macro-assembler-aarch32.cc revision e8ce9f0ec7fe9484fca0c446ecc8a9d7929bea66
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT(masm_ == NULL);
41  VIXL_ASSERT(masm != NULL);
42  masm_ = masm;
43
44  old_available_ = masm_->GetScratchRegisterList()->GetList();
45  old_available_vfp_ = masm_->GetScratchVRegisterList()->GetList();
46
47  parent_ = masm->GetCurrentScratchRegisterScope();
48  masm->SetCurrentScratchRegisterScope(this);
49}
50
51
52void UseScratchRegisterScope::Close() {
53  if (masm_ != NULL) {
54    // Ensure that scopes nest perfectly, and do not outlive their parents.
55    // This is a run-time check because the order of destruction of objects in
56    // the _same_ scope is implementation-defined, and is likely to change in
57    // optimised builds.
58    VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
59    masm_->SetCurrentScratchRegisterScope(parent_);
60
61    masm_->GetScratchRegisterList()->SetList(old_available_);
62    masm_->GetScratchVRegisterList()->SetList(old_available_vfp_);
63
64    masm_ = NULL;
65  }
66}
67
68
69bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
70  VIXL_ASSERT(masm_ != NULL);
71  VIXL_ASSERT(reg.IsValid());
72  return masm_->GetScratchRegisterList()->Includes(reg);
73}
74
75
76bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
77  VIXL_ASSERT(masm_ != NULL);
78  VIXL_ASSERT(reg.IsValid());
79  return masm_->GetScratchVRegisterList()->IncludesAllOf(reg);
80}
81
82
83Register UseScratchRegisterScope::Acquire() {
84  VIXL_ASSERT(masm_ != NULL);
85  Register reg = masm_->GetScratchRegisterList()->GetFirstAvailableRegister();
86  VIXL_CHECK(reg.IsValid());
87  masm_->GetScratchRegisterList()->Remove(reg);
88  return reg;
89}
90
91
92VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
93  switch (size_in_bits) {
94    case kSRegSizeInBits:
95      return AcquireS();
96    case kDRegSizeInBits:
97      return AcquireD();
98    case kQRegSizeInBits:
99      return AcquireQ();
100    default:
101      VIXL_UNREACHABLE();
102      return NoVReg;
103  }
104}
105
106
107QRegister UseScratchRegisterScope::AcquireQ() {
108  VIXL_ASSERT(masm_ != NULL);
109  QRegister reg =
110      masm_->GetScratchVRegisterList()->GetFirstAvailableQRegister();
111  VIXL_CHECK(reg.IsValid());
112  masm_->GetScratchVRegisterList()->Remove(reg);
113  return reg;
114}
115
116
117DRegister UseScratchRegisterScope::AcquireD() {
118  VIXL_ASSERT(masm_ != NULL);
119  DRegister reg =
120      masm_->GetScratchVRegisterList()->GetFirstAvailableDRegister();
121  VIXL_CHECK(reg.IsValid());
122  masm_->GetScratchVRegisterList()->Remove(reg);
123  return reg;
124}
125
126
127SRegister UseScratchRegisterScope::AcquireS() {
128  VIXL_ASSERT(masm_ != NULL);
129  SRegister reg =
130      masm_->GetScratchVRegisterList()->GetFirstAvailableSRegister();
131  VIXL_CHECK(reg.IsValid());
132  masm_->GetScratchVRegisterList()->Remove(reg);
133  return reg;
134}
135
136
137void UseScratchRegisterScope::Release(const Register& reg) {
138  VIXL_ASSERT(masm_ != NULL);
139  VIXL_ASSERT(reg.IsValid());
140  VIXL_ASSERT(!masm_->GetScratchRegisterList()->Includes(reg));
141  masm_->GetScratchRegisterList()->Combine(reg);
142}
143
144
145void UseScratchRegisterScope::Release(const VRegister& reg) {
146  VIXL_ASSERT(masm_ != NULL);
147  VIXL_ASSERT(reg.IsValid());
148  VIXL_ASSERT(!masm_->GetScratchVRegisterList()->IncludesAliasOf(reg));
149  masm_->GetScratchVRegisterList()->Combine(reg);
150}
151
152
153void UseScratchRegisterScope::Include(const RegisterList& list) {
154  VIXL_ASSERT(masm_ != NULL);
155  RegisterList excluded_registers(sp, lr, pc);
156  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
157  RegisterList* available = masm_->GetScratchRegisterList();
158  available->SetList(available->GetList() | mask);
159}
160
161
162void UseScratchRegisterScope::Include(const VRegisterList& list) {
163  VIXL_ASSERT(masm_ != NULL);
164  VRegisterList* available = masm_->GetScratchVRegisterList();
165  available->SetList(available->GetList() | list.GetList());
166}
167
168
169void UseScratchRegisterScope::Exclude(const RegisterList& list) {
170  VIXL_ASSERT(masm_ != NULL);
171  RegisterList* available = masm_->GetScratchRegisterList();
172  available->SetList(available->GetList() & ~list.GetList());
173}
174
175
176void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
177  VIXL_ASSERT(masm_ != NULL);
178  VRegisterList* available = masm_->GetScratchVRegisterList();
179  available->SetList(available->GetList() & ~list.GetList());
180}
181
182
183void UseScratchRegisterScope::Exclude(const Operand& operand) {
184  if (operand.IsImmediateShiftedRegister()) {
185    Exclude(operand.GetBaseRegister());
186  } else if (operand.IsRegisterShiftedRegister()) {
187    Exclude(operand.GetBaseRegister(), operand.GetShiftRegister());
188  } else {
189    VIXL_ASSERT(operand.IsImmediate());
190  }
191}
192
193
194void UseScratchRegisterScope::ExcludeAll() {
195  VIXL_ASSERT(masm_ != NULL);
196  masm_->GetScratchRegisterList()->SetList(0);
197  masm_->GetScratchVRegisterList()->SetList(0);
198}
199
200
201void VeneerPoolManager::AddLabel(Label* label) {
202  if (last_label_reference_offset_ != 0) {
203    // If the pool grows faster than the instruction stream, we must adjust
204    // the checkpoint to compensate. The veneer pool entries take 32 bits, so
205    // this can only occur when two consecutive 16-bit instructions add veneer
206    // pool entries.
207    // This is typically the case for cbz and cbnz (other forward branches
208    // have a 32 bit variant which is always used).
209    if (last_label_reference_offset_ + 2 * k16BitT32InstructionSizeInBytes ==
210        static_cast<uint32_t>(masm_->GetCursorOffset())) {
211      // We found two 16 bit forward branches generated one after the other.
212      // That means that the pool will grow by one 32-bit branch when
213      // the cursor offset will move forward by only one 16-bit branch.
214      // Update the cbz/cbnz checkpoint to manage the difference.
215      near_checkpoint_ -=
216          k32BitT32InstructionSizeInBytes - k16BitT32InstructionSizeInBytes;
217    }
218  }
219  Label::ForwardReference& back = label->GetBackForwardRef();
220  VIXL_ASSERT(back.GetMaxForwardDistance() >= kCbzCbnzRange);
221  if (!label->IsInVeneerPool()) {
222    if (back.GetMaxForwardDistance() <= kNearLabelRange) {
223      near_labels_.push_back(label);
224      label->SetVeneerPoolManager(this, true);
225    } else {
226      far_labels_.push_back(label);
227      label->SetVeneerPoolManager(this, false);
228    }
229  } else if (back.GetMaxForwardDistance() <= kNearLabelRange) {
230    if (!label->IsNear()) {
231      far_labels_.remove(label);
232      near_labels_.push_back(label);
233      label->SetVeneerPoolManager(this, true);
234    }
235  }
236
237  back.SetIsBranch();
238  last_label_reference_offset_ = back.GetLocation();
239  label->UpdateCheckpoint();
240  Label::Offset tmp = label->GetCheckpoint();
241  if (label->IsNear()) {
242    if (near_checkpoint_ > tmp) near_checkpoint_ = tmp;
243  } else {
244    if (far_checkpoint_ > tmp) far_checkpoint_ = tmp;
245  }
246  // Always compute the global checkpoint as, adding veneers shorten the
247  // literals' checkpoint.
248  masm_->ComputeCheckpoint();
249}
250
251
252void VeneerPoolManager::RemoveLabel(Label* label) {
253  label->ClearVeneerPoolManager();
254  std::list<Label*>& list = label->IsNear() ? near_labels_ : far_labels_;
255  Label::Offset* checkpoint_reference =
256      label->IsNear() ? &near_checkpoint_ : &far_checkpoint_;
257  if (label->GetCheckpoint() == *checkpoint_reference) {
258    // We have to compute checkpoint again.
259    *checkpoint_reference = Label::kMaxOffset;
260    for (std::list<Label*>::iterator it = list.begin(); it != list.end();) {
261      if (*it == label) {
262        it = list.erase(it);
263      } else {
264        *checkpoint_reference =
265            std::min(*checkpoint_reference, (*it)->GetCheckpoint());
266        ++it;
267      }
268    }
269    masm_->ComputeCheckpoint();
270  } else {
271    // We only have to remove the label from the list.
272    list.remove(label);
273  }
274}
275
276
277void VeneerPoolManager::EmitLabel(Label* label, Label::Offset emitted_target) {
278  // Define the veneer.
279  Label veneer;
280  masm_->Bind(&veneer);
281  Label::Offset label_checkpoint = Label::kMaxOffset;
282  // Check all uses of this label.
283  for (Label::ForwardRefList::iterator ref = label->GetFirstForwardRef();
284       ref != label->GetEndForwardRef();) {
285    if (ref->IsBranch()) {
286      if (ref->GetCheckpoint() <= emitted_target) {
287        // Use the veneer.
288        masm_->EncodeLabelFor(*ref, &veneer);
289        ref = label->Erase(ref);
290      } else {
291        // Don't use the veneer => update checkpoint.
292        label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
293        ++ref;
294      }
295    } else {
296      ++ref;
297    }
298  }
299  label->SetCheckpoint(label_checkpoint);
300  if (label->IsNear()) {
301    near_checkpoint_ = std::min(near_checkpoint_, label_checkpoint);
302  } else {
303    far_checkpoint_ = std::min(far_checkpoint_, label_checkpoint);
304  }
305  // Generate the veneer.
306  masm_->B(label);
307}
308
309
310void VeneerPoolManager::Emit(Label::Offset target) {
311  VIXL_ASSERT(!IsBlocked());
312  // Sort labels (regarding their checkpoint) to avoid that a veneer
313  // becomes out of range. Near labels are always sorted as it holds only one
314  // range.
315  far_labels_.sort(Label::CompareLabels);
316  // To avoid too many veneers, generate veneers which will be necessary soon.
317  static const size_t kVeneerEmissionMargin = 1 * KBytes;
318  // To avoid too many veneers, use generated veneers for other not too far
319  // uses.
320  static const size_t kVeneerEmittedMargin = 2 * KBytes;
321  Label::Offset emitted_target = target + kVeneerEmittedMargin;
322  target += kVeneerEmissionMargin;
323  // Reset the checkpoints. They will be computed again in the loop.
324  near_checkpoint_ = Label::kMaxOffset;
325  far_checkpoint_ = Label::kMaxOffset;
326  for (std::list<Label*>::iterator it = near_labels_.begin();
327       it != near_labels_.end();) {
328    Label* label = *it;
329    // Move the label from the near list to the far list as it will be needed in
330    // the far list (as the veneer will generate a far branch).
331    // The label is pushed at the end of the list. The list remains sorted as
332    // we use an unconditional jump which has the biggest range. However, it
333    // wouldn't be a problem if the items at the end of the list were not
334    // sorted as they won't be used by this generation (their range will be
335    // greater than kVeneerEmittedMargin).
336    it = near_labels_.erase(it);
337    far_labels_.push_back(label);
338    label->SetVeneerPoolManager(this, false);
339    EmitLabel(label, emitted_target);
340  }
341  for (std::list<Label*>::iterator it = far_labels_.begin();
342       it != far_labels_.end();) {
343    // The labels are sorted. As soon as a veneer is not needed, we can stop.
344    if ((*it)->GetCheckpoint() > target) {
345      far_checkpoint_ = std::min(far_checkpoint_, (*it)->GetCheckpoint());
346      break;
347    }
348    // Even if we no longer have use of this label, we can keep it in the list
349    // as the next "B" would add it back.
350    EmitLabel(*it, emitted_target);
351    ++it;
352  }
353#ifdef VIXL_DEBUG
354  for (std::list<Label*>::iterator it = near_labels_.begin();
355       it != near_labels_.end();
356       ++it) {
357    VIXL_ASSERT((*it)->GetCheckpoint() >= near_checkpoint_);
358  }
359  for (std::list<Label*>::iterator it = far_labels_.begin();
360       it != far_labels_.end();
361       ++it) {
362    VIXL_ASSERT((*it)->GetCheckpoint() >= far_checkpoint_);
363  }
364#endif
365  masm_->ComputeCheckpoint();
366}
367
368
369// We use a subclass to access the protected `ExactAssemblyScope` constructor
370// giving us control over the pools, and make the constructor private to limit
371// usage to code paths emitting pools.
372class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
373 private:
374  ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm,
375                                      size_t size,
376                                      SizePolicy size_policy = kExactSize)
377      : ExactAssemblyScope(masm,
378                           size,
379                           size_policy,
380                           ExactAssemblyScope::kIgnorePools) {}
381
382  friend void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
383                                              EmitOption option);
384
385  // TODO: `PerformEnsureEmit` is `private`, so we have to make the
386  // `MacroAssembler` a friend.
387  friend class MacroAssembler;
388};
389
390
391void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
392  if (!doing_veneer_pool_generation_) {
393    EmitOption option = kBranchRequired;
394    Label after_pools;
395    Label::Offset literal_target = GetTargetForLiteralEmission();
396    VIXL_ASSERT(literal_target >= 0);
397    bool generate_veneers = target > veneer_pool_manager_.GetCheckpoint();
398    if (target > literal_target) {
399      // We will generate the literal pool. Generate all the veneers which
400      // would become out of range.
401      size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize() +
402                                 kMaxInstructionSizeInBytes;
403      VIXL_ASSERT(IsInt32(literal_pool_size));
404      Label::Offset veneers_target =
405          AlignUp(target + static_cast<Label::Offset>(literal_pool_size), 4);
406      VIXL_ASSERT(veneers_target >= 0);
407      if (veneers_target > veneer_pool_manager_.GetCheckpoint()) {
408        generate_veneers = true;
409      }
410    }
411    if (generate_veneers) {
412      {
413        ExactAssemblyScopeWithoutPoolsCheck
414            guard(this,
415                  kMaxInstructionSizeInBytes,
416                  ExactAssemblyScope::kMaximumSize);
417        b(&after_pools);
418      }
419      doing_veneer_pool_generation_ = true;
420      veneer_pool_manager_.Emit(target);
421      doing_veneer_pool_generation_ = false;
422      option = kNoBranchRequired;
423    }
424    // Check if the macro-assembler's internal literal pool should be emitted
425    // to avoid any overflow. If we already generated the veneers, we can
426    // emit the pool (the branch is already done).
427    if ((target > literal_target) || (option == kNoBranchRequired)) {
428      EmitLiteralPool(option);
429    }
430    BindHelper(&after_pools);
431  }
432  if (GetBuffer()->IsManaged()) {
433    bool grow_requested;
434    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
435    if (grow_requested) ComputeCheckpoint();
436  }
437}
438
439
440void MacroAssembler::ComputeCheckpoint() {
441  checkpoint_ = AlignDown(std::min(veneer_pool_manager_.GetCheckpoint(),
442                                   GetTargetForLiteralEmission()),
443                          4);
444  size_t buffer_size = GetBuffer()->GetCapacity();
445  VIXL_ASSERT(IsInt32(buffer_size));
446  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
447  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
448}
449
450
451void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
452                                     EmitOption option) {
453  if (literal_pool->GetSize() > 0) {
454#ifdef VIXL_DEBUG
455    for (LiteralPool::RawLiteralListIterator literal_it =
456             literal_pool->GetFirst();
457         literal_it != literal_pool->GetEnd();
458         literal_it++) {
459      RawLiteral* literal = *literal_it;
460      VIXL_ASSERT(GetCursorOffset() < literal->GetCheckpoint());
461    }
462#endif
463    Label after_literal;
464    if (option == kBranchRequired) {
465      GetBuffer()->EnsureSpaceFor(kMaxInstructionSizeInBytes);
466      VIXL_ASSERT(!AllowAssembler());
467      {
468        ExactAssemblyScopeWithoutPoolsCheck
469            guard(this,
470                  kMaxInstructionSizeInBytes,
471                  ExactAssemblyScope::kMaximumSize);
472        b(&after_literal);
473      }
474    }
475    GetBuffer()->Align();
476    GetBuffer()->EnsureSpaceFor(literal_pool->GetSize());
477    for (LiteralPool::RawLiteralListIterator it = literal_pool->GetFirst();
478         it != literal_pool->GetEnd();
479         it++) {
480      PlaceHelper(*it);
481      GetBuffer()->Align();
482    }
483    if (option == kBranchRequired) BindHelper(&after_literal);
484    literal_pool->Clear();
485  }
486}
487
488
489void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
490  // 32-bit table A32:
491  // adr ip, table
492  // add ip, r1, lsl 2
493  // ldr ip, [ip]
494  // jmp: add pc, pc, ip, lsl 2
495  // table:
496  // .int (case_0 - (jmp + 8)) >> 2
497  // .int (case_1 - (jmp + 8)) >> 2
498  // .int (case_2 - (jmp + 8)) >> 2
499
500  // 16-bit table T32:
501  // adr ip, table
502  // jmp: tbh ip, r1
503  // table:
504  // .short (case_0 - (jmp + 4)) >> 1
505  // .short (case_1 - (jmp + 4)) >> 1
506  // .short (case_2 - (jmp + 4)) >> 1
507  // case_0:
508  //   ...
509  //   b end_switch
510  // case_1:
511  //   ...
512  //   b end_switch
513  // ...
514  // end_switch:
515  Label jump_table;
516  UseScratchRegisterScope temps(this);
517  Register scratch = temps.Acquire();
518  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
519
520  // Jump to default if reg is not in [0, table->GetLength()[
521  Cmp(reg, table->GetLength());
522  B(ge, table->GetDefaultLabel());
523
524  Adr(scratch, &jump_table);
525  if (IsUsingA32()) {
526    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
527    switch (table->GetOffsetShift()) {
528      case 0:
529        Ldrb(scratch, MemOperand(scratch));
530        break;
531      case 1:
532        Ldrh(scratch, MemOperand(scratch));
533        break;
534      case 2:
535        Ldr(scratch, MemOperand(scratch));
536        break;
537      default:
538        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
539    }
540    // Emit whatever needs to be emitted if we want to
541    // correctly record the position of the branch instruction
542    uint32_t branch_location = GetCursorOffset();
543    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
544    ExactAssemblyScope scope(this,
545                             table_size + kA32InstructionSizeInBytes,
546                             ExactAssemblyScope::kMaximumSize);
547    add(pc, pc, Operand(scratch, LSL, 2));
548    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
549    bind(&jump_table);
550    GenerateSwitchTable(table, table_size);
551  } else {
552    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
553    //  But for 32bit offsets, we use the same coding as for A32
554    if (table->GetOffsetShift() == 2) {
555      // 32bit offsets
556      Add(scratch, scratch, Operand(reg, LSL, 2));
557      Ldr(scratch, MemOperand(scratch));
558      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
559      // so let's do the shift before
560      Lsl(scratch, scratch, 1);
561      // Emit whatever needs to be emitted if we want to
562      // correctly record the position of the branch instruction
563      uint32_t branch_location = GetCursorOffset();
564      table->SetBranchLocation(branch_location +
565                               GetArchitectureStatePCOffset());
566      ExactAssemblyScope scope(this,
567                               table_size + kMaxInstructionSizeInBytes,
568                               ExactAssemblyScope::kMaximumSize);
569      add(pc, pc, scratch);
570      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
571      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
572      bind(&jump_table);
573      GenerateSwitchTable(table, table_size);
574    } else {
575      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
576                  (table->GetOffsetShift() == 1));
577      // Emit whatever needs to be emitted if we want to
578      // correctly record the position of the branch instruction
579      uint32_t branch_location = GetCursorOffset();
580      table->SetBranchLocation(branch_location +
581                               GetArchitectureStatePCOffset());
582      ExactAssemblyScope scope(this,
583                               table_size + kMaxInstructionSizeInBytes,
584                               ExactAssemblyScope::kMaximumSize);
585      if (table->GetOffsetShift() == 0) {
586        // 8bit offsets
587        tbb(scratch, reg);
588      } else {
589        // 16bit offsets
590        tbh(scratch, reg);
591      }
592      // tbb/tbh is a 32bit instruction
593      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
594      bind(&jump_table);
595      GenerateSwitchTable(table, table_size);
596    }
597  }
598}
599
600
601void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
602  table->BindTable(GetCursorOffset());
603  for (int i = 0; i < table_size / 4; i++) {
604    GetBuffer()->Emit32(0);
605  }
606}
607
608
609// switch/case/default : case
610// case_index is assumed to be < table->GetLength()
611// which is checked in JumpTable::Link and Table::SetPresenceBit
612void MacroAssembler::Case(JumpTableBase* table, int case_index) {
613  table->Link(this, case_index, GetCursorOffset());
614  table->SetPresenceBitForCase(case_index);
615}
616
617// switch/case/default : default
618void MacroAssembler::Default(JumpTableBase* table) {
619  Bind(table->GetDefaultLabel());
620}
621
622// switch/case/default : break
623void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
624
625// switch/case/default : finalize
626// Manage the default path, mosstly. All empty offsets in the jumptable
627// will point to default.
628// All values not in [0, table->GetLength()[ are already pointing here anyway.
629void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
630
631void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
632                                                Register tmp,
633                                                uint32_t imm) {
634  if (IsUintN(16, imm)) {
635    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
636    mov(cond, tmp, imm & 0xffff);
637    return;
638  }
639  if (IsUsingT32()) {
640    if (ImmediateT32::IsImmediateT32(~imm)) {
641      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
642      mvn(cond, tmp, ~imm);
643      return;
644    }
645  } else {
646    if (ImmediateA32::IsImmediateA32(~imm)) {
647      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
648      mvn(cond, tmp, ~imm);
649      return;
650    }
651  }
652  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
653  mov(cond, tmp, imm & 0xffff);
654  movt(cond, tmp, imm >> 16);
655}
656
657
658void MacroAssembler::PadToMinimumBranchRange(Label* label) {
659  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
660  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
661    uint32_t location = last_reference->GetLocation();
662    if (location + k16BitT32InstructionSizeInBytes ==
663        static_cast<uint32_t>(GetCursorOffset())) {
664      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
665      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
666        VIXL_ASSERT(!InITBlock());
667        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
668        // target is immediately after the Cbz or Cbnz, we insert a nop to
669        // avoid that.
670        EmitT32_16(k16BitT32NopOpcode);
671      }
672    }
673  }
674}
675
676
677MemOperand MacroAssembler::MemOperandComputationHelper(
678    Condition cond,
679    Register scratch,
680    Register base,
681    uint32_t offset,
682    uint32_t extra_offset_mask) {
683  VIXL_ASSERT(!AliasesAvailableScratchRegister(scratch));
684  VIXL_ASSERT(!AliasesAvailableScratchRegister(base));
685  VIXL_ASSERT(allow_macro_instructions_);
686  VIXL_ASSERT(OutsideITBlock());
687
688  // Check for the simple pass-through case.
689  if ((offset & extra_offset_mask) == offset) return MemOperand(base, offset);
690
691  MacroEmissionCheckScope guard(this);
692  ITScope it_scope(this, &cond);
693
694  uint32_t load_store_offset = offset & extra_offset_mask;
695  uint32_t add_offset = offset & ~extra_offset_mask;
696
697  if (base.IsPC()) {
698    // Special handling for PC bases. We must read the PC in the first
699    // instruction (and only in that instruction), and we must also take care to
700    // keep the same address calculation as loads and stores. For T32, that
701    // means using something like ADR, which uses AlignDown(PC, 4).
702
703    // We don't handle positive offsets from PC because the intention is not
704    // clear; does the user expect the offset from the current
705    // GetCursorOffset(), or to allow a certain amount of space after the
706    // instruction?
707    VIXL_ASSERT((offset & 0x80000000) != 0);
708    if (IsUsingT32()) {
709      // T32: make the first instruction "SUB (immediate, from PC)" -- an alias
710      // of ADR -- to get behaviour like loads and stores. This ADR can handle
711      // at least as much offset as the load_store_offset so it can replace it.
712
713      uint32_t sub_pc_offset = (-offset) & 0xfff;
714      load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
715      add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;
716
717      ExactAssemblyScope scope(this, k32BitT32InstructionSizeInBytes);
718      sub(cond, scratch, base, sub_pc_offset);
719
720      if (add_offset == 0) return MemOperand(scratch, load_store_offset);
721
722      // The rest of the offset can be generated in the usual way.
723      base = scratch;
724    }
725    // A32 can use any SUB instruction, so we don't have to do anything special
726    // here except to ensure that we read the PC first.
727  }
728
729  add(cond, scratch, base, add_offset);
730  return MemOperand(scratch, load_store_offset);
731}
732
733
734uint32_t MacroAssembler::GetOffsetMask(InstructionType type,
735                                       AddrMode addrmode) {
736  switch (type) {
737    case kLdr:
738    case kLdrb:
739    case kStr:
740    case kStrb:
741      if (IsUsingA32() || (addrmode == Offset)) {
742        return 0xfff;
743      } else {
744        return 0xff;
745      }
746    case kLdrsb:
747    case kLdrh:
748    case kLdrsh:
749    case kStrh:
750      if (IsUsingT32() && (addrmode == Offset)) {
751        return 0xfff;
752      } else {
753        return 0xff;
754      }
755    case kVldr:
756    case kVstr:
757      return 0x3fc;
758    case kLdrd:
759    case kStrd:
760      if (IsUsingA32()) {
761        return 0xff;
762      } else {
763        return 0x3fc;
764      }
765    default:
766      VIXL_UNREACHABLE();
767      return 0;
768  }
769}
770
771
772HARDFLOAT void PrintfTrampolineRRRR(
773    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
774  printf(format, a, b, c, d);
775}
776
777
778HARDFLOAT void PrintfTrampolineRRRD(
779    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
780  printf(format, a, b, c, d);
781}
782
783
784HARDFLOAT void PrintfTrampolineRRDR(
785    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
786  printf(format, a, b, c, d);
787}
788
789
790HARDFLOAT void PrintfTrampolineRRDD(
791    const char* format, uint32_t a, uint32_t b, double c, double d) {
792  printf(format, a, b, c, d);
793}
794
795
796HARDFLOAT void PrintfTrampolineRDRR(
797    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
798  printf(format, a, b, c, d);
799}
800
801
802HARDFLOAT void PrintfTrampolineRDRD(
803    const char* format, uint32_t a, double b, uint32_t c, double d) {
804  printf(format, a, b, c, d);
805}
806
807
808HARDFLOAT void PrintfTrampolineRDDR(
809    const char* format, uint32_t a, double b, double c, uint32_t d) {
810  printf(format, a, b, c, d);
811}
812
813
814HARDFLOAT void PrintfTrampolineRDDD(
815    const char* format, uint32_t a, double b, double c, double d) {
816  printf(format, a, b, c, d);
817}
818
819
820HARDFLOAT void PrintfTrampolineDRRR(
821    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
822  printf(format, a, b, c, d);
823}
824
825
826HARDFLOAT void PrintfTrampolineDRRD(
827    const char* format, double a, uint32_t b, uint32_t c, double d) {
828  printf(format, a, b, c, d);
829}
830
831
832HARDFLOAT void PrintfTrampolineDRDR(
833    const char* format, double a, uint32_t b, double c, uint32_t d) {
834  printf(format, a, b, c, d);
835}
836
837
838HARDFLOAT void PrintfTrampolineDRDD(
839    const char* format, double a, uint32_t b, double c, double d) {
840  printf(format, a, b, c, d);
841}
842
843
844HARDFLOAT void PrintfTrampolineDDRR(
845    const char* format, double a, double b, uint32_t c, uint32_t d) {
846  printf(format, a, b, c, d);
847}
848
849
850HARDFLOAT void PrintfTrampolineDDRD(
851    const char* format, double a, double b, uint32_t c, double d) {
852  printf(format, a, b, c, d);
853}
854
855
856HARDFLOAT void PrintfTrampolineDDDR(
857    const char* format, double a, double b, double c, uint32_t d) {
858  printf(format, a, b, c, d);
859}
860
861
862HARDFLOAT void PrintfTrampolineDDDD(
863    const char* format, double a, double b, double c, double d) {
864  printf(format, a, b, c, d);
865}
866
867
868void MacroAssembler::Printf(const char* format,
869                            CPURegister reg1,
870                            CPURegister reg2,
871                            CPURegister reg3,
872                            CPURegister reg4) {
873  // Exclude all registers from the available scratch registers, so
874  // that we are able to use ip below.
875  // TODO: Refactor this function to use UseScratchRegisterScope
876  // for temporary registers below.
877  UseScratchRegisterScope scratch(this);
878  scratch.ExcludeAll();
879  if (generate_simulator_code_) {
880    PushRegister(reg4);
881    PushRegister(reg3);
882    PushRegister(reg2);
883    PushRegister(reg1);
884    Push(RegisterList(r0, r1));
885    StringLiteral* format_literal =
886        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
887    Adr(r0, format_literal);
888    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
889                    (reg2.GetType() << 4) | reg1.GetType();
890    Mov(r1, args);
891    Hvc(kPrintfCode);
892    Pop(RegisterList(r0, r1));
893    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
894               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
895    Drop(size);
896  } else {
897    // Generate on a native platform => 32 bit environment.
898    // Preserve core registers r0-r3, r12, r14
899    const uint32_t saved_registers_mask =
900        kCallerSavedRegistersMask | (1 << r5.GetCode());
901    Push(RegisterList(saved_registers_mask));
902    // Push VFP registers.
903    Vpush(Untyped64, DRegisterList(d0, 8));
904    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
905    // Search one register which has been saved and which doesn't need to be
906    // printed.
907    RegisterList available_registers(kCallerSavedRegistersMask);
908    if (reg1.GetType() == CPURegister::kRRegister) {
909      available_registers.Remove(Register(reg1.GetCode()));
910    }
911    if (reg2.GetType() == CPURegister::kRRegister) {
912      available_registers.Remove(Register(reg2.GetCode()));
913    }
914    if (reg3.GetType() == CPURegister::kRRegister) {
915      available_registers.Remove(Register(reg3.GetCode()));
916    }
917    if (reg4.GetType() == CPURegister::kRRegister) {
918      available_registers.Remove(Register(reg4.GetCode()));
919    }
920    Register tmp = available_registers.GetFirstAvailableRegister();
921    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
922    // Push the flags.
923    Mrs(tmp, APSR);
924    Push(tmp);
925    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
926    Push(tmp);
927    // Push the registers to print on the stack.
928    PushRegister(reg4);
929    PushRegister(reg3);
930    PushRegister(reg2);
931    PushRegister(reg1);
932    int core_count = 1;
933    int vfp_count = 0;
934    uint32_t printf_type = 0;
935    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
936    // Reg4 may stay into the stack if all the register to print are core
937    // registers.
938    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
939    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
940    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
941    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
942    // Ensure that the stack is aligned on 8 bytes.
943    And(r5, sp, 0x7);
944    if (core_count == 5) {
945      // One 32 bit argument (reg4) has been left on the stack =>  align the
946      // stack
947      // before the argument.
948      Pop(r0);
949      Sub(sp, sp, r5);
950      Push(r0);
951    } else {
952      Sub(sp, sp, r5);
953    }
954    // Select the right trampoline depending on the arguments.
955    uintptr_t address;
956    switch (printf_type) {
957      case 0:
958        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
959        break;
960      case 1:
961        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
962        break;
963      case 2:
964        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
965        break;
966      case 3:
967        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
968        break;
969      case 4:
970        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
971        break;
972      case 5:
973        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
974        break;
975      case 6:
976        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
977        break;
978      case 7:
979        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
980        break;
981      case 8:
982        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
983        break;
984      case 9:
985        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
986        break;
987      case 10:
988        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
989        break;
990      case 11:
991        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
992        break;
993      case 12:
994        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
995        break;
996      case 13:
997        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
998        break;
999      case 14:
1000        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
1001        break;
1002      case 15:
1003        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
1004        break;
1005      default:
1006        VIXL_UNREACHABLE();
1007        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
1008        break;
1009    }
1010    StringLiteral* format_literal =
1011        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
1012    Adr(r0, format_literal);
1013    Mov(ip, Operand::From(address));
1014    Blx(ip);
1015    // If register reg4 was left on the stack => skip it.
1016    if (core_count == 5) Drop(kRegSizeInBytes);
1017    // Restore the stack as it was before alignment.
1018    Add(sp, sp, r5);
1019    // Restore the flags.
1020    Pop(tmp);
1021    Vmsr(FPSCR, tmp);
1022    Pop(tmp);
1023    Msr(APSR_nzcvqg, tmp);
1024    // Restore the regsisters.
1025    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
1026    Vpop(Untyped64, DRegisterList(d0, 8));
1027    Pop(RegisterList(saved_registers_mask));
1028  }
1029}
1030
1031
1032void MacroAssembler::PushRegister(CPURegister reg) {
1033  switch (reg.GetType()) {
1034    case CPURegister::kNoRegister:
1035      break;
1036    case CPURegister::kRRegister:
1037      Push(Register(reg.GetCode()));
1038      break;
1039    case CPURegister::kSRegister:
1040      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
1041      break;
1042    case CPURegister::kDRegister:
1043      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
1044      break;
1045    case CPURegister::kQRegister:
1046      VIXL_UNIMPLEMENTED();
1047      break;
1048  }
1049}
1050
1051
1052void MacroAssembler::PreparePrintfArgument(CPURegister reg,
1053                                           int* core_count,
1054                                           int* vfp_count,
1055                                           uint32_t* printf_type) {
1056  switch (reg.GetType()) {
1057    case CPURegister::kNoRegister:
1058      break;
1059    case CPURegister::kRRegister:
1060      VIXL_ASSERT(*core_count <= 4);
1061      if (*core_count < 4) Pop(Register(*core_count));
1062      *core_count += 1;
1063      break;
1064    case CPURegister::kSRegister:
1065      VIXL_ASSERT(*vfp_count < 4);
1066      *printf_type |= 1 << (*core_count + *vfp_count - 1);
1067      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
1068      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
1069      *vfp_count += 1;
1070      break;
1071    case CPURegister::kDRegister:
1072      VIXL_ASSERT(*vfp_count < 4);
1073      *printf_type |= 1 << (*core_count + *vfp_count - 1);
1074      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
1075      *vfp_count += 1;
1076      break;
1077    case CPURegister::kQRegister:
1078      VIXL_UNIMPLEMENTED();
1079      break;
1080  }
1081}
1082
1083
1084void MacroAssembler::Delegate(InstructionType type,
1085                              InstructionCondROp instruction,
1086                              Condition cond,
1087                              Register rn,
1088                              const Operand& operand) {
1089  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
1090              (type == kUxtb16));
1091
1092  if (type == kMovt) {
1093    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.");
1094  }
1095
1096  // This delegate only supports teq with immediates.
1097  CONTEXT_SCOPE;
1098  if ((type == kTeq) && operand.IsImmediate()) {
1099    UseScratchRegisterScope temps(this);
1100    Register scratch = temps.Acquire();
1101    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
1102    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1103    teq(cond, rn, scratch);
1104    return;
1105  }
1106  Assembler::Delegate(type, instruction, cond, rn, operand);
1107}
1108
1109
1110void MacroAssembler::Delegate(InstructionType type,
1111                              InstructionCondSizeROp instruction,
1112                              Condition cond,
1113                              EncodingSize size,
1114                              Register rn,
1115                              const Operand& operand) {
1116  CONTEXT_SCOPE;
1117  VIXL_ASSERT(size.IsBest());
1118  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
1119              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
1120              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
1121              (type == kUxtb) || (type == kUxth));
1122  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1123    VIXL_ASSERT((type != kMov) || (type != kMovs));
1124    InstructionCondRROp shiftop = NULL;
1125    switch (operand.GetShift().GetType()) {
1126      case LSL:
1127        shiftop = &Assembler::lsl;
1128        break;
1129      case LSR:
1130        shiftop = &Assembler::lsr;
1131        break;
1132      case ASR:
1133        shiftop = &Assembler::asr;
1134        break;
1135      case RRX:
1136        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1137        VIXL_UNREACHABLE();
1138        break;
1139      case ROR:
1140        shiftop = &Assembler::ror;
1141        break;
1142      default:
1143        VIXL_UNREACHABLE();
1144    }
1145    if (shiftop != NULL) {
1146      UseScratchRegisterScope temps(this);
1147      Register scratch = temps.Acquire();
1148      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1149      (this->*shiftop)(cond,
1150                       scratch,
1151                       operand.GetBaseRegister(),
1152                       operand.GetShiftRegister());
1153      (this->*instruction)(cond, size, rn, scratch);
1154      return;
1155    }
1156  }
1157  if (operand.IsImmediate()) {
1158    uint32_t imm = operand.GetImmediate();
1159    switch (type) {
1160      case kMov:
1161      case kMovs:
1162        if (!rn.IsPC()) {
1163          // Immediate is too large, but not using PC, so handle with mov{t}.
1164          HandleOutOfBoundsImmediate(cond, rn, imm);
1165          if (type == kMovs) {
1166            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1167            tst(cond, rn, rn);
1168          }
1169          return;
1170        } else if (type == kMov) {
1171          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
1172          // Immediate is too large and using PC, so handle using a temporary
1173          // register.
1174          UseScratchRegisterScope temps(this);
1175          Register scratch = temps.Acquire();
1176          HandleOutOfBoundsImmediate(al, scratch, imm);
1177          EnsureEmitFor(kMaxInstructionSizeInBytes);
1178          bx(cond, scratch);
1179          return;
1180        }
1181        break;
1182      case kCmn:
1183      case kCmp:
1184        if (IsUsingA32() || !rn.IsPC()) {
1185          UseScratchRegisterScope temps(this);
1186          Register scratch = temps.Acquire();
1187          HandleOutOfBoundsImmediate(cond, scratch, imm);
1188          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1189          (this->*instruction)(cond, size, rn, scratch);
1190          return;
1191        }
1192        break;
1193      case kMvn:
1194      case kMvns:
1195        if (!rn.IsPC()) {
1196          UseScratchRegisterScope temps(this);
1197          Register scratch = temps.Acquire();
1198          HandleOutOfBoundsImmediate(cond, scratch, imm);
1199          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1200          (this->*instruction)(cond, size, rn, scratch);
1201          return;
1202        }
1203        break;
1204      case kTst:
1205        if (IsUsingA32() || !rn.IsPC()) {
1206          UseScratchRegisterScope temps(this);
1207          Register scratch = temps.Acquire();
1208          HandleOutOfBoundsImmediate(cond, scratch, imm);
1209          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1210          (this->*instruction)(cond, size, rn, scratch);
1211          return;
1212        }
1213        break;
1214      default:  // kSxtb, Sxth, Uxtb, Uxth
1215        break;
1216    }
1217  }
1218  Assembler::Delegate(type, instruction, cond, size, rn, operand);
1219}
1220
1221
1222void MacroAssembler::Delegate(InstructionType type,
1223                              InstructionCondRROp instruction,
1224                              Condition cond,
1225                              Register rd,
1226                              Register rn,
1227                              const Operand& operand) {
1228  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
1229      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
1230      (type == kPkhbt) || (type == kPkhtb)) {
1231    UnimplementedDelegate(type);
1232    return;
1233  }
1234
1235  // This delegate only handles the following instructions.
1236  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
1237              (type == kRscs));
1238  CONTEXT_SCOPE;
1239
1240  // T32 does not support register shifted register operands, emulate it.
1241  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1242    InstructionCondRROp shiftop = NULL;
1243    switch (operand.GetShift().GetType()) {
1244      case LSL:
1245        shiftop = &Assembler::lsl;
1246        break;
1247      case LSR:
1248        shiftop = &Assembler::lsr;
1249        break;
1250      case ASR:
1251        shiftop = &Assembler::asr;
1252        break;
1253      case RRX:
1254        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1255        VIXL_UNREACHABLE();
1256        break;
1257      case ROR:
1258        shiftop = &Assembler::ror;
1259        break;
1260      default:
1261        VIXL_UNREACHABLE();
1262    }
1263    if (shiftop != NULL) {
1264      UseScratchRegisterScope temps(this);
1265      Register rm = operand.GetBaseRegister();
1266      Register rs = operand.GetShiftRegister();
1267      // Try to use rd as a scratch register. We can do this if it aliases rs or
1268      // rm (because we read them in the first instruction), but not rn.
1269      if (!rd.Is(rn)) temps.Include(rd);
1270      Register scratch = temps.Acquire();
1271      // TODO: The scope length was measured empirically. We should analyse the
1272      // worst-case size and add targetted tests.
1273      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1274      (this->*shiftop)(cond, scratch, rm, rs);
1275      (this->*instruction)(cond, rd, rn, scratch);
1276      return;
1277    }
1278  }
1279
1280  // T32 does not have a Rsc instruction, negate the lhs input and turn it into
1281  // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
1282  //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
1283  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1284    // The RegisterShiftRegister case should have been handled above.
1285    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1286    UseScratchRegisterScope temps(this);
1287    // Try to use rd as a scratch register. We can do this if it aliases rn
1288    // (because we read it in the first instruction), but not rm.
1289    temps.Include(rd);
1290    temps.Exclude(operand);
1291    Register negated_rn = temps.Acquire();
1292    {
1293      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1294      mvn(cond, negated_rn, rn);
1295    }
1296    if (type == kRsc) {
1297      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1298      adc(cond, rd, negated_rn, operand);
1299      return;
1300    }
1301    // TODO: We shouldn't have to specify how much space the next instruction
1302    // needs.
1303    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1304    adcs(cond, rd, negated_rn, operand);
1305    return;
1306  }
1307
1308  if (operand.IsImmediate()) {
1309    // If the immediate can be encoded when inverted, turn Orn into Orr.
1310    // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
1311    // mov.
1312    int32_t imm = operand.GetSignedImmediate();
1313    if (((type == kOrn) || (type == kOrns)) && IsModifiedImmediate(~imm)) {
1314      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1315      switch (type) {
1316        case kOrn:
1317          orr(cond, rd, rn, ~imm);
1318          return;
1319        case kOrns:
1320          orrs(cond, rd, rn, ~imm);
1321          return;
1322        default:
1323          VIXL_UNREACHABLE();
1324          break;
1325      }
1326    }
1327  }
1328
1329  // A32 does not have a Orn instruction, negate the rhs input and turn it into
1330  // a Orr.
1331  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1332    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1333    //  mvn r0, r2
1334    //  orr r0, r1, r0
1335    Register scratch;
1336    UseScratchRegisterScope temps(this);
1337    // Try to use rd as a scratch register. We can do this if it aliases rs or
1338    // rm (because we read them in the first instruction), but not rn.
1339    if (!rd.Is(rn)) temps.Include(rd);
1340    scratch = temps.Acquire();
1341    {
1342      // TODO: We shouldn't have to specify how much space the next instruction
1343      // needs.
1344      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1345      mvn(cond, scratch, operand);
1346    }
1347    if (type == kOrns) {
1348      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1349      orrs(cond, rd, rn, scratch);
1350      return;
1351    }
1352    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1353    orr(cond, rd, rn, scratch);
1354    return;
1355  }
1356
1357  if (operand.IsImmediate()) {
1358    UseScratchRegisterScope temps(this);
1359    // Allow using the destination as a scratch register if possible.
1360    if (!rd.Is(rn)) temps.Include(rd);
1361    Register scratch = temps.Acquire();
1362    int32_t imm = operand.GetSignedImmediate();
1363    HandleOutOfBoundsImmediate(cond, scratch, imm);
1364    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1365    (this->*instruction)(cond, rd, rn, scratch);
1366    return;
1367  }
1368  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1369}
1370
1371
1372void MacroAssembler::Delegate(InstructionType type,
1373                              InstructionCondSizeRL instruction,
1374                              Condition cond,
1375                              EncodingSize size,
1376                              Register rd,
1377                              Label* label) {
1378  VIXL_ASSERT((type == kLdr) || (type == kAdr));
1379
1380  CONTEXT_SCOPE;
1381  VIXL_ASSERT(size.IsBest());
1382
1383  if ((type == kLdr) && label->IsBound()) {
1384    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
1385    UseScratchRegisterScope temps(this);
1386    temps.Include(rd);
1387    uint32_t mask = GetOffsetMask(type, Offset);
1388    ldr(rd, MemOperandComputationHelper(cond, temps.Acquire(), label, mask));
1389    return;
1390  }
1391
1392  Assembler::Delegate(type, instruction, cond, size, rd, label);
1393}
1394
1395
1396void MacroAssembler::Delegate(InstructionType type,
1397                              InstructionCondSizeRROp instruction,
1398                              Condition cond,
1399                              EncodingSize size,
1400                              Register rd,
1401                              Register rn,
1402                              const Operand& operand) {
1403  VIXL_ASSERT(
1404      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1405      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1406      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1407      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1408      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1409      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1410      (type == kSub) || (type == kSubs));
1411
1412  CONTEXT_SCOPE;
1413  VIXL_ASSERT(size.IsBest());
1414  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1415    InstructionCondRROp shiftop = NULL;
1416    switch (operand.GetShift().GetType()) {
1417      case LSL:
1418        shiftop = &Assembler::lsl;
1419        break;
1420      case LSR:
1421        shiftop = &Assembler::lsr;
1422        break;
1423      case ASR:
1424        shiftop = &Assembler::asr;
1425        break;
1426      case RRX:
1427        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1428        VIXL_UNREACHABLE();
1429        break;
1430      case ROR:
1431        shiftop = &Assembler::ror;
1432        break;
1433      default:
1434        VIXL_UNREACHABLE();
1435    }
1436    if (shiftop != NULL) {
1437      UseScratchRegisterScope temps(this);
1438      Register rm = operand.GetBaseRegister();
1439      Register rs = operand.GetShiftRegister();
1440      // Try to use rd as a scratch register. We can do this if it aliases rs or
1441      // rm (because we read them in the first instruction), but not rn.
1442      if (!rd.Is(rn)) temps.Include(rd);
1443      Register scratch = temps.Acquire();
1444      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1445      (this->*shiftop)(cond, scratch, rm, rs);
1446      (this->*instruction)(cond, size, rd, rn, scratch);
1447      return;
1448    }
1449  }
1450  if (operand.IsImmediate()) {
1451    int32_t imm = operand.GetSignedImmediate();
1452    if (ImmediateT32::IsImmediateT32(~imm)) {
1453      if (IsUsingT32()) {
1454        switch (type) {
1455          case kOrr:
1456            orn(cond, rd, rn, ~imm);
1457            return;
1458          case kOrrs:
1459            orns(cond, rd, rn, ~imm);
1460            return;
1461          default:
1462            break;
1463        }
1464      }
1465    }
1466    if (imm < 0) {
1467      InstructionCondSizeRROp asmcb = NULL;
1468      // Add and sub are equivalent using an arithmetic negation:
1469      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1470      // Add and sub with carry are equivalent using a bitwise NOT:
1471      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1472      switch (type) {
1473        case kAdd:
1474          asmcb = &Assembler::sub;
1475          imm = -imm;
1476          break;
1477        case kAdds:
1478          asmcb = &Assembler::subs;
1479          imm = -imm;
1480          break;
1481        case kSub:
1482          asmcb = &Assembler::add;
1483          imm = -imm;
1484          break;
1485        case kSubs:
1486          asmcb = &Assembler::adds;
1487          imm = -imm;
1488          break;
1489        case kAdc:
1490          asmcb = &Assembler::sbc;
1491          imm = ~imm;
1492          break;
1493        case kAdcs:
1494          asmcb = &Assembler::sbcs;
1495          imm = ~imm;
1496          break;
1497        case kSbc:
1498          asmcb = &Assembler::adc;
1499          imm = ~imm;
1500          break;
1501        case kSbcs:
1502          asmcb = &Assembler::adcs;
1503          imm = ~imm;
1504          break;
1505        default:
1506          break;
1507      }
1508      if (asmcb != NULL) {
1509        CodeBufferCheckScope scope(this, 4 * kMaxInstructionSizeInBytes);
1510        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1511        return;
1512      }
1513    }
1514    UseScratchRegisterScope temps(this);
1515    // Allow using the destination as a scratch register if possible.
1516    if (!rd.Is(rn)) temps.Include(rd);
1517
1518    if (rn.IsPC()) {
1519      // If we're reading the PC, we need to do it in the first instruction,
1520      // otherwise we'll read the wrong value. We rely on this to handle the
1521      // long-range PC-relative MemOperands which can result from user-managed
1522      // literals.
1523
1524      // Only handle negative offsets. The correct way to handle positive
1525      // offsets isn't clear; does the user want the offset from the start of
1526      // the macro, or from the end (to allow a certain amount of space)?
1527      bool offset_is_negative_or_zero = (imm <= 0);
1528      switch (type) {
1529        case kAdd:
1530        case kAdds:
1531          offset_is_negative_or_zero = (imm <= 0);
1532          break;
1533        case kSub:
1534        case kSubs:
1535          offset_is_negative_or_zero = (imm >= 0);
1536          break;
1537        case kAdc:
1538        case kAdcs:
1539          offset_is_negative_or_zero = (imm < 0);
1540          break;
1541        case kSbc:
1542        case kSbcs:
1543          offset_is_negative_or_zero = (imm > 0);
1544          break;
1545        default:
1546          break;
1547      }
1548      if (offset_is_negative_or_zero) {
1549        {
1550          rn = temps.Acquire();
1551          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1552          mov(cond, rn, pc);
1553        }
1554        // Recurse rather than falling through, to try to get the immediate into
1555        // a single instruction.
1556        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1557        (this->*instruction)(cond, size, rd, rn, operand);
1558        return;
1559      }
1560    } else {
1561      Register scratch = temps.Acquire();
1562      // TODO: The scope length was measured empirically. We should analyse the
1563      // worst-case size and add targetted tests.
1564      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1565      mov(cond, scratch, operand.GetImmediate());
1566      (this->*instruction)(cond, size, rd, rn, scratch);
1567      return;
1568    }
1569  }
1570  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1571}
1572
1573
1574void MacroAssembler::Delegate(InstructionType type,
1575                              InstructionRL instruction,
1576                              Register rn,
1577                              Label* label) {
1578  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1579
1580  CONTEXT_SCOPE;
1581  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1582  if (IsUsingA32()) {
1583    if (type == kCbz) {
1584      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1585    } else {
1586      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1587    }
1588  } else if (rn.IsLow()) {
1589    switch (type) {
1590      case kCbnz: {
1591        Label done;
1592        cbz(rn, &done);
1593        b(label);
1594        Bind(&done);
1595        return;
1596      }
1597      case kCbz: {
1598        Label done;
1599        cbnz(rn, &done);
1600        b(label);
1601        Bind(&done);
1602        return;
1603      }
1604      default:
1605        break;
1606    }
1607  }
1608  Assembler::Delegate(type, instruction, rn, label);
1609}
1610
1611
1612template <typename T>
1613static inline bool IsI64BitPattern(T imm) {
1614  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1615    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1616  }
1617  return true;
1618}
1619
1620
1621template <typename T>
1622static inline bool IsI8BitPattern(T imm) {
1623  uint8_t imm8 = imm & 0xff;
1624  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1625    imm >>= 8;
1626    if ((imm & 0xff) != imm8) return false;
1627  }
1628  return true;
1629}
1630
1631
1632static inline bool CanBeInverted(uint32_t imm32) {
1633  uint32_t fill8 = 0;
1634
1635  if ((imm32 & 0xffffff00) == 0xffffff00) {
1636    //    11111111 11111111 11111111 abcdefgh
1637    return true;
1638  }
1639  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1640    fill8 = imm32 & 0xff;
1641    imm32 >>= 8;
1642    if ((imm32 >> 8) == 0xffff) {
1643      //    11111111 11111111 abcdefgh 00000000
1644      // or 11111111 11111111 abcdefgh 11111111
1645      return true;
1646    }
1647    if ((imm32 & 0xff) == fill8) {
1648      imm32 >>= 8;
1649      if ((imm32 >> 8) == 0xff) {
1650        //    11111111 abcdefgh 00000000 00000000
1651        // or 11111111 abcdefgh 11111111 11111111
1652        return true;
1653      }
1654      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1655        //    abcdefgh 11111111 11111111 11111111
1656        return true;
1657      }
1658    }
1659  }
1660  return false;
1661}
1662
1663
1664template <typename RES, typename T>
1665static inline RES replicate(T imm) {
1666  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1667              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1668  RES res = imm;
1669  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1670    res = (res << (sizeof(T) * 8)) | imm;
1671  }
1672  return res;
1673}
1674
1675
1676void MacroAssembler::Delegate(InstructionType type,
1677                              InstructionCondDtSSop instruction,
1678                              Condition cond,
1679                              DataType dt,
1680                              SRegister rd,
1681                              const SOperand& operand) {
1682  CONTEXT_SCOPE;
1683  if (type == kVmov) {
1684    if (operand.IsImmediate() && dt.Is(F32)) {
1685      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1686      if (neon_imm.CanConvert<float>()) {
1687        // movw ip, imm16
1688        // movk ip, imm16
1689        // vmov s0, ip
1690        UseScratchRegisterScope temps(this);
1691        Register scratch = temps.Acquire();
1692        float f = neon_imm.GetImmediate<float>();
1693        // TODO: The scope length was measured empirically. We should analyse
1694        // the
1695        // worst-case size and add targetted tests.
1696        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1697        mov(cond, scratch, FloatToRawbits(f));
1698        vmov(cond, rd, scratch);
1699        return;
1700      }
1701    }
1702  }
1703  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1704}
1705
1706
1707void MacroAssembler::Delegate(InstructionType type,
1708                              InstructionCondDtDDop instruction,
1709                              Condition cond,
1710                              DataType dt,
1711                              DRegister rd,
1712                              const DOperand& operand) {
1713  CONTEXT_SCOPE;
1714  if (type == kVmov) {
1715    if (operand.IsImmediate()) {
1716      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1717      switch (dt.GetValue()) {
1718        case I32:
1719          if (neon_imm.CanConvert<uint32_t>()) {
1720            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1721            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1722            if (IsI8BitPattern(imm)) {
1723              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1724              vmov(cond, I8, rd, imm & 0xff);
1725              return;
1726            }
1727            // vmov.i32 d0, 0xff0000ff will translate into
1728            // vmov.i64 d0, 0xff0000ffff0000ff
1729            if (IsI64BitPattern(imm)) {
1730              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1731              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1732              return;
1733            }
1734            // vmov.i32 d0, 0xffab0000 will translate into
1735            // vmvn.i32 d0, 0x0054ffff
1736            if (cond.Is(al) && CanBeInverted(imm)) {
1737              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1738              vmvn(I32, rd, ~imm);
1739              return;
1740            }
1741          }
1742          break;
1743        case I16:
1744          if (neon_imm.CanConvert<uint16_t>()) {
1745            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1746            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1747            if (IsI8BitPattern(imm)) {
1748              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1749              vmov(cond, I8, rd, imm & 0xff);
1750              return;
1751            }
1752          }
1753          break;
1754        case I64:
1755          if (neon_imm.CanConvert<uint64_t>()) {
1756            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1757            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1758            if (IsI8BitPattern(imm)) {
1759              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1760              vmov(cond, I8, rd, imm & 0xff);
1761              return;
1762            }
1763            // mov ip, lo(imm64)
1764            // vdup d0, ip
1765            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1766            // preserved
1767            {
1768              UseScratchRegisterScope temps(this);
1769              Register scratch = temps.Acquire();
1770              {
1771                // TODO: The scope length was measured empirically. We should
1772                // analyse the
1773                // worst-case size and add targetted tests.
1774                CodeBufferCheckScope scope(this,
1775                                           2 * kMaxInstructionSizeInBytes);
1776                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1777              }
1778              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1779              vdup(cond, Untyped32, rd, scratch);
1780            }
1781            // mov ip, hi(imm64)
1782            // vmov d0[1], ip
1783            {
1784              UseScratchRegisterScope temps(this);
1785              Register scratch = temps.Acquire();
1786              {
1787                // TODO: The scope length was measured empirically. We should
1788                // analyse the
1789                // worst-case size and add targetted tests.
1790                CodeBufferCheckScope scope(this,
1791                                           2 * kMaxInstructionSizeInBytes);
1792                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1793              }
1794              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1795              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1796            }
1797            return;
1798          }
1799          break;
1800        default:
1801          break;
1802      }
1803      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1804      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1805        // mov ip, imm32
1806        // vdup.16 d0, ip
1807        UseScratchRegisterScope temps(this);
1808        Register scratch = temps.Acquire();
1809        {
1810          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1811          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1812        }
1813        DataTypeValue vdup_dt = Untyped32;
1814        switch (dt.GetValue()) {
1815          case I16:
1816            vdup_dt = Untyped16;
1817            break;
1818          case I32:
1819            vdup_dt = Untyped32;
1820            break;
1821          default:
1822            VIXL_UNREACHABLE();
1823        }
1824        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1825        vdup(cond, vdup_dt, rd, scratch);
1826        return;
1827      }
1828      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1829        float f = neon_imm.GetImmediate<float>();
1830        // Punt to vmov.i32
1831        // TODO: The scope length was guessed based on the double case below. We
1832        // should analyse the worst-case size and add targetted tests.
1833        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1834        vmov(cond, I32, rd, FloatToRawbits(f));
1835        return;
1836      }
1837      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1838        // Punt to vmov.i64
1839        double d = neon_imm.GetImmediate<double>();
1840        // TODO: The scope length was measured empirically. We should analyse
1841        // the
1842        // worst-case size and add targetted tests.
1843        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1844        vmov(cond, I64, rd, DoubleToRawbits(d));
1845        return;
1846      }
1847    }
1848  }
1849  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1850}
1851
1852
1853void MacroAssembler::Delegate(InstructionType type,
1854                              InstructionCondDtQQop instruction,
1855                              Condition cond,
1856                              DataType dt,
1857                              QRegister rd,
1858                              const QOperand& operand) {
1859  CONTEXT_SCOPE;
1860  if (type == kVmov) {
1861    if (operand.IsImmediate()) {
1862      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1863      switch (dt.GetValue()) {
1864        case I32:
1865          if (neon_imm.CanConvert<uint32_t>()) {
1866            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1867            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1868            if (IsI8BitPattern(imm)) {
1869              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1870              vmov(cond, I8, rd, imm & 0xff);
1871              return;
1872            }
1873            // vmov.i32 d0, 0xff0000ff will translate into
1874            // vmov.i64 d0, 0xff0000ffff0000ff
1875            if (IsI64BitPattern(imm)) {
1876              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1877              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1878              return;
1879            }
1880            // vmov.i32 d0, 0xffab0000 will translate into
1881            // vmvn.i32 d0, 0x0054ffff
1882            if (CanBeInverted(imm)) {
1883              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1884              vmvn(cond, I32, rd, ~imm);
1885              return;
1886            }
1887          }
1888          break;
1889        case I16:
1890          if (neon_imm.CanConvert<uint16_t>()) {
1891            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1892            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1893            if (IsI8BitPattern(imm)) {
1894              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1895              vmov(cond, I8, rd, imm & 0xff);
1896              return;
1897            }
1898          }
1899          break;
1900        case I64:
1901          if (neon_imm.CanConvert<uint64_t>()) {
1902            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1903            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1904            if (IsI8BitPattern(imm)) {
1905              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1906              vmov(cond, I8, rd, imm & 0xff);
1907              return;
1908            }
1909            // mov ip, lo(imm64)
1910            // vdup q0, ip
1911            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1912            // preserved
1913            {
1914              UseScratchRegisterScope temps(this);
1915              Register scratch = temps.Acquire();
1916              {
1917                CodeBufferCheckScope scope(this,
1918                                           2 * kMaxInstructionSizeInBytes);
1919                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1920              }
1921              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1922              vdup(cond, Untyped32, rd, scratch);
1923            }
1924            // mov ip, hi(imm64)
1925            // vmov.i32 d0[1], ip
1926            // vmov d1, d0
1927            {
1928              UseScratchRegisterScope temps(this);
1929              Register scratch = temps.Acquire();
1930              {
1931                CodeBufferCheckScope scope(this,
1932                                           2 * kMaxInstructionSizeInBytes);
1933                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1934              }
1935              {
1936                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1937                vmov(cond,
1938                     Untyped32,
1939                     DRegisterLane(rd.GetLowDRegister(), 1),
1940                     scratch);
1941              }
1942              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1943              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1944            }
1945            return;
1946          }
1947          break;
1948        default:
1949          break;
1950      }
1951      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1952      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1953        // mov ip, imm32
1954        // vdup.16 d0, ip
1955        UseScratchRegisterScope temps(this);
1956        Register scratch = temps.Acquire();
1957        {
1958          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1959          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1960        }
1961        DataTypeValue vdup_dt = Untyped32;
1962        switch (dt.GetValue()) {
1963          case I16:
1964            vdup_dt = Untyped16;
1965            break;
1966          case I32:
1967            vdup_dt = Untyped32;
1968            break;
1969          default:
1970            VIXL_UNREACHABLE();
1971        }
1972        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1973        vdup(cond, vdup_dt, rd, scratch);
1974        return;
1975      }
1976      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1977        // Punt to vmov.i64
1978        float f = neon_imm.GetImmediate<float>();
1979        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1980        vmov(cond, I32, rd, FloatToRawbits(f));
1981        return;
1982      }
1983      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1984        // Use vmov to create the double in the low D register, then duplicate
1985        // it into the high D register.
1986        double d = neon_imm.GetImmediate<double>();
1987        CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
1988        vmov(cond, F64, rd.GetLowDRegister(), d);
1989        vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1990        return;
1991      }
1992    }
1993  }
1994  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1995}
1996
1997
1998void MacroAssembler::Delegate(InstructionType type,
1999                              InstructionCondRL instruction,
2000                              Condition cond,
2001                              Register rt,
2002                              Label* label) {
2003  VIXL_ASSERT((type == kLdrb) || (type == kLdrh) || (type == kLdrsb) ||
2004              (type == kLdrsh));
2005
2006  CONTEXT_SCOPE;
2007
2008  if (label->IsBound()) {
2009    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2010    UseScratchRegisterScope temps(this);
2011    temps.Include(rt);
2012    Register scratch = temps.Acquire();
2013    uint32_t mask = GetOffsetMask(type, Offset);
2014    switch (type) {
2015      case kLdrb:
2016        ldrb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2017        return;
2018      case kLdrh:
2019        ldrh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2020        return;
2021      case kLdrsb:
2022        ldrsb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2023        return;
2024      case kLdrsh:
2025        ldrsh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2026        return;
2027      default:
2028        VIXL_UNREACHABLE();
2029    }
2030    return;
2031  }
2032
2033  Assembler::Delegate(type, instruction, cond, rt, label);
2034}
2035
2036
2037void MacroAssembler::Delegate(InstructionType type,
2038                              InstructionCondRRL instruction,
2039                              Condition cond,
2040                              Register rt,
2041                              Register rt2,
2042                              Label* label) {
2043  VIXL_ASSERT(type == kLdrd);
2044
2045  CONTEXT_SCOPE;
2046
2047  if (label->IsBound()) {
2048    CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
2049    UseScratchRegisterScope temps(this);
2050    temps.Include(rt, rt2);
2051    Register scratch = temps.Acquire();
2052    uint32_t mask = GetOffsetMask(type, Offset);
2053    ldrd(rt, rt2, MemOperandComputationHelper(cond, scratch, label, mask));
2054    return;
2055  }
2056
2057  Assembler::Delegate(type, instruction, cond, rt, rt2, label);
2058}
2059
2060
2061void MacroAssembler::Delegate(InstructionType type,
2062                              InstructionCondSizeRMop instruction,
2063                              Condition cond,
2064                              EncodingSize size,
2065                              Register rd,
2066                              const MemOperand& operand) {
2067  CONTEXT_SCOPE;
2068  VIXL_ASSERT(size.IsBest());
2069  VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
2070              (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
2071              (type == kStrb) || (type == kStrh));
2072  if (operand.IsImmediate()) {
2073    const Register& rn = operand.GetBaseRegister();
2074    AddrMode addrmode = operand.GetAddrMode();
2075    int32_t offset = operand.GetOffsetImmediate();
2076    uint32_t mask = GetOffsetMask(type, addrmode);
2077    bool negative;
2078    // Try to maximize the offset use by the MemOperand (load_store_offset).
2079    // Add or subtract the part which can't be used by the MemOperand
2080    // (add_sub_offset).
2081    int32_t add_sub_offset;
2082    int32_t load_store_offset;
2083    load_store_offset = offset & mask;
2084    if (offset >= 0) {
2085      negative = false;
2086      add_sub_offset = offset & ~mask;
2087    } else {
2088      negative = true;
2089      add_sub_offset = -offset & ~mask;
2090      if (load_store_offset > 0) add_sub_offset += mask + 1;
2091    }
2092    switch (addrmode) {
2093      case PreIndex:
2094        // Avoid the unpredictable case 'str r0, [r0, imm]!'
2095        if (!rn.Is(rd)) {
2096          // Pre-Indexed case:
2097          // ldr r0, [r1, 12345]! will translate into
2098          //   add r1, r1, 12345
2099          //   ldr r0, [r1]
2100          {
2101            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2102            if (negative) {
2103              sub(cond, rn, rn, add_sub_offset);
2104            } else {
2105              add(cond, rn, rn, add_sub_offset);
2106            }
2107          }
2108          {
2109            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2110            (this->*instruction)(cond,
2111                                 size,
2112                                 rd,
2113                                 MemOperand(rn, load_store_offset, PreIndex));
2114          }
2115          return;
2116        }
2117        break;
2118      case Offset: {
2119        UseScratchRegisterScope temps(this);
2120        // Allow using the destination as a scratch register if possible.
2121        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
2122            !rd.Is(rn)) {
2123          temps.Include(rd);
2124        }
2125        Register scratch = temps.Acquire();
2126        // Offset case:
2127        // ldr r0, [r1, 12345] will translate into
2128        //   add r0, r1, 12345
2129        //   ldr r0, [r0]
2130        {
2131          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2132          if (negative) {
2133            sub(cond, scratch, rn, add_sub_offset);
2134          } else {
2135            add(cond, scratch, rn, add_sub_offset);
2136          }
2137        }
2138        {
2139          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2140          (this->*instruction)(cond,
2141                               size,
2142                               rd,
2143                               MemOperand(scratch, load_store_offset));
2144        }
2145        return;
2146      }
2147      case PostIndex:
2148        // Avoid the unpredictable case 'ldr r0, [r0], imm'
2149        if (!rn.Is(rd)) {
2150          // Post-indexed case:
2151          // ldr r0. [r1], imm32 will translate into
2152          //   ldr r0, [r1]
2153          //   movw ip. imm32 & 0xffffffff
2154          //   movt ip, imm32 >> 16
2155          //   add r1, r1, ip
2156          {
2157            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2158            (this->*instruction)(cond,
2159                                 size,
2160                                 rd,
2161                                 MemOperand(rn, load_store_offset, PostIndex));
2162          }
2163          {
2164            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2165            if (negative) {
2166              sub(cond, rn, rn, add_sub_offset);
2167            } else {
2168              add(cond, rn, rn, add_sub_offset);
2169            }
2170          }
2171          return;
2172        }
2173        break;
2174    }
2175  } else if (operand.IsPlainRegister()) {
2176    const Register& rn = operand.GetBaseRegister();
2177    AddrMode addrmode = operand.GetAddrMode();
2178    const Register& rm = operand.GetOffsetRegister();
2179    if (rm.IsPC()) {
2180      VIXL_ABORT_WITH_MSG(
2181          "The MacroAssembler does not convert loads and stores with a PC "
2182          "offset register.\n");
2183    }
2184    if (rn.IsPC()) {
2185      if (addrmode == Offset) {
2186        if (IsUsingT32()) {
2187          VIXL_ABORT_WITH_MSG(
2188              "The MacroAssembler does not convert loads and stores with a PC "
2189              "base register for T32.\n");
2190        }
2191      } else {
2192        VIXL_ABORT_WITH_MSG(
2193            "The MacroAssembler does not convert loads and stores with a PC "
2194            "base register in pre-index or post-index mode.\n");
2195      }
2196    }
2197    switch (addrmode) {
2198      case PreIndex:
2199        // Avoid the unpredictable case 'str r0, [r0, imm]!'
2200        if (!rn.Is(rd)) {
2201          // Pre-Indexed case:
2202          // ldr r0, [r1, r2]! will translate into
2203          //   add r1, r1, r2
2204          //   ldr r0, [r1]
2205          {
2206            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2207            if (operand.GetSign().IsPlus()) {
2208              add(cond, rn, rn, rm);
2209            } else {
2210              sub(cond, rn, rn, rm);
2211            }
2212          }
2213          {
2214            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2215            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
2216          }
2217          return;
2218        }
2219        break;
2220      case Offset: {
2221        UseScratchRegisterScope temps(this);
2222        // Allow using the destination as a scratch register if this is not a
2223        // store.
2224        // Avoid using PC as a temporary as this has side-effects.
2225        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
2226            !rd.IsPC()) {
2227          temps.Include(rd);
2228        }
2229        Register scratch = temps.Acquire();
2230        // Offset case:
2231        // ldr r0, [r1, r2] will translate into
2232        //   add r0, r1, r2
2233        //   ldr r0, [r0]
2234        {
2235          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2236          if (operand.GetSign().IsPlus()) {
2237            add(cond, scratch, rn, rm);
2238          } else {
2239            sub(cond, scratch, rn, rm);
2240          }
2241        }
2242        {
2243          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2244          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
2245        }
2246        return;
2247      }
2248      case PostIndex:
2249        // Avoid the unpredictable case 'ldr r0, [r0], imm'
2250        if (!rn.Is(rd)) {
2251          // Post-indexed case:
2252          // ldr r0. [r1], r2 will translate into
2253          //   ldr r0, [r1]
2254          //   add r1, r1, r2
2255          {
2256            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2257            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
2258          }
2259          {
2260            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2261            if (operand.GetSign().IsPlus()) {
2262              add(cond, rn, rn, rm);
2263            } else {
2264              sub(cond, rn, rn, rm);
2265            }
2266          }
2267          return;
2268        }
2269        break;
2270    }
2271  }
2272  Assembler::Delegate(type, instruction, cond, size, rd, operand);
2273}
2274
2275
2276void MacroAssembler::Delegate(InstructionType type,
2277                              InstructionCondRRMop instruction,
2278                              Condition cond,
2279                              Register rt,
2280                              Register rt2,
2281                              const MemOperand& operand) {
2282  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
2283      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
2284      (type == kStrexb) || (type == kStrexh)) {
2285    UnimplementedDelegate(type);
2286    return;
2287  }
2288
2289  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
2290
2291  CONTEXT_SCOPE;
2292
2293  // TODO: Should we allow these cases?
2294  if (IsUsingA32()) {
2295    // The first register needs to be even.
2296    if ((rt.GetCode() & 1) != 0) {
2297      UnimplementedDelegate(type);
2298      return;
2299    }
2300    // Registers need to be adjacent.
2301    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
2302      UnimplementedDelegate(type);
2303      return;
2304    }
2305    // LDRD lr, pc [...] is not allowed.
2306    if (rt.Is(lr)) {
2307      UnimplementedDelegate(type);
2308      return;
2309    }
2310  }
2311
2312  if (operand.IsImmediate()) {
2313    const Register& rn = operand.GetBaseRegister();
2314    AddrMode addrmode = operand.GetAddrMode();
2315    int32_t offset = operand.GetOffsetImmediate();
2316    switch (addrmode) {
2317      case PreIndex: {
2318        // Allow using the destinations as a scratch registers if possible.
2319        UseScratchRegisterScope temps(this);
2320        if (type == kLdrd) {
2321          if (!rt.Is(rn)) temps.Include(rt);
2322          if (!rt2.Is(rn)) temps.Include(rt2);
2323        }
2324
2325        // Pre-Indexed case:
2326        // ldrd r0, r1, [r2, 12345]! will translate into
2327        //   add r2, 12345
2328        //   ldrd r0, r1, [r2]
2329        {
2330          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2331          add(cond, rn, rn, offset);
2332        }
2333        {
2334          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2335          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2336        }
2337        return;
2338      }
2339      case Offset: {
2340        UseScratchRegisterScope temps(this);
2341        // Allow using the destinations as a scratch registers if possible.
2342        if (type == kLdrd) {
2343          if (!rt.Is(rn)) temps.Include(rt);
2344          if (!rt2.Is(rn)) temps.Include(rt2);
2345        }
2346        Register scratch = temps.Acquire();
2347        // Offset case:
2348        // ldrd r0, r1, [r2, 12345] will translate into
2349        //   add r0, r2, 12345
2350        //   ldrd r0, r1, [r0]
2351        {
2352          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2353          add(cond, scratch, rn, offset);
2354        }
2355        {
2356          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2357          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2358        }
2359        return;
2360      }
2361      case PostIndex:
2362        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
2363        if (!rn.Is(rt) && !rn.Is(rt2)) {
2364          // Post-indexed case:
2365          // ldrd r0, r1, [r2], imm32 will translate into
2366          //   ldrd r0, r1, [r2]
2367          //   movw ip. imm32 & 0xffffffff
2368          //   movt ip, imm32 >> 16
2369          //   add r2, ip
2370          {
2371            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2372            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2373          }
2374          {
2375            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2376            add(cond, rn, rn, offset);
2377          }
2378          return;
2379        }
2380        break;
2381    }
2382  }
2383  if (operand.IsPlainRegister()) {
2384    const Register& rn = operand.GetBaseRegister();
2385    const Register& rm = operand.GetOffsetRegister();
2386    AddrMode addrmode = operand.GetAddrMode();
2387    switch (addrmode) {
2388      case PreIndex:
2389        // ldrd r0, r1, [r2, r3]! will translate into
2390        //   add r2, r3
2391        //   ldrd r0, r1, [r2]
2392        {
2393          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2394          if (operand.GetSign().IsPlus()) {
2395            add(cond, rn, rn, rm);
2396          } else {
2397            sub(cond, rn, rn, rm);
2398          }
2399        }
2400        {
2401          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2402          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2403        }
2404        return;
2405      case PostIndex:
2406        // ldrd r0, r1, [r2], r3 will translate into
2407        //   ldrd r0, r1, [r2]
2408        //   add r2, r3
2409        {
2410          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2411          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2412        }
2413        {
2414          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2415          if (operand.GetSign().IsPlus()) {
2416            add(cond, rn, rn, rm);
2417          } else {
2418            sub(cond, rn, rn, rm);
2419          }
2420        }
2421        return;
2422      case Offset: {
2423        UseScratchRegisterScope temps(this);
2424        // Allow using the destinations as a scratch registers if possible.
2425        if (type == kLdrd) {
2426          if (!rt.Is(rn)) temps.Include(rt);
2427          if (!rt2.Is(rn)) temps.Include(rt2);
2428        }
2429        Register scratch = temps.Acquire();
2430        // Offset case:
2431        // ldrd r0, r1, [r2, r3] will translate into
2432        //   add r0, r2, r3
2433        //   ldrd r0, r1, [r0]
2434        {
2435          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2436          if (operand.GetSign().IsPlus()) {
2437            add(cond, scratch, rn, rm);
2438          } else {
2439            sub(cond, scratch, rn, rm);
2440          }
2441        }
2442        {
2443          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2444          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2445        }
2446        return;
2447      }
2448    }
2449  }
2450  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2451}
2452
2453
2454void MacroAssembler::Delegate(InstructionType type,
2455                              InstructionCondDtSMop instruction,
2456                              Condition cond,
2457                              DataType dt,
2458                              SRegister rd,
2459                              const MemOperand& operand) {
2460  CONTEXT_SCOPE;
2461  if (operand.IsImmediate()) {
2462    const Register& rn = operand.GetBaseRegister();
2463    AddrMode addrmode = operand.GetAddrMode();
2464    int32_t offset = operand.GetOffsetImmediate();
2465    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2466                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2467    if (rn.IsPC()) {
2468      VIXL_ABORT_WITH_MSG(
2469          "The MacroAssembler does not convert vldr or vstr with a PC base "
2470          "register.\n");
2471    }
2472    switch (addrmode) {
2473      case PreIndex:
2474        // Pre-Indexed case:
2475        // vldr.32 s0, [r1, 12345]! will translate into
2476        //   add r1, 12345
2477        //   vldr.32 s0, [r1]
2478        if (offset != 0) {
2479          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2480          add(cond, rn, rn, offset);
2481        }
2482        {
2483          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2484          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2485        }
2486        return;
2487      case Offset: {
2488        UseScratchRegisterScope temps(this);
2489        Register scratch = temps.Acquire();
2490        // Offset case:
2491        // vldr.32 s0, [r1, 12345] will translate into
2492        //   add ip, r1, 12345
2493        //   vldr.32 s0, [ip]
2494        {
2495          VIXL_ASSERT(offset != 0);
2496          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2497          add(cond, scratch, rn, offset);
2498        }
2499        {
2500          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2501          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2502        }
2503        return;
2504      }
2505      case PostIndex:
2506        // Post-indexed case:
2507        // vldr.32 s0, [r1], imm32 will translate into
2508        //   vldr.32 s0, [r1]
2509        //   movw ip. imm32 & 0xffffffff
2510        //   movt ip, imm32 >> 16
2511        //   add r1, ip
2512        {
2513          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2514          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2515        }
2516        if (offset != 0) {
2517          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2518          add(cond, rn, rn, offset);
2519        }
2520        return;
2521    }
2522  }
2523  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2524}
2525
2526
2527void MacroAssembler::Delegate(InstructionType type,
2528                              InstructionCondDtDMop instruction,
2529                              Condition cond,
2530                              DataType dt,
2531                              DRegister rd,
2532                              const MemOperand& operand) {
2533  CONTEXT_SCOPE;
2534  if (operand.IsImmediate()) {
2535    const Register& rn = operand.GetBaseRegister();
2536    AddrMode addrmode = operand.GetAddrMode();
2537    int32_t offset = operand.GetOffsetImmediate();
2538    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2539                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2540    if (rn.IsPC()) {
2541      VIXL_ABORT_WITH_MSG(
2542          "The MacroAssembler does not convert vldr or vstr with a PC base "
2543          "register.\n");
2544    }
2545    switch (addrmode) {
2546      case PreIndex:
2547        // Pre-Indexed case:
2548        // vldr.64 d0, [r1, 12345]! will translate into
2549        //   add r1, 12345
2550        //   vldr.64 d0, [r1]
2551        if (offset != 0) {
2552          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2553          add(cond, rn, rn, offset);
2554        }
2555        {
2556          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2557          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2558        }
2559        return;
2560      case Offset: {
2561        UseScratchRegisterScope temps(this);
2562        Register scratch = temps.Acquire();
2563        // Offset case:
2564        // vldr.64 d0, [r1, 12345] will translate into
2565        //   add ip, r1, 12345
2566        //   vldr.32 s0, [ip]
2567        {
2568          VIXL_ASSERT(offset != 0);
2569          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2570          add(cond, scratch, rn, offset);
2571        }
2572        {
2573          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2574          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2575        }
2576        return;
2577      }
2578      case PostIndex:
2579        // Post-indexed case:
2580        // vldr.64 d0. [r1], imm32 will translate into
2581        //   vldr.64 d0, [r1]
2582        //   movw ip. imm32 & 0xffffffff
2583        //   movt ip, imm32 >> 16
2584        //   add r1, ip
2585        {
2586          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2587          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2588        }
2589        if (offset != 0) {
2590          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2591          add(cond, rn, rn, offset);
2592        }
2593        return;
2594    }
2595  }
2596  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2597}
2598
2599
2600void MacroAssembler::Delegate(InstructionType type,
2601                              InstructionCondMsrOp instruction,
2602                              Condition cond,
2603                              MaskedSpecialRegister spec_reg,
2604                              const Operand& operand) {
2605  USE(type);
2606  VIXL_ASSERT(type == kMsr);
2607  if (operand.IsImmediate()) {
2608    UseScratchRegisterScope temps(this);
2609    Register scratch = temps.Acquire();
2610    {
2611      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2612      mov(cond, scratch, operand);
2613    }
2614    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2615    msr(cond, spec_reg, scratch);
2616    return;
2617  }
2618  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2619}
2620
2621
2622void MacroAssembler::Delegate(InstructionType type,
2623                              InstructionCondDtDL instruction,
2624                              Condition cond,
2625                              DataType dt,
2626                              DRegister rd,
2627                              Label* label) {
2628  VIXL_ASSERT(type == kVldr);
2629
2630  CONTEXT_SCOPE;
2631
2632  if (label->IsBound()) {
2633    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2634    UseScratchRegisterScope temps(this);
2635    Register scratch = temps.Acquire();
2636    uint32_t mask = GetOffsetMask(type, Offset);
2637    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
2638    return;
2639  }
2640
2641  Assembler::Delegate(type, instruction, cond, dt, rd, label);
2642}
2643
2644
2645void MacroAssembler::Delegate(InstructionType type,
2646                              InstructionCondDtSL instruction,
2647                              Condition cond,
2648                              DataType dt,
2649                              SRegister rd,
2650                              Label* label) {
2651  VIXL_ASSERT(type == kVldr);
2652
2653  CONTEXT_SCOPE;
2654
2655  if (label->IsBound()) {
2656    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2657    UseScratchRegisterScope temps(this);
2658    Register scratch = temps.Acquire();
2659    uint32_t mask = GetOffsetMask(type, Offset);
2660    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
2661    return;
2662  }
2663
2664  Assembler::Delegate(type, instruction, cond, dt, rd, label);
2665}
2666
2667
2668#undef CONTEXT_SCOPE
2669#undef TOSTRING
2670#undef STRINGIFY
2671
2672// Start of generated code.
2673// End of generated code.
2674}  // namespace aarch32
2675}  // namespace vixl
2676