macro-assembler-aarch32.cc revision 80b4a1f554a92b2c4d4504265d0bac545c74c69b
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT(masm_ == NULL);
41  VIXL_ASSERT(masm != NULL);
42  masm_ = masm;
43
44  old_available_ = masm_->GetScratchRegisterList()->GetList();
45  old_available_vfp_ = masm_->GetScratchVRegisterList()->GetList();
46
47  parent_ = masm->GetCurrentScratchRegisterScope();
48  masm->SetCurrentScratchRegisterScope(this);
49}
50
51
52void UseScratchRegisterScope::Close() {
53  if (masm_ != NULL) {
54    // Ensure that scopes nest perfectly, and do not outlive their parents.
55    // This is a run-time check because the order of destruction of objects in
56    // the _same_ scope is implementation-defined, and is likely to change in
57    // optimised builds.
58    VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
59    masm_->SetCurrentScratchRegisterScope(parent_);
60
61    masm_->GetScratchRegisterList()->SetList(old_available_);
62    masm_->GetScratchVRegisterList()->SetList(old_available_vfp_);
63
64    masm_ = NULL;
65  }
66}
67
68
69bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
70  VIXL_ASSERT(masm_ != NULL);
71  VIXL_ASSERT(reg.IsValid());
72  return masm_->GetScratchRegisterList()->Includes(reg);
73}
74
75
76bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
77  VIXL_ASSERT(masm_ != NULL);
78  VIXL_ASSERT(reg.IsValid());
79  return masm_->GetScratchVRegisterList()->IncludesAllOf(reg);
80}
81
82
83Register UseScratchRegisterScope::Acquire() {
84  VIXL_ASSERT(masm_ != NULL);
85  Register reg = masm_->GetScratchRegisterList()->GetFirstAvailableRegister();
86  VIXL_CHECK(reg.IsValid());
87  masm_->GetScratchRegisterList()->Remove(reg);
88  return reg;
89}
90
91
92VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
93  switch (size_in_bits) {
94    case kSRegSizeInBits:
95      return AcquireS();
96    case kDRegSizeInBits:
97      return AcquireD();
98    case kQRegSizeInBits:
99      return AcquireQ();
100    default:
101      VIXL_UNREACHABLE();
102      return NoVReg;
103  }
104}
105
106
107QRegister UseScratchRegisterScope::AcquireQ() {
108  VIXL_ASSERT(masm_ != NULL);
109  QRegister reg =
110      masm_->GetScratchVRegisterList()->GetFirstAvailableQRegister();
111  VIXL_CHECK(reg.IsValid());
112  masm_->GetScratchVRegisterList()->Remove(reg);
113  return reg;
114}
115
116
117DRegister UseScratchRegisterScope::AcquireD() {
118  VIXL_ASSERT(masm_ != NULL);
119  DRegister reg =
120      masm_->GetScratchVRegisterList()->GetFirstAvailableDRegister();
121  VIXL_CHECK(reg.IsValid());
122  masm_->GetScratchVRegisterList()->Remove(reg);
123  return reg;
124}
125
126
127SRegister UseScratchRegisterScope::AcquireS() {
128  VIXL_ASSERT(masm_ != NULL);
129  SRegister reg =
130      masm_->GetScratchVRegisterList()->GetFirstAvailableSRegister();
131  VIXL_CHECK(reg.IsValid());
132  masm_->GetScratchVRegisterList()->Remove(reg);
133  return reg;
134}
135
136
137void UseScratchRegisterScope::Release(const Register& reg) {
138  VIXL_ASSERT(masm_ != NULL);
139  VIXL_ASSERT(reg.IsValid());
140  VIXL_ASSERT(!masm_->GetScratchRegisterList()->Includes(reg));
141  masm_->GetScratchRegisterList()->Combine(reg);
142}
143
144
145void UseScratchRegisterScope::Release(const VRegister& reg) {
146  VIXL_ASSERT(masm_ != NULL);
147  VIXL_ASSERT(reg.IsValid());
148  VIXL_ASSERT(!masm_->GetScratchVRegisterList()->IncludesAliasOf(reg));
149  masm_->GetScratchVRegisterList()->Combine(reg);
150}
151
152
153void UseScratchRegisterScope::Include(const RegisterList& list) {
154  VIXL_ASSERT(masm_ != NULL);
155  RegisterList excluded_registers(sp, lr, pc);
156  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
157  RegisterList* available = masm_->GetScratchRegisterList();
158  available->SetList(available->GetList() | mask);
159}
160
161
162void UseScratchRegisterScope::Include(const VRegisterList& list) {
163  VIXL_ASSERT(masm_ != NULL);
164  VRegisterList* available = masm_->GetScratchVRegisterList();
165  available->SetList(available->GetList() | list.GetList());
166}
167
168
169void UseScratchRegisterScope::Exclude(const RegisterList& list) {
170  VIXL_ASSERT(masm_ != NULL);
171  RegisterList* available = masm_->GetScratchRegisterList();
172  available->SetList(available->GetList() & ~list.GetList());
173}
174
175
176void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
177  VIXL_ASSERT(masm_ != NULL);
178  VRegisterList* available = masm_->GetScratchVRegisterList();
179  available->SetList(available->GetList() & ~list.GetList());
180}
181
182
183void UseScratchRegisterScope::Exclude(const Operand& operand) {
184  if (operand.IsImmediateShiftedRegister()) {
185    Exclude(operand.GetBaseRegister());
186  } else if (operand.IsRegisterShiftedRegister()) {
187    Exclude(operand.GetBaseRegister(), operand.GetShiftRegister());
188  } else {
189    VIXL_ASSERT(operand.IsImmediate());
190  }
191}
192
193
194void UseScratchRegisterScope::ExcludeAll() {
195  VIXL_ASSERT(masm_ != NULL);
196  masm_->GetScratchRegisterList()->SetList(0);
197  masm_->GetScratchVRegisterList()->SetList(0);
198}
199
200
201void VeneerPoolManager::AddLabel(Label* label) {
202  if (last_label_reference_offset_ != 0) {
203    // If the pool grows faster than the instruction stream, we must adjust
204    // the checkpoint to compensate. The veneer pool entries take 32 bits, so
205    // this can only occur when two consecutive 16-bit instructions add veneer
206    // pool entries.
207    // This is typically the case for cbz and cbnz (other forward branches
208    // have a 32 bit variant which is always used).
209    if (last_label_reference_offset_ + 2 * k16BitT32InstructionSizeInBytes ==
210        static_cast<uint32_t>(masm_->GetCursorOffset())) {
211      // We found two 16 bit forward branches generated one after the other.
212      // That means that the pool will grow by one 32-bit branch when
213      // the cursor offset will move forward by only one 16-bit branch.
214      // Update the near checkpoint margin to manage the difference.
215      near_checkpoint_margin_ +=
216          k32BitT32InstructionSizeInBytes - k16BitT32InstructionSizeInBytes;
217    }
218  }
219  Label::ForwardReference& back = label->GetBackForwardRef();
220  VIXL_ASSERT(back.GetMaxForwardDistance() >= kCbzCbnzRange);
221  if (!label->IsInVeneerPool()) {
222    if (back.GetMaxForwardDistance() <= kNearLabelRange) {
223      near_labels_.push_back(label);
224      label->SetVeneerPoolManager(this, true);
225    } else {
226      far_labels_.push_back(label);
227      label->SetVeneerPoolManager(this, false);
228    }
229  } else if (back.GetMaxForwardDistance() <= kNearLabelRange) {
230    if (!label->IsNear()) {
231      far_labels_.remove(label);
232      near_labels_.push_back(label);
233      label->SetVeneerPoolManager(this, true);
234    }
235  }
236
237  back.SetIsBranch();
238  last_label_reference_offset_ = back.GetLocation();
239  label->UpdateCheckpoint();
240  Label::Offset tmp = label->GetCheckpoint();
241  if (label->IsNear()) {
242    if (near_checkpoint_ > tmp) near_checkpoint_ = tmp;
243    if (max_near_checkpoint_ >= tmp) {
244      // This checkpoint is before some already in the near list. That means
245      // that the veneer (if needed) will be emitted before some of the veneers
246      // already in the list. We adjust the margin with the size of a veneer
247      // branch.
248      near_checkpoint_margin_ += k32BitT32InstructionSizeInBytes;
249    } else {
250      max_near_checkpoint_ = tmp;
251    }
252  } else {
253    if (far_checkpoint_ > tmp) far_checkpoint_ = tmp;
254  }
255  // Always compute the global checkpoint as, adding veneers shorten the
256  // literals' checkpoint.
257  masm_->ComputeCheckpoint();
258}
259
260
261void VeneerPoolManager::RemoveLabel(Label* label) {
262  label->ClearVeneerPoolManager();
263  std::list<Label*>& list = label->IsNear() ? near_labels_ : far_labels_;
264  Label::Offset* checkpoint_reference =
265      label->IsNear() ? &near_checkpoint_ : &far_checkpoint_;
266  if (label->GetCheckpoint() == *checkpoint_reference) {
267    // We have to compute checkpoint again.
268    *checkpoint_reference = Label::kMaxOffset;
269    for (std::list<Label*>::iterator it = list.begin(); it != list.end();) {
270      if (*it == label) {
271        it = list.erase(it);
272      } else {
273        *checkpoint_reference =
274            std::min(*checkpoint_reference, (*it)->GetCheckpoint());
275        ++it;
276      }
277    }
278    masm_->ComputeCheckpoint();
279  } else {
280    // We only have to remove the label from the list.
281    list.remove(label);
282  }
283}
284
285
286void VeneerPoolManager::EmitLabel(Label* label, Label::Offset emitted_target) {
287  // Define the veneer.
288  Label veneer;
289  masm_->Bind(&veneer);
290  Label::Offset label_checkpoint = Label::kMaxOffset;
291  // Check all uses of this label.
292  for (Label::ForwardRefList::iterator ref = label->GetFirstForwardRef();
293       ref != label->GetEndForwardRef();) {
294    if (ref->IsBranch()) {
295      if (ref->GetCheckpoint() <= emitted_target) {
296        // Use the veneer.
297        masm_->EncodeLabelFor(*ref, &veneer);
298        ref = label->Erase(ref);
299      } else {
300        // Don't use the veneer => update checkpoint.
301        label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
302        ++ref;
303      }
304    } else {
305      ++ref;
306    }
307  }
308  label->SetCheckpoint(label_checkpoint);
309  if (label->IsNear()) {
310    near_checkpoint_ = std::min(near_checkpoint_, label_checkpoint);
311  } else {
312    far_checkpoint_ = std::min(far_checkpoint_, label_checkpoint);
313  }
314  // Generate the veneer.
315  masm_->B(label);
316}
317
318
319void VeneerPoolManager::Emit(Label::Offset target) {
320  VIXL_ASSERT(!IsBlocked());
321  // Sort labels (regarding their checkpoint) to avoid that a veneer
322  // becomes out of range.
323  near_labels_.sort(Label::CompareLabels);
324  far_labels_.sort(Label::CompareLabels);
325  // To avoid too many veneers, generate veneers which will be necessary soon.
326  static const size_t kVeneerEmissionMargin = 1 * KBytes;
327  // To avoid too many veneers, use generated veneers for other not too far
328  // uses.
329  static const size_t kVeneerEmittedMargin = 2 * KBytes;
330  Label::Offset emitted_target = target + kVeneerEmittedMargin;
331  target += kVeneerEmissionMargin;
332  // Reset the checkpoints. They will be computed again in the loop.
333  near_checkpoint_ = Label::kMaxOffset;
334  far_checkpoint_ = Label::kMaxOffset;
335  max_near_checkpoint_ = 0;
336  near_checkpoint_margin_ = 0;
337  for (std::list<Label*>::iterator it = near_labels_.begin();
338       it != near_labels_.end();) {
339    Label* label = *it;
340    // Move the label from the near list to the far list as it will be needed in
341    // the far list (as the veneer will generate a far branch).
342    // The label is pushed at the end of the list. The list remains sorted as
343    // we use an unconditional jump which has the biggest range. However, it
344    // wouldn't be a problem if the items at the end of the list were not
345    // sorted as they won't be used by this generation (their range will be
346    // greater than kVeneerEmittedMargin).
347    it = near_labels_.erase(it);
348    far_labels_.push_back(label);
349    label->SetVeneerPoolManager(this, false);
350    EmitLabel(label, emitted_target);
351  }
352  for (std::list<Label*>::iterator it = far_labels_.begin();
353       it != far_labels_.end();) {
354    // The labels are sorted. As soon as a veneer is not needed, we can stop.
355    if ((*it)->GetCheckpoint() > target) {
356      far_checkpoint_ = std::min(far_checkpoint_, (*it)->GetCheckpoint());
357      break;
358    }
359    // Even if we no longer have use of this label, we can keep it in the list
360    // as the next "B" would add it back.
361    EmitLabel(*it, emitted_target);
362    ++it;
363  }
364#ifdef VIXL_DEBUG
365  for (std::list<Label*>::iterator it = near_labels_.begin();
366       it != near_labels_.end();
367       ++it) {
368    VIXL_ASSERT((*it)->GetCheckpoint() >= near_checkpoint_);
369  }
370  for (std::list<Label*>::iterator it = far_labels_.begin();
371       it != far_labels_.end();
372       ++it) {
373    VIXL_ASSERT((*it)->GetCheckpoint() >= far_checkpoint_);
374  }
375#endif
376  masm_->ComputeCheckpoint();
377}
378
379
380// We use a subclass to access the protected `ExactAssemblyScope` constructor
381// giving us control over the pools, and make the constructor private to limit
382// usage to code paths emitting pools.
383class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
384 private:
385  ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm,
386                                      size_t size,
387                                      SizePolicy size_policy = kExactSize)
388      : ExactAssemblyScope(masm,
389                           size,
390                           size_policy,
391                           ExactAssemblyScope::kIgnorePools) {}
392
393  friend void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
394                                              EmitOption option);
395
396  // TODO: `PerformEnsureEmit` is `private`, so we have to make the
397  // `MacroAssembler` a friend.
398  friend class MacroAssembler;
399};
400
401
402void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
403  if (!doing_veneer_pool_generation_) {
404    EmitOption option = kBranchRequired;
405    Label after_pools;
406    Label::Offset literal_target = GetTargetForLiteralEmission();
407    VIXL_ASSERT(literal_target >= 0);
408    bool generate_veneers = target > veneer_pool_manager_.GetCheckpoint();
409    if (target > literal_target) {
410      // We will generate the literal pool. Generate all the veneers which
411      // would become out of range.
412      size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize() +
413                                 kMaxInstructionSizeInBytes;
414      VIXL_ASSERT(IsInt32(literal_pool_size));
415      Label::Offset veneers_target =
416          AlignUp(target + static_cast<Label::Offset>(literal_pool_size), 4);
417      VIXL_ASSERT(veneers_target >= 0);
418      if (veneers_target > veneer_pool_manager_.GetCheckpoint()) {
419        generate_veneers = true;
420      }
421    }
422    if (generate_veneers) {
423      {
424        ExactAssemblyScopeWithoutPoolsCheck
425            guard(this,
426                  kMaxInstructionSizeInBytes,
427                  ExactAssemblyScope::kMaximumSize);
428        b(&after_pools);
429      }
430      doing_veneer_pool_generation_ = true;
431      veneer_pool_manager_.Emit(target);
432      doing_veneer_pool_generation_ = false;
433      option = kNoBranchRequired;
434    }
435    // Check if the macro-assembler's internal literal pool should be emitted
436    // to avoid any overflow. If we already generated the veneers, we can
437    // emit the pool (the branch is already done).
438    if ((target > literal_target) || (option == kNoBranchRequired)) {
439      EmitLiteralPool(option);
440    }
441    BindHelper(&after_pools);
442  }
443  if (GetBuffer()->IsManaged()) {
444    bool grow_requested;
445    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
446    if (grow_requested) ComputeCheckpoint();
447  }
448}
449
450
451void MacroAssembler::ComputeCheckpoint() {
452  checkpoint_ = AlignDown(std::min(veneer_pool_manager_.GetCheckpoint(),
453                                   GetTargetForLiteralEmission()),
454                          4);
455  size_t buffer_size = GetBuffer()->GetCapacity();
456  VIXL_ASSERT(IsInt32(buffer_size));
457  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
458  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
459}
460
461
462void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
463                                     EmitOption option) {
464  if (literal_pool->GetSize() > 0) {
465#ifdef VIXL_DEBUG
466    for (LiteralPool::RawLiteralListIterator literal_it =
467             literal_pool->GetFirst();
468         literal_it != literal_pool->GetEnd();
469         literal_it++) {
470      RawLiteral* literal = *literal_it;
471      VIXL_ASSERT(GetCursorOffset() < literal->GetCheckpoint());
472    }
473#endif
474    Label after_literal;
475    if (option == kBranchRequired) {
476      GetBuffer()->EnsureSpaceFor(kMaxInstructionSizeInBytes);
477      VIXL_ASSERT(!AllowAssembler());
478      {
479        ExactAssemblyScopeWithoutPoolsCheck
480            guard(this,
481                  kMaxInstructionSizeInBytes,
482                  ExactAssemblyScope::kMaximumSize);
483        b(&after_literal);
484      }
485    }
486    GetBuffer()->Align();
487    GetBuffer()->EnsureSpaceFor(literal_pool->GetSize());
488    for (LiteralPool::RawLiteralListIterator it = literal_pool->GetFirst();
489         it != literal_pool->GetEnd();
490         it++) {
491      PlaceHelper(*it);
492      GetBuffer()->Align();
493    }
494    if (option == kBranchRequired) BindHelper(&after_literal);
495    literal_pool->Clear();
496  }
497}
498
499
500void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
501  // 32-bit table A32:
502  // adr ip, table
503  // add ip, r1, lsl 2
504  // ldr ip, [ip]
505  // jmp: add pc, pc, ip, lsl 2
506  // table:
507  // .int (case_0 - (jmp + 8)) >> 2
508  // .int (case_1 - (jmp + 8)) >> 2
509  // .int (case_2 - (jmp + 8)) >> 2
510
511  // 16-bit table T32:
512  // adr ip, table
513  // jmp: tbh ip, r1
514  // table:
515  // .short (case_0 - (jmp + 4)) >> 1
516  // .short (case_1 - (jmp + 4)) >> 1
517  // .short (case_2 - (jmp + 4)) >> 1
518  // case_0:
519  //   ...
520  //   b end_switch
521  // case_1:
522  //   ...
523  //   b end_switch
524  // ...
525  // end_switch:
526  Label jump_table;
527  UseScratchRegisterScope temps(this);
528  Register scratch = temps.Acquire();
529  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
530
531  // Jump to default if reg is not in [0, table->GetLength()[
532  Cmp(reg, table->GetLength());
533  B(ge, table->GetDefaultLabel());
534
535  Adr(scratch, &jump_table);
536  if (IsUsingA32()) {
537    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
538    switch (table->GetOffsetShift()) {
539      case 0:
540        Ldrb(scratch, MemOperand(scratch));
541        break;
542      case 1:
543        Ldrh(scratch, MemOperand(scratch));
544        break;
545      case 2:
546        Ldr(scratch, MemOperand(scratch));
547        break;
548      default:
549        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
550    }
551    // Emit whatever needs to be emitted if we want to
552    // correctly record the position of the branch instruction
553    uint32_t branch_location = GetCursorOffset();
554    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
555    ExactAssemblyScope scope(this,
556                             table_size + kA32InstructionSizeInBytes,
557                             ExactAssemblyScope::kMaximumSize);
558    add(pc, pc, Operand(scratch, LSL, 2));
559    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
560    bind(&jump_table);
561    GenerateSwitchTable(table, table_size);
562  } else {
563    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
564    //  But for 32bit offsets, we use the same coding as for A32
565    if (table->GetOffsetShift() == 2) {
566      // 32bit offsets
567      Add(scratch, scratch, Operand(reg, LSL, 2));
568      Ldr(scratch, MemOperand(scratch));
569      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
570      // so let's do the shift before
571      Lsl(scratch, scratch, 1);
572      // Emit whatever needs to be emitted if we want to
573      // correctly record the position of the branch instruction
574      uint32_t branch_location = GetCursorOffset();
575      table->SetBranchLocation(branch_location +
576                               GetArchitectureStatePCOffset());
577      ExactAssemblyScope scope(this,
578                               table_size + kMaxInstructionSizeInBytes,
579                               ExactAssemblyScope::kMaximumSize);
580      add(pc, pc, scratch);
581      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
582      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
583      bind(&jump_table);
584      GenerateSwitchTable(table, table_size);
585    } else {
586      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
587                  (table->GetOffsetShift() == 1));
588      // Emit whatever needs to be emitted if we want to
589      // correctly record the position of the branch instruction
590      uint32_t branch_location = GetCursorOffset();
591      table->SetBranchLocation(branch_location +
592                               GetArchitectureStatePCOffset());
593      ExactAssemblyScope scope(this,
594                               table_size + kMaxInstructionSizeInBytes,
595                               ExactAssemblyScope::kMaximumSize);
596      if (table->GetOffsetShift() == 0) {
597        // 8bit offsets
598        tbb(scratch, reg);
599      } else {
600        // 16bit offsets
601        tbh(scratch, reg);
602      }
603      // tbb/tbh is a 32bit instruction
604      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
605      bind(&jump_table);
606      GenerateSwitchTable(table, table_size);
607    }
608  }
609}
610
611
612void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
613  table->BindTable(GetCursorOffset());
614  for (int i = 0; i < table_size / 4; i++) {
615    GetBuffer()->Emit32(0);
616  }
617}
618
619
620// switch/case/default : case
621// case_index is assumed to be < table->GetLength()
622// which is checked in JumpTable::Link and Table::SetPresenceBit
623void MacroAssembler::Case(JumpTableBase* table, int case_index) {
624  table->Link(this, case_index, GetCursorOffset());
625  table->SetPresenceBitForCase(case_index);
626}
627
628// switch/case/default : default
629void MacroAssembler::Default(JumpTableBase* table) {
630  Bind(table->GetDefaultLabel());
631}
632
633// switch/case/default : break
634void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
635
636// switch/case/default : finalize
637// Manage the default path, mosstly. All empty offsets in the jumptable
638// will point to default.
639// All values not in [0, table->GetLength()[ are already pointing here anyway.
640void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
641
642void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
643                                                Register tmp,
644                                                uint32_t imm) {
645  if (IsUintN(16, imm)) {
646    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
647    mov(cond, tmp, imm & 0xffff);
648    return;
649  }
650  if (IsUsingT32()) {
651    if (ImmediateT32::IsImmediateT32(~imm)) {
652      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
653      mvn(cond, tmp, ~imm);
654      return;
655    }
656  } else {
657    if (ImmediateA32::IsImmediateA32(~imm)) {
658      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
659      mvn(cond, tmp, ~imm);
660      return;
661    }
662  }
663  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
664  mov(cond, tmp, imm & 0xffff);
665  movt(cond, tmp, imm >> 16);
666}
667
668
669void MacroAssembler::PadToMinimumBranchRange(Label* label) {
670  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
671  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
672    uint32_t location = last_reference->GetLocation();
673    if (location + k16BitT32InstructionSizeInBytes ==
674        static_cast<uint32_t>(GetCursorOffset())) {
675      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
676      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
677        VIXL_ASSERT(!InITBlock());
678        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
679        // target is immediately after the Cbz or Cbnz, we insert a nop to
680        // avoid that.
681        EmitT32_16(k16BitT32NopOpcode);
682      }
683    }
684  }
685}
686
687
688MemOperand MacroAssembler::MemOperandComputationHelper(
689    Condition cond,
690    Register scratch,
691    Register base,
692    uint32_t offset,
693    uint32_t extra_offset_mask) {
694  VIXL_ASSERT(!AliasesAvailableScratchRegister(scratch));
695  VIXL_ASSERT(!AliasesAvailableScratchRegister(base));
696  VIXL_ASSERT(allow_macro_instructions_);
697  VIXL_ASSERT(OutsideITBlock());
698
699  // Check for the simple pass-through case.
700  if ((offset & extra_offset_mask) == offset) return MemOperand(base, offset);
701
702  MacroEmissionCheckScope guard(this);
703  ITScope it_scope(this, &cond);
704
705  uint32_t load_store_offset = offset & extra_offset_mask;
706  uint32_t add_offset = offset & ~extra_offset_mask;
707
708  if (base.IsPC()) {
709    // Special handling for PC bases. We must read the PC in the first
710    // instruction (and only in that instruction), and we must also take care to
711    // keep the same address calculation as loads and stores. For T32, that
712    // means using something like ADR, which uses AlignDown(PC, 4).
713
714    // We don't handle positive offsets from PC because the intention is not
715    // clear; does the user expect the offset from the current
716    // GetCursorOffset(), or to allow a certain amount of space after the
717    // instruction?
718    VIXL_ASSERT((offset & 0x80000000) != 0);
719    if (IsUsingT32()) {
720      // T32: make the first instruction "SUB (immediate, from PC)" -- an alias
721      // of ADR -- to get behaviour like loads and stores. This ADR can handle
722      // at least as much offset as the load_store_offset so it can replace it.
723
724      uint32_t sub_pc_offset = (-offset) & 0xfff;
725      load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
726      add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;
727
728      ExactAssemblyScope scope(this, k32BitT32InstructionSizeInBytes);
729      sub(cond, scratch, base, sub_pc_offset);
730
731      if (add_offset == 0) return MemOperand(scratch, load_store_offset);
732
733      // The rest of the offset can be generated in the usual way.
734      base = scratch;
735    }
736    // A32 can use any SUB instruction, so we don't have to do anything special
737    // here except to ensure that we read the PC first.
738  }
739
740  add(cond, scratch, base, add_offset);
741  return MemOperand(scratch, load_store_offset);
742}
743
744
745uint32_t MacroAssembler::GetOffsetMask(InstructionType type,
746                                       AddrMode addrmode) {
747  switch (type) {
748    case kLdr:
749    case kLdrb:
750    case kStr:
751    case kStrb:
752      if (IsUsingA32() || (addrmode == Offset)) {
753        return 0xfff;
754      } else {
755        return 0xff;
756      }
757    case kLdrsb:
758    case kLdrh:
759    case kLdrsh:
760    case kStrh:
761      if (IsUsingT32() && (addrmode == Offset)) {
762        return 0xfff;
763      } else {
764        return 0xff;
765      }
766    case kVldr:
767    case kVstr:
768      return 0x3fc;
769    case kLdrd:
770    case kStrd:
771      if (IsUsingA32()) {
772        return 0xff;
773      } else {
774        return 0x3fc;
775      }
776    default:
777      VIXL_UNREACHABLE();
778      return 0;
779  }
780}
781
782
783HARDFLOAT void PrintfTrampolineRRRR(
784    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
785  printf(format, a, b, c, d);
786}
787
788
789HARDFLOAT void PrintfTrampolineRRRD(
790    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
791  printf(format, a, b, c, d);
792}
793
794
795HARDFLOAT void PrintfTrampolineRRDR(
796    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
797  printf(format, a, b, c, d);
798}
799
800
801HARDFLOAT void PrintfTrampolineRRDD(
802    const char* format, uint32_t a, uint32_t b, double c, double d) {
803  printf(format, a, b, c, d);
804}
805
806
807HARDFLOAT void PrintfTrampolineRDRR(
808    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
809  printf(format, a, b, c, d);
810}
811
812
813HARDFLOAT void PrintfTrampolineRDRD(
814    const char* format, uint32_t a, double b, uint32_t c, double d) {
815  printf(format, a, b, c, d);
816}
817
818
819HARDFLOAT void PrintfTrampolineRDDR(
820    const char* format, uint32_t a, double b, double c, uint32_t d) {
821  printf(format, a, b, c, d);
822}
823
824
825HARDFLOAT void PrintfTrampolineRDDD(
826    const char* format, uint32_t a, double b, double c, double d) {
827  printf(format, a, b, c, d);
828}
829
830
831HARDFLOAT void PrintfTrampolineDRRR(
832    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
833  printf(format, a, b, c, d);
834}
835
836
837HARDFLOAT void PrintfTrampolineDRRD(
838    const char* format, double a, uint32_t b, uint32_t c, double d) {
839  printf(format, a, b, c, d);
840}
841
842
843HARDFLOAT void PrintfTrampolineDRDR(
844    const char* format, double a, uint32_t b, double c, uint32_t d) {
845  printf(format, a, b, c, d);
846}
847
848
849HARDFLOAT void PrintfTrampolineDRDD(
850    const char* format, double a, uint32_t b, double c, double d) {
851  printf(format, a, b, c, d);
852}
853
854
855HARDFLOAT void PrintfTrampolineDDRR(
856    const char* format, double a, double b, uint32_t c, uint32_t d) {
857  printf(format, a, b, c, d);
858}
859
860
861HARDFLOAT void PrintfTrampolineDDRD(
862    const char* format, double a, double b, uint32_t c, double d) {
863  printf(format, a, b, c, d);
864}
865
866
867HARDFLOAT void PrintfTrampolineDDDR(
868    const char* format, double a, double b, double c, uint32_t d) {
869  printf(format, a, b, c, d);
870}
871
872
873HARDFLOAT void PrintfTrampolineDDDD(
874    const char* format, double a, double b, double c, double d) {
875  printf(format, a, b, c, d);
876}
877
878
879void MacroAssembler::Printf(const char* format,
880                            CPURegister reg1,
881                            CPURegister reg2,
882                            CPURegister reg3,
883                            CPURegister reg4) {
884  // Exclude all registers from the available scratch registers, so
885  // that we are able to use ip below.
886  // TODO: Refactor this function to use UseScratchRegisterScope
887  // for temporary registers below.
888  UseScratchRegisterScope scratch(this);
889  scratch.ExcludeAll();
890  if (generate_simulator_code_) {
891    PushRegister(reg4);
892    PushRegister(reg3);
893    PushRegister(reg2);
894    PushRegister(reg1);
895    Push(RegisterList(r0, r1));
896    StringLiteral* format_literal =
897        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
898    Adr(r0, format_literal);
899    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
900                    (reg2.GetType() << 4) | reg1.GetType();
901    Mov(r1, args);
902    Hvc(kPrintfCode);
903    Pop(RegisterList(r0, r1));
904    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
905               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
906    Drop(size);
907  } else {
908    // Generate on a native platform => 32 bit environment.
909    // Preserve core registers r0-r3, r12, r14
910    const uint32_t saved_registers_mask =
911        kCallerSavedRegistersMask | (1 << r5.GetCode());
912    Push(RegisterList(saved_registers_mask));
913    // Push VFP registers.
914    Vpush(Untyped64, DRegisterList(d0, 8));
915    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
916    // Search one register which has been saved and which doesn't need to be
917    // printed.
918    RegisterList available_registers(kCallerSavedRegistersMask);
919    if (reg1.GetType() == CPURegister::kRRegister) {
920      available_registers.Remove(Register(reg1.GetCode()));
921    }
922    if (reg2.GetType() == CPURegister::kRRegister) {
923      available_registers.Remove(Register(reg2.GetCode()));
924    }
925    if (reg3.GetType() == CPURegister::kRRegister) {
926      available_registers.Remove(Register(reg3.GetCode()));
927    }
928    if (reg4.GetType() == CPURegister::kRRegister) {
929      available_registers.Remove(Register(reg4.GetCode()));
930    }
931    Register tmp = available_registers.GetFirstAvailableRegister();
932    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
933    // Push the flags.
934    Mrs(tmp, APSR);
935    Push(tmp);
936    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
937    Push(tmp);
938    // Push the registers to print on the stack.
939    PushRegister(reg4);
940    PushRegister(reg3);
941    PushRegister(reg2);
942    PushRegister(reg1);
943    int core_count = 1;
944    int vfp_count = 0;
945    uint32_t printf_type = 0;
946    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
947    // Reg4 may stay into the stack if all the register to print are core
948    // registers.
949    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
950    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
951    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
952    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
953    // Ensure that the stack is aligned on 8 bytes.
954    And(r5, sp, 0x7);
955    if (core_count == 5) {
956      // One 32 bit argument (reg4) has been left on the stack =>  align the
957      // stack
958      // before the argument.
959      Pop(r0);
960      Sub(sp, sp, r5);
961      Push(r0);
962    } else {
963      Sub(sp, sp, r5);
964    }
965    // Select the right trampoline depending on the arguments.
966    uintptr_t address;
967    switch (printf_type) {
968      case 0:
969        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
970        break;
971      case 1:
972        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
973        break;
974      case 2:
975        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
976        break;
977      case 3:
978        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
979        break;
980      case 4:
981        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
982        break;
983      case 5:
984        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
985        break;
986      case 6:
987        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
988        break;
989      case 7:
990        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
991        break;
992      case 8:
993        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
994        break;
995      case 9:
996        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
997        break;
998      case 10:
999        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
1000        break;
1001      case 11:
1002        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
1003        break;
1004      case 12:
1005        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
1006        break;
1007      case 13:
1008        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
1009        break;
1010      case 14:
1011        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
1012        break;
1013      case 15:
1014        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
1015        break;
1016      default:
1017        VIXL_UNREACHABLE();
1018        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
1019        break;
1020    }
1021    StringLiteral* format_literal =
1022        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
1023    Adr(r0, format_literal);
1024    Mov(ip, Operand::From(address));
1025    Blx(ip);
1026    // If register reg4 was left on the stack => skip it.
1027    if (core_count == 5) Drop(kRegSizeInBytes);
1028    // Restore the stack as it was before alignment.
1029    Add(sp, sp, r5);
1030    // Restore the flags.
1031    Pop(tmp);
1032    Vmsr(FPSCR, tmp);
1033    Pop(tmp);
1034    Msr(APSR_nzcvqg, tmp);
1035    // Restore the regsisters.
1036    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
1037    Vpop(Untyped64, DRegisterList(d0, 8));
1038    Pop(RegisterList(saved_registers_mask));
1039  }
1040}
1041
1042
1043void MacroAssembler::PushRegister(CPURegister reg) {
1044  switch (reg.GetType()) {
1045    case CPURegister::kNoRegister:
1046      break;
1047    case CPURegister::kRRegister:
1048      Push(Register(reg.GetCode()));
1049      break;
1050    case CPURegister::kSRegister:
1051      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
1052      break;
1053    case CPURegister::kDRegister:
1054      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
1055      break;
1056    case CPURegister::kQRegister:
1057      VIXL_UNIMPLEMENTED();
1058      break;
1059  }
1060}
1061
1062
1063void MacroAssembler::PreparePrintfArgument(CPURegister reg,
1064                                           int* core_count,
1065                                           int* vfp_count,
1066                                           uint32_t* printf_type) {
1067  switch (reg.GetType()) {
1068    case CPURegister::kNoRegister:
1069      break;
1070    case CPURegister::kRRegister:
1071      VIXL_ASSERT(*core_count <= 4);
1072      if (*core_count < 4) Pop(Register(*core_count));
1073      *core_count += 1;
1074      break;
1075    case CPURegister::kSRegister:
1076      VIXL_ASSERT(*vfp_count < 4);
1077      *printf_type |= 1 << (*core_count + *vfp_count - 1);
1078      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
1079      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
1080      *vfp_count += 1;
1081      break;
1082    case CPURegister::kDRegister:
1083      VIXL_ASSERT(*vfp_count < 4);
1084      *printf_type |= 1 << (*core_count + *vfp_count - 1);
1085      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
1086      *vfp_count += 1;
1087      break;
1088    case CPURegister::kQRegister:
1089      VIXL_UNIMPLEMENTED();
1090      break;
1091  }
1092}
1093
1094
1095void MacroAssembler::Delegate(InstructionType type,
1096                              InstructionCondROp instruction,
1097                              Condition cond,
1098                              Register rn,
1099                              const Operand& operand) {
1100  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
1101              (type == kUxtb16));
1102
1103  if (type == kMovt) {
1104    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.\n");
1105  }
1106
1107  // This delegate only supports teq with immediates.
1108  CONTEXT_SCOPE;
1109  if ((type == kTeq) && operand.IsImmediate()) {
1110    UseScratchRegisterScope temps(this);
1111    Register scratch = temps.Acquire();
1112    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
1113    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1114    teq(cond, rn, scratch);
1115    return;
1116  }
1117  Assembler::Delegate(type, instruction, cond, rn, operand);
1118}
1119
1120
1121void MacroAssembler::Delegate(InstructionType type,
1122                              InstructionCondSizeROp instruction,
1123                              Condition cond,
1124                              EncodingSize size,
1125                              Register rn,
1126                              const Operand& operand) {
1127  CONTEXT_SCOPE;
1128  VIXL_ASSERT(size.IsBest());
1129  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
1130              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
1131              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
1132              (type == kUxtb) || (type == kUxth));
1133  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1134    VIXL_ASSERT((type != kMov) || (type != kMovs));
1135    InstructionCondRROp shiftop = NULL;
1136    switch (operand.GetShift().GetType()) {
1137      case LSL:
1138        shiftop = &Assembler::lsl;
1139        break;
1140      case LSR:
1141        shiftop = &Assembler::lsr;
1142        break;
1143      case ASR:
1144        shiftop = &Assembler::asr;
1145        break;
1146      case RRX:
1147        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1148        VIXL_UNREACHABLE();
1149        break;
1150      case ROR:
1151        shiftop = &Assembler::ror;
1152        break;
1153      default:
1154        VIXL_UNREACHABLE();
1155    }
1156    if (shiftop != NULL) {
1157      UseScratchRegisterScope temps(this);
1158      Register scratch = temps.Acquire();
1159      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1160      (this->*shiftop)(cond,
1161                       scratch,
1162                       operand.GetBaseRegister(),
1163                       operand.GetShiftRegister());
1164      (this->*instruction)(cond, size, rn, scratch);
1165      return;
1166    }
1167  }
1168  if (operand.IsImmediate()) {
1169    uint32_t imm = operand.GetImmediate();
1170    switch (type) {
1171      case kMov:
1172      case kMovs:
1173        if (!rn.IsPC()) {
1174          // Immediate is too large, but not using PC, so handle with mov{t}.
1175          HandleOutOfBoundsImmediate(cond, rn, imm);
1176          if (type == kMovs) {
1177            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1178            tst(cond, rn, rn);
1179          }
1180          return;
1181        } else if (type == kMov) {
1182          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
1183          // Immediate is too large and using PC, so handle using a temporary
1184          // register.
1185          UseScratchRegisterScope temps(this);
1186          Register scratch = temps.Acquire();
1187          HandleOutOfBoundsImmediate(al, scratch, imm);
1188          EnsureEmitFor(kMaxInstructionSizeInBytes);
1189          bx(cond, scratch);
1190          return;
1191        }
1192        break;
1193      case kCmn:
1194      case kCmp:
1195        if (IsUsingA32() || !rn.IsPC()) {
1196          UseScratchRegisterScope temps(this);
1197          Register scratch = temps.Acquire();
1198          HandleOutOfBoundsImmediate(cond, scratch, imm);
1199          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1200          (this->*instruction)(cond, size, rn, scratch);
1201          return;
1202        }
1203        break;
1204      case kMvn:
1205      case kMvns:
1206        if (!rn.IsPC()) {
1207          UseScratchRegisterScope temps(this);
1208          Register scratch = temps.Acquire();
1209          HandleOutOfBoundsImmediate(cond, scratch, imm);
1210          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1211          (this->*instruction)(cond, size, rn, scratch);
1212          return;
1213        }
1214        break;
1215      case kTst:
1216        if (IsUsingA32() || !rn.IsPC()) {
1217          UseScratchRegisterScope temps(this);
1218          Register scratch = temps.Acquire();
1219          HandleOutOfBoundsImmediate(cond, scratch, imm);
1220          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1221          (this->*instruction)(cond, size, rn, scratch);
1222          return;
1223        }
1224        break;
1225      default:  // kSxtb, Sxth, Uxtb, Uxth
1226        break;
1227    }
1228  }
1229  Assembler::Delegate(type, instruction, cond, size, rn, operand);
1230}
1231
1232
1233void MacroAssembler::Delegate(InstructionType type,
1234                              InstructionCondRROp instruction,
1235                              Condition cond,
1236                              Register rd,
1237                              Register rn,
1238                              const Operand& operand) {
1239  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
1240      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
1241      (type == kPkhbt) || (type == kPkhtb)) {
1242    UnimplementedDelegate(type);
1243    return;
1244  }
1245
1246  // This delegate only handles the following instructions.
1247  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
1248              (type == kRscs));
1249  CONTEXT_SCOPE;
1250
1251  // T32 does not support register shifted register operands, emulate it.
1252  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1253    InstructionCondRROp shiftop = NULL;
1254    switch (operand.GetShift().GetType()) {
1255      case LSL:
1256        shiftop = &Assembler::lsl;
1257        break;
1258      case LSR:
1259        shiftop = &Assembler::lsr;
1260        break;
1261      case ASR:
1262        shiftop = &Assembler::asr;
1263        break;
1264      case RRX:
1265        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1266        VIXL_UNREACHABLE();
1267        break;
1268      case ROR:
1269        shiftop = &Assembler::ror;
1270        break;
1271      default:
1272        VIXL_UNREACHABLE();
1273    }
1274    if (shiftop != NULL) {
1275      UseScratchRegisterScope temps(this);
1276      Register rm = operand.GetBaseRegister();
1277      Register rs = operand.GetShiftRegister();
1278      // Try to use rd as a scratch register. We can do this if it aliases rs or
1279      // rm (because we read them in the first instruction), but not rn.
1280      if (!rd.Is(rn)) temps.Include(rd);
1281      Register scratch = temps.Acquire();
1282      // TODO: The scope length was measured empirically. We should analyse the
1283      // worst-case size and add targetted tests.
1284      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1285      (this->*shiftop)(cond, scratch, rm, rs);
1286      (this->*instruction)(cond, rd, rn, scratch);
1287      return;
1288    }
1289  }
1290
1291  // T32 does not have a Rsc instruction, negate the lhs input and turn it into
1292  // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
1293  //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
1294  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1295    // The RegisterShiftRegister case should have been handled above.
1296    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1297    UseScratchRegisterScope temps(this);
1298    // Try to use rd as a scratch register. We can do this if it aliases rn
1299    // (because we read it in the first instruction), but not rm.
1300    temps.Include(rd);
1301    temps.Exclude(operand);
1302    Register negated_rn = temps.Acquire();
1303    {
1304      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1305      mvn(cond, negated_rn, rn);
1306    }
1307    if (type == kRsc) {
1308      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1309      adc(cond, rd, negated_rn, operand);
1310      return;
1311    }
1312    // TODO: We shouldn't have to specify how much space the next instruction
1313    // needs.
1314    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1315    adcs(cond, rd, negated_rn, operand);
1316    return;
1317  }
1318
1319  if (operand.IsImmediate()) {
1320    // If the immediate can be encoded when inverted, turn Orn into Orr.
1321    // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
1322    // mov.
1323    int32_t imm = operand.GetSignedImmediate();
1324    if (((type == kOrn) || (type == kOrns)) && IsModifiedImmediate(~imm)) {
1325      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1326      switch (type) {
1327        case kOrn:
1328          orr(cond, rd, rn, ~imm);
1329          return;
1330        case kOrns:
1331          orrs(cond, rd, rn, ~imm);
1332          return;
1333        default:
1334          VIXL_UNREACHABLE();
1335          break;
1336      }
1337    }
1338  }
1339
1340  // A32 does not have a Orn instruction, negate the rhs input and turn it into
1341  // a Orr.
1342  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1343    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1344    //  mvn r0, r2
1345    //  orr r0, r1, r0
1346    Register scratch;
1347    UseScratchRegisterScope temps(this);
1348    // Try to use rd as a scratch register. We can do this if it aliases rs or
1349    // rm (because we read them in the first instruction), but not rn.
1350    if (!rd.Is(rn)) temps.Include(rd);
1351    scratch = temps.Acquire();
1352    {
1353      // TODO: We shouldn't have to specify how much space the next instruction
1354      // needs.
1355      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1356      mvn(cond, scratch, operand);
1357    }
1358    if (type == kOrns) {
1359      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1360      orrs(cond, rd, rn, scratch);
1361      return;
1362    }
1363    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1364    orr(cond, rd, rn, scratch);
1365    return;
1366  }
1367
1368  if (operand.IsImmediate()) {
1369    UseScratchRegisterScope temps(this);
1370    // Allow using the destination as a scratch register if possible.
1371    if (!rd.Is(rn)) temps.Include(rd);
1372    Register scratch = temps.Acquire();
1373    int32_t imm = operand.GetSignedImmediate();
1374    HandleOutOfBoundsImmediate(cond, scratch, imm);
1375    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1376    (this->*instruction)(cond, rd, rn, scratch);
1377    return;
1378  }
1379  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1380}
1381
1382
1383void MacroAssembler::Delegate(InstructionType type,
1384                              InstructionCondSizeRL instruction,
1385                              Condition cond,
1386                              EncodingSize size,
1387                              Register rd,
1388                              Label* label) {
1389  VIXL_ASSERT((type == kLdr) || (type == kAdr));
1390
1391  CONTEXT_SCOPE;
1392  VIXL_ASSERT(size.IsBest());
1393
1394  if ((type == kLdr) && label->IsBound()) {
1395    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
1396    UseScratchRegisterScope temps(this);
1397    temps.Include(rd);
1398    uint32_t mask = GetOffsetMask(type, Offset);
1399    ldr(rd, MemOperandComputationHelper(cond, temps.Acquire(), label, mask));
1400    return;
1401  }
1402
1403  Assembler::Delegate(type, instruction, cond, size, rd, label);
1404}
1405
1406
1407bool MacroAssembler::GenerateSplitInstruction(
1408    InstructionCondSizeRROp instruction,
1409    Condition cond,
1410    Register rd,
1411    Register rn,
1412    uint32_t imm,
1413    uint32_t mask) {
1414  uint32_t high = imm & ~mask;
1415  if (!IsModifiedImmediate(high) && !rn.IsPC()) return false;
1416  // If high is a modified immediate, we can perform the operation with
1417  // only 2 instructions.
1418  // Else, if rn is PC, we want to avoid moving PC into a temporary.
1419  // Therefore, we also use the pattern even if the second call may
1420  // generate 3 instructions.
1421  uint32_t low = imm & mask;
1422  CodeBufferCheckScope scope(this,
1423                             (rn.IsPC() ? 4 : 2) * kMaxInstructionSizeInBytes);
1424  (this->*instruction)(cond, Best, rd, rn, low);
1425  (this->*instruction)(cond, Best, rd, rd, high);
1426  return true;
1427}
1428
1429
1430void MacroAssembler::Delegate(InstructionType type,
1431                              InstructionCondSizeRROp instruction,
1432                              Condition cond,
1433                              EncodingSize size,
1434                              Register rd,
1435                              Register rn,
1436                              const Operand& operand) {
1437  VIXL_ASSERT(
1438      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1439      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1440      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1441      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1442      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1443      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1444      (type == kSub) || (type == kSubs));
1445
1446  CONTEXT_SCOPE;
1447  VIXL_ASSERT(size.IsBest());
1448  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1449    InstructionCondRROp shiftop = NULL;
1450    switch (operand.GetShift().GetType()) {
1451      case LSL:
1452        shiftop = &Assembler::lsl;
1453        break;
1454      case LSR:
1455        shiftop = &Assembler::lsr;
1456        break;
1457      case ASR:
1458        shiftop = &Assembler::asr;
1459        break;
1460      case RRX:
1461        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1462        VIXL_UNREACHABLE();
1463        break;
1464      case ROR:
1465        shiftop = &Assembler::ror;
1466        break;
1467      default:
1468        VIXL_UNREACHABLE();
1469    }
1470    if (shiftop != NULL) {
1471      UseScratchRegisterScope temps(this);
1472      Register rm = operand.GetBaseRegister();
1473      Register rs = operand.GetShiftRegister();
1474      // Try to use rd as a scratch register. We can do this if it aliases rs or
1475      // rm (because we read them in the first instruction), but not rn.
1476      if (!rd.Is(rn)) temps.Include(rd);
1477      Register scratch = temps.Acquire();
1478      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1479      (this->*shiftop)(cond, scratch, rm, rs);
1480      (this->*instruction)(cond, size, rd, rn, scratch);
1481      return;
1482    }
1483  }
1484  if (operand.IsImmediate()) {
1485    int32_t imm = operand.GetSignedImmediate();
1486    if (ImmediateT32::IsImmediateT32(~imm)) {
1487      if (IsUsingT32()) {
1488        switch (type) {
1489          case kOrr:
1490            orn(cond, rd, rn, ~imm);
1491            return;
1492          case kOrrs:
1493            orns(cond, rd, rn, ~imm);
1494            return;
1495          default:
1496            break;
1497        }
1498      }
1499    }
1500    if (imm < 0) {
1501      InstructionCondSizeRROp asmcb = NULL;
1502      // Add and sub are equivalent using an arithmetic negation:
1503      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1504      // Add and sub with carry are equivalent using a bitwise NOT:
1505      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1506      switch (type) {
1507        case kAdd:
1508          asmcb = &Assembler::sub;
1509          imm = -imm;
1510          break;
1511        case kAdds:
1512          asmcb = &Assembler::subs;
1513          imm = -imm;
1514          break;
1515        case kSub:
1516          asmcb = &Assembler::add;
1517          imm = -imm;
1518          break;
1519        case kSubs:
1520          asmcb = &Assembler::adds;
1521          imm = -imm;
1522          break;
1523        case kAdc:
1524          asmcb = &Assembler::sbc;
1525          imm = ~imm;
1526          break;
1527        case kAdcs:
1528          asmcb = &Assembler::sbcs;
1529          imm = ~imm;
1530          break;
1531        case kSbc:
1532          asmcb = &Assembler::adc;
1533          imm = ~imm;
1534          break;
1535        case kSbcs:
1536          asmcb = &Assembler::adcs;
1537          imm = ~imm;
1538          break;
1539        default:
1540          break;
1541      }
1542      if (asmcb != NULL) {
1543        CodeBufferCheckScope scope(this, 4 * kMaxInstructionSizeInBytes);
1544        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1545        return;
1546      }
1547    }
1548
1549    // When rn is PC, only handle negative offsets. The correct way to handle
1550    // positive offsets isn't clear; does the user want the offset from the
1551    // start of the macro, or from the end (to allow a certain amount of space)?
1552    // When type is Add or Sub, imm is always positive (imm < 0 has just been
1553    // handled and imm == 0 would have been generated without the need of a
1554    // delegate). Therefore, only add to PC is forbidden here.
1555    if ((((type == kAdd) && !rn.IsPC()) || (type == kSub)) &&
1556        (IsUsingA32() || (!rd.IsPC() && !rn.IsPC()))) {
1557      VIXL_ASSERT(imm > 0);
1558      // Try to break the constant into two modified immediates.
1559      // For T32 also try to break the constant into one imm12 and one modified
1560      // immediate. Count the trailing zeroes and get the biggest even value.
1561      int trailing_zeroes = CountTrailingZeros(imm) & ~1u;
1562      uint32_t mask = ((trailing_zeroes < 4) && IsUsingT32())
1563                          ? 0xfff
1564                          : (0xff << trailing_zeroes);
1565      if (GenerateSplitInstruction(instruction, cond, rd, rn, imm, mask)) {
1566        return;
1567      }
1568      InstructionCondSizeRROp asmcb = NULL;
1569      switch (type) {
1570        case kAdd:
1571          asmcb = &Assembler::sub;
1572          break;
1573        case kSub:
1574          asmcb = &Assembler::add;
1575          break;
1576        default:
1577          VIXL_UNREACHABLE();
1578      }
1579      if (GenerateSplitInstruction(asmcb, cond, rd, rn, -imm, mask)) {
1580        return;
1581      }
1582    }
1583
1584    UseScratchRegisterScope temps(this);
1585    // Allow using the destination as a scratch register if possible.
1586    if (!rd.Is(rn)) temps.Include(rd);
1587    if (rn.IsPC()) {
1588      // If we're reading the PC, we need to do it in the first instruction,
1589      // otherwise we'll read the wrong value. We rely on this to handle the
1590      // long-range PC-relative MemOperands which can result from user-managed
1591      // literals.
1592
1593      // Only handle negative offsets. The correct way to handle positive
1594      // offsets isn't clear; does the user want the offset from the start of
1595      // the macro, or from the end (to allow a certain amount of space)?
1596      bool offset_is_negative_or_zero = (imm <= 0);
1597      switch (type) {
1598        case kAdd:
1599        case kAdds:
1600          offset_is_negative_or_zero = (imm <= 0);
1601          break;
1602        case kSub:
1603        case kSubs:
1604          offset_is_negative_or_zero = (imm >= 0);
1605          break;
1606        case kAdc:
1607        case kAdcs:
1608          offset_is_negative_or_zero = (imm < 0);
1609          break;
1610        case kSbc:
1611        case kSbcs:
1612          offset_is_negative_or_zero = (imm > 0);
1613          break;
1614        default:
1615          break;
1616      }
1617      if (offset_is_negative_or_zero) {
1618        {
1619          rn = temps.Acquire();
1620          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1621          mov(cond, rn, pc);
1622        }
1623        // Recurse rather than falling through, to try to get the immediate into
1624        // a single instruction.
1625        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1626        (this->*instruction)(cond, size, rd, rn, operand);
1627        return;
1628      }
1629    } else {
1630      Register scratch = temps.Acquire();
1631      // TODO: The scope length was measured empirically. We should analyse the
1632      // worst-case size and add targetted tests.
1633      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1634      mov(cond, scratch, operand.GetImmediate());
1635      (this->*instruction)(cond, size, rd, rn, scratch);
1636      return;
1637    }
1638  }
1639  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1640}
1641
1642
1643void MacroAssembler::Delegate(InstructionType type,
1644                              InstructionRL instruction,
1645                              Register rn,
1646                              Label* label) {
1647  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1648
1649  CONTEXT_SCOPE;
1650  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1651  if (IsUsingA32()) {
1652    if (type == kCbz) {
1653      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1654    } else {
1655      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1656    }
1657  } else if (rn.IsLow()) {
1658    switch (type) {
1659      case kCbnz: {
1660        Label done;
1661        cbz(rn, &done);
1662        b(label);
1663        Bind(&done);
1664        return;
1665      }
1666      case kCbz: {
1667        Label done;
1668        cbnz(rn, &done);
1669        b(label);
1670        Bind(&done);
1671        return;
1672      }
1673      default:
1674        break;
1675    }
1676  }
1677  Assembler::Delegate(type, instruction, rn, label);
1678}
1679
1680
1681template <typename T>
1682static inline bool IsI64BitPattern(T imm) {
1683  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1684    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1685  }
1686  return true;
1687}
1688
1689
1690template <typename T>
1691static inline bool IsI8BitPattern(T imm) {
1692  uint8_t imm8 = imm & 0xff;
1693  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1694    imm >>= 8;
1695    if ((imm & 0xff) != imm8) return false;
1696  }
1697  return true;
1698}
1699
1700
1701static inline bool CanBeInverted(uint32_t imm32) {
1702  uint32_t fill8 = 0;
1703
1704  if ((imm32 & 0xffffff00) == 0xffffff00) {
1705    //    11111111 11111111 11111111 abcdefgh
1706    return true;
1707  }
1708  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1709    fill8 = imm32 & 0xff;
1710    imm32 >>= 8;
1711    if ((imm32 >> 8) == 0xffff) {
1712      //    11111111 11111111 abcdefgh 00000000
1713      // or 11111111 11111111 abcdefgh 11111111
1714      return true;
1715    }
1716    if ((imm32 & 0xff) == fill8) {
1717      imm32 >>= 8;
1718      if ((imm32 >> 8) == 0xff) {
1719        //    11111111 abcdefgh 00000000 00000000
1720        // or 11111111 abcdefgh 11111111 11111111
1721        return true;
1722      }
1723      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1724        //    abcdefgh 11111111 11111111 11111111
1725        return true;
1726      }
1727    }
1728  }
1729  return false;
1730}
1731
1732
1733template <typename RES, typename T>
1734static inline RES replicate(T imm) {
1735  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1736              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1737  RES res = imm;
1738  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1739    res = (res << (sizeof(T) * 8)) | imm;
1740  }
1741  return res;
1742}
1743
1744
1745void MacroAssembler::Delegate(InstructionType type,
1746                              InstructionCondDtSSop instruction,
1747                              Condition cond,
1748                              DataType dt,
1749                              SRegister rd,
1750                              const SOperand& operand) {
1751  CONTEXT_SCOPE;
1752  if (type == kVmov) {
1753    if (operand.IsImmediate() && dt.Is(F32)) {
1754      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1755      if (neon_imm.CanConvert<float>()) {
1756        // movw ip, imm16
1757        // movk ip, imm16
1758        // vmov s0, ip
1759        UseScratchRegisterScope temps(this);
1760        Register scratch = temps.Acquire();
1761        float f = neon_imm.GetImmediate<float>();
1762        // TODO: The scope length was measured empirically. We should analyse
1763        // the
1764        // worst-case size and add targetted tests.
1765        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1766        mov(cond, scratch, FloatToRawbits(f));
1767        vmov(cond, rd, scratch);
1768        return;
1769      }
1770    }
1771  }
1772  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1773}
1774
1775
1776void MacroAssembler::Delegate(InstructionType type,
1777                              InstructionCondDtDDop instruction,
1778                              Condition cond,
1779                              DataType dt,
1780                              DRegister rd,
1781                              const DOperand& operand) {
1782  CONTEXT_SCOPE;
1783  if (type == kVmov) {
1784    if (operand.IsImmediate()) {
1785      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1786      switch (dt.GetValue()) {
1787        case I32:
1788          if (neon_imm.CanConvert<uint32_t>()) {
1789            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1790            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1791            if (IsI8BitPattern(imm)) {
1792              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1793              vmov(cond, I8, rd, imm & 0xff);
1794              return;
1795            }
1796            // vmov.i32 d0, 0xff0000ff will translate into
1797            // vmov.i64 d0, 0xff0000ffff0000ff
1798            if (IsI64BitPattern(imm)) {
1799              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1800              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1801              return;
1802            }
1803            // vmov.i32 d0, 0xffab0000 will translate into
1804            // vmvn.i32 d0, 0x0054ffff
1805            if (cond.Is(al) && CanBeInverted(imm)) {
1806              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1807              vmvn(I32, rd, ~imm);
1808              return;
1809            }
1810          }
1811          break;
1812        case I16:
1813          if (neon_imm.CanConvert<uint16_t>()) {
1814            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1815            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1816            if (IsI8BitPattern(imm)) {
1817              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1818              vmov(cond, I8, rd, imm & 0xff);
1819              return;
1820            }
1821          }
1822          break;
1823        case I64:
1824          if (neon_imm.CanConvert<uint64_t>()) {
1825            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1826            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1827            if (IsI8BitPattern(imm)) {
1828              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1829              vmov(cond, I8, rd, imm & 0xff);
1830              return;
1831            }
1832            // mov ip, lo(imm64)
1833            // vdup d0, ip
1834            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1835            // preserved
1836            {
1837              UseScratchRegisterScope temps(this);
1838              Register scratch = temps.Acquire();
1839              {
1840                // TODO: The scope length was measured empirically. We should
1841                // analyse the
1842                // worst-case size and add targetted tests.
1843                CodeBufferCheckScope scope(this,
1844                                           2 * kMaxInstructionSizeInBytes);
1845                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1846              }
1847              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1848              vdup(cond, Untyped32, rd, scratch);
1849            }
1850            // mov ip, hi(imm64)
1851            // vmov d0[1], ip
1852            {
1853              UseScratchRegisterScope temps(this);
1854              Register scratch = temps.Acquire();
1855              {
1856                // TODO: The scope length was measured empirically. We should
1857                // analyse the
1858                // worst-case size and add targetted tests.
1859                CodeBufferCheckScope scope(this,
1860                                           2 * kMaxInstructionSizeInBytes);
1861                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1862              }
1863              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1864              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1865            }
1866            return;
1867          }
1868          break;
1869        default:
1870          break;
1871      }
1872      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1873      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1874        // mov ip, imm32
1875        // vdup.16 d0, ip
1876        UseScratchRegisterScope temps(this);
1877        Register scratch = temps.Acquire();
1878        {
1879          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1880          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1881        }
1882        DataTypeValue vdup_dt = Untyped32;
1883        switch (dt.GetValue()) {
1884          case I16:
1885            vdup_dt = Untyped16;
1886            break;
1887          case I32:
1888            vdup_dt = Untyped32;
1889            break;
1890          default:
1891            VIXL_UNREACHABLE();
1892        }
1893        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1894        vdup(cond, vdup_dt, rd, scratch);
1895        return;
1896      }
1897      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1898        float f = neon_imm.GetImmediate<float>();
1899        // Punt to vmov.i32
1900        // TODO: The scope length was guessed based on the double case below. We
1901        // should analyse the worst-case size and add targetted tests.
1902        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1903        vmov(cond, I32, rd, FloatToRawbits(f));
1904        return;
1905      }
1906      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1907        // Punt to vmov.i64
1908        double d = neon_imm.GetImmediate<double>();
1909        // TODO: The scope length was measured empirically. We should analyse
1910        // the
1911        // worst-case size and add targetted tests.
1912        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1913        vmov(cond, I64, rd, DoubleToRawbits(d));
1914        return;
1915      }
1916    }
1917  }
1918  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1919}
1920
1921
1922void MacroAssembler::Delegate(InstructionType type,
1923                              InstructionCondDtQQop instruction,
1924                              Condition cond,
1925                              DataType dt,
1926                              QRegister rd,
1927                              const QOperand& operand) {
1928  CONTEXT_SCOPE;
1929  if (type == kVmov) {
1930    if (operand.IsImmediate()) {
1931      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1932      switch (dt.GetValue()) {
1933        case I32:
1934          if (neon_imm.CanConvert<uint32_t>()) {
1935            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1936            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1937            if (IsI8BitPattern(imm)) {
1938              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1939              vmov(cond, I8, rd, imm & 0xff);
1940              return;
1941            }
1942            // vmov.i32 d0, 0xff0000ff will translate into
1943            // vmov.i64 d0, 0xff0000ffff0000ff
1944            if (IsI64BitPattern(imm)) {
1945              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1946              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1947              return;
1948            }
1949            // vmov.i32 d0, 0xffab0000 will translate into
1950            // vmvn.i32 d0, 0x0054ffff
1951            if (CanBeInverted(imm)) {
1952              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1953              vmvn(cond, I32, rd, ~imm);
1954              return;
1955            }
1956          }
1957          break;
1958        case I16:
1959          if (neon_imm.CanConvert<uint16_t>()) {
1960            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1961            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1962            if (IsI8BitPattern(imm)) {
1963              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1964              vmov(cond, I8, rd, imm & 0xff);
1965              return;
1966            }
1967          }
1968          break;
1969        case I64:
1970          if (neon_imm.CanConvert<uint64_t>()) {
1971            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1972            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1973            if (IsI8BitPattern(imm)) {
1974              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1975              vmov(cond, I8, rd, imm & 0xff);
1976              return;
1977            }
1978            // mov ip, lo(imm64)
1979            // vdup q0, ip
1980            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1981            // preserved
1982            {
1983              UseScratchRegisterScope temps(this);
1984              Register scratch = temps.Acquire();
1985              {
1986                CodeBufferCheckScope scope(this,
1987                                           2 * kMaxInstructionSizeInBytes);
1988                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1989              }
1990              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1991              vdup(cond, Untyped32, rd, scratch);
1992            }
1993            // mov ip, hi(imm64)
1994            // vmov.i32 d0[1], ip
1995            // vmov d1, d0
1996            {
1997              UseScratchRegisterScope temps(this);
1998              Register scratch = temps.Acquire();
1999              {
2000                CodeBufferCheckScope scope(this,
2001                                           2 * kMaxInstructionSizeInBytes);
2002                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
2003              }
2004              {
2005                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2006                vmov(cond,
2007                     Untyped32,
2008                     DRegisterLane(rd.GetLowDRegister(), 1),
2009                     scratch);
2010              }
2011              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2012              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
2013            }
2014            return;
2015          }
2016          break;
2017        default:
2018          break;
2019      }
2020      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
2021      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
2022        // mov ip, imm32
2023        // vdup.16 d0, ip
2024        UseScratchRegisterScope temps(this);
2025        Register scratch = temps.Acquire();
2026        {
2027          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2028          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
2029        }
2030        DataTypeValue vdup_dt = Untyped32;
2031        switch (dt.GetValue()) {
2032          case I16:
2033            vdup_dt = Untyped16;
2034            break;
2035          case I32:
2036            vdup_dt = Untyped32;
2037            break;
2038          default:
2039            VIXL_UNREACHABLE();
2040        }
2041        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2042        vdup(cond, vdup_dt, rd, scratch);
2043        return;
2044      }
2045      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
2046        // Punt to vmov.i64
2047        float f = neon_imm.GetImmediate<float>();
2048        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2049        vmov(cond, I32, rd, FloatToRawbits(f));
2050        return;
2051      }
2052      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
2053        // Use vmov to create the double in the low D register, then duplicate
2054        // it into the high D register.
2055        double d = neon_imm.GetImmediate<double>();
2056        CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
2057        vmov(cond, F64, rd.GetLowDRegister(), d);
2058        vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
2059        return;
2060      }
2061    }
2062  }
2063  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2064}
2065
2066
2067void MacroAssembler::Delegate(InstructionType type,
2068                              InstructionCondRL instruction,
2069                              Condition cond,
2070                              Register rt,
2071                              Label* label) {
2072  VIXL_ASSERT((type == kLdrb) || (type == kLdrh) || (type == kLdrsb) ||
2073              (type == kLdrsh));
2074
2075  CONTEXT_SCOPE;
2076
2077  if (label->IsBound()) {
2078    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2079    UseScratchRegisterScope temps(this);
2080    temps.Include(rt);
2081    Register scratch = temps.Acquire();
2082    uint32_t mask = GetOffsetMask(type, Offset);
2083    switch (type) {
2084      case kLdrb:
2085        ldrb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2086        return;
2087      case kLdrh:
2088        ldrh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2089        return;
2090      case kLdrsb:
2091        ldrsb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2092        return;
2093      case kLdrsh:
2094        ldrsh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2095        return;
2096      default:
2097        VIXL_UNREACHABLE();
2098    }
2099    return;
2100  }
2101
2102  Assembler::Delegate(type, instruction, cond, rt, label);
2103}
2104
2105
2106void MacroAssembler::Delegate(InstructionType type,
2107                              InstructionCondRRL instruction,
2108                              Condition cond,
2109                              Register rt,
2110                              Register rt2,
2111                              Label* label) {
2112  VIXL_ASSERT(type == kLdrd);
2113
2114  CONTEXT_SCOPE;
2115
2116  if (label->IsBound()) {
2117    CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
2118    UseScratchRegisterScope temps(this);
2119    temps.Include(rt, rt2);
2120    Register scratch = temps.Acquire();
2121    uint32_t mask = GetOffsetMask(type, Offset);
2122    ldrd(rt, rt2, MemOperandComputationHelper(cond, scratch, label, mask));
2123    return;
2124  }
2125
2126  Assembler::Delegate(type, instruction, cond, rt, rt2, label);
2127}
2128
2129
2130void MacroAssembler::Delegate(InstructionType type,
2131                              InstructionCondSizeRMop instruction,
2132                              Condition cond,
2133                              EncodingSize size,
2134                              Register rd,
2135                              const MemOperand& operand) {
2136  CONTEXT_SCOPE;
2137  VIXL_ASSERT(size.IsBest());
2138  VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
2139              (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
2140              (type == kStrb) || (type == kStrh));
2141  if (operand.IsImmediate()) {
2142    const Register& rn = operand.GetBaseRegister();
2143    AddrMode addrmode = operand.GetAddrMode();
2144    int32_t offset = operand.GetOffsetImmediate();
2145    uint32_t mask = GetOffsetMask(type, addrmode);
2146    bool negative;
2147    // Try to maximize the offset use by the MemOperand (load_store_offset).
2148    // Add or subtract the part which can't be used by the MemOperand
2149    // (add_sub_offset).
2150    int32_t add_sub_offset;
2151    int32_t load_store_offset;
2152    load_store_offset = offset & mask;
2153    if (offset >= 0) {
2154      negative = false;
2155      add_sub_offset = offset & ~mask;
2156    } else {
2157      negative = true;
2158      add_sub_offset = -offset & ~mask;
2159      if (load_store_offset > 0) add_sub_offset += mask + 1;
2160    }
2161    switch (addrmode) {
2162      case PreIndex:
2163        // Avoid the unpredictable case 'str r0, [r0, imm]!'
2164        if (!rn.Is(rd)) {
2165          // Pre-Indexed case:
2166          // ldr r0, [r1, 12345]! will translate into
2167          //   add r1, r1, 12345
2168          //   ldr r0, [r1]
2169          {
2170            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2171            if (negative) {
2172              sub(cond, rn, rn, add_sub_offset);
2173            } else {
2174              add(cond, rn, rn, add_sub_offset);
2175            }
2176          }
2177          {
2178            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2179            (this->*instruction)(cond,
2180                                 size,
2181                                 rd,
2182                                 MemOperand(rn, load_store_offset, PreIndex));
2183          }
2184          return;
2185        }
2186        break;
2187      case Offset: {
2188        UseScratchRegisterScope temps(this);
2189        // Allow using the destination as a scratch register if possible.
2190        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
2191            !rd.Is(rn)) {
2192          temps.Include(rd);
2193        }
2194        Register scratch = temps.Acquire();
2195        // Offset case:
2196        // ldr r0, [r1, 12345] will translate into
2197        //   add r0, r1, 12345
2198        //   ldr r0, [r0]
2199        {
2200          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2201          if (negative) {
2202            sub(cond, scratch, rn, add_sub_offset);
2203          } else {
2204            add(cond, scratch, rn, add_sub_offset);
2205          }
2206        }
2207        {
2208          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2209          (this->*instruction)(cond,
2210                               size,
2211                               rd,
2212                               MemOperand(scratch, load_store_offset));
2213        }
2214        return;
2215      }
2216      case PostIndex:
2217        // Avoid the unpredictable case 'ldr r0, [r0], imm'
2218        if (!rn.Is(rd)) {
2219          // Post-indexed case:
2220          // ldr r0. [r1], imm32 will translate into
2221          //   ldr r0, [r1]
2222          //   movw ip. imm32 & 0xffffffff
2223          //   movt ip, imm32 >> 16
2224          //   add r1, r1, ip
2225          {
2226            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2227            (this->*instruction)(cond,
2228                                 size,
2229                                 rd,
2230                                 MemOperand(rn, load_store_offset, PostIndex));
2231          }
2232          {
2233            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2234            if (negative) {
2235              sub(cond, rn, rn, add_sub_offset);
2236            } else {
2237              add(cond, rn, rn, add_sub_offset);
2238            }
2239          }
2240          return;
2241        }
2242        break;
2243    }
2244  } else if (operand.IsPlainRegister()) {
2245    const Register& rn = operand.GetBaseRegister();
2246    AddrMode addrmode = operand.GetAddrMode();
2247    const Register& rm = operand.GetOffsetRegister();
2248    if (rm.IsPC()) {
2249      VIXL_ABORT_WITH_MSG(
2250          "The MacroAssembler does not convert loads and stores with a PC "
2251          "offset register.\n");
2252    }
2253    if (rn.IsPC()) {
2254      if (addrmode == Offset) {
2255        if (IsUsingT32()) {
2256          VIXL_ABORT_WITH_MSG(
2257              "The MacroAssembler does not convert loads and stores with a PC "
2258              "base register for T32.\n");
2259        }
2260      } else {
2261        VIXL_ABORT_WITH_MSG(
2262            "The MacroAssembler does not convert loads and stores with a PC "
2263            "base register in pre-index or post-index mode.\n");
2264      }
2265    }
2266    switch (addrmode) {
2267      case PreIndex:
2268        // Avoid the unpredictable case 'str r0, [r0, imm]!'
2269        if (!rn.Is(rd)) {
2270          // Pre-Indexed case:
2271          // ldr r0, [r1, r2]! will translate into
2272          //   add r1, r1, r2
2273          //   ldr r0, [r1]
2274          {
2275            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2276            if (operand.GetSign().IsPlus()) {
2277              add(cond, rn, rn, rm);
2278            } else {
2279              sub(cond, rn, rn, rm);
2280            }
2281          }
2282          {
2283            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2284            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
2285          }
2286          return;
2287        }
2288        break;
2289      case Offset: {
2290        UseScratchRegisterScope temps(this);
2291        // Allow using the destination as a scratch register if this is not a
2292        // store.
2293        // Avoid using PC as a temporary as this has side-effects.
2294        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
2295            !rd.IsPC()) {
2296          temps.Include(rd);
2297        }
2298        Register scratch = temps.Acquire();
2299        // Offset case:
2300        // ldr r0, [r1, r2] will translate into
2301        //   add r0, r1, r2
2302        //   ldr r0, [r0]
2303        {
2304          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2305          if (operand.GetSign().IsPlus()) {
2306            add(cond, scratch, rn, rm);
2307          } else {
2308            sub(cond, scratch, rn, rm);
2309          }
2310        }
2311        {
2312          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2313          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
2314        }
2315        return;
2316      }
2317      case PostIndex:
2318        // Avoid the unpredictable case 'ldr r0, [r0], imm'
2319        if (!rn.Is(rd)) {
2320          // Post-indexed case:
2321          // ldr r0. [r1], r2 will translate into
2322          //   ldr r0, [r1]
2323          //   add r1, r1, r2
2324          {
2325            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2326            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
2327          }
2328          {
2329            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2330            if (operand.GetSign().IsPlus()) {
2331              add(cond, rn, rn, rm);
2332            } else {
2333              sub(cond, rn, rn, rm);
2334            }
2335          }
2336          return;
2337        }
2338        break;
2339    }
2340  }
2341  Assembler::Delegate(type, instruction, cond, size, rd, operand);
2342}
2343
2344
2345void MacroAssembler::Delegate(InstructionType type,
2346                              InstructionCondRRMop instruction,
2347                              Condition cond,
2348                              Register rt,
2349                              Register rt2,
2350                              const MemOperand& operand) {
2351  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
2352      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
2353      (type == kStrexb) || (type == kStrexh)) {
2354    UnimplementedDelegate(type);
2355    return;
2356  }
2357
2358  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
2359
2360  CONTEXT_SCOPE;
2361
2362  // TODO: Should we allow these cases?
2363  if (IsUsingA32()) {
2364    // The first register needs to be even.
2365    if ((rt.GetCode() & 1) != 0) {
2366      UnimplementedDelegate(type);
2367      return;
2368    }
2369    // Registers need to be adjacent.
2370    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
2371      UnimplementedDelegate(type);
2372      return;
2373    }
2374    // LDRD lr, pc [...] is not allowed.
2375    if (rt.Is(lr)) {
2376      UnimplementedDelegate(type);
2377      return;
2378    }
2379  }
2380
2381  if (operand.IsImmediate()) {
2382    const Register& rn = operand.GetBaseRegister();
2383    AddrMode addrmode = operand.GetAddrMode();
2384    int32_t offset = operand.GetOffsetImmediate();
2385    switch (addrmode) {
2386      case PreIndex: {
2387        // Allow using the destinations as a scratch registers if possible.
2388        UseScratchRegisterScope temps(this);
2389        if (type == kLdrd) {
2390          if (!rt.Is(rn)) temps.Include(rt);
2391          if (!rt2.Is(rn)) temps.Include(rt2);
2392        }
2393
2394        // Pre-Indexed case:
2395        // ldrd r0, r1, [r2, 12345]! will translate into
2396        //   add r2, 12345
2397        //   ldrd r0, r1, [r2]
2398        {
2399          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2400          add(cond, rn, rn, offset);
2401        }
2402        {
2403          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2404          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2405        }
2406        return;
2407      }
2408      case Offset: {
2409        UseScratchRegisterScope temps(this);
2410        // Allow using the destinations as a scratch registers if possible.
2411        if (type == kLdrd) {
2412          if (!rt.Is(rn)) temps.Include(rt);
2413          if (!rt2.Is(rn)) temps.Include(rt2);
2414        }
2415        Register scratch = temps.Acquire();
2416        // Offset case:
2417        // ldrd r0, r1, [r2, 12345] will translate into
2418        //   add r0, r2, 12345
2419        //   ldrd r0, r1, [r0]
2420        {
2421          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2422          add(cond, scratch, rn, offset);
2423        }
2424        {
2425          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2426          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2427        }
2428        return;
2429      }
2430      case PostIndex:
2431        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
2432        if (!rn.Is(rt) && !rn.Is(rt2)) {
2433          // Post-indexed case:
2434          // ldrd r0, r1, [r2], imm32 will translate into
2435          //   ldrd r0, r1, [r2]
2436          //   movw ip. imm32 & 0xffffffff
2437          //   movt ip, imm32 >> 16
2438          //   add r2, ip
2439          {
2440            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2441            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2442          }
2443          {
2444            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2445            add(cond, rn, rn, offset);
2446          }
2447          return;
2448        }
2449        break;
2450    }
2451  }
2452  if (operand.IsPlainRegister()) {
2453    const Register& rn = operand.GetBaseRegister();
2454    const Register& rm = operand.GetOffsetRegister();
2455    AddrMode addrmode = operand.GetAddrMode();
2456    switch (addrmode) {
2457      case PreIndex:
2458        // ldrd r0, r1, [r2, r3]! will translate into
2459        //   add r2, r3
2460        //   ldrd r0, r1, [r2]
2461        {
2462          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2463          if (operand.GetSign().IsPlus()) {
2464            add(cond, rn, rn, rm);
2465          } else {
2466            sub(cond, rn, rn, rm);
2467          }
2468        }
2469        {
2470          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2471          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2472        }
2473        return;
2474      case PostIndex:
2475        // ldrd r0, r1, [r2], r3 will translate into
2476        //   ldrd r0, r1, [r2]
2477        //   add r2, r3
2478        {
2479          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2480          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2481        }
2482        {
2483          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2484          if (operand.GetSign().IsPlus()) {
2485            add(cond, rn, rn, rm);
2486          } else {
2487            sub(cond, rn, rn, rm);
2488          }
2489        }
2490        return;
2491      case Offset: {
2492        UseScratchRegisterScope temps(this);
2493        // Allow using the destinations as a scratch registers if possible.
2494        if (type == kLdrd) {
2495          if (!rt.Is(rn)) temps.Include(rt);
2496          if (!rt2.Is(rn)) temps.Include(rt2);
2497        }
2498        Register scratch = temps.Acquire();
2499        // Offset case:
2500        // ldrd r0, r1, [r2, r3] will translate into
2501        //   add r0, r2, r3
2502        //   ldrd r0, r1, [r0]
2503        {
2504          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2505          if (operand.GetSign().IsPlus()) {
2506            add(cond, scratch, rn, rm);
2507          } else {
2508            sub(cond, scratch, rn, rm);
2509          }
2510        }
2511        {
2512          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2513          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2514        }
2515        return;
2516      }
2517    }
2518  }
2519  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2520}
2521
2522
2523void MacroAssembler::Delegate(InstructionType type,
2524                              InstructionCondDtSMop instruction,
2525                              Condition cond,
2526                              DataType dt,
2527                              SRegister rd,
2528                              const MemOperand& operand) {
2529  CONTEXT_SCOPE;
2530  if (operand.IsImmediate()) {
2531    const Register& rn = operand.GetBaseRegister();
2532    AddrMode addrmode = operand.GetAddrMode();
2533    int32_t offset = operand.GetOffsetImmediate();
2534    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2535                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2536    if (rn.IsPC()) {
2537      VIXL_ABORT_WITH_MSG(
2538          "The MacroAssembler does not convert vldr or vstr with a PC base "
2539          "register.\n");
2540    }
2541    switch (addrmode) {
2542      case PreIndex:
2543        // Pre-Indexed case:
2544        // vldr.32 s0, [r1, 12345]! will translate into
2545        //   add r1, 12345
2546        //   vldr.32 s0, [r1]
2547        if (offset != 0) {
2548          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2549          add(cond, rn, rn, offset);
2550        }
2551        {
2552          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2553          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2554        }
2555        return;
2556      case Offset: {
2557        UseScratchRegisterScope temps(this);
2558        Register scratch = temps.Acquire();
2559        // Offset case:
2560        // vldr.32 s0, [r1, 12345] will translate into
2561        //   add ip, r1, 12345
2562        //   vldr.32 s0, [ip]
2563        {
2564          VIXL_ASSERT(offset != 0);
2565          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2566          add(cond, scratch, rn, offset);
2567        }
2568        {
2569          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2570          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2571        }
2572        return;
2573      }
2574      case PostIndex:
2575        // Post-indexed case:
2576        // vldr.32 s0, [r1], imm32 will translate into
2577        //   vldr.32 s0, [r1]
2578        //   movw ip. imm32 & 0xffffffff
2579        //   movt ip, imm32 >> 16
2580        //   add r1, ip
2581        {
2582          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2583          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2584        }
2585        if (offset != 0) {
2586          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2587          add(cond, rn, rn, offset);
2588        }
2589        return;
2590    }
2591  }
2592  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2593}
2594
2595
2596void MacroAssembler::Delegate(InstructionType type,
2597                              InstructionCondDtDMop instruction,
2598                              Condition cond,
2599                              DataType dt,
2600                              DRegister rd,
2601                              const MemOperand& operand) {
2602  CONTEXT_SCOPE;
2603  if (operand.IsImmediate()) {
2604    const Register& rn = operand.GetBaseRegister();
2605    AddrMode addrmode = operand.GetAddrMode();
2606    int32_t offset = operand.GetOffsetImmediate();
2607    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2608                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2609    if (rn.IsPC()) {
2610      VIXL_ABORT_WITH_MSG(
2611          "The MacroAssembler does not convert vldr or vstr with a PC base "
2612          "register.\n");
2613    }
2614    switch (addrmode) {
2615      case PreIndex:
2616        // Pre-Indexed case:
2617        // vldr.64 d0, [r1, 12345]! will translate into
2618        //   add r1, 12345
2619        //   vldr.64 d0, [r1]
2620        if (offset != 0) {
2621          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2622          add(cond, rn, rn, offset);
2623        }
2624        {
2625          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2626          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2627        }
2628        return;
2629      case Offset: {
2630        UseScratchRegisterScope temps(this);
2631        Register scratch = temps.Acquire();
2632        // Offset case:
2633        // vldr.64 d0, [r1, 12345] will translate into
2634        //   add ip, r1, 12345
2635        //   vldr.32 s0, [ip]
2636        {
2637          VIXL_ASSERT(offset != 0);
2638          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2639          add(cond, scratch, rn, offset);
2640        }
2641        {
2642          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2643          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2644        }
2645        return;
2646      }
2647      case PostIndex:
2648        // Post-indexed case:
2649        // vldr.64 d0. [r1], imm32 will translate into
2650        //   vldr.64 d0, [r1]
2651        //   movw ip. imm32 & 0xffffffff
2652        //   movt ip, imm32 >> 16
2653        //   add r1, ip
2654        {
2655          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2656          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2657        }
2658        if (offset != 0) {
2659          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2660          add(cond, rn, rn, offset);
2661        }
2662        return;
2663    }
2664  }
2665  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2666}
2667
2668
2669void MacroAssembler::Delegate(InstructionType type,
2670                              InstructionCondMsrOp instruction,
2671                              Condition cond,
2672                              MaskedSpecialRegister spec_reg,
2673                              const Operand& operand) {
2674  USE(type);
2675  VIXL_ASSERT(type == kMsr);
2676  if (operand.IsImmediate()) {
2677    UseScratchRegisterScope temps(this);
2678    Register scratch = temps.Acquire();
2679    {
2680      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2681      mov(cond, scratch, operand);
2682    }
2683    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2684    msr(cond, spec_reg, scratch);
2685    return;
2686  }
2687  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2688}
2689
2690
2691void MacroAssembler::Delegate(InstructionType type,
2692                              InstructionCondDtDL instruction,
2693                              Condition cond,
2694                              DataType dt,
2695                              DRegister rd,
2696                              Label* label) {
2697  VIXL_ASSERT(type == kVldr);
2698
2699  CONTEXT_SCOPE;
2700
2701  if (label->IsBound()) {
2702    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2703    UseScratchRegisterScope temps(this);
2704    Register scratch = temps.Acquire();
2705    uint32_t mask = GetOffsetMask(type, Offset);
2706    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
2707    return;
2708  }
2709
2710  Assembler::Delegate(type, instruction, cond, dt, rd, label);
2711}
2712
2713
2714void MacroAssembler::Delegate(InstructionType type,
2715                              InstructionCondDtSL instruction,
2716                              Condition cond,
2717                              DataType dt,
2718                              SRegister rd,
2719                              Label* label) {
2720  VIXL_ASSERT(type == kVldr);
2721
2722  CONTEXT_SCOPE;
2723
2724  if (label->IsBound()) {
2725    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2726    UseScratchRegisterScope temps(this);
2727    Register scratch = temps.Acquire();
2728    uint32_t mask = GetOffsetMask(type, Offset);
2729    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
2730    return;
2731  }
2732
2733  Assembler::Delegate(type, instruction, cond, dt, rd, label);
2734}
2735
2736
2737#undef CONTEXT_SCOPE
2738#undef TOSTRING
2739#undef STRINGIFY
2740
2741// Start of generated code.
2742// End of generated code.
2743}  // namespace aarch32
2744}  // namespace vixl
2745