macro-assembler-aarch32.cc revision 7f2a44c6dce08e942080f14af19f83a202162104
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT((available_ == NULL) && (available_vfp_ == NULL));
41  available_ = masm->GetScratchRegisterList();
42  old_available_ = available_->GetList();
43  available_vfp_ = masm->GetScratchVRegisterList();
44  old_available_vfp_ = available_vfp_->GetList();
45}
46
47
48void UseScratchRegisterScope::Close() {
49  if (available_ != NULL) {
50    available_->SetList(old_available_);
51    available_ = NULL;
52  }
53  if (available_vfp_ != NULL) {
54    available_vfp_->SetList(old_available_vfp_);
55    available_vfp_ = NULL;
56  }
57}
58
59
60bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
61  VIXL_ASSERT(available_ != NULL);
62  VIXL_ASSERT(reg.IsValid());
63  return available_->Includes(reg);
64}
65
66
67bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
68  VIXL_ASSERT(available_vfp_ != NULL);
69  VIXL_ASSERT(reg.IsValid());
70  return available_vfp_->IncludesAllOf(reg);
71}
72
73
74Register UseScratchRegisterScope::Acquire() {
75  VIXL_ASSERT(available_ != NULL);
76  VIXL_CHECK(!available_->IsEmpty());
77  Register reg = available_->GetFirstAvailableRegister();
78  available_->Remove(reg);
79  return reg;
80}
81
82
83VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
84  switch (size_in_bits) {
85    case kSRegSizeInBits:
86      return AcquireS();
87    case kDRegSizeInBits:
88      return AcquireD();
89    case kQRegSizeInBits:
90      return AcquireQ();
91    default:
92      VIXL_UNREACHABLE();
93      return NoVReg;
94  }
95}
96
97
98QRegister UseScratchRegisterScope::AcquireQ() {
99  VIXL_ASSERT(available_vfp_ != NULL);
100  VIXL_CHECK(!available_vfp_->IsEmpty());
101  QRegister reg = available_vfp_->GetFirstAvailableQRegister();
102  available_vfp_->Remove(reg);
103  return reg;
104}
105
106
107DRegister UseScratchRegisterScope::AcquireD() {
108  VIXL_ASSERT(available_vfp_ != NULL);
109  VIXL_CHECK(!available_vfp_->IsEmpty());
110  DRegister reg = available_vfp_->GetFirstAvailableDRegister();
111  available_vfp_->Remove(reg);
112  return reg;
113}
114
115
116SRegister UseScratchRegisterScope::AcquireS() {
117  VIXL_ASSERT(available_vfp_ != NULL);
118  VIXL_CHECK(!available_vfp_->IsEmpty());
119  SRegister reg = available_vfp_->GetFirstAvailableSRegister();
120  available_vfp_->Remove(reg);
121  return reg;
122}
123
124
125void UseScratchRegisterScope::Release(const Register& reg) {
126  VIXL_ASSERT(available_ != NULL);
127  VIXL_ASSERT(reg.IsValid());
128  VIXL_ASSERT(!available_->Includes(reg));
129  available_->Combine(reg);
130}
131
132
133void UseScratchRegisterScope::Release(const VRegister& reg) {
134  VIXL_ASSERT(available_vfp_ != NULL);
135  VIXL_ASSERT(reg.IsValid());
136  VIXL_ASSERT(!available_vfp_->IncludesAliasOf(reg));
137  available_vfp_->Combine(reg);
138}
139
140
141void UseScratchRegisterScope::Include(const RegisterList& list) {
142  VIXL_ASSERT(available_ != NULL);
143  RegisterList excluded_registers(sp, lr, pc);
144  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
145  available_->SetList(available_->GetList() | mask);
146}
147
148
149void UseScratchRegisterScope::Include(const VRegisterList& list) {
150  VIXL_ASSERT(available_vfp_ != NULL);
151  available_vfp_->SetList(available_vfp_->GetList() | list.GetList());
152}
153
154
155void UseScratchRegisterScope::Exclude(const RegisterList& list) {
156  VIXL_ASSERT(available_ != NULL);
157  available_->SetList(available_->GetList() & ~list.GetList());
158}
159
160
161void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
162  VIXL_ASSERT(available_vfp_ != NULL);
163  available_vfp_->SetList(available_->GetList() & ~list.GetList());
164}
165
166
167void UseScratchRegisterScope::ExcludeAll() {
168  if (available_ != NULL) {
169    available_->SetList(0);
170  }
171  if (available_vfp_ != NULL) {
172    available_vfp_->SetList(0);
173  }
174}
175
176
177void VeneerPoolManager::AddLabel(Label* label) {
178  if (!label->IsInVeneerPool()) {
179    label->SetVeneerPoolManager(this);
180    labels_.push_back(label);
181  }
182  Label::ForwardReference& back = label->GetBackForwardRef();
183  back.SetIsBranch();
184  label->UpdateCheckpoint();
185  Label::Offset tmp = label->GetCheckpoint();
186  if (checkpoint_ > tmp) {
187    checkpoint_ = tmp;
188    masm_->ComputeCheckpoint();
189  }
190}
191
192
193void VeneerPoolManager::RemoveLabel(Label* label) {
194  label->ClearVeneerPoolManager();
195  if (label->GetCheckpoint() == checkpoint_) {
196    // We have to compute checkpoint again.
197    checkpoint_ = Label::kMaxOffset;
198    for (std::list<Label*>::iterator it = labels_.begin();
199         it != labels_.end();) {
200      if (*it == label) {
201        it = labels_.erase(it);
202      } else {
203        checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
204        ++it;
205      }
206    }
207    masm_->ComputeCheckpoint();
208  } else {
209    // We only have to remove the label from the list.
210    for (std::list<Label*>::iterator it = labels_.begin();; ++it) {
211      VIXL_ASSERT(it != labels_.end());
212      if (*it == label) {
213        labels_.erase(it);
214        break;
215      }
216    }
217  }
218}
219
220
221void VeneerPoolManager::Emit(Label::Offset target) {
222  checkpoint_ = Label::kMaxOffset;
223  // Sort labels (regarding their checkpoint) to avoid that a veneer
224  // becomes out of range.
225  labels_.sort(Label::CompareLabels);
226  // To avoid too many veneers, generate veneers which will be necessary soon.
227  static const size_t kVeneerEmissionMargin = 1 * KBytes;
228  // To avoid too many veneers, use generated veneers for other not too far
229  // uses.
230  static const size_t kVeneerEmittedMargin = 2 * KBytes;
231  Label::Offset emitted_target = target + kVeneerEmittedMargin;
232  target += kVeneerEmissionMargin;
233  // Reset the checkpoint. It will be computed again in the loop.
234  checkpoint_ = Label::kMaxOffset;
235  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();) {
236    // The labels are sorted. As soon as a veneer is not needed, we can stop.
237    if ((*it)->GetCheckpoint() > target) {
238      checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
239      break;
240    }
241    // Define the veneer.
242    Label veneer;
243    masm_->Bind(&veneer);
244    Label::Offset label_checkpoint = Label::kMaxOffset;
245    // Check all uses of this label.
246    for (Label::ForwardRefList::iterator ref = (*it)->GetFirstForwardRef();
247         ref != (*it)->GetEndForwardRef();) {
248      if (ref->IsBranch()) {
249        if (ref->GetCheckpoint() <= emitted_target) {
250          // Use the veneer.
251          masm_->EncodeLabelFor(*ref, &veneer);
252          ref = (*it)->Erase(ref);
253        } else {
254          // Don't use the veneer => update checkpoint.
255          label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
256          ++ref;
257        }
258      } else {
259        ++ref;
260      }
261    }
262    // Even if we no longer have use of this label, we can keep it in the list
263    // as the next "B" would add it back.
264    (*it)->SetCheckpoint(label_checkpoint);
265    checkpoint_ = std::min(checkpoint_, label_checkpoint);
266    // Generate the veneer.
267    masm_->B(*it);
268    ++it;
269  }
270#ifdef VIXL_DEBUG
271  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();
272       ++it) {
273    VIXL_ASSERT((*it)->GetCheckpoint() >= checkpoint_);
274  }
275#endif
276  masm_->ComputeCheckpoint();
277}
278
279
280void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
281  EmitOption option = kBranchRequired;
282  Label after_pools;
283  if (target >= veneer_pool_manager_.GetCheckpoint()) {
284#ifdef VIXL_DEBUG
285    // Here, we can't use an AssemblerAccurateScope as it would call
286    // PerformEnsureEmit in an infinite loop.
287    bool save_assembler_state = AllowAssembler();
288    SetAllowAssembler(true);
289#endif
290    b(&after_pools);
291#ifdef VIXL_DEBUG
292    SetAllowAssembler(false);
293#endif
294    veneer_pool_manager_.Emit(target);
295    option = kNoBranchRequired;
296#ifdef VIXL_DEBUG
297    SetAllowAssembler(save_assembler_state);
298#endif
299  }
300  // Check if the macro-assembler's internal literal pool should be emitted
301  // to avoid any overflow. If we already generated the veneers, we can
302  // emit the pool (the branch is already done).
303  VIXL_ASSERT(GetCursorOffset() <= literal_pool_manager_.GetCheckpoint());
304  if ((target > literal_pool_manager_.GetCheckpoint()) ||
305      (option == kNoBranchRequired)) {
306    // We will generate the literal pool. Generate all the veneers which
307    // would become out of range.
308    size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize();
309    VIXL_ASSERT(IsInt32(literal_pool_size));
310    Label::Offset veneers_target =
311        target + static_cast<Label::Offset>(literal_pool_size);
312    VIXL_ASSERT(veneers_target >= 0);
313    if (veneers_target >= veneer_pool_manager_.GetCheckpoint()) {
314      veneer_pool_manager_.Emit(veneers_target);
315    }
316    EmitLiteralPool(option);
317  }
318  BindHelper(&after_pools);
319  if (GetBuffer()->IsManaged()) {
320    bool grow_requested;
321    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
322    if (grow_requested) ComputeCheckpoint();
323  }
324}
325
326
327void MacroAssembler::ComputeCheckpoint() {
328  checkpoint_ = veneer_pool_manager_.GetCheckpoint();
329  if (literal_pool_manager_.GetCheckpoint() != Label::kMaxOffset) {
330    size_t veneer_max_size = veneer_pool_manager_.GetMaxSize();
331    VIXL_ASSERT(IsInt32(veneer_max_size));
332    // We must be able to generate the pool and a branch over the pool.
333    Label::Offset tmp = literal_pool_manager_.GetCheckpoint() -
334                        static_cast<Label::Offset>(veneer_max_size +
335                                                   kMaxInstructionSizeInBytes);
336    VIXL_ASSERT(tmp >= 0);
337    checkpoint_ = std::min(checkpoint_, tmp);
338  }
339  size_t buffer_size = GetBuffer()->GetCapacity();
340  VIXL_ASSERT(IsInt32(buffer_size));
341  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
342  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
343}
344
345
346void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
347  // 32-bit table A32:
348  // adr ip, table
349  // add ip, r1, lsl 2
350  // ldr ip, [ip]
351  // jmp: add pc, pc, ip, lsl 2
352  // table:
353  // .int (case_0 - (jmp + 8)) >> 2
354  // .int (case_1 - (jmp + 8)) >> 2
355  // .int (case_2 - (jmp + 8)) >> 2
356
357  // 16-bit table T32:
358  // adr ip, table
359  // jmp: tbh ip, r1
360  // table:
361  // .short (case_0 - (jmp + 4)) >> 1
362  // .short (case_1 - (jmp + 4)) >> 1
363  // .short (case_2 - (jmp + 4)) >> 1
364  // case_0:
365  //   ...
366  //   b end_switch
367  // case_1:
368  //   ...
369  //   b end_switch
370  // ...
371  // end_switch:
372  Label jump_table;
373  UseScratchRegisterScope temps(this);
374  Register scratch = temps.Acquire();
375  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
376
377  // Jumpt to default if reg is not in [0, table->GetLength()[
378  Cmp(reg, table->GetLength());
379  B(ge, table->GetDefaultLabel());
380
381  Adr(scratch, &jump_table);
382  if (IsUsingA32()) {
383    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
384    switch (table->GetOffsetShift()) {
385      case 0:
386        Ldrb(scratch, MemOperand(scratch));
387        break;
388      case 1:
389        Ldrh(scratch, MemOperand(scratch));
390        break;
391      case 2:
392        Ldr(scratch, MemOperand(scratch));
393        break;
394      default:
395        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
396    }
397    // Emit whatever needs to be emitted if we want to
398    // correctly rescord the position of the branch instruction
399    uint32_t branch_location = GetCursorOffset();
400    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
401    AssemblerAccurateScope scope(this,
402                                 table_size + kA32InstructionSizeInBytes,
403                                 CodeBufferCheckScope::kMaximumSize);
404    add(pc, pc, Operand(scratch, LSL, 2));
405    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
406    bind(&jump_table);
407    GenerateSwitchTable(table, table_size);
408  } else {
409    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
410    //  But for 32bit offsets, we use the same coding as for A32
411    if (table->GetOffsetShift() == 2) {
412      // 32bit offsets
413      Add(scratch, scratch, Operand(reg, LSL, 2));
414      Ldr(scratch, MemOperand(scratch));
415      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
416      // so let's do the shift before
417      Lsl(scratch, scratch, 1);
418      // Emit whatever needs to be emitted if we want to
419      // correctly rescord the position of the branch instruction
420      uint32_t branch_location = GetCursorOffset();
421      table->SetBranchLocation(branch_location +
422                               GetArchitectureStatePCOffset());
423      AssemblerAccurateScope scope(this,
424                                   table_size + kMaxInstructionSizeInBytes,
425                                   CodeBufferCheckScope::kMaximumSize);
426      add(pc, pc, scratch);
427      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
428      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
429      bind(&jump_table);
430      GenerateSwitchTable(table, table_size);
431    } else {
432      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
433                  (table->GetOffsetShift() == 1));
434      // Emit whatever needs to be emitted if we want to
435      // correctly rescord the position of the branch instruction
436      uint32_t branch_location = GetCursorOffset();
437      table->SetBranchLocation(branch_location +
438                               GetArchitectureStatePCOffset());
439      AssemblerAccurateScope scope(this,
440                                   table_size + kMaxInstructionSizeInBytes,
441                                   CodeBufferCheckScope::kMaximumSize);
442      if (table->GetOffsetShift() == 0) {
443        // 8bit offsets
444        tbb(scratch, reg);
445      } else {
446        // 16bit offsets
447        tbh(scratch, reg);
448      }
449      // tbb/tbh is a 32bit instruction
450      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
451      bind(&jump_table);
452      GenerateSwitchTable(table, table_size);
453    }
454  }
455}
456
457
458void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
459  table->BindTable(GetCursorOffset());
460  for (int i = 0; i < table_size / 4; i++) {
461    GetBuffer()->Emit32(0);
462  }
463}
464
465
466// switch/case/default : case
467// case_index is assumed to be < table->GetLength()
468// which is checked in JumpTable::Link and Table::SetPresenceBit
469void MacroAssembler::Case(JumpTableBase* table, int case_index) {
470  table->Link(this, case_index, GetCursorOffset());
471  table->SetPresenceBitForCase(case_index);
472}
473
474// switch/case/default : default
475void MacroAssembler::Default(JumpTableBase* table) {
476  Bind(table->GetDefaultLabel());
477}
478
479// switch/case/default : break
480void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
481
482// switch/case/default : finalize
483// Manage the default path, mosstly. All empty offsets in the jumptable
484// will point to default.
485// All values not in [0, table->GetLength()[ are already pointing here anyway.
486void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
487
488void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
489                                                Register tmp,
490                                                uint32_t imm) {
491  if (IsUintN(16, imm)) {
492    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
493    mov(cond, tmp, imm & 0xffff);
494    return;
495  }
496  if (IsUsingT32()) {
497    if (ImmediateT32::IsImmediateT32(~imm)) {
498      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
499      mvn(cond, tmp, ~imm);
500      return;
501    }
502  } else {
503    if (ImmediateA32::IsImmediateA32(~imm)) {
504      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
505      mvn(cond, tmp, ~imm);
506      return;
507    }
508  }
509  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
510  mov(cond, tmp, imm & 0xffff);
511  movt(cond, tmp, imm >> 16);
512}
513
514
515void MacroAssembler::PadToMinimumBranchRange(Label* label) {
516  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
517  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
518    uint32_t location = last_reference->GetLocation();
519    if (location + k16BitT32InstructionSizeInBytes ==
520        static_cast<uint32_t>(GetCursorOffset())) {
521      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
522      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
523        VIXL_ASSERT(!InITBlock());
524        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
525        // target is immediately after the Cbz or Cbnz, we insert a nop to
526        // avoid that.
527        EmitT32_16(k16BitT32NopOpcode);
528      }
529    }
530  }
531}
532
533
534HARDFLOAT void PrintfTrampolineRRRR(
535    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
536  printf(format, a, b, c, d);
537}
538
539
540HARDFLOAT void PrintfTrampolineRRRD(
541    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
542  printf(format, a, b, c, d);
543}
544
545
546HARDFLOAT void PrintfTrampolineRRDR(
547    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
548  printf(format, a, b, c, d);
549}
550
551
552HARDFLOAT void PrintfTrampolineRRDD(
553    const char* format, uint32_t a, uint32_t b, double c, double d) {
554  printf(format, a, b, c, d);
555}
556
557
558HARDFLOAT void PrintfTrampolineRDRR(
559    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
560  printf(format, a, b, c, d);
561}
562
563
564HARDFLOAT void PrintfTrampolineRDRD(
565    const char* format, uint32_t a, double b, uint32_t c, double d) {
566  printf(format, a, b, c, d);
567}
568
569
570HARDFLOAT void PrintfTrampolineRDDR(
571    const char* format, uint32_t a, double b, double c, uint32_t d) {
572  printf(format, a, b, c, d);
573}
574
575
576HARDFLOAT void PrintfTrampolineRDDD(
577    const char* format, uint32_t a, double b, double c, double d) {
578  printf(format, a, b, c, d);
579}
580
581
582HARDFLOAT void PrintfTrampolineDRRR(
583    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
584  printf(format, a, b, c, d);
585}
586
587
588HARDFLOAT void PrintfTrampolineDRRD(
589    const char* format, double a, uint32_t b, uint32_t c, double d) {
590  printf(format, a, b, c, d);
591}
592
593
594HARDFLOAT void PrintfTrampolineDRDR(
595    const char* format, double a, uint32_t b, double c, uint32_t d) {
596  printf(format, a, b, c, d);
597}
598
599
600HARDFLOAT void PrintfTrampolineDRDD(
601    const char* format, double a, uint32_t b, double c, double d) {
602  printf(format, a, b, c, d);
603}
604
605
606HARDFLOAT void PrintfTrampolineDDRR(
607    const char* format, double a, double b, uint32_t c, uint32_t d) {
608  printf(format, a, b, c, d);
609}
610
611
612HARDFLOAT void PrintfTrampolineDDRD(
613    const char* format, double a, double b, uint32_t c, double d) {
614  printf(format, a, b, c, d);
615}
616
617
618HARDFLOAT void PrintfTrampolineDDDR(
619    const char* format, double a, double b, double c, uint32_t d) {
620  printf(format, a, b, c, d);
621}
622
623
624HARDFLOAT void PrintfTrampolineDDDD(
625    const char* format, double a, double b, double c, double d) {
626  printf(format, a, b, c, d);
627}
628
629
630void MacroAssembler::Printf(const char* format,
631                            CPURegister reg1,
632                            CPURegister reg2,
633                            CPURegister reg3,
634                            CPURegister reg4) {
635  if (generate_simulator_code_) {
636    PushRegister(reg4);
637    PushRegister(reg3);
638    PushRegister(reg2);
639    PushRegister(reg1);
640    Push(RegisterList(r0, r1));
641    StringLiteral* format_literal =
642        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
643    Adr(r0, format_literal);
644    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
645                    (reg2.GetType() << 4) | reg1.GetType();
646    Mov(r1, args);
647    Hvc(kPrintfCode);
648    Pop(RegisterList(r0, r1));
649    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
650               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
651    Drop(size);
652  } else {
653    // Generate on a native platform => 32 bit environment.
654    // Preserve core registers r0-r3, r12, r14
655    const uint32_t saved_registers_mask =
656        kCallerSavedRegistersMask | (1 << r5.GetCode());
657    Push(RegisterList(saved_registers_mask));
658    // Push VFP registers.
659    Vpush(Untyped64, DRegisterList(d0, 8));
660    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
661    // Search one register which has been saved and which doesn't need to be
662    // printed.
663    RegisterList available_registers(kCallerSavedRegistersMask);
664    if (reg1.GetType() == CPURegister::kRRegister) {
665      available_registers.Remove(Register(reg1.GetCode()));
666    }
667    if (reg2.GetType() == CPURegister::kRRegister) {
668      available_registers.Remove(Register(reg2.GetCode()));
669    }
670    if (reg3.GetType() == CPURegister::kRRegister) {
671      available_registers.Remove(Register(reg3.GetCode()));
672    }
673    if (reg4.GetType() == CPURegister::kRRegister) {
674      available_registers.Remove(Register(reg4.GetCode()));
675    }
676    Register tmp = available_registers.GetFirstAvailableRegister();
677    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
678    // Push the flags.
679    Mrs(tmp, APSR);
680    Push(tmp);
681    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
682    Push(tmp);
683    // Push the registers to print on the stack.
684    PushRegister(reg4);
685    PushRegister(reg3);
686    PushRegister(reg2);
687    PushRegister(reg1);
688    int core_count = 1;
689    int vfp_count = 0;
690    uint32_t printf_type = 0;
691    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
692    // Reg4 may stay into the stack if all the register to print are core
693    // registers.
694    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
695    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
696    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
697    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
698    // Ensure that the stack is aligned on 8 bytes.
699    And(r5, sp, 0x7);
700    if (core_count == 5) {
701      // One 32 bit argument (reg4) has been left on the stack =>  align the
702      // stack
703      // before the argument.
704      Pop(r0);
705      Sub(sp, sp, r5);
706      Push(r0);
707    } else {
708      Sub(sp, sp, r5);
709    }
710    // Select the right trampoline depending on the arguments.
711    uintptr_t address;
712    switch (printf_type) {
713      case 0:
714        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
715        break;
716      case 1:
717        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
718        break;
719      case 2:
720        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
721        break;
722      case 3:
723        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
724        break;
725      case 4:
726        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
727        break;
728      case 5:
729        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
730        break;
731      case 6:
732        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
733        break;
734      case 7:
735        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
736        break;
737      case 8:
738        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
739        break;
740      case 9:
741        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
742        break;
743      case 10:
744        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
745        break;
746      case 11:
747        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
748        break;
749      case 12:
750        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
751        break;
752      case 13:
753        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
754        break;
755      case 14:
756        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
757        break;
758      case 15:
759        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
760        break;
761      default:
762        VIXL_UNREACHABLE();
763        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
764        break;
765    }
766    StringLiteral* format_literal =
767        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
768    Adr(r0, format_literal);
769    Mov(ip, Operand::From(address));
770    Blx(ip);
771    // If register reg4 was left on the stack => skip it.
772    if (core_count == 5) Drop(kRegSizeInBytes);
773    // Restore the stack as it was before alignment.
774    Add(sp, sp, r5);
775    // Restore the flags.
776    Pop(tmp);
777    Vmsr(FPSCR, tmp);
778    Pop(tmp);
779    Msr(APSR_nzcvqg, tmp);
780    // Restore the regsisters.
781    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
782    Vpop(Untyped64, DRegisterList(d0, 8));
783    Pop(RegisterList(saved_registers_mask));
784  }
785}
786
787
788void MacroAssembler::PushRegister(CPURegister reg) {
789  switch (reg.GetType()) {
790    case CPURegister::kNoRegister:
791      break;
792    case CPURegister::kRRegister:
793      Push(Register(reg.GetCode()));
794      break;
795    case CPURegister::kSRegister:
796      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
797      break;
798    case CPURegister::kDRegister:
799      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
800      break;
801    case CPURegister::kQRegister:
802      VIXL_UNIMPLEMENTED();
803      break;
804  }
805}
806
807
808void MacroAssembler::PreparePrintfArgument(CPURegister reg,
809                                           int* core_count,
810                                           int* vfp_count,
811                                           uint32_t* printf_type) {
812  switch (reg.GetType()) {
813    case CPURegister::kNoRegister:
814      break;
815    case CPURegister::kRRegister:
816      VIXL_ASSERT(*core_count <= 4);
817      if (*core_count < 4) Pop(Register(*core_count));
818      *core_count += 1;
819      break;
820    case CPURegister::kSRegister:
821      VIXL_ASSERT(*vfp_count < 4);
822      *printf_type |= 1 << (*core_count + *vfp_count - 1);
823      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
824      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
825      *vfp_count += 1;
826      break;
827    case CPURegister::kDRegister:
828      VIXL_ASSERT(*vfp_count < 4);
829      *printf_type |= 1 << (*core_count + *vfp_count - 1);
830      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
831      *vfp_count += 1;
832      break;
833    case CPURegister::kQRegister:
834      VIXL_UNIMPLEMENTED();
835      break;
836  }
837}
838
839
840void MacroAssembler::Delegate(InstructionType type,
841                              InstructionCondROp instruction,
842                              Condition cond,
843                              Register rn,
844                              const Operand& operand) {
845  // movt, sxtb16, teq, uxtb16
846  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
847              (type == kUxtb16));
848
849  if (type == kMovt) {
850    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.");
851  }
852
853  // This delegate only supports teq with immediates.
854  CONTEXT_SCOPE;
855  if ((type == kTeq) && operand.IsImmediate()) {
856    UseScratchRegisterScope temps(this);
857    Register scratch = temps.Acquire();
858    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
859    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
860    teq(cond, rn, scratch);
861    return;
862  }
863  Assembler::Delegate(type, instruction, cond, rn, operand);
864}
865
866
867void MacroAssembler::Delegate(InstructionType type,
868                              InstructionCondSizeROp instruction,
869                              Condition cond,
870                              EncodingSize size,
871                              Register rn,
872                              const Operand& operand) {
873  // cmn cmp mov movs mvn mvns sxtb sxth tst uxtb uxth
874  CONTEXT_SCOPE;
875  VIXL_ASSERT(size.IsBest());
876  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
877              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
878              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
879              (type == kUxtb) || (type == kUxth));
880  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
881    VIXL_ASSERT((type != kMov) || (type != kMovs));
882    InstructionCondRROp shiftop = NULL;
883    switch (operand.GetShift().GetType()) {
884      case LSL:
885        shiftop = &Assembler::lsl;
886        break;
887      case LSR:
888        shiftop = &Assembler::lsr;
889        break;
890      case ASR:
891        shiftop = &Assembler::asr;
892        break;
893      case RRX:
894        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
895        VIXL_UNREACHABLE();
896        break;
897      case ROR:
898        shiftop = &Assembler::ror;
899        break;
900      default:
901        VIXL_UNREACHABLE();
902    }
903    if (shiftop != NULL) {
904      UseScratchRegisterScope temps(this);
905      Register scratch = temps.Acquire();
906      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
907      (this->*shiftop)(cond,
908                       scratch,
909                       operand.GetBaseRegister(),
910                       operand.GetShiftRegister());
911      return (this->*instruction)(cond, size, rn, scratch);
912    }
913  }
914  if (operand.IsImmediate()) {
915    uint32_t imm = operand.GetImmediate();
916    switch (type) {
917      case kMov:
918      case kMovs:
919        if (!rn.IsPC()) {
920          // Immediate is too large, but not using PC, so handle with mov{t}.
921          HandleOutOfBoundsImmediate(cond, rn, imm);
922          if (type == kMovs) {
923            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
924            tst(cond, rn, rn);
925          }
926          return;
927        } else if (type == kMov) {
928          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
929          // Immediate is too large and using PC, so handle using a temporary
930          // register.
931          UseScratchRegisterScope temps(this);
932          Register scratch = temps.Acquire();
933          HandleOutOfBoundsImmediate(al, scratch, imm);
934          EnsureEmitFor(kMaxInstructionSizeInBytes);
935          return bx(cond, scratch);
936        }
937        break;
938      case kCmn:
939      case kCmp:
940        if (IsUsingA32() || !rn.IsPC()) {
941          UseScratchRegisterScope temps(this);
942          Register scratch = temps.Acquire();
943          HandleOutOfBoundsImmediate(cond, scratch, imm);
944          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
945          return (this->*instruction)(cond, size, rn, scratch);
946        }
947        break;
948      case kMvn:
949      case kMvns:
950        if (!rn.IsPC()) {
951          UseScratchRegisterScope temps(this);
952          Register scratch = temps.Acquire();
953          HandleOutOfBoundsImmediate(cond, scratch, imm);
954          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
955          return (this->*instruction)(cond, size, rn, scratch);
956        }
957        break;
958      case kTst:
959        if (IsUsingA32() || !rn.IsPC()) {
960          UseScratchRegisterScope temps(this);
961          Register scratch = temps.Acquire();
962          HandleOutOfBoundsImmediate(cond, scratch, imm);
963          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
964          return (this->*instruction)(cond, size, rn, scratch);
965        }
966        break;
967      default:  // kSxtb, Sxth, Uxtb, Uxth
968        break;
969    }
970  }
971  Assembler::Delegate(type, instruction, cond, size, rn, operand);
972}
973
974
975void MacroAssembler::Delegate(InstructionType type,
976                              InstructionCondRROp instruction,
977                              Condition cond,
978                              Register rd,
979                              Register rn,
980                              const Operand& operand) {
981  // orn orns pkhbt pkhtb rsc rscs sxtab sxtab16 sxtah uxtab uxtab16 uxtah
982
983  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
984      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
985      (type == kPkhbt) || (type == kPkhtb)) {
986    return UnimplementedDelegate(type);
987  }
988
989  // This delegate only handles the following instructions.
990  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
991              (type == kRscs));
992  CONTEXT_SCOPE;
993  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
994    InstructionCondRROp shiftop = NULL;
995    switch (operand.GetShift().GetType()) {
996      case LSL:
997        shiftop = &Assembler::lsl;
998        break;
999      case LSR:
1000        shiftop = &Assembler::lsr;
1001        break;
1002      case ASR:
1003        shiftop = &Assembler::asr;
1004        break;
1005      case RRX:
1006        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1007        VIXL_UNREACHABLE();
1008        break;
1009      case ROR:
1010        shiftop = &Assembler::ror;
1011        break;
1012      default:
1013        VIXL_UNREACHABLE();
1014    }
1015    if (shiftop != NULL) {
1016      UseScratchRegisterScope temps(this);
1017      Register rm = operand.GetBaseRegister();
1018      Register rs = operand.GetShiftRegister();
1019      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1020      // a scratch register.
1021      if (!rd.Is(rn)) temps.Include(rd);
1022      if (!rm.Is(rn)) temps.Include(rm);
1023      if (!rs.Is(rn)) temps.Include(rs);
1024      Register scratch = temps.Acquire();
1025      // TODO: The scope length was measured empirically. We should analyse the
1026      // worst-case size and add targetted tests.
1027      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1028      (this->*shiftop)(cond, scratch, rm, rs);
1029      return (this->*instruction)(cond, rd, rn, scratch);
1030    }
1031  }
1032  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1033    // The RegisterShiftRegister case should have been handled above.
1034    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1035    UseScratchRegisterScope temps(this);
1036    Register negated_rn;
1037    if (operand.IsImmediate() || !operand.GetBaseRegister().Is(rn)) {
1038      // In this case, we can just negate `rn` instead of using a temporary
1039      // register.
1040      negated_rn = rn;
1041    } else {
1042      if (!rd.Is(rn)) temps.Include(rd);
1043      negated_rn = temps.Acquire();
1044    }
1045    {
1046      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1047      mvn(cond, negated_rn, rn);
1048    }
1049    if (type == kRsc) {
1050      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1051      return adc(cond, rd, negated_rn, operand);
1052    }
1053    // TODO: We shouldn't have to specify how much space the next instruction
1054    // needs.
1055    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1056    return adcs(cond, rd, negated_rn, operand);
1057  }
1058  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1059    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1060    //  mvn r0, r2
1061    //  orr r0, r1, r0
1062    Register scratch;
1063    UseScratchRegisterScope temps(this);
1064    // If different from `rn`, we can make use of source and destination
1065    // registers as a scratch register.
1066    if (!rd.Is(rn)) temps.Include(rd);
1067    if (!operand.IsImmediate() && !operand.GetBaseRegister().Is(rn)) {
1068      temps.Include(operand.GetBaseRegister());
1069    }
1070    if (operand.IsRegisterShiftedRegister() &&
1071        !operand.GetShiftRegister().Is(rn)) {
1072      temps.Include(operand.GetShiftRegister());
1073    }
1074    scratch = temps.Acquire();
1075    {
1076      // TODO: We shouldn't have to specify how much space the next instruction
1077      // needs.
1078      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1079      mvn(cond, scratch, operand);
1080    }
1081    if (type == kOrns) {
1082      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1083      return orrs(cond, rd, rn, scratch);
1084    }
1085    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1086    return orr(cond, rd, rn, scratch);
1087  }
1088  if (operand.IsImmediate()) {
1089    int32_t imm = operand.GetSignedImmediate();
1090    if (ImmediateT32::IsImmediateT32(~imm)) {
1091      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1092      if (IsUsingT32()) {
1093        switch (type) {
1094          case kOrn:
1095            return orr(cond, rd, rn, ~imm);
1096          case kOrns:
1097            return orrs(cond, rd, rn, ~imm);
1098          default:
1099            break;
1100        }
1101      }
1102    }
1103    UseScratchRegisterScope temps(this);
1104    // Allow using the destination as a scratch register if possible.
1105    if (!rd.Is(rn)) temps.Include(rd);
1106    Register scratch = temps.Acquire();
1107    HandleOutOfBoundsImmediate(cond, scratch, imm);
1108    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1109    return (this->*instruction)(cond, rd, rn, scratch);
1110  }
1111  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1112}
1113
1114
1115void MacroAssembler::Delegate(InstructionType type,
1116                              InstructionCondSizeRROp instruction,
1117                              Condition cond,
1118                              EncodingSize size,
1119                              Register rd,
1120                              Register rn,
1121                              const Operand& operand) {
1122  // adc adcs add adds and_ ands asr asrs bic bics eor eors lsl lsls lsr lsrs
1123  // orr orrs ror rors rsb rsbs sbc sbcs sub subs
1124
1125  VIXL_ASSERT(
1126      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1127      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1128      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1129      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1130      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1131      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1132      (type == kSub) || (type == kSubs));
1133
1134  CONTEXT_SCOPE;
1135  VIXL_ASSERT(size.IsBest());
1136  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1137    InstructionCondRROp shiftop = NULL;
1138    switch (operand.GetShift().GetType()) {
1139      case LSL:
1140        shiftop = &Assembler::lsl;
1141        break;
1142      case LSR:
1143        shiftop = &Assembler::lsr;
1144        break;
1145      case ASR:
1146        shiftop = &Assembler::asr;
1147        break;
1148      case RRX:
1149        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1150        VIXL_UNREACHABLE();
1151        break;
1152      case ROR:
1153        shiftop = &Assembler::ror;
1154        break;
1155      default:
1156        VIXL_UNREACHABLE();
1157    }
1158    if (shiftop != NULL) {
1159      UseScratchRegisterScope temps(this);
1160      Register rm = operand.GetBaseRegister();
1161      Register rs = operand.GetShiftRegister();
1162      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1163      // a scratch register.
1164      if (!rd.Is(rn)) temps.Include(rd);
1165      if (!rm.Is(rn)) temps.Include(rm);
1166      if (!rs.Is(rn)) temps.Include(rs);
1167      Register scratch = temps.Acquire();
1168      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1169      (this->*shiftop)(cond, scratch, rm, rs);
1170      return (this->*instruction)(cond, size, rd, rn, scratch);
1171    }
1172  }
1173  if (operand.IsImmediate()) {
1174    int32_t imm = operand.GetSignedImmediate();
1175    if (ImmediateT32::IsImmediateT32(~imm)) {
1176      if (IsUsingT32()) {
1177        switch (type) {
1178          case kOrr:
1179            return orn(cond, rd, rn, ~imm);
1180          case kOrrs:
1181            return orns(cond, rd, rn, ~imm);
1182          default:
1183            break;
1184        }
1185      }
1186    }
1187    if (imm < 0) {
1188      InstructionCondSizeRROp asmcb = NULL;
1189      // Add and sub are equivalent using an arithmetic negation:
1190      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1191      // Add and sub with carry are equivalent using a bitwise NOT:
1192      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1193      switch (type) {
1194        case kAdd:
1195          asmcb = &Assembler::sub;
1196          imm = -imm;
1197          break;
1198        case kAdds:
1199          asmcb = &Assembler::subs;
1200          imm = -imm;
1201          break;
1202        case kSub:
1203          asmcb = &Assembler::add;
1204          imm = -imm;
1205          break;
1206        case kSubs:
1207          asmcb = &Assembler::adds;
1208          imm = -imm;
1209          break;
1210        case kAdc:
1211          asmcb = &Assembler::sbc;
1212          imm = ~imm;
1213          break;
1214        case kAdcs:
1215          asmcb = &Assembler::sbcs;
1216          imm = ~imm;
1217          break;
1218        case kSbc:
1219          asmcb = &Assembler::adc;
1220          imm = ~imm;
1221          break;
1222        case kSbcs:
1223          asmcb = &Assembler::adcs;
1224          imm = ~imm;
1225          break;
1226        default:
1227          break;
1228      }
1229      if (asmcb != NULL) {
1230        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1231        return (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1232      }
1233    }
1234    UseScratchRegisterScope temps(this);
1235    // Allow using the destination as a scratch register if possible.
1236    if (!rd.Is(rn)) temps.Include(rd);
1237    Register scratch = temps.Acquire();
1238    // TODO: The scope length was measured empirically. We should analyse the
1239    // worst-case size and add targetted tests.
1240    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1241    mov(cond, scratch, operand.GetImmediate());
1242    return (this->*instruction)(cond, size, rd, rn, scratch);
1243  }
1244  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1245}
1246
1247
1248void MacroAssembler::Delegate(InstructionType type,
1249                              InstructionRL instruction,
1250                              Register rn,
1251                              Label* label) {
1252  // cbz cbnz
1253  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1254
1255  CONTEXT_SCOPE;
1256  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1257  if (IsUsingA32()) {
1258    if (type == kCbz) {
1259      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1260    } else {
1261      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1262    }
1263  } else if (rn.IsLow()) {
1264    switch (type) {
1265      case kCbnz: {
1266        Label done;
1267        cbz(rn, &done);
1268        b(label);
1269        Bind(&done);
1270        return;
1271      }
1272      case kCbz: {
1273        Label done;
1274        cbnz(rn, &done);
1275        b(label);
1276        Bind(&done);
1277        return;
1278      }
1279      default:
1280        break;
1281    }
1282  }
1283  Assembler::Delegate(type, instruction, rn, label);
1284}
1285
1286
1287template <typename T>
1288static inline bool IsI64BitPattern(T imm) {
1289  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1290    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1291  }
1292  return true;
1293}
1294
1295
1296template <typename T>
1297static inline bool IsI8BitPattern(T imm) {
1298  uint8_t imm8 = imm & 0xff;
1299  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1300    imm >>= 8;
1301    if ((imm & 0xff) != imm8) return false;
1302  }
1303  return true;
1304}
1305
1306
1307static inline bool CanBeInverted(uint32_t imm32) {
1308  uint32_t fill8 = 0;
1309
1310  if ((imm32 & 0xffffff00) == 0xffffff00) {
1311    //    11111111 11111111 11111111 abcdefgh
1312    return true;
1313  }
1314  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1315    fill8 = imm32 & 0xff;
1316    imm32 >>= 8;
1317    if ((imm32 >> 8) == 0xffff) {
1318      //    11111111 11111111 abcdefgh 00000000
1319      // or 11111111 11111111 abcdefgh 11111111
1320      return true;
1321    }
1322    if ((imm32 & 0xff) == fill8) {
1323      imm32 >>= 8;
1324      if ((imm32 >> 8) == 0xff) {
1325        //    11111111 abcdefgh 00000000 00000000
1326        // or 11111111 abcdefgh 11111111 11111111
1327        return true;
1328      }
1329      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1330        //    abcdefgh 11111111 11111111 11111111
1331        return true;
1332      }
1333    }
1334  }
1335  return false;
1336}
1337
1338
1339template <typename RES, typename T>
1340static inline RES replicate(T imm) {
1341  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1342              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1343  RES res = imm;
1344  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1345    res = (res << (sizeof(T) * 8)) | imm;
1346  }
1347  return res;
1348}
1349
1350
1351void MacroAssembler::Delegate(InstructionType type,
1352                              InstructionCondDtSSop instruction,
1353                              Condition cond,
1354                              DataType dt,
1355                              SRegister rd,
1356                              const SOperand& operand) {
1357  CONTEXT_SCOPE;
1358  if (type == kVmov) {
1359    if (operand.IsImmediate() && dt.Is(F32)) {
1360      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1361      if (neon_imm.CanConvert<float>()) {
1362        // movw ip, imm16
1363        // movk ip, imm16
1364        // vmov s0, ip
1365        UseScratchRegisterScope temps(this);
1366        Register scratch = temps.Acquire();
1367        float f = neon_imm.GetImmediate<float>();
1368        // TODO: The scope length was measured empirically. We should analyse
1369        // the
1370        // worst-case size and add targetted tests.
1371        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1372        mov(cond, scratch, FloatToRawbits(f));
1373        return vmov(cond, rd, scratch);
1374      }
1375    }
1376  }
1377  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1378}
1379
1380
1381void MacroAssembler::Delegate(InstructionType type,
1382                              InstructionCondDtDDop instruction,
1383                              Condition cond,
1384                              DataType dt,
1385                              DRegister rd,
1386                              const DOperand& operand) {
1387  CONTEXT_SCOPE;
1388  if (type == kVmov) {
1389    if (operand.IsImmediate()) {
1390      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1391      switch (dt.GetValue()) {
1392        case I32:
1393          if (neon_imm.CanConvert<uint32_t>()) {
1394            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1395            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1396            if (IsI8BitPattern(imm)) {
1397              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1398              return vmov(cond, I8, rd, imm & 0xff);
1399            }
1400            // vmov.i32 d0, 0xff0000ff will translate into
1401            // vmov.i64 d0, 0xff0000ffff0000ff
1402            if (IsI64BitPattern(imm)) {
1403              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1404              return vmov(cond, I64, rd, replicate<uint64_t>(imm));
1405            }
1406            // vmov.i32 d0, 0xffab0000 will translate into
1407            // vmvn.i32 d0, 0x0054ffff
1408            if (cond.Is(al) && CanBeInverted(imm)) {
1409              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1410              return vmvn(I32, rd, ~imm);
1411            }
1412          }
1413          break;
1414        case I16:
1415          if (neon_imm.CanConvert<uint16_t>()) {
1416            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1417            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1418            if (IsI8BitPattern(imm)) {
1419              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1420              return vmov(cond, I8, rd, imm & 0xff);
1421            }
1422          }
1423          break;
1424        case I64:
1425          if (neon_imm.CanConvert<uint64_t>()) {
1426            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1427            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1428            if (IsI8BitPattern(imm)) {
1429              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1430              return vmov(cond, I8, rd, imm & 0xff);
1431            }
1432            // mov ip, lo(imm64)
1433            // vdup d0, ip
1434            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1435            // preserved
1436            {
1437              UseScratchRegisterScope temps(this);
1438              Register scratch = temps.Acquire();
1439              {
1440                // TODO: The scope length was measured empirically. We should
1441                // analyse the
1442                // worst-case size and add targetted tests.
1443                CodeBufferCheckScope scope(this,
1444                                           2 * kMaxInstructionSizeInBytes);
1445                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1446              }
1447              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1448              vdup(cond, Untyped32, rd, scratch);
1449            }
1450            // mov ip, hi(imm64)
1451            // vmov d0[1], ip
1452            {
1453              UseScratchRegisterScope temps(this);
1454              Register scratch = temps.Acquire();
1455              {
1456                // TODO: The scope length was measured empirically. We should
1457                // analyse the
1458                // worst-case size and add targetted tests.
1459                CodeBufferCheckScope scope(this,
1460                                           2 * kMaxInstructionSizeInBytes);
1461                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1462              }
1463              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1464              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1465            }
1466            return;
1467          }
1468          break;
1469        default:
1470          break;
1471      }
1472      if ((dt.Is(I8) || dt.Is(I16) || dt.Is(I32)) &&
1473          neon_imm.CanConvert<uint32_t>()) {
1474        // mov ip, imm32
1475        // vdup.8 d0, ip
1476        UseScratchRegisterScope temps(this);
1477        Register scratch = temps.Acquire();
1478        {
1479          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1480          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1481        }
1482        DataTypeValue vdup_dt = Untyped32;
1483        switch (dt.GetValue()) {
1484          case I8:
1485            vdup_dt = Untyped8;
1486            break;
1487          case I16:
1488            vdup_dt = Untyped16;
1489            break;
1490          case I32:
1491            vdup_dt = Untyped32;
1492            break;
1493          default:
1494            VIXL_UNREACHABLE();
1495        }
1496        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1497        return vdup(cond, vdup_dt, rd, scratch);
1498      }
1499      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1500        float f = neon_imm.GetImmediate<float>();
1501        // Punt to vmov.i32
1502        // TODO: The scope length was guessed based on the double case below. We
1503        // should analyse the worst-case size and add targetted tests.
1504        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1505        return vmov(cond, I32, rd, FloatToRawbits(f));
1506      }
1507      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1508        // Punt to vmov.i64
1509        double d = neon_imm.GetImmediate<double>();
1510        // TODO: The scope length was measured empirically. We should analyse
1511        // the
1512        // worst-case size and add targetted tests.
1513        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1514        return vmov(cond, I64, rd, DoubleToRawbits(d));
1515      }
1516    }
1517  }
1518  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1519}
1520
1521
1522void MacroAssembler::Delegate(InstructionType type,
1523                              InstructionCondDtQQop instruction,
1524                              Condition cond,
1525                              DataType dt,
1526                              QRegister rd,
1527                              const QOperand& operand) {
1528  CONTEXT_SCOPE;
1529  if (type == kVmov) {
1530    if (operand.IsImmediate()) {
1531      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1532      switch (dt.GetValue()) {
1533        case I32:
1534          if (neon_imm.CanConvert<uint32_t>()) {
1535            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1536            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1537            if (IsI8BitPattern(imm)) {
1538              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1539              return vmov(cond, I8, rd, imm & 0xff);
1540            }
1541            // vmov.i32 d0, 0xff0000ff will translate into
1542            // vmov.i64 d0, 0xff0000ffff0000ff
1543            if (IsI64BitPattern(imm)) {
1544              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1545              return vmov(cond, I64, rd, replicate<uint64_t>(imm));
1546            }
1547            // vmov.i32 d0, 0xffab0000 will translate into
1548            // vmvn.i32 d0, 0x0054ffff
1549            if (CanBeInverted(imm)) {
1550              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1551              return vmvn(cond, I32, rd, ~imm);
1552            }
1553          }
1554          break;
1555        case I16:
1556          if (neon_imm.CanConvert<uint16_t>()) {
1557            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1558            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1559            if (IsI8BitPattern(imm)) {
1560              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1561              return vmov(cond, I8, rd, imm & 0xff);
1562            }
1563          }
1564          break;
1565        case I64:
1566          if (neon_imm.CanConvert<uint64_t>()) {
1567            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1568            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1569            if (IsI8BitPattern(imm)) {
1570              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1571              return vmov(cond, I8, rd, imm & 0xff);
1572            }
1573            // mov ip, lo(imm64)
1574            // vdup q0, ip
1575            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1576            // preserved
1577            {
1578              UseScratchRegisterScope temps(this);
1579              Register scratch = temps.Acquire();
1580              {
1581                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1582                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1583              }
1584              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1585              vdup(cond, Untyped32, rd, scratch);
1586            }
1587            // mov ip, hi(imm64)
1588            // vmov.i32 d0[1], ip
1589            // vmov d1, d0
1590            {
1591              UseScratchRegisterScope temps(this);
1592              Register scratch = temps.Acquire();
1593              {
1594                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1595                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1596              }
1597              {
1598                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1599                vmov(cond,
1600                     Untyped32,
1601                     DRegisterLane(rd.GetLowDRegister(), 1),
1602                     scratch);
1603              }
1604              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1605              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1606            }
1607            return;
1608          }
1609          break;
1610        default:
1611          break;
1612      }
1613      if ((dt.Is(I8) || dt.Is(I16) || dt.Is(I32)) &&
1614          neon_imm.CanConvert<uint32_t>()) {
1615        // mov ip, imm32
1616        // vdup.8 d0, ip
1617        UseScratchRegisterScope temps(this);
1618        Register scratch = temps.Acquire();
1619        {
1620          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1621          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1622        }
1623        DataTypeValue vdup_dt = Untyped32;
1624        switch (dt.GetValue()) {
1625          case I8:
1626            vdup_dt = Untyped8;
1627            break;
1628          case I16:
1629            vdup_dt = Untyped16;
1630            break;
1631          case I32:
1632            vdup_dt = Untyped32;
1633            break;
1634          default:
1635            VIXL_UNREACHABLE();
1636        }
1637        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1638        return vdup(cond, vdup_dt, rd, scratch);
1639      }
1640      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1641        // Punt to vmov.i64
1642        float f = neon_imm.GetImmediate<float>();
1643        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1644        return vmov(cond, I32, rd, FloatToRawbits(f));
1645      }
1646      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1647        // Punt to vmov.i64
1648        double d = neon_imm.GetImmediate<double>();
1649        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1650        return vmov(cond, I64, rd, DoubleToRawbits(d));
1651      }
1652    }
1653  }
1654  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1655}
1656
1657
1658void MacroAssembler::Delegate(InstructionType type,
1659                              InstructionCondSizeRMop instruction,
1660                              Condition cond,
1661                              EncodingSize size,
1662                              Register rd,
1663                              const MemOperand& operand) {
1664  // ldr ldrb ldrh ldrsb ldrsh str strb strh
1665  CONTEXT_SCOPE;
1666  VIXL_ASSERT(size.IsBest());
1667  if (operand.IsImmediate()) {
1668    const Register& rn = operand.GetBaseRegister();
1669    AddrMode addrmode = operand.GetAddrMode();
1670    int32_t offset = operand.GetOffsetImmediate();
1671    bool ok = true;
1672    uint32_t mask = 0;
1673    switch (type) {
1674      case kLdr:
1675      case kLdrb:
1676      case kStr:
1677      case kStrb:
1678        if (IsUsingA32() || (addrmode == Offset)) {
1679          mask = 0xfff;
1680        } else {
1681          mask = 0xff;
1682        }
1683        break;
1684      case kLdrsb:
1685      case kLdrh:
1686      case kLdrsh:
1687      case kStrh:
1688        if (IsUsingT32() && (addrmode == Offset)) {
1689          mask = 0xfff;
1690        } else {
1691          mask = 0xff;
1692        }
1693        break;
1694      default:
1695        ok = false;
1696        break;
1697    }
1698    if (ok) {
1699      bool negative;
1700      // Try to maximize the offset use by the MemOperand (load_store_offset).
1701      // Add or subtract the part which can't be used by the MemOperand
1702      // (add_sub_offset).
1703      int32_t add_sub_offset;
1704      int32_t load_store_offset;
1705      load_store_offset = offset & mask;
1706      if (offset >= 0) {
1707        negative = false;
1708        add_sub_offset = offset & ~mask;
1709      } else {
1710        negative = true;
1711        add_sub_offset = -offset & ~mask;
1712        if (load_store_offset > 0) add_sub_offset += mask + 1;
1713      }
1714      switch (addrmode) {
1715        case PreIndex:
1716          // Pre-Indexed case:
1717          // ldr r0, [r1, 12345]! will translate into
1718          //   add r1, r1, 12345
1719          //   ldr r0, [r1]
1720          {
1721            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1722            if (negative) {
1723              sub(cond, rn, rn, add_sub_offset);
1724            } else {
1725              add(cond, rn, rn, add_sub_offset);
1726            }
1727          }
1728          {
1729            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1730            (this->*instruction)(cond,
1731                                 size,
1732                                 rd,
1733                                 MemOperand(rn, load_store_offset, PreIndex));
1734          }
1735          return;
1736        case Offset: {
1737          UseScratchRegisterScope temps(this);
1738          // Allow using the destination as a scratch register if possible.
1739          if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1740              !rd.Is(rn)) {
1741            temps.Include(rd);
1742          }
1743          Register scratch = temps.Acquire();
1744          // Offset case:
1745          // ldr r0, [r1, 12345] will translate into
1746          //   add r0, r1, 12345
1747          //   ldr r0, [r0]
1748          {
1749            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1750            if (negative) {
1751              sub(cond, scratch, rn, add_sub_offset);
1752            } else {
1753              add(cond, scratch, rn, add_sub_offset);
1754            }
1755          }
1756          {
1757            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1758            (this->*instruction)(cond,
1759                                 size,
1760                                 rd,
1761                                 MemOperand(scratch, load_store_offset));
1762          }
1763          return;
1764        }
1765        case PostIndex:
1766          // Avoid the unpredictable case 'ldr r0, [r0], imm'
1767          if (!rn.Is(rd)) {
1768            // Post-indexed case:
1769            // ldr r0. [r1], imm32 will translate into
1770            //   ldr r0, [r1]
1771            //   movw ip. imm32 & 0xffffffff
1772            //   movt ip, imm32 >> 16
1773            //   add r1, r1, ip
1774            {
1775              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1776              (this->*instruction)(cond,
1777                                   size,
1778                                   rd,
1779                                   MemOperand(rn,
1780                                              load_store_offset,
1781                                              PostIndex));
1782            }
1783            {
1784              CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1785              if (negative) {
1786                sub(cond, rn, rn, add_sub_offset);
1787              } else {
1788                add(cond, rn, rn, add_sub_offset);
1789              }
1790            }
1791            return;
1792          }
1793          break;
1794      }
1795    }
1796  }
1797  if (operand.IsPlainRegister()) {
1798    const Register& rn = operand.GetBaseRegister();
1799    AddrMode addrmode = operand.GetAddrMode();
1800    const Register& rm = operand.GetOffsetRegister();
1801    switch (addrmode) {
1802      case PreIndex:
1803        // Pre-Indexed case:
1804        // ldr r0, [r1, r2]! will translate into
1805        //   add r1, r1, r2
1806        //   ldr r0, [r1]
1807        {
1808          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1809          if (operand.GetSign().IsPlus()) {
1810            add(cond, rn, rn, rm);
1811          } else {
1812            sub(cond, rn, rn, rm);
1813          }
1814        }
1815        {
1816          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1817          (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1818        }
1819        return;
1820      case Offset: {
1821        UseScratchRegisterScope temps(this);
1822        // Allow using the destination as a scratch register if possible.
1823        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1824            !rd.Is(rn)) {
1825          temps.Include(rd);
1826        }
1827        Register scratch = temps.Acquire();
1828        // Offset case:
1829        // ldr r0, [r1, r2] will translate into
1830        //   add r0, r1, r2
1831        //   ldr r0, [r0]
1832        {
1833          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1834          if (operand.GetSign().IsPlus()) {
1835            add(cond, scratch, rn, rm);
1836          } else {
1837            sub(cond, scratch, rn, rm);
1838          }
1839        }
1840        {
1841          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1842          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
1843        }
1844        return;
1845      }
1846      case PostIndex:
1847        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1848        if (!rn.Is(rd)) {
1849          // Post-indexed case:
1850          // ldr r0. [r1], r2 will translate into
1851          //   ldr r0, [r1]
1852          //   add r1, r1, r2
1853          {
1854            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1855            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1856          }
1857          {
1858            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1859            if (operand.GetSign().IsPlus()) {
1860              add(cond, rn, rn, rm);
1861            } else {
1862              sub(cond, rn, rn, rm);
1863            }
1864          }
1865          return;
1866        }
1867        break;
1868    }
1869  }
1870  Assembler::Delegate(type, instruction, cond, size, rd, operand);
1871}
1872
1873
1874void MacroAssembler::Delegate(InstructionType type,
1875                              InstructionCondRRMop instruction,
1876                              Condition cond,
1877                              Register rt,
1878                              Register rt2,
1879                              const MemOperand& operand) {
1880  // ldaexd, ldrd, ldrexd, stlex, stlexb, stlexh, strd, strex, strexb, strexh
1881
1882  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
1883      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
1884      (type == kStrexb) || (type == kStrexh)) {
1885    return UnimplementedDelegate(type);
1886  }
1887
1888  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
1889
1890  CONTEXT_SCOPE;
1891
1892  // TODO: Should we allow these cases?
1893  if (IsUsingA32()) {
1894    // The first register needs to be even.
1895    if ((rt.GetCode() & 1) != 0) {
1896      UnimplementedDelegate(type);
1897      return;
1898    }
1899    // Registers need to be adjacent.
1900    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
1901      UnimplementedDelegate(type);
1902      return;
1903    }
1904    // LDRD lr, pc [...] is not allowed.
1905    if (rt.Is(lr)) {
1906      UnimplementedDelegate(type);
1907      return;
1908    }
1909  }
1910
1911  if (operand.IsImmediate()) {
1912    const Register& rn = operand.GetBaseRegister();
1913    AddrMode addrmode = operand.GetAddrMode();
1914    int32_t offset = operand.GetOffsetImmediate();
1915    switch (addrmode) {
1916      case PreIndex: {
1917        // Allow using the destinations as a scratch registers if possible.
1918        UseScratchRegisterScope temps(this);
1919        if (type == kLdrd) {
1920          if (!rt.Is(rn)) temps.Include(rt);
1921          if (!rt2.Is(rn)) temps.Include(rt2);
1922        }
1923
1924        // Pre-Indexed case:
1925        // ldrd r0, r1, [r2, 12345]! will translate into
1926        //   add r2, 12345
1927        //   ldrd r0, r1, [r2]
1928        {
1929          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1930          add(cond, rn, rn, offset);
1931        }
1932        {
1933          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1934          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
1935        }
1936        return;
1937      }
1938      case Offset: {
1939        UseScratchRegisterScope temps(this);
1940        // Allow using the destinations as a scratch registers if possible.
1941        if (type == kLdrd) {
1942          if (!rt.Is(rn)) temps.Include(rt);
1943          if (!rt2.Is(rn)) temps.Include(rt2);
1944        }
1945        Register scratch = temps.Acquire();
1946        // Offset case:
1947        // ldrd r0, r1, [r2, 12345] will translate into
1948        //   add r0, r2, 12345
1949        //   ldrd r0, r1, [r0]
1950        {
1951          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1952          add(cond, scratch, rn, offset);
1953        }
1954        {
1955          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1956          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
1957        }
1958        return;
1959      }
1960      case PostIndex:
1961        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
1962        if (!rn.Is(rt) && !rn.Is(rt2)) {
1963          // Post-indexed case:
1964          // ldrd r0, r1, [r2], imm32 will translate into
1965          //   ldrd r0, r1, [r2]
1966          //   movw ip. imm32 & 0xffffffff
1967          //   movt ip, imm32 >> 16
1968          //   add r2, ip
1969          {
1970            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1971            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
1972          }
1973          {
1974            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1975            add(cond, rn, rn, offset);
1976          }
1977          return;
1978        }
1979        break;
1980    }
1981  }
1982  if (operand.IsPlainRegister()) {
1983    const Register& rn = operand.GetBaseRegister();
1984    const Register& rm = operand.GetOffsetRegister();
1985    AddrMode addrmode = operand.GetAddrMode();
1986    switch (addrmode) {
1987      case PreIndex:
1988        // ldrd r0, r1, [r2, r3]! will translate into
1989        //   add r2, r3
1990        //   ldrd r0, r1, [r2]
1991        {
1992          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1993          if (operand.GetSign().IsPlus()) {
1994            add(cond, rn, rn, rm);
1995          } else {
1996            sub(cond, rn, rn, rm);
1997          }
1998        }
1999        {
2000          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2001          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2002        }
2003        return;
2004      case PostIndex:
2005        // ldrd r0, r1, [r2], r3 will translate into
2006        //   ldrd r0, r1, [r2]
2007        //   add r2, r3
2008        {
2009          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2010          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2011        }
2012        {
2013          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2014          if (operand.GetSign().IsPlus()) {
2015            add(cond, rn, rn, rm);
2016          } else {
2017            sub(cond, rn, rn, rm);
2018          }
2019        }
2020        return;
2021      case Offset: {
2022        UseScratchRegisterScope temps(this);
2023        // Allow using the destinations as a scratch registers if possible.
2024        if (type == kLdrd) {
2025          if (!rt.Is(rn)) temps.Include(rt);
2026          if (!rt2.Is(rn)) temps.Include(rt2);
2027        }
2028        Register scratch = temps.Acquire();
2029        // Offset case:
2030        // ldrd r0, r1, [r2, r3] will translate into
2031        //   add r0, r2, r3
2032        //   ldrd r0, r1, [r0]
2033        {
2034          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2035          if (operand.GetSign().IsPlus()) {
2036            add(cond, scratch, rn, rm);
2037          } else {
2038            sub(cond, scratch, rn, rm);
2039          }
2040        }
2041        {
2042          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2043          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2044        }
2045        return;
2046      }
2047    }
2048  }
2049  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2050}
2051
2052
2053void MacroAssembler::Delegate(InstructionType type,
2054                              InstructionCondDtSMop instruction,
2055                              Condition cond,
2056                              DataType dt,
2057                              SRegister rd,
2058                              const MemOperand& operand) {
2059  // vldr.32 vstr.32
2060  CONTEXT_SCOPE;
2061  if (operand.IsImmediate()) {
2062    const Register& rn = operand.GetBaseRegister();
2063    AddrMode addrmode = operand.GetAddrMode();
2064    int32_t offset = operand.GetOffsetImmediate();
2065    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2066                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2067    if (rn.IsPC()) {
2068      VIXL_ABORT_WITH_MSG(
2069          "The MacroAssembler does not convert vldr or vstr with a PC base "
2070          "register.\n");
2071    }
2072    switch (addrmode) {
2073      case PreIndex:
2074        // Pre-Indexed case:
2075        // vldr.32 s0, [r1, 12345]! will translate into
2076        //   add r1, 12345
2077        //   vldr.32 s0, [r1]
2078        if (offset != 0) {
2079          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2080          add(cond, rn, rn, offset);
2081        }
2082        {
2083          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2084          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2085        }
2086        return;
2087      case Offset: {
2088        UseScratchRegisterScope temps(this);
2089        Register scratch = temps.Acquire();
2090        // Offset case:
2091        // vldr.32 s0, [r1, 12345] will translate into
2092        //   add ip, r1, 12345
2093        //   vldr.32 s0, [ip]
2094        {
2095          VIXL_ASSERT(offset != 0);
2096          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2097          add(cond, scratch, rn, offset);
2098        }
2099        {
2100          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2101          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2102        }
2103        return;
2104      }
2105      case PostIndex:
2106        // Post-indexed case:
2107        // vldr.32 s0, [r1], imm32 will translate into
2108        //   vldr.32 s0, [r1]
2109        //   movw ip. imm32 & 0xffffffff
2110        //   movt ip, imm32 >> 16
2111        //   add r1, ip
2112        {
2113          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2114          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2115        }
2116        if (offset != 0) {
2117          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2118          add(cond, rn, rn, offset);
2119        }
2120        return;
2121    }
2122  }
2123  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2124}
2125
2126
2127void MacroAssembler::Delegate(InstructionType type,
2128                              InstructionCondDtDMop instruction,
2129                              Condition cond,
2130                              DataType dt,
2131                              DRegister rd,
2132                              const MemOperand& operand) {
2133  // vldr.64 vstr.64
2134  CONTEXT_SCOPE;
2135  if (operand.IsImmediate()) {
2136    const Register& rn = operand.GetBaseRegister();
2137    AddrMode addrmode = operand.GetAddrMode();
2138    int32_t offset = operand.GetOffsetImmediate();
2139    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2140                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2141    if (rn.IsPC()) {
2142      VIXL_ABORT_WITH_MSG(
2143          "The MacroAssembler does not convert vldr or vstr with a PC base "
2144          "register.\n");
2145    }
2146    switch (addrmode) {
2147      case PreIndex:
2148        // Pre-Indexed case:
2149        // vldr.64 d0, [r1, 12345]! will translate into
2150        //   add r1, 12345
2151        //   vldr.64 d0, [r1]
2152        if (offset != 0) {
2153          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2154          add(cond, rn, rn, offset);
2155        }
2156        {
2157          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2158          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2159        }
2160        return;
2161      case Offset: {
2162        UseScratchRegisterScope temps(this);
2163        Register scratch = temps.Acquire();
2164        // Offset case:
2165        // vldr.64 d0, [r1, 12345] will translate into
2166        //   add ip, r1, 12345
2167        //   vldr.32 s0, [ip]
2168        {
2169          VIXL_ASSERT(offset != 0);
2170          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2171          add(cond, scratch, rn, offset);
2172        }
2173        {
2174          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2175          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2176        }
2177        return;
2178      }
2179      case PostIndex:
2180        // Post-indexed case:
2181        // vldr.64 d0. [r1], imm32 will translate into
2182        //   vldr.64 d0, [r1]
2183        //   movw ip. imm32 & 0xffffffff
2184        //   movt ip, imm32 >> 16
2185        //   add r1, ip
2186        {
2187          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2188          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2189        }
2190        if (offset != 0) {
2191          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2192          add(cond, rn, rn, offset);
2193        }
2194        return;
2195    }
2196  }
2197  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2198}
2199
2200
2201void MacroAssembler::Delegate(InstructionType type,
2202                              InstructionCondMsrOp instruction,
2203                              Condition cond,
2204                              MaskedSpecialRegister spec_reg,
2205                              const Operand& operand) {
2206  USE(type);
2207  VIXL_ASSERT(type == kMsr);
2208  if (operand.IsImmediate()) {
2209    UseScratchRegisterScope temps(this);
2210    Register scratch = temps.Acquire();
2211    {
2212      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2213      mov(cond, scratch, operand);
2214    }
2215    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2216    msr(cond, spec_reg, scratch);
2217    return;
2218  }
2219  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2220}
2221
2222#undef CONTEXT_SCOPE
2223#undef TOSTRING
2224#undef STRINGIFY
2225
2226// Start of generated code.
2227// End of generated code.
2228}  // namespace aarch32
2229}  // namespace vixl
2230