macro-assembler-aarch32.cc revision 283bbdf1908649c90069ff80dfca45de4f675de4
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT((available_ == NULL) && (available_vfp_ == NULL));
41  available_ = masm->GetScratchRegisterList();
42  old_available_ = available_->GetList();
43  available_vfp_ = masm->GetScratchVRegisterList();
44  old_available_vfp_ = available_vfp_->GetList();
45}
46
47
48void UseScratchRegisterScope::Close() {
49  if (available_ != NULL) {
50    available_->SetList(old_available_);
51    available_ = NULL;
52  }
53  if (available_vfp_ != NULL) {
54    available_vfp_->SetList(old_available_vfp_);
55    available_vfp_ = NULL;
56  }
57}
58
59
60bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
61  VIXL_ASSERT(available_ != NULL);
62  VIXL_ASSERT(reg.IsValid());
63  return available_->Includes(reg);
64}
65
66
67bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
68  VIXL_ASSERT(available_vfp_ != NULL);
69  VIXL_ASSERT(reg.IsValid());
70  return available_vfp_->IncludesAllOf(reg);
71}
72
73
74Register UseScratchRegisterScope::Acquire() {
75  VIXL_ASSERT(available_ != NULL);
76  VIXL_CHECK(!available_->IsEmpty());
77  Register reg = available_->GetFirstAvailableRegister();
78  available_->Remove(reg);
79  return reg;
80}
81
82
83VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
84  switch (size_in_bits) {
85    case kSRegSizeInBits:
86      return AcquireS();
87    case kDRegSizeInBits:
88      return AcquireD();
89    case kQRegSizeInBits:
90      return AcquireQ();
91    default:
92      VIXL_UNREACHABLE();
93      return NoVReg;
94  }
95}
96
97
98QRegister UseScratchRegisterScope::AcquireQ() {
99  VIXL_ASSERT(available_vfp_ != NULL);
100  VIXL_CHECK(!available_vfp_->IsEmpty());
101  QRegister reg = available_vfp_->GetFirstAvailableQRegister();
102  available_vfp_->Remove(reg);
103  return reg;
104}
105
106
107DRegister UseScratchRegisterScope::AcquireD() {
108  VIXL_ASSERT(available_vfp_ != NULL);
109  VIXL_CHECK(!available_vfp_->IsEmpty());
110  DRegister reg = available_vfp_->GetFirstAvailableDRegister();
111  available_vfp_->Remove(reg);
112  return reg;
113}
114
115
116SRegister UseScratchRegisterScope::AcquireS() {
117  VIXL_ASSERT(available_vfp_ != NULL);
118  VIXL_CHECK(!available_vfp_->IsEmpty());
119  SRegister reg = available_vfp_->GetFirstAvailableSRegister();
120  available_vfp_->Remove(reg);
121  return reg;
122}
123
124
125void UseScratchRegisterScope::Release(const Register& reg) {
126  VIXL_ASSERT(available_ != NULL);
127  VIXL_ASSERT(reg.IsValid());
128  VIXL_ASSERT(!available_->Includes(reg));
129  available_->Combine(reg);
130}
131
132
133void UseScratchRegisterScope::Release(const VRegister& reg) {
134  VIXL_ASSERT(available_vfp_ != NULL);
135  VIXL_ASSERT(reg.IsValid());
136  VIXL_ASSERT(!available_vfp_->IncludesAliasOf(reg));
137  available_vfp_->Combine(reg);
138}
139
140
141void UseScratchRegisterScope::Include(const RegisterList& list) {
142  VIXL_ASSERT(available_ != NULL);
143  RegisterList excluded_registers(sp, lr, pc);
144  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
145  available_->SetList(available_->GetList() | mask);
146}
147
148
149void UseScratchRegisterScope::Include(const VRegisterList& list) {
150  VIXL_ASSERT(available_vfp_ != NULL);
151  available_vfp_->SetList(available_vfp_->GetList() | list.GetList());
152}
153
154
155void UseScratchRegisterScope::Exclude(const RegisterList& list) {
156  VIXL_ASSERT(available_ != NULL);
157  available_->SetList(available_->GetList() & ~list.GetList());
158}
159
160
161void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
162  VIXL_ASSERT(available_vfp_ != NULL);
163  available_vfp_->SetList(available_->GetList() & ~list.GetList());
164}
165
166
167void UseScratchRegisterScope::ExcludeAll() {
168  if (available_ != NULL) {
169    available_->SetList(0);
170  }
171  if (available_vfp_ != NULL) {
172    available_vfp_->SetList(0);
173  }
174}
175
176
177void VeneerPoolManager::AddLabel(Label* label) {
178  if (!label->IsInVeneerPool()) {
179    label->SetVeneerPoolManager(this);
180    labels_.push_back(label);
181  }
182  Label::ForwardReference& back = label->GetBackForwardRef();
183  back.SetIsBranch();
184  label->UpdateCheckpoint();
185  Label::Offset tmp = label->GetCheckpoint();
186  if (checkpoint_ > tmp) {
187    checkpoint_ = tmp;
188    masm_->ComputeCheckpoint();
189  }
190}
191
192
193void VeneerPoolManager::RemoveLabel(Label* label) {
194  label->ClearVeneerPoolManager();
195  if (label->GetCheckpoint() == checkpoint_) {
196    // We have to compute checkpoint again.
197    checkpoint_ = Label::kMaxOffset;
198    for (std::list<Label*>::iterator it = labels_.begin();
199         it != labels_.end();) {
200      if (*it == label) {
201        it = labels_.erase(it);
202      } else {
203        checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
204        ++it;
205      }
206    }
207    masm_->ComputeCheckpoint();
208  } else {
209    // We only have to remove the label from the list.
210    for (std::list<Label*>::iterator it = labels_.begin();; ++it) {
211      VIXL_ASSERT(it != labels_.end());
212      if (*it == label) {
213        labels_.erase(it);
214        break;
215      }
216    }
217  }
218}
219
220
221void VeneerPoolManager::Emit(Label::Offset target) {
222  checkpoint_ = Label::kMaxOffset;
223  // Sort labels (regarding their checkpoint) to avoid that a veneer
224  // becomes out of range.
225  labels_.sort(Label::CompareLabels);
226  // To avoid too many veneers, generate veneers which will be necessary soon.
227  static const size_t kVeneerEmissionMargin = 1 * KBytes;
228  // To avoid too many veneers, use generated veneers for other not too far
229  // uses.
230  static const size_t kVeneerEmittedMargin = 2 * KBytes;
231  Label::Offset emitted_target = target + kVeneerEmittedMargin;
232  target += kVeneerEmissionMargin;
233  // Reset the checkpoint. It will be computed again in the loop.
234  checkpoint_ = Label::kMaxOffset;
235  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();) {
236    // The labels are sorted. As soon as a veneer is not needed, we can stop.
237    if ((*it)->GetCheckpoint() > target) {
238      checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
239      break;
240    }
241    // Define the veneer.
242    Label veneer;
243    masm_->Bind(&veneer);
244    Label::Offset label_checkpoint = Label::kMaxOffset;
245    // Check all uses of this label.
246    for (Label::ForwardRefList::iterator ref = (*it)->GetFirstForwardRef();
247         ref != (*it)->GetEndForwardRef();) {
248      if (ref->IsBranch()) {
249        if (ref->GetCheckpoint() <= emitted_target) {
250          // Use the veneer.
251          masm_->EncodeLabelFor(*ref, &veneer);
252          ref = (*it)->Erase(ref);
253        } else {
254          // Don't use the veneer => update checkpoint.
255          label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
256          ++ref;
257        }
258      } else {
259        ++ref;
260      }
261    }
262    // Even if we no longer have use of this label, we can keep it in the list
263    // as the next "B" would add it back.
264    (*it)->SetCheckpoint(label_checkpoint);
265    checkpoint_ = std::min(checkpoint_, label_checkpoint);
266    // Generate the veneer.
267    masm_->B(*it);
268    ++it;
269  }
270#ifdef VIXL_DEBUG
271  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();
272       ++it) {
273    VIXL_ASSERT((*it)->GetCheckpoint() >= checkpoint_);
274  }
275#endif
276  masm_->ComputeCheckpoint();
277}
278
279
280void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
281  EmitOption option = kBranchRequired;
282  Label after_pools;
283  if (target >= veneer_pool_manager_.GetCheckpoint()) {
284#ifdef VIXL_DEBUG
285    // Here, we can't use an AssemblerAccurateScope as it would call
286    // PerformEnsureEmit in an infinite loop.
287    bool save_assembler_state = AllowAssembler();
288    SetAllowAssembler(true);
289#endif
290    b(&after_pools);
291#ifdef VIXL_DEBUG
292    SetAllowAssembler(false);
293#endif
294    veneer_pool_manager_.Emit(target);
295    option = kNoBranchRequired;
296#ifdef VIXL_DEBUG
297    SetAllowAssembler(save_assembler_state);
298#endif
299  }
300  // Check if the macro-assembler's internal literal pool should be emitted
301  // to avoid any overflow. If we already generated the veneers, we can
302  // emit the pool (the branch is already done).
303  VIXL_ASSERT(GetCursorOffset() <= literal_pool_manager_.GetCheckpoint());
304  if ((target > literal_pool_manager_.GetCheckpoint()) ||
305      (option == kNoBranchRequired)) {
306    // We will generate the literal pool. Generate all the veneers which
307    // would become out of range.
308    size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize();
309    VIXL_ASSERT(IsInt32(literal_pool_size));
310    Label::Offset veneers_target =
311        target + static_cast<Label::Offset>(literal_pool_size);
312    VIXL_ASSERT(veneers_target >= 0);
313    if (veneers_target >= veneer_pool_manager_.GetCheckpoint()) {
314      veneer_pool_manager_.Emit(veneers_target);
315    }
316    EmitLiteralPool(option);
317  }
318  BindHelper(&after_pools);
319  if (GetBuffer()->IsManaged()) {
320    bool grow_requested;
321    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
322    if (grow_requested) ComputeCheckpoint();
323  }
324}
325
326
327void MacroAssembler::ComputeCheckpoint() {
328  checkpoint_ = veneer_pool_manager_.GetCheckpoint();
329  if (literal_pool_manager_.GetCheckpoint() != Label::kMaxOffset) {
330    size_t veneer_max_size = veneer_pool_manager_.GetMaxSize();
331    VIXL_ASSERT(IsInt32(veneer_max_size));
332    // We must be able to generate the pool and a branch over the pool.
333    Label::Offset tmp = literal_pool_manager_.GetCheckpoint() -
334                        static_cast<Label::Offset>(veneer_max_size +
335                                                   kMaxInstructionSizeInBytes);
336    VIXL_ASSERT(tmp >= 0);
337    checkpoint_ = std::min(checkpoint_, tmp);
338  }
339  size_t buffer_size = GetBuffer()->GetCapacity();
340  VIXL_ASSERT(IsInt32(buffer_size));
341  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
342  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
343}
344
345
346void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
347  // 32-bit table A32:
348  // adr ip, table
349  // add ip, r1, lsl 2
350  // ldr ip, [ip]
351  // jmp: add pc, pc, ip, lsl 2
352  // table:
353  // .int (case_0 - (jmp + 8)) >> 2
354  // .int (case_1 - (jmp + 8)) >> 2
355  // .int (case_2 - (jmp + 8)) >> 2
356
357  // 16-bit table T32:
358  // adr ip, table
359  // jmp: tbh ip, r1
360  // table:
361  // .short (case_0 - (jmp + 4)) >> 1
362  // .short (case_1 - (jmp + 4)) >> 1
363  // .short (case_2 - (jmp + 4)) >> 1
364  // case_0:
365  //   ...
366  //   b end_switch
367  // case_1:
368  //   ...
369  //   b end_switch
370  // ...
371  // end_switch:
372  Label jump_table;
373  UseScratchRegisterScope temps(this);
374  Register scratch = temps.Acquire();
375  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
376
377  // Jumpt to default if reg is not in [0, table->GetLength()[
378  Cmp(reg, table->GetLength());
379  B(ge, table->GetDefaultLabel());
380
381  Adr(scratch, &jump_table);
382  if (IsUsingA32()) {
383    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
384    switch (table->GetOffsetShift()) {
385      case 0:
386        Ldrb(scratch, MemOperand(scratch));
387        break;
388      case 1:
389        Ldrh(scratch, MemOperand(scratch));
390        break;
391      case 2:
392        Ldr(scratch, MemOperand(scratch));
393        break;
394      default:
395        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
396    }
397    // Emit whatever needs to be emitted if we want to
398    // correctly rescord the position of the branch instruction
399    uint32_t branch_location = GetCursorOffset();
400    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
401    AssemblerAccurateScope scope(this,
402                                 table_size + kA32InstructionSizeInBytes,
403                                 CodeBufferCheckScope::kMaximumSize);
404    add(pc, pc, Operand(scratch, LSL, 2));
405    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
406    bind(&jump_table);
407    GenerateSwitchTable(table, table_size);
408  } else {
409    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
410    //  But for 32bit offsets, we use the same coding as for A32
411    if (table->GetOffsetShift() == 2) {
412      // 32bit offsets
413      Add(scratch, scratch, Operand(reg, LSL, 2));
414      Ldr(scratch, MemOperand(scratch));
415      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
416      // so let's do the shift before
417      Lsl(scratch, scratch, 1);
418      // Emit whatever needs to be emitted if we want to
419      // correctly rescord the position of the branch instruction
420      uint32_t branch_location = GetCursorOffset();
421      table->SetBranchLocation(branch_location +
422                               GetArchitectureStatePCOffset());
423      AssemblerAccurateScope scope(this,
424                                   table_size + kMaxInstructionSizeInBytes,
425                                   CodeBufferCheckScope::kMaximumSize);
426      add(pc, pc, scratch);
427      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
428      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
429      bind(&jump_table);
430      GenerateSwitchTable(table, table_size);
431    } else {
432      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
433                  (table->GetOffsetShift() == 1));
434      // Emit whatever needs to be emitted if we want to
435      // correctly rescord the position of the branch instruction
436      uint32_t branch_location = GetCursorOffset();
437      table->SetBranchLocation(branch_location +
438                               GetArchitectureStatePCOffset());
439      AssemblerAccurateScope scope(this,
440                                   table_size + kMaxInstructionSizeInBytes,
441                                   CodeBufferCheckScope::kMaximumSize);
442      if (table->GetOffsetShift() == 0) {
443        // 8bit offsets
444        tbb(scratch, reg);
445      } else {
446        // 16bit offsets
447        tbh(scratch, reg);
448      }
449      // tbb/tbh is a 32bit instruction
450      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
451      bind(&jump_table);
452      GenerateSwitchTable(table, table_size);
453    }
454  }
455}
456
457
458void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
459  table->BindTable(GetCursorOffset());
460  for (int i = 0; i < table_size / 4; i++) {
461    GetBuffer()->Emit32(0);
462  }
463}
464
465
466// switch/case/default : case
467// case_index is assumed to be < table->GetLength()
468// which is checked in JumpTable::Link and Table::SetPresenceBit
469void MacroAssembler::Case(JumpTableBase* table, int case_index) {
470  table->Link(this, case_index, GetCursorOffset());
471  table->SetPresenceBitForCase(case_index);
472}
473
474// switch/case/default : default
475void MacroAssembler::Default(JumpTableBase* table) {
476  Bind(table->GetDefaultLabel());
477}
478
479// switch/case/default : break
480void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
481
482// switch/case/default : finalize
483// Manage the default path, mosstly. All empty offsets in the jumptable
484// will point to default.
485// All values not in [0, table->GetLength()[ are already pointing here anyway.
486void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
487
488void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
489                                                Register tmp,
490                                                uint32_t imm) {
491  if (IsUintN(16, imm)) {
492    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
493    mov(cond, tmp, imm & 0xffff);
494    return;
495  }
496  if (IsUsingT32()) {
497    if (ImmediateT32::IsImmediateT32(~imm)) {
498      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
499      mvn(cond, tmp, ~imm);
500      return;
501    }
502  } else {
503    if (ImmediateA32::IsImmediateA32(~imm)) {
504      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
505      mvn(cond, tmp, ~imm);
506      return;
507    }
508  }
509  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
510  mov(cond, tmp, imm & 0xffff);
511  movt(cond, tmp, imm >> 16);
512}
513
514
515void MacroAssembler::PadToMinimumBranchRange(Label* label) {
516  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
517  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
518    uint32_t location = last_reference->GetLocation();
519    if (location + k16BitT32InstructionSizeInBytes ==
520        static_cast<uint32_t>(GetCursorOffset())) {
521      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
522      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
523        VIXL_ASSERT(!InITBlock());
524        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
525        // target is immediately after the Cbz or Cbnz, we insert a nop to
526        // avoid that.
527        EmitT32_16(k16BitT32NopOpcode);
528      }
529    }
530  }
531}
532
533
534HARDFLOAT void PrintfTrampolineRRRR(
535    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
536  printf(format, a, b, c, d);
537}
538
539
540HARDFLOAT void PrintfTrampolineRRRD(
541    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
542  printf(format, a, b, c, d);
543}
544
545
546HARDFLOAT void PrintfTrampolineRRDR(
547    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
548  printf(format, a, b, c, d);
549}
550
551
552HARDFLOAT void PrintfTrampolineRRDD(
553    const char* format, uint32_t a, uint32_t b, double c, double d) {
554  printf(format, a, b, c, d);
555}
556
557
558HARDFLOAT void PrintfTrampolineRDRR(
559    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
560  printf(format, a, b, c, d);
561}
562
563
564HARDFLOAT void PrintfTrampolineRDRD(
565    const char* format, uint32_t a, double b, uint32_t c, double d) {
566  printf(format, a, b, c, d);
567}
568
569
570HARDFLOAT void PrintfTrampolineRDDR(
571    const char* format, uint32_t a, double b, double c, uint32_t d) {
572  printf(format, a, b, c, d);
573}
574
575
576HARDFLOAT void PrintfTrampolineRDDD(
577    const char* format, uint32_t a, double b, double c, double d) {
578  printf(format, a, b, c, d);
579}
580
581
582HARDFLOAT void PrintfTrampolineDRRR(
583    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
584  printf(format, a, b, c, d);
585}
586
587
588HARDFLOAT void PrintfTrampolineDRRD(
589    const char* format, double a, uint32_t b, uint32_t c, double d) {
590  printf(format, a, b, c, d);
591}
592
593
594HARDFLOAT void PrintfTrampolineDRDR(
595    const char* format, double a, uint32_t b, double c, uint32_t d) {
596  printf(format, a, b, c, d);
597}
598
599
600HARDFLOAT void PrintfTrampolineDRDD(
601    const char* format, double a, uint32_t b, double c, double d) {
602  printf(format, a, b, c, d);
603}
604
605
606HARDFLOAT void PrintfTrampolineDDRR(
607    const char* format, double a, double b, uint32_t c, uint32_t d) {
608  printf(format, a, b, c, d);
609}
610
611
612HARDFLOAT void PrintfTrampolineDDRD(
613    const char* format, double a, double b, uint32_t c, double d) {
614  printf(format, a, b, c, d);
615}
616
617
618HARDFLOAT void PrintfTrampolineDDDR(
619    const char* format, double a, double b, double c, uint32_t d) {
620  printf(format, a, b, c, d);
621}
622
623
624HARDFLOAT void PrintfTrampolineDDDD(
625    const char* format, double a, double b, double c, double d) {
626  printf(format, a, b, c, d);
627}
628
629
630void MacroAssembler::Printf(const char* format,
631                            CPURegister reg1,
632                            CPURegister reg2,
633                            CPURegister reg3,
634                            CPURegister reg4) {
635  if (generate_simulator_code_) {
636    PushRegister(reg4);
637    PushRegister(reg3);
638    PushRegister(reg2);
639    PushRegister(reg1);
640    Push(RegisterList(r0, r1));
641    StringLiteral* format_literal =
642        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
643    Adr(r0, format_literal);
644    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
645                    (reg2.GetType() << 4) | reg1.GetType();
646    Mov(r1, args);
647    Hvc(kPrintfCode);
648    Pop(RegisterList(r0, r1));
649    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
650               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
651    Drop(size);
652  } else {
653    // Generate on a native platform => 32 bit environment.
654    // Preserve core registers r0-r3, r12, r14
655    const uint32_t saved_registers_mask =
656        kCallerSavedRegistersMask | (1 << r5.GetCode());
657    Push(RegisterList(saved_registers_mask));
658    // Push VFP registers.
659    Vpush(Untyped64, DRegisterList(d0, 8));
660    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
661    // Search one register which has been saved and which doesn't need to be
662    // printed.
663    RegisterList available_registers(kCallerSavedRegistersMask);
664    if (reg1.GetType() == CPURegister::kRRegister) {
665      available_registers.Remove(Register(reg1.GetCode()));
666    }
667    if (reg2.GetType() == CPURegister::kRRegister) {
668      available_registers.Remove(Register(reg2.GetCode()));
669    }
670    if (reg3.GetType() == CPURegister::kRRegister) {
671      available_registers.Remove(Register(reg3.GetCode()));
672    }
673    if (reg4.GetType() == CPURegister::kRRegister) {
674      available_registers.Remove(Register(reg4.GetCode()));
675    }
676    Register tmp = available_registers.GetFirstAvailableRegister();
677    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
678    // Push the flags.
679    Mrs(tmp, APSR);
680    Push(tmp);
681    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
682    Push(tmp);
683    // Push the registers to print on the stack.
684    PushRegister(reg4);
685    PushRegister(reg3);
686    PushRegister(reg2);
687    PushRegister(reg1);
688    int core_count = 1;
689    int vfp_count = 0;
690    uint32_t printf_type = 0;
691    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
692    // Reg4 may stay into the stack if all the register to print are core
693    // registers.
694    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
695    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
696    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
697    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
698    // Ensure that the stack is aligned on 8 bytes.
699    And(r5, sp, 0x7);
700    if (core_count == 5) {
701      // One 32 bit argument (reg4) has been left on the stack =>  align the
702      // stack
703      // before the argument.
704      Pop(r0);
705      Sub(sp, sp, r5);
706      Push(r0);
707    } else {
708      Sub(sp, sp, r5);
709    }
710    // Select the right trampoline depending on the arguments.
711    uintptr_t address;
712    switch (printf_type) {
713      case 0:
714        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
715        break;
716      case 1:
717        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
718        break;
719      case 2:
720        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
721        break;
722      case 3:
723        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
724        break;
725      case 4:
726        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
727        break;
728      case 5:
729        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
730        break;
731      case 6:
732        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
733        break;
734      case 7:
735        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
736        break;
737      case 8:
738        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
739        break;
740      case 9:
741        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
742        break;
743      case 10:
744        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
745        break;
746      case 11:
747        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
748        break;
749      case 12:
750        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
751        break;
752      case 13:
753        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
754        break;
755      case 14:
756        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
757        break;
758      case 15:
759        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
760        break;
761      default:
762        VIXL_UNREACHABLE();
763        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
764        break;
765    }
766    StringLiteral* format_literal =
767        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
768    Adr(r0, format_literal);
769    Mov(ip, Operand::From(address));
770    Blx(ip);
771    // If register reg4 was left on the stack => skip it.
772    if (core_count == 5) Drop(kRegSizeInBytes);
773    // Restore the stack as it was before alignment.
774    Add(sp, sp, r5);
775    // Restore the flags.
776    Pop(tmp);
777    Vmsr(FPSCR, tmp);
778    Pop(tmp);
779    Msr(APSR_nzcvqg, tmp);
780    // Restore the regsisters.
781    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
782    Vpop(Untyped64, DRegisterList(d0, 8));
783    Pop(RegisterList(saved_registers_mask));
784  }
785}
786
787
788void MacroAssembler::PushRegister(CPURegister reg) {
789  switch (reg.GetType()) {
790    case CPURegister::kNoRegister:
791      break;
792    case CPURegister::kRRegister:
793      Push(Register(reg.GetCode()));
794      break;
795    case CPURegister::kSRegister:
796      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
797      break;
798    case CPURegister::kDRegister:
799      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
800      break;
801    case CPURegister::kQRegister:
802      VIXL_UNIMPLEMENTED();
803      break;
804  }
805}
806
807
808void MacroAssembler::PreparePrintfArgument(CPURegister reg,
809                                           int* core_count,
810                                           int* vfp_count,
811                                           uint32_t* printf_type) {
812  switch (reg.GetType()) {
813    case CPURegister::kNoRegister:
814      break;
815    case CPURegister::kRRegister:
816      VIXL_ASSERT(*core_count <= 4);
817      if (*core_count < 4) Pop(Register(*core_count));
818      *core_count += 1;
819      break;
820    case CPURegister::kSRegister:
821      VIXL_ASSERT(*vfp_count < 4);
822      *printf_type |= 1 << (*core_count + *vfp_count - 1);
823      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
824      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
825      *vfp_count += 1;
826      break;
827    case CPURegister::kDRegister:
828      VIXL_ASSERT(*vfp_count < 4);
829      *printf_type |= 1 << (*core_count + *vfp_count - 1);
830      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
831      *vfp_count += 1;
832      break;
833    case CPURegister::kQRegister:
834      VIXL_UNIMPLEMENTED();
835      break;
836  }
837}
838
839
840void MacroAssembler::Delegate(InstructionType type,
841                              InstructionCondROp instruction,
842                              Condition cond,
843                              Register rn,
844                              const Operand& operand) {
845  // movt, sxtb16, teq, uxtb16
846  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
847              (type == kUxtb16));
848
849  if (type == kMovt) {
850    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.");
851  }
852
853  // This delegate only supports teq with immediates.
854  CONTEXT_SCOPE;
855  if ((type == kTeq) && operand.IsImmediate()) {
856    UseScratchRegisterScope temps(this);
857    Register scratch = temps.Acquire();
858    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
859    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
860    teq(cond, rn, scratch);
861    return;
862  }
863  Assembler::Delegate(type, instruction, cond, rn, operand);
864}
865
866
867void MacroAssembler::Delegate(InstructionType type,
868                              InstructionCondSizeROp instruction,
869                              Condition cond,
870                              EncodingSize size,
871                              Register rn,
872                              const Operand& operand) {
873  // cmn cmp mov movs mvn mvns sxtb sxth tst uxtb uxth
874  CONTEXT_SCOPE;
875  VIXL_ASSERT(size.IsBest());
876  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
877              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
878              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
879              (type == kUxtb) || (type == kUxth));
880  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
881    VIXL_ASSERT((type != kMov) || (type != kMovs));
882    InstructionCondRROp shiftop = NULL;
883    switch (operand.GetShift().GetType()) {
884      case LSL:
885        shiftop = &Assembler::lsl;
886        break;
887      case LSR:
888        shiftop = &Assembler::lsr;
889        break;
890      case ASR:
891        shiftop = &Assembler::asr;
892        break;
893      case RRX:
894        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
895        VIXL_UNREACHABLE();
896        break;
897      case ROR:
898        shiftop = &Assembler::ror;
899        break;
900      default:
901        VIXL_UNREACHABLE();
902    }
903    if (shiftop != NULL) {
904      UseScratchRegisterScope temps(this);
905      Register scratch = temps.Acquire();
906      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
907      (this->*shiftop)(cond,
908                       scratch,
909                       operand.GetBaseRegister(),
910                       operand.GetShiftRegister());
911      (this->*instruction)(cond, size, rn, scratch);
912      return;
913    }
914  }
915  if (operand.IsImmediate()) {
916    uint32_t imm = operand.GetImmediate();
917    switch (type) {
918      case kMov:
919      case kMovs:
920        if (!rn.IsPC()) {
921          // Immediate is too large, but not using PC, so handle with mov{t}.
922          HandleOutOfBoundsImmediate(cond, rn, imm);
923          if (type == kMovs) {
924            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
925            tst(cond, rn, rn);
926          }
927          return;
928        } else if (type == kMov) {
929          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
930          // Immediate is too large and using PC, so handle using a temporary
931          // register.
932          UseScratchRegisterScope temps(this);
933          Register scratch = temps.Acquire();
934          HandleOutOfBoundsImmediate(al, scratch, imm);
935          EnsureEmitFor(kMaxInstructionSizeInBytes);
936          bx(cond, scratch);
937          return;
938        }
939        break;
940      case kCmn:
941      case kCmp:
942        if (IsUsingA32() || !rn.IsPC()) {
943          UseScratchRegisterScope temps(this);
944          Register scratch = temps.Acquire();
945          HandleOutOfBoundsImmediate(cond, scratch, imm);
946          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
947          (this->*instruction)(cond, size, rn, scratch);
948          return;
949        }
950        break;
951      case kMvn:
952      case kMvns:
953        if (!rn.IsPC()) {
954          UseScratchRegisterScope temps(this);
955          Register scratch = temps.Acquire();
956          HandleOutOfBoundsImmediate(cond, scratch, imm);
957          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
958          (this->*instruction)(cond, size, rn, scratch);
959          return;
960        }
961        break;
962      case kTst:
963        if (IsUsingA32() || !rn.IsPC()) {
964          UseScratchRegisterScope temps(this);
965          Register scratch = temps.Acquire();
966          HandleOutOfBoundsImmediate(cond, scratch, imm);
967          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
968          (this->*instruction)(cond, size, rn, scratch);
969          return;
970        }
971        break;
972      default:  // kSxtb, Sxth, Uxtb, Uxth
973        break;
974    }
975  }
976  Assembler::Delegate(type, instruction, cond, size, rn, operand);
977}
978
979
980void MacroAssembler::Delegate(InstructionType type,
981                              InstructionCondRROp instruction,
982                              Condition cond,
983                              Register rd,
984                              Register rn,
985                              const Operand& operand) {
986  // orn orns pkhbt pkhtb rsc rscs sxtab sxtab16 sxtah uxtab uxtab16 uxtah
987
988  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
989      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
990      (type == kPkhbt) || (type == kPkhtb)) {
991    UnimplementedDelegate(type);
992    return;
993  }
994
995  // This delegate only handles the following instructions.
996  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
997              (type == kRscs));
998  CONTEXT_SCOPE;
999  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1000    InstructionCondRROp shiftop = NULL;
1001    switch (operand.GetShift().GetType()) {
1002      case LSL:
1003        shiftop = &Assembler::lsl;
1004        break;
1005      case LSR:
1006        shiftop = &Assembler::lsr;
1007        break;
1008      case ASR:
1009        shiftop = &Assembler::asr;
1010        break;
1011      case RRX:
1012        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1013        VIXL_UNREACHABLE();
1014        break;
1015      case ROR:
1016        shiftop = &Assembler::ror;
1017        break;
1018      default:
1019        VIXL_UNREACHABLE();
1020    }
1021    if (shiftop != NULL) {
1022      UseScratchRegisterScope temps(this);
1023      Register rm = operand.GetBaseRegister();
1024      Register rs = operand.GetShiftRegister();
1025      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1026      // a scratch register.
1027      if (!rd.Is(rn)) temps.Include(rd);
1028      if (!rm.Is(rn)) temps.Include(rm);
1029      if (!rs.Is(rn)) temps.Include(rs);
1030      Register scratch = temps.Acquire();
1031      // TODO: The scope length was measured empirically. We should analyse the
1032      // worst-case size and add targetted tests.
1033      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1034      (this->*shiftop)(cond, scratch, rm, rs);
1035      (this->*instruction)(cond, rd, rn, scratch);
1036      return;
1037    }
1038  }
1039  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1040    // The RegisterShiftRegister case should have been handled above.
1041    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1042    UseScratchRegisterScope temps(this);
1043    Register negated_rn;
1044    if (operand.IsImmediate() || !operand.GetBaseRegister().Is(rn)) {
1045      // In this case, we can just negate `rn` instead of using a temporary
1046      // register.
1047      negated_rn = rn;
1048    } else {
1049      if (!rd.Is(rn)) temps.Include(rd);
1050      negated_rn = temps.Acquire();
1051    }
1052    {
1053      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1054      mvn(cond, negated_rn, rn);
1055    }
1056    if (type == kRsc) {
1057      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1058      adc(cond, rd, negated_rn, operand);
1059      return;
1060    }
1061    // TODO: We shouldn't have to specify how much space the next instruction
1062    // needs.
1063    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1064    adcs(cond, rd, negated_rn, operand);
1065    return;
1066  }
1067  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1068    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1069    //  mvn r0, r2
1070    //  orr r0, r1, r0
1071    Register scratch;
1072    UseScratchRegisterScope temps(this);
1073    // If different from `rn`, we can make use of source and destination
1074    // registers as a scratch register.
1075    if (!rd.Is(rn)) temps.Include(rd);
1076    if (!operand.IsImmediate() && !operand.GetBaseRegister().Is(rn)) {
1077      temps.Include(operand.GetBaseRegister());
1078    }
1079    if (operand.IsRegisterShiftedRegister() &&
1080        !operand.GetShiftRegister().Is(rn)) {
1081      temps.Include(operand.GetShiftRegister());
1082    }
1083    scratch = temps.Acquire();
1084    {
1085      // TODO: We shouldn't have to specify how much space the next instruction
1086      // needs.
1087      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1088      mvn(cond, scratch, operand);
1089    }
1090    if (type == kOrns) {
1091      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1092      orrs(cond, rd, rn, scratch);
1093      return;
1094    }
1095    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1096    orr(cond, rd, rn, scratch);
1097    return;
1098  }
1099  if (operand.IsImmediate()) {
1100    int32_t imm = operand.GetSignedImmediate();
1101    if (ImmediateT32::IsImmediateT32(~imm)) {
1102      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1103      if (IsUsingT32()) {
1104        switch (type) {
1105          case kOrn:
1106            orr(cond, rd, rn, ~imm);
1107            return;
1108          case kOrns:
1109            orrs(cond, rd, rn, ~imm);
1110            return;
1111          default:
1112            break;
1113        }
1114      }
1115    }
1116    UseScratchRegisterScope temps(this);
1117    // Allow using the destination as a scratch register if possible.
1118    if (!rd.Is(rn)) temps.Include(rd);
1119    Register scratch = temps.Acquire();
1120    HandleOutOfBoundsImmediate(cond, scratch, imm);
1121    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1122    (this->*instruction)(cond, rd, rn, scratch);
1123    return;
1124  }
1125  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1126}
1127
1128
1129void MacroAssembler::Delegate(InstructionType type,
1130                              InstructionCondSizeRROp instruction,
1131                              Condition cond,
1132                              EncodingSize size,
1133                              Register rd,
1134                              Register rn,
1135                              const Operand& operand) {
1136  // adc adcs add adds and_ ands asr asrs bic bics eor eors lsl lsls lsr lsrs
1137  // orr orrs ror rors rsb rsbs sbc sbcs sub subs
1138
1139  VIXL_ASSERT(
1140      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1141      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1142      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1143      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1144      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1145      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1146      (type == kSub) || (type == kSubs));
1147
1148  CONTEXT_SCOPE;
1149  VIXL_ASSERT(size.IsBest());
1150  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1151    InstructionCondRROp shiftop = NULL;
1152    switch (operand.GetShift().GetType()) {
1153      case LSL:
1154        shiftop = &Assembler::lsl;
1155        break;
1156      case LSR:
1157        shiftop = &Assembler::lsr;
1158        break;
1159      case ASR:
1160        shiftop = &Assembler::asr;
1161        break;
1162      case RRX:
1163        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1164        VIXL_UNREACHABLE();
1165        break;
1166      case ROR:
1167        shiftop = &Assembler::ror;
1168        break;
1169      default:
1170        VIXL_UNREACHABLE();
1171    }
1172    if (shiftop != NULL) {
1173      UseScratchRegisterScope temps(this);
1174      Register rm = operand.GetBaseRegister();
1175      Register rs = operand.GetShiftRegister();
1176      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1177      // a scratch register.
1178      if (!rd.Is(rn)) temps.Include(rd);
1179      if (!rm.Is(rn)) temps.Include(rm);
1180      if (!rs.Is(rn)) temps.Include(rs);
1181      Register scratch = temps.Acquire();
1182      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1183      (this->*shiftop)(cond, scratch, rm, rs);
1184      (this->*instruction)(cond, size, rd, rn, scratch);
1185      return;
1186    }
1187  }
1188  if (operand.IsImmediate()) {
1189    int32_t imm = operand.GetSignedImmediate();
1190    if (ImmediateT32::IsImmediateT32(~imm)) {
1191      if (IsUsingT32()) {
1192        switch (type) {
1193          case kOrr:
1194            orn(cond, rd, rn, ~imm);
1195            return;
1196          case kOrrs:
1197            orns(cond, rd, rn, ~imm);
1198            return;
1199          default:
1200            break;
1201        }
1202      }
1203    }
1204    if (imm < 0) {
1205      InstructionCondSizeRROp asmcb = NULL;
1206      // Add and sub are equivalent using an arithmetic negation:
1207      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1208      // Add and sub with carry are equivalent using a bitwise NOT:
1209      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1210      switch (type) {
1211        case kAdd:
1212          asmcb = &Assembler::sub;
1213          imm = -imm;
1214          break;
1215        case kAdds:
1216          asmcb = &Assembler::subs;
1217          imm = -imm;
1218          break;
1219        case kSub:
1220          asmcb = &Assembler::add;
1221          imm = -imm;
1222          break;
1223        case kSubs:
1224          asmcb = &Assembler::adds;
1225          imm = -imm;
1226          break;
1227        case kAdc:
1228          asmcb = &Assembler::sbc;
1229          imm = ~imm;
1230          break;
1231        case kAdcs:
1232          asmcb = &Assembler::sbcs;
1233          imm = ~imm;
1234          break;
1235        case kSbc:
1236          asmcb = &Assembler::adc;
1237          imm = ~imm;
1238          break;
1239        case kSbcs:
1240          asmcb = &Assembler::adcs;
1241          imm = ~imm;
1242          break;
1243        default:
1244          break;
1245      }
1246      if (asmcb != NULL) {
1247        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1248        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1249        return;
1250      }
1251    }
1252    UseScratchRegisterScope temps(this);
1253    // Allow using the destination as a scratch register if possible.
1254    if (!rd.Is(rn)) temps.Include(rd);
1255    Register scratch = temps.Acquire();
1256    // TODO: The scope length was measured empirically. We should analyse the
1257    // worst-case size and add targetted tests.
1258    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1259    mov(cond, scratch, operand.GetImmediate());
1260    (this->*instruction)(cond, size, rd, rn, scratch);
1261    return;
1262  }
1263  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1264}
1265
1266
1267void MacroAssembler::Delegate(InstructionType type,
1268                              InstructionRL instruction,
1269                              Register rn,
1270                              Label* label) {
1271  // cbz cbnz
1272  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1273
1274  CONTEXT_SCOPE;
1275  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1276  if (IsUsingA32()) {
1277    if (type == kCbz) {
1278      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1279    } else {
1280      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1281    }
1282  } else if (rn.IsLow()) {
1283    switch (type) {
1284      case kCbnz: {
1285        Label done;
1286        cbz(rn, &done);
1287        b(label);
1288        Bind(&done);
1289        return;
1290      }
1291      case kCbz: {
1292        Label done;
1293        cbnz(rn, &done);
1294        b(label);
1295        Bind(&done);
1296        return;
1297      }
1298      default:
1299        break;
1300    }
1301  }
1302  Assembler::Delegate(type, instruction, rn, label);
1303}
1304
1305
1306template <typename T>
1307static inline bool IsI64BitPattern(T imm) {
1308  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1309    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1310  }
1311  return true;
1312}
1313
1314
1315template <typename T>
1316static inline bool IsI8BitPattern(T imm) {
1317  uint8_t imm8 = imm & 0xff;
1318  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1319    imm >>= 8;
1320    if ((imm & 0xff) != imm8) return false;
1321  }
1322  return true;
1323}
1324
1325
1326static inline bool CanBeInverted(uint32_t imm32) {
1327  uint32_t fill8 = 0;
1328
1329  if ((imm32 & 0xffffff00) == 0xffffff00) {
1330    //    11111111 11111111 11111111 abcdefgh
1331    return true;
1332  }
1333  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1334    fill8 = imm32 & 0xff;
1335    imm32 >>= 8;
1336    if ((imm32 >> 8) == 0xffff) {
1337      //    11111111 11111111 abcdefgh 00000000
1338      // or 11111111 11111111 abcdefgh 11111111
1339      return true;
1340    }
1341    if ((imm32 & 0xff) == fill8) {
1342      imm32 >>= 8;
1343      if ((imm32 >> 8) == 0xff) {
1344        //    11111111 abcdefgh 00000000 00000000
1345        // or 11111111 abcdefgh 11111111 11111111
1346        return true;
1347      }
1348      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1349        //    abcdefgh 11111111 11111111 11111111
1350        return true;
1351      }
1352    }
1353  }
1354  return false;
1355}
1356
1357
1358template <typename RES, typename T>
1359static inline RES replicate(T imm) {
1360  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1361              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1362  RES res = imm;
1363  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1364    res = (res << (sizeof(T) * 8)) | imm;
1365  }
1366  return res;
1367}
1368
1369
1370void MacroAssembler::Delegate(InstructionType type,
1371                              InstructionCondDtSSop instruction,
1372                              Condition cond,
1373                              DataType dt,
1374                              SRegister rd,
1375                              const SOperand& operand) {
1376  CONTEXT_SCOPE;
1377  if (type == kVmov) {
1378    if (operand.IsImmediate() && dt.Is(F32)) {
1379      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1380      if (neon_imm.CanConvert<float>()) {
1381        // movw ip, imm16
1382        // movk ip, imm16
1383        // vmov s0, ip
1384        UseScratchRegisterScope temps(this);
1385        Register scratch = temps.Acquire();
1386        float f = neon_imm.GetImmediate<float>();
1387        // TODO: The scope length was measured empirically. We should analyse
1388        // the
1389        // worst-case size and add targetted tests.
1390        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1391        mov(cond, scratch, FloatToRawbits(f));
1392        vmov(cond, rd, scratch);
1393        return;
1394      }
1395    }
1396  }
1397  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1398}
1399
1400
1401void MacroAssembler::Delegate(InstructionType type,
1402                              InstructionCondDtDDop instruction,
1403                              Condition cond,
1404                              DataType dt,
1405                              DRegister rd,
1406                              const DOperand& operand) {
1407  CONTEXT_SCOPE;
1408  if (type == kVmov) {
1409    if (operand.IsImmediate()) {
1410      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1411      switch (dt.GetValue()) {
1412        case I32:
1413          if (neon_imm.CanConvert<uint32_t>()) {
1414            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1415            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1416            if (IsI8BitPattern(imm)) {
1417              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1418              vmov(cond, I8, rd, imm & 0xff);
1419              return;
1420            }
1421            // vmov.i32 d0, 0xff0000ff will translate into
1422            // vmov.i64 d0, 0xff0000ffff0000ff
1423            if (IsI64BitPattern(imm)) {
1424              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1425              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1426              return;
1427            }
1428            // vmov.i32 d0, 0xffab0000 will translate into
1429            // vmvn.i32 d0, 0x0054ffff
1430            if (cond.Is(al) && CanBeInverted(imm)) {
1431              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1432              vmvn(I32, rd, ~imm);
1433              return;
1434            }
1435          }
1436          break;
1437        case I16:
1438          if (neon_imm.CanConvert<uint16_t>()) {
1439            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1440            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1441            if (IsI8BitPattern(imm)) {
1442              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1443              vmov(cond, I8, rd, imm & 0xff);
1444              return;
1445            }
1446          }
1447          break;
1448        case I64:
1449          if (neon_imm.CanConvert<uint64_t>()) {
1450            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1451            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1452            if (IsI8BitPattern(imm)) {
1453              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1454              vmov(cond, I8, rd, imm & 0xff);
1455              return;
1456            }
1457            // mov ip, lo(imm64)
1458            // vdup d0, ip
1459            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1460            // preserved
1461            {
1462              UseScratchRegisterScope temps(this);
1463              Register scratch = temps.Acquire();
1464              {
1465                // TODO: The scope length was measured empirically. We should
1466                // analyse the
1467                // worst-case size and add targetted tests.
1468                CodeBufferCheckScope scope(this,
1469                                           2 * kMaxInstructionSizeInBytes);
1470                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1471              }
1472              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1473              vdup(cond, Untyped32, rd, scratch);
1474            }
1475            // mov ip, hi(imm64)
1476            // vmov d0[1], ip
1477            {
1478              UseScratchRegisterScope temps(this);
1479              Register scratch = temps.Acquire();
1480              {
1481                // TODO: The scope length was measured empirically. We should
1482                // analyse the
1483                // worst-case size and add targetted tests.
1484                CodeBufferCheckScope scope(this,
1485                                           2 * kMaxInstructionSizeInBytes);
1486                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1487              }
1488              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1489              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1490            }
1491            return;
1492          }
1493          break;
1494        default:
1495          break;
1496      }
1497      if ((dt.Is(I8) || dt.Is(I16) || dt.Is(I32)) &&
1498          neon_imm.CanConvert<uint32_t>()) {
1499        // mov ip, imm32
1500        // vdup.8 d0, ip
1501        UseScratchRegisterScope temps(this);
1502        Register scratch = temps.Acquire();
1503        {
1504          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1505          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1506        }
1507        DataTypeValue vdup_dt = Untyped32;
1508        switch (dt.GetValue()) {
1509          case I8:
1510            vdup_dt = Untyped8;
1511            break;
1512          case I16:
1513            vdup_dt = Untyped16;
1514            break;
1515          case I32:
1516            vdup_dt = Untyped32;
1517            break;
1518          default:
1519            VIXL_UNREACHABLE();
1520        }
1521        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1522        vdup(cond, vdup_dt, rd, scratch);
1523        return;
1524      }
1525      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1526        float f = neon_imm.GetImmediate<float>();
1527        // Punt to vmov.i32
1528        // TODO: The scope length was guessed based on the double case below. We
1529        // should analyse the worst-case size and add targetted tests.
1530        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1531        vmov(cond, I32, rd, FloatToRawbits(f));
1532        return;
1533      }
1534      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1535        // Punt to vmov.i64
1536        double d = neon_imm.GetImmediate<double>();
1537        // TODO: The scope length was measured empirically. We should analyse
1538        // the
1539        // worst-case size and add targetted tests.
1540        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1541        vmov(cond, I64, rd, DoubleToRawbits(d));
1542        return;
1543      }
1544    }
1545  }
1546  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1547}
1548
1549
1550void MacroAssembler::Delegate(InstructionType type,
1551                              InstructionCondDtQQop instruction,
1552                              Condition cond,
1553                              DataType dt,
1554                              QRegister rd,
1555                              const QOperand& operand) {
1556  CONTEXT_SCOPE;
1557  if (type == kVmov) {
1558    if (operand.IsImmediate()) {
1559      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1560      switch (dt.GetValue()) {
1561        case I32:
1562          if (neon_imm.CanConvert<uint32_t>()) {
1563            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1564            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1565            if (IsI8BitPattern(imm)) {
1566              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1567              vmov(cond, I8, rd, imm & 0xff);
1568              return;
1569            }
1570            // vmov.i32 d0, 0xff0000ff will translate into
1571            // vmov.i64 d0, 0xff0000ffff0000ff
1572            if (IsI64BitPattern(imm)) {
1573              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1574              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1575              return;
1576            }
1577            // vmov.i32 d0, 0xffab0000 will translate into
1578            // vmvn.i32 d0, 0x0054ffff
1579            if (CanBeInverted(imm)) {
1580              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1581              vmvn(cond, I32, rd, ~imm);
1582              return;
1583            }
1584          }
1585          break;
1586        case I16:
1587          if (neon_imm.CanConvert<uint16_t>()) {
1588            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1589            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1590            if (IsI8BitPattern(imm)) {
1591              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1592              vmov(cond, I8, rd, imm & 0xff);
1593              return;
1594            }
1595          }
1596          break;
1597        case I64:
1598          if (neon_imm.CanConvert<uint64_t>()) {
1599            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1600            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1601            if (IsI8BitPattern(imm)) {
1602              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1603              vmov(cond, I8, rd, imm & 0xff);
1604              return;
1605            }
1606            // mov ip, lo(imm64)
1607            // vdup q0, ip
1608            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1609            // preserved
1610            {
1611              UseScratchRegisterScope temps(this);
1612              Register scratch = temps.Acquire();
1613              {
1614                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1615                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1616              }
1617              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1618              vdup(cond, Untyped32, rd, scratch);
1619            }
1620            // mov ip, hi(imm64)
1621            // vmov.i32 d0[1], ip
1622            // vmov d1, d0
1623            {
1624              UseScratchRegisterScope temps(this);
1625              Register scratch = temps.Acquire();
1626              {
1627                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1628                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1629              }
1630              {
1631                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1632                vmov(cond,
1633                     Untyped32,
1634                     DRegisterLane(rd.GetLowDRegister(), 1),
1635                     scratch);
1636              }
1637              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1638              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1639            }
1640            return;
1641          }
1642          break;
1643        default:
1644          break;
1645      }
1646      if ((dt.Is(I8) || dt.Is(I16) || dt.Is(I32)) &&
1647          neon_imm.CanConvert<uint32_t>()) {
1648        // mov ip, imm32
1649        // vdup.8 d0, ip
1650        UseScratchRegisterScope temps(this);
1651        Register scratch = temps.Acquire();
1652        {
1653          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1654          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1655        }
1656        DataTypeValue vdup_dt = Untyped32;
1657        switch (dt.GetValue()) {
1658          case I8:
1659            vdup_dt = Untyped8;
1660            break;
1661          case I16:
1662            vdup_dt = Untyped16;
1663            break;
1664          case I32:
1665            vdup_dt = Untyped32;
1666            break;
1667          default:
1668            VIXL_UNREACHABLE();
1669        }
1670        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1671        vdup(cond, vdup_dt, rd, scratch);
1672        return;
1673      }
1674      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1675        // Punt to vmov.i64
1676        float f = neon_imm.GetImmediate<float>();
1677        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1678        vmov(cond, I32, rd, FloatToRawbits(f));
1679        return;
1680      }
1681      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1682        // Punt to vmov.i64
1683        double d = neon_imm.GetImmediate<double>();
1684        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1685        vmov(cond, I64, rd, DoubleToRawbits(d));
1686        return;
1687      }
1688    }
1689  }
1690  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1691}
1692
1693
1694void MacroAssembler::Delegate(InstructionType type,
1695                              InstructionCondSizeRMop instruction,
1696                              Condition cond,
1697                              EncodingSize size,
1698                              Register rd,
1699                              const MemOperand& operand) {
1700  // ldr ldrb ldrh ldrsb ldrsh str strb strh
1701  CONTEXT_SCOPE;
1702  VIXL_ASSERT(size.IsBest());
1703  if (operand.IsImmediate()) {
1704    const Register& rn = operand.GetBaseRegister();
1705    AddrMode addrmode = operand.GetAddrMode();
1706    int32_t offset = operand.GetOffsetImmediate();
1707    bool ok = true;
1708    uint32_t mask = 0;
1709    switch (type) {
1710      case kLdr:
1711      case kLdrb:
1712      case kStr:
1713      case kStrb:
1714        if (IsUsingA32() || (addrmode == Offset)) {
1715          mask = 0xfff;
1716        } else {
1717          mask = 0xff;
1718        }
1719        break;
1720      case kLdrsb:
1721      case kLdrh:
1722      case kLdrsh:
1723      case kStrh:
1724        if (IsUsingT32() && (addrmode == Offset)) {
1725          mask = 0xfff;
1726        } else {
1727          mask = 0xff;
1728        }
1729        break;
1730      default:
1731        ok = false;
1732        break;
1733    }
1734    if (ok) {
1735      bool negative;
1736      // Try to maximize the offset use by the MemOperand (load_store_offset).
1737      // Add or subtract the part which can't be used by the MemOperand
1738      // (add_sub_offset).
1739      int32_t add_sub_offset;
1740      int32_t load_store_offset;
1741      load_store_offset = offset & mask;
1742      if (offset >= 0) {
1743        negative = false;
1744        add_sub_offset = offset & ~mask;
1745      } else {
1746        negative = true;
1747        add_sub_offset = -offset & ~mask;
1748        if (load_store_offset > 0) add_sub_offset += mask + 1;
1749      }
1750      switch (addrmode) {
1751        case PreIndex:
1752          // Pre-Indexed case:
1753          // ldr r0, [r1, 12345]! will translate into
1754          //   add r1, r1, 12345
1755          //   ldr r0, [r1]
1756          {
1757            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1758            if (negative) {
1759              sub(cond, rn, rn, add_sub_offset);
1760            } else {
1761              add(cond, rn, rn, add_sub_offset);
1762            }
1763          }
1764          {
1765            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1766            (this->*instruction)(cond,
1767                                 size,
1768                                 rd,
1769                                 MemOperand(rn, load_store_offset, PreIndex));
1770          }
1771          return;
1772        case Offset: {
1773          UseScratchRegisterScope temps(this);
1774          // Allow using the destination as a scratch register if possible.
1775          if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1776              !rd.Is(rn)) {
1777            temps.Include(rd);
1778          }
1779          Register scratch = temps.Acquire();
1780          // Offset case:
1781          // ldr r0, [r1, 12345] will translate into
1782          //   add r0, r1, 12345
1783          //   ldr r0, [r0]
1784          {
1785            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1786            if (negative) {
1787              sub(cond, scratch, rn, add_sub_offset);
1788            } else {
1789              add(cond, scratch, rn, add_sub_offset);
1790            }
1791          }
1792          {
1793            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1794            (this->*instruction)(cond,
1795                                 size,
1796                                 rd,
1797                                 MemOperand(scratch, load_store_offset));
1798          }
1799          return;
1800        }
1801        case PostIndex:
1802          // Avoid the unpredictable case 'ldr r0, [r0], imm'
1803          if (!rn.Is(rd)) {
1804            // Post-indexed case:
1805            // ldr r0. [r1], imm32 will translate into
1806            //   ldr r0, [r1]
1807            //   movw ip. imm32 & 0xffffffff
1808            //   movt ip, imm32 >> 16
1809            //   add r1, r1, ip
1810            {
1811              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1812              (this->*instruction)(cond,
1813                                   size,
1814                                   rd,
1815                                   MemOperand(rn,
1816                                              load_store_offset,
1817                                              PostIndex));
1818            }
1819            {
1820              CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1821              if (negative) {
1822                sub(cond, rn, rn, add_sub_offset);
1823              } else {
1824                add(cond, rn, rn, add_sub_offset);
1825              }
1826            }
1827            return;
1828          }
1829          break;
1830      }
1831    }
1832  }
1833  if (operand.IsPlainRegister()) {
1834    const Register& rn = operand.GetBaseRegister();
1835    AddrMode addrmode = operand.GetAddrMode();
1836    const Register& rm = operand.GetOffsetRegister();
1837    switch (addrmode) {
1838      case PreIndex:
1839        // Pre-Indexed case:
1840        // ldr r0, [r1, r2]! will translate into
1841        //   add r1, r1, r2
1842        //   ldr r0, [r1]
1843        {
1844          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1845          if (operand.GetSign().IsPlus()) {
1846            add(cond, rn, rn, rm);
1847          } else {
1848            sub(cond, rn, rn, rm);
1849          }
1850        }
1851        {
1852          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1853          (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1854        }
1855        return;
1856      case Offset: {
1857        UseScratchRegisterScope temps(this);
1858        // Allow using the destination as a scratch register if possible.
1859        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1860            !rd.Is(rn)) {
1861          temps.Include(rd);
1862        }
1863        Register scratch = temps.Acquire();
1864        // Offset case:
1865        // ldr r0, [r1, r2] will translate into
1866        //   add r0, r1, r2
1867        //   ldr r0, [r0]
1868        {
1869          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1870          if (operand.GetSign().IsPlus()) {
1871            add(cond, scratch, rn, rm);
1872          } else {
1873            sub(cond, scratch, rn, rm);
1874          }
1875        }
1876        {
1877          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1878          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
1879        }
1880        return;
1881      }
1882      case PostIndex:
1883        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1884        if (!rn.Is(rd)) {
1885          // Post-indexed case:
1886          // ldr r0. [r1], r2 will translate into
1887          //   ldr r0, [r1]
1888          //   add r1, r1, r2
1889          {
1890            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1891            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1892          }
1893          {
1894            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1895            if (operand.GetSign().IsPlus()) {
1896              add(cond, rn, rn, rm);
1897            } else {
1898              sub(cond, rn, rn, rm);
1899            }
1900          }
1901          return;
1902        }
1903        break;
1904    }
1905  }
1906  Assembler::Delegate(type, instruction, cond, size, rd, operand);
1907}
1908
1909
1910void MacroAssembler::Delegate(InstructionType type,
1911                              InstructionCondRRMop instruction,
1912                              Condition cond,
1913                              Register rt,
1914                              Register rt2,
1915                              const MemOperand& operand) {
1916  // ldaexd, ldrd, ldrexd, stlex, stlexb, stlexh, strd, strex, strexb, strexh
1917
1918  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
1919      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
1920      (type == kStrexb) || (type == kStrexh)) {
1921    UnimplementedDelegate(type);
1922    return;
1923  }
1924
1925  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
1926
1927  CONTEXT_SCOPE;
1928
1929  // TODO: Should we allow these cases?
1930  if (IsUsingA32()) {
1931    // The first register needs to be even.
1932    if ((rt.GetCode() & 1) != 0) {
1933      UnimplementedDelegate(type);
1934      return;
1935    }
1936    // Registers need to be adjacent.
1937    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
1938      UnimplementedDelegate(type);
1939      return;
1940    }
1941    // LDRD lr, pc [...] is not allowed.
1942    if (rt.Is(lr)) {
1943      UnimplementedDelegate(type);
1944      return;
1945    }
1946  }
1947
1948  if (operand.IsImmediate()) {
1949    const Register& rn = operand.GetBaseRegister();
1950    AddrMode addrmode = operand.GetAddrMode();
1951    int32_t offset = operand.GetOffsetImmediate();
1952    switch (addrmode) {
1953      case PreIndex: {
1954        // Allow using the destinations as a scratch registers if possible.
1955        UseScratchRegisterScope temps(this);
1956        if (type == kLdrd) {
1957          if (!rt.Is(rn)) temps.Include(rt);
1958          if (!rt2.Is(rn)) temps.Include(rt2);
1959        }
1960
1961        // Pre-Indexed case:
1962        // ldrd r0, r1, [r2, 12345]! will translate into
1963        //   add r2, 12345
1964        //   ldrd r0, r1, [r2]
1965        {
1966          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1967          add(cond, rn, rn, offset);
1968        }
1969        {
1970          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1971          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
1972        }
1973        return;
1974      }
1975      case Offset: {
1976        UseScratchRegisterScope temps(this);
1977        // Allow using the destinations as a scratch registers if possible.
1978        if (type == kLdrd) {
1979          if (!rt.Is(rn)) temps.Include(rt);
1980          if (!rt2.Is(rn)) temps.Include(rt2);
1981        }
1982        Register scratch = temps.Acquire();
1983        // Offset case:
1984        // ldrd r0, r1, [r2, 12345] will translate into
1985        //   add r0, r2, 12345
1986        //   ldrd r0, r1, [r0]
1987        {
1988          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1989          add(cond, scratch, rn, offset);
1990        }
1991        {
1992          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1993          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
1994        }
1995        return;
1996      }
1997      case PostIndex:
1998        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
1999        if (!rn.Is(rt) && !rn.Is(rt2)) {
2000          // Post-indexed case:
2001          // ldrd r0, r1, [r2], imm32 will translate into
2002          //   ldrd r0, r1, [r2]
2003          //   movw ip. imm32 & 0xffffffff
2004          //   movt ip, imm32 >> 16
2005          //   add r2, ip
2006          {
2007            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2008            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2009          }
2010          {
2011            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2012            add(cond, rn, rn, offset);
2013          }
2014          return;
2015        }
2016        break;
2017    }
2018  }
2019  if (operand.IsPlainRegister()) {
2020    const Register& rn = operand.GetBaseRegister();
2021    const Register& rm = operand.GetOffsetRegister();
2022    AddrMode addrmode = operand.GetAddrMode();
2023    switch (addrmode) {
2024      case PreIndex:
2025        // ldrd r0, r1, [r2, r3]! will translate into
2026        //   add r2, r3
2027        //   ldrd r0, r1, [r2]
2028        {
2029          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2030          if (operand.GetSign().IsPlus()) {
2031            add(cond, rn, rn, rm);
2032          } else {
2033            sub(cond, rn, rn, rm);
2034          }
2035        }
2036        {
2037          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2038          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2039        }
2040        return;
2041      case PostIndex:
2042        // ldrd r0, r1, [r2], r3 will translate into
2043        //   ldrd r0, r1, [r2]
2044        //   add r2, r3
2045        {
2046          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2047          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2048        }
2049        {
2050          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2051          if (operand.GetSign().IsPlus()) {
2052            add(cond, rn, rn, rm);
2053          } else {
2054            sub(cond, rn, rn, rm);
2055          }
2056        }
2057        return;
2058      case Offset: {
2059        UseScratchRegisterScope temps(this);
2060        // Allow using the destinations as a scratch registers if possible.
2061        if (type == kLdrd) {
2062          if (!rt.Is(rn)) temps.Include(rt);
2063          if (!rt2.Is(rn)) temps.Include(rt2);
2064        }
2065        Register scratch = temps.Acquire();
2066        // Offset case:
2067        // ldrd r0, r1, [r2, r3] will translate into
2068        //   add r0, r2, r3
2069        //   ldrd r0, r1, [r0]
2070        {
2071          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2072          if (operand.GetSign().IsPlus()) {
2073            add(cond, scratch, rn, rm);
2074          } else {
2075            sub(cond, scratch, rn, rm);
2076          }
2077        }
2078        {
2079          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2080          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2081        }
2082        return;
2083      }
2084    }
2085  }
2086  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2087}
2088
2089
2090void MacroAssembler::Delegate(InstructionType type,
2091                              InstructionCondDtSMop instruction,
2092                              Condition cond,
2093                              DataType dt,
2094                              SRegister rd,
2095                              const MemOperand& operand) {
2096  // vldr.32 vstr.32
2097  CONTEXT_SCOPE;
2098  if (operand.IsImmediate()) {
2099    const Register& rn = operand.GetBaseRegister();
2100    AddrMode addrmode = operand.GetAddrMode();
2101    int32_t offset = operand.GetOffsetImmediate();
2102    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2103                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2104    if (rn.IsPC()) {
2105      VIXL_ABORT_WITH_MSG(
2106          "The MacroAssembler does not convert vldr or vstr with a PC base "
2107          "register.\n");
2108    }
2109    switch (addrmode) {
2110      case PreIndex:
2111        // Pre-Indexed case:
2112        // vldr.32 s0, [r1, 12345]! will translate into
2113        //   add r1, 12345
2114        //   vldr.32 s0, [r1]
2115        if (offset != 0) {
2116          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2117          add(cond, rn, rn, offset);
2118        }
2119        {
2120          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2121          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2122        }
2123        return;
2124      case Offset: {
2125        UseScratchRegisterScope temps(this);
2126        Register scratch = temps.Acquire();
2127        // Offset case:
2128        // vldr.32 s0, [r1, 12345] will translate into
2129        //   add ip, r1, 12345
2130        //   vldr.32 s0, [ip]
2131        {
2132          VIXL_ASSERT(offset != 0);
2133          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2134          add(cond, scratch, rn, offset);
2135        }
2136        {
2137          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2138          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2139        }
2140        return;
2141      }
2142      case PostIndex:
2143        // Post-indexed case:
2144        // vldr.32 s0, [r1], imm32 will translate into
2145        //   vldr.32 s0, [r1]
2146        //   movw ip. imm32 & 0xffffffff
2147        //   movt ip, imm32 >> 16
2148        //   add r1, ip
2149        {
2150          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2151          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2152        }
2153        if (offset != 0) {
2154          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2155          add(cond, rn, rn, offset);
2156        }
2157        return;
2158    }
2159  }
2160  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2161}
2162
2163
2164void MacroAssembler::Delegate(InstructionType type,
2165                              InstructionCondDtDMop instruction,
2166                              Condition cond,
2167                              DataType dt,
2168                              DRegister rd,
2169                              const MemOperand& operand) {
2170  // vldr.64 vstr.64
2171  CONTEXT_SCOPE;
2172  if (operand.IsImmediate()) {
2173    const Register& rn = operand.GetBaseRegister();
2174    AddrMode addrmode = operand.GetAddrMode();
2175    int32_t offset = operand.GetOffsetImmediate();
2176    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2177                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2178    if (rn.IsPC()) {
2179      VIXL_ABORT_WITH_MSG(
2180          "The MacroAssembler does not convert vldr or vstr with a PC base "
2181          "register.\n");
2182    }
2183    switch (addrmode) {
2184      case PreIndex:
2185        // Pre-Indexed case:
2186        // vldr.64 d0, [r1, 12345]! will translate into
2187        //   add r1, 12345
2188        //   vldr.64 d0, [r1]
2189        if (offset != 0) {
2190          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2191          add(cond, rn, rn, offset);
2192        }
2193        {
2194          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2195          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2196        }
2197        return;
2198      case Offset: {
2199        UseScratchRegisterScope temps(this);
2200        Register scratch = temps.Acquire();
2201        // Offset case:
2202        // vldr.64 d0, [r1, 12345] will translate into
2203        //   add ip, r1, 12345
2204        //   vldr.32 s0, [ip]
2205        {
2206          VIXL_ASSERT(offset != 0);
2207          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2208          add(cond, scratch, rn, offset);
2209        }
2210        {
2211          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2212          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2213        }
2214        return;
2215      }
2216      case PostIndex:
2217        // Post-indexed case:
2218        // vldr.64 d0. [r1], imm32 will translate into
2219        //   vldr.64 d0, [r1]
2220        //   movw ip. imm32 & 0xffffffff
2221        //   movt ip, imm32 >> 16
2222        //   add r1, ip
2223        {
2224          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2225          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2226        }
2227        if (offset != 0) {
2228          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2229          add(cond, rn, rn, offset);
2230        }
2231        return;
2232    }
2233  }
2234  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2235}
2236
2237
2238void MacroAssembler::Delegate(InstructionType type,
2239                              InstructionCondMsrOp instruction,
2240                              Condition cond,
2241                              MaskedSpecialRegister spec_reg,
2242                              const Operand& operand) {
2243  USE(type);
2244  VIXL_ASSERT(type == kMsr);
2245  if (operand.IsImmediate()) {
2246    UseScratchRegisterScope temps(this);
2247    Register scratch = temps.Acquire();
2248    {
2249      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2250      mov(cond, scratch, operand);
2251    }
2252    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2253    msr(cond, spec_reg, scratch);
2254    return;
2255  }
2256  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2257}
2258
2259#undef CONTEXT_SCOPE
2260#undef TOSTRING
2261#undef STRINGIFY
2262
2263// Start of generated code.
2264// End of generated code.
2265}  // namespace aarch32
2266}  // namespace vixl
2267