1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <cctype>
28
29#include "macro-assembler-aarch64.h"
30
31namespace vixl {
32namespace aarch64 {
33
34
35void Pool::Release() {
36  if (--monitor_ == 0) {
37    // Ensure the pool has not been blocked for too long.
38    VIXL_ASSERT(masm_->GetCursorOffset() < checkpoint_);
39  }
40}
41
42
43void Pool::SetNextCheckpoint(ptrdiff_t checkpoint) {
44  masm_->checkpoint_ = std::min(masm_->checkpoint_, checkpoint);
45  checkpoint_ = checkpoint;
46}
47
48
49LiteralPool::LiteralPool(MacroAssembler* masm)
50    : Pool(masm),
51      size_(0),
52      first_use_(-1),
53      recommended_checkpoint_(kNoCheckpointRequired) {}
54
55
56LiteralPool::~LiteralPool() {
57  VIXL_ASSERT(IsEmpty());
58  VIXL_ASSERT(!IsBlocked());
59  for (std::vector<RawLiteral*>::iterator it = deleted_on_destruction_.begin();
60       it != deleted_on_destruction_.end();
61       it++) {
62    delete *it;
63  }
64}
65
66
67void LiteralPool::Reset() {
68  std::vector<RawLiteral *>::iterator it, end;
69  for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
70    RawLiteral* literal = *it;
71    if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) {
72      delete literal;
73    }
74  }
75  entries_.clear();
76  size_ = 0;
77  first_use_ = -1;
78  Pool::Reset();
79  recommended_checkpoint_ = kNoCheckpointRequired;
80}
81
82
83void LiteralPool::CheckEmitFor(size_t amount, EmitOption option) {
84  if (IsEmpty() || IsBlocked()) return;
85
86  ptrdiff_t distance = masm_->GetCursorOffset() + amount - first_use_;
87  if (distance >= kRecommendedLiteralPoolRange) {
88    Emit(option);
89  }
90}
91
92
93void LiteralPool::CheckEmitForBranch(size_t range) {
94  if (IsEmpty() || IsBlocked()) return;
95  if (GetMaxSize() >= range) Emit();
96}
97
98// We use a subclass to access the protected `ExactAssemblyScope` constructor
99// giving us control over the pools. This allows us to use this scope within
100// code emitting pools without creating a circular dependency.
101// We keep the constructor private to restrict usage of this helper class.
102class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
103 private:
104  ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm, size_t size)
105      : ExactAssemblyScope(masm,
106                           size,
107                           ExactAssemblyScope::kExactSize,
108                           ExactAssemblyScope::kIgnorePools) {}
109
110  friend void LiteralPool::Emit(LiteralPool::EmitOption);
111  friend void VeneerPool::Emit(VeneerPool::EmitOption, size_t);
112};
113
114
115void LiteralPool::Emit(EmitOption option) {
116  // There is an issue if we are asked to emit a blocked or empty pool.
117  VIXL_ASSERT(!IsBlocked());
118  VIXL_ASSERT(!IsEmpty());
119
120  size_t pool_size = GetSize();
121  size_t emit_size = pool_size;
122  if (option == kBranchRequired) emit_size += kInstructionSize;
123  Label end_of_pool;
124
125  VIXL_ASSERT(emit_size % kInstructionSize == 0);
126  {
127    CodeBufferCheckScope guard(masm_,
128                               emit_size,
129                               CodeBufferCheckScope::kCheck,
130                               CodeBufferCheckScope::kExactSize);
131#ifdef VIXL_DEBUG
132    // Also explicitly disallow usage of the `MacroAssembler` here.
133    masm_->SetAllowMacroInstructions(false);
134#endif
135    if (option == kBranchRequired) {
136      ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
137      masm_->b(&end_of_pool);
138    }
139
140    {
141      // Marker indicating the size of the literal pool in 32-bit words.
142      VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0);
143      ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
144      masm_->ldr(xzr, static_cast<int>(pool_size / kWRegSizeInBytes));
145    }
146
147    // Now populate the literal pool.
148    std::vector<RawLiteral *>::iterator it, end;
149    for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
150      VIXL_ASSERT((*it)->IsUsed());
151      masm_->place(*it);
152    }
153
154    if (option == kBranchRequired) masm_->bind(&end_of_pool);
155#ifdef VIXL_DEBUG
156    masm_->SetAllowMacroInstructions(true);
157#endif
158  }
159
160  Reset();
161}
162
163
164void LiteralPool::AddEntry(RawLiteral* literal) {
165  // A literal must be registered immediately before its first use. Here we
166  // cannot control that it is its first use, but we check no code has been
167  // emitted since its last use.
168  VIXL_ASSERT(masm_->GetCursorOffset() == literal->GetLastUse());
169
170  UpdateFirstUse(masm_->GetCursorOffset());
171  VIXL_ASSERT(masm_->GetCursorOffset() >= first_use_);
172  entries_.push_back(literal);
173  size_ += literal->GetSize();
174}
175
176
177void LiteralPool::UpdateFirstUse(ptrdiff_t use_position) {
178  first_use_ = std::min(first_use_, use_position);
179  if (first_use_ == -1) {
180    first_use_ = use_position;
181    SetNextRecommendedCheckpoint(GetNextRecommendedCheckpoint());
182    SetNextCheckpoint(first_use_ + Instruction::kLoadLiteralRange);
183  } else {
184    VIXL_ASSERT(use_position > first_use_);
185  }
186}
187
188
189void VeneerPool::Reset() {
190  Pool::Reset();
191  unresolved_branches_.Reset();
192}
193
194
195void VeneerPool::Release() {
196  if (--monitor_ == 0) {
197    VIXL_ASSERT(IsEmpty() ||
198                masm_->GetCursorOffset() <
199                    unresolved_branches_.GetFirstLimit());
200  }
201}
202
203
204void VeneerPool::RegisterUnresolvedBranch(ptrdiff_t branch_pos,
205                                          Label* label,
206                                          ImmBranchType branch_type) {
207  VIXL_ASSERT(!label->IsBound());
208  BranchInfo branch_info = BranchInfo(branch_pos, label, branch_type);
209  unresolved_branches_.insert(branch_info);
210  UpdateNextCheckPoint();
211  // TODO: In debug mode register the label with the assembler to make sure it
212  // is bound with masm Bind and not asm bind.
213}
214
215
216void VeneerPool::DeleteUnresolvedBranchInfoForLabel(Label* label) {
217  if (IsEmpty()) {
218    VIXL_ASSERT(checkpoint_ == kNoCheckpointRequired);
219    return;
220  }
221
222  if (label->IsLinked()) {
223    Label::LabelLinksIterator links_it(label);
224    for (; !links_it.Done(); links_it.Advance()) {
225      ptrdiff_t link_offset = *links_it.Current();
226      Instruction* link = masm_->GetInstructionAt(link_offset);
227
228      // ADR instructions are not handled.
229      if (BranchTypeUsesVeneers(link->GetBranchType())) {
230        BranchInfo branch_info(link_offset, label, link->GetBranchType());
231        unresolved_branches_.erase(branch_info);
232      }
233    }
234  }
235
236  UpdateNextCheckPoint();
237}
238
239
240bool VeneerPool::ShouldEmitVeneer(int64_t first_unreacheable_pc,
241                                  size_t amount) {
242  ptrdiff_t offset =
243      kPoolNonVeneerCodeSize + amount + GetMaxSize() + GetOtherPoolsMaxSize();
244  return (masm_->GetCursorOffset() + offset) > first_unreacheable_pc;
245}
246
247
248void VeneerPool::CheckEmitFor(size_t amount, EmitOption option) {
249  if (IsEmpty()) return;
250
251  VIXL_ASSERT(masm_->GetCursorOffset() + kPoolNonVeneerCodeSize <
252              unresolved_branches_.GetFirstLimit());
253
254  if (IsBlocked()) return;
255
256  if (ShouldEmitVeneers(amount)) {
257    Emit(option, amount);
258  } else {
259    UpdateNextCheckPoint();
260  }
261}
262
263
264void VeneerPool::Emit(EmitOption option, size_t amount) {
265  // There is an issue if we are asked to emit a blocked or empty pool.
266  VIXL_ASSERT(!IsBlocked());
267  VIXL_ASSERT(!IsEmpty());
268
269  Label end;
270  if (option == kBranchRequired) {
271    ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
272    masm_->b(&end);
273  }
274
275  // We want to avoid generating veneer pools too often, so generate veneers for
276  // branches that don't immediately require a veneer but will soon go out of
277  // range.
278  static const size_t kVeneerEmissionMargin = 1 * KBytes;
279
280  for (BranchInfoSetIterator it(&unresolved_branches_); !it.Done();) {
281    BranchInfo* branch_info = it.Current();
282    if (ShouldEmitVeneer(branch_info->first_unreacheable_pc_,
283                         amount + kVeneerEmissionMargin)) {
284      CodeBufferCheckScope scope(masm_,
285                                 kVeneerCodeSize,
286                                 CodeBufferCheckScope::kCheck,
287                                 CodeBufferCheckScope::kExactSize);
288      ptrdiff_t branch_pos = branch_info->pc_offset_;
289      Instruction* branch = masm_->GetInstructionAt(branch_pos);
290      Label* label = branch_info->label_;
291
292      // Patch the branch to point to the current position, and emit a branch
293      // to the label.
294      Instruction* veneer = masm_->GetCursorAddress<Instruction*>();
295      branch->SetImmPCOffsetTarget(veneer);
296      {
297        ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
298        masm_->b(label);
299      }
300
301      // Update the label. The branch patched does not point to it any longer.
302      label->DeleteLink(branch_pos);
303
304      it.DeleteCurrentAndAdvance();
305    } else {
306      it.AdvanceToNextType();
307    }
308  }
309
310  UpdateNextCheckPoint();
311
312  masm_->bind(&end);
313}
314
315
316MacroAssembler::MacroAssembler(PositionIndependentCodeOption pic)
317    : Assembler(pic),
318#ifdef VIXL_DEBUG
319      allow_macro_instructions_(true),
320#endif
321      generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
322      sp_(sp),
323      tmp_list_(ip0, ip1),
324      fptmp_list_(d31),
325      current_scratch_scope_(NULL),
326      literal_pool_(this),
327      veneer_pool_(this),
328      recommended_checkpoint_(Pool::kNoCheckpointRequired) {
329  checkpoint_ = GetNextCheckPoint();
330#ifndef VIXL_DEBUG
331  USE(allow_macro_instructions_);
332#endif
333}
334
335
336MacroAssembler::MacroAssembler(size_t capacity,
337                               PositionIndependentCodeOption pic)
338    : Assembler(capacity, pic),
339#ifdef VIXL_DEBUG
340      allow_macro_instructions_(true),
341#endif
342      generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
343      sp_(sp),
344      tmp_list_(ip0, ip1),
345      fptmp_list_(d31),
346      current_scratch_scope_(NULL),
347      literal_pool_(this),
348      veneer_pool_(this),
349      recommended_checkpoint_(Pool::kNoCheckpointRequired) {
350  checkpoint_ = GetNextCheckPoint();
351}
352
353
354MacroAssembler::MacroAssembler(byte* buffer,
355                               size_t capacity,
356                               PositionIndependentCodeOption pic)
357    : Assembler(buffer, capacity, pic),
358#ifdef VIXL_DEBUG
359      allow_macro_instructions_(true),
360#endif
361      generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
362      sp_(sp),
363      tmp_list_(ip0, ip1),
364      fptmp_list_(d31),
365      current_scratch_scope_(NULL),
366      literal_pool_(this),
367      veneer_pool_(this),
368      recommended_checkpoint_(Pool::kNoCheckpointRequired) {
369  checkpoint_ = GetNextCheckPoint();
370}
371
372
373MacroAssembler::~MacroAssembler() {}
374
375
376void MacroAssembler::Reset() {
377  Assembler::Reset();
378
379  VIXL_ASSERT(!literal_pool_.IsBlocked());
380  literal_pool_.Reset();
381  veneer_pool_.Reset();
382
383  checkpoint_ = GetNextCheckPoint();
384}
385
386
387void MacroAssembler::FinalizeCode(FinalizeOption option) {
388  if (!literal_pool_.IsEmpty()) {
389    // The user may decide to emit more code after Finalize, emit a branch if
390    // that's the case.
391    literal_pool_.Emit(option == kUnreachable ? Pool::kNoBranchRequired
392                                              : Pool::kBranchRequired);
393  }
394  VIXL_ASSERT(veneer_pool_.IsEmpty());
395
396  Assembler::FinalizeCode();
397}
398
399
400void MacroAssembler::CheckEmitFor(size_t amount) {
401  CheckEmitPoolsFor(amount);
402  GetBuffer()->EnsureSpaceFor(amount);
403}
404
405
406void MacroAssembler::CheckEmitPoolsFor(size_t amount) {
407  literal_pool_.CheckEmitFor(amount);
408  veneer_pool_.CheckEmitFor(amount);
409  checkpoint_ = GetNextCheckPoint();
410}
411
412
413int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
414                                        const Register& rd,
415                                        uint64_t imm) {
416  bool emit_code = (masm != NULL);
417  VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits());
418  // The worst case for size is mov 64-bit immediate to sp:
419  //  * up to 4 instructions to materialise the constant
420  //  * 1 instruction to move to sp
421  MacroEmissionCheckScope guard(masm);
422
423  // Immediates on Aarch64 can be produced using an initial value, and zero to
424  // three move keep operations.
425  //
426  // Initial values can be generated with:
427  //  1. 64-bit move zero (movz).
428  //  2. 32-bit move inverted (movn).
429  //  3. 64-bit move inverted.
430  //  4. 32-bit orr immediate.
431  //  5. 64-bit orr immediate.
432  // Move-keep may then be used to modify each of the 16-bit half words.
433  //
434  // The code below supports all five initial value generators, and
435  // applying move-keep operations to move-zero and move-inverted initial
436  // values.
437
438  // Try to move the immediate in one instruction, and if that fails, switch to
439  // using multiple instructions.
440  if (OneInstrMoveImmediateHelper(masm, rd, imm)) {
441    return 1;
442  } else {
443    int instruction_count = 0;
444    unsigned reg_size = rd.GetSizeInBits();
445
446    // Generic immediate case. Imm will be represented by
447    //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
448    // A move-zero or move-inverted is generated for the first non-zero or
449    // non-0xffff immX, and a move-keep for subsequent non-zero immX.
450
451    uint64_t ignored_halfword = 0;
452    bool invert_move = false;
453    // If the number of 0xffff halfwords is greater than the number of 0x0000
454    // halfwords, it's more efficient to use move-inverted.
455    if (CountClearHalfWords(~imm, reg_size) >
456        CountClearHalfWords(imm, reg_size)) {
457      ignored_halfword = 0xffff;
458      invert_move = true;
459    }
460
461    // Mov instructions can't move values into the stack pointer, so set up a
462    // temporary register, if needed.
463    UseScratchRegisterScope temps;
464    Register temp;
465    if (emit_code) {
466      temps.Open(masm);
467      temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
468    }
469
470    // Iterate through the halfwords. Use movn/movz for the first non-ignored
471    // halfword, and movk for subsequent halfwords.
472    VIXL_ASSERT((reg_size % 16) == 0);
473    bool first_mov_done = false;
474    for (unsigned i = 0; i < (reg_size / 16); i++) {
475      uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
476      if (imm16 != ignored_halfword) {
477        if (!first_mov_done) {
478          if (invert_move) {
479            if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i);
480            instruction_count++;
481          } else {
482            if (emit_code) masm->movz(temp, imm16, 16 * i);
483            instruction_count++;
484          }
485          first_mov_done = true;
486        } else {
487          // Construct a wider constant.
488          if (emit_code) masm->movk(temp, imm16, 16 * i);
489          instruction_count++;
490        }
491      }
492    }
493
494    VIXL_ASSERT(first_mov_done);
495
496    // Move the temporary if the original destination register was the stack
497    // pointer.
498    if (rd.IsSP()) {
499      if (emit_code) masm->mov(rd, temp);
500      instruction_count++;
501    }
502    return instruction_count;
503  }
504}
505
506
507bool MacroAssembler::OneInstrMoveImmediateHelper(MacroAssembler* masm,
508                                                 const Register& dst,
509                                                 int64_t imm) {
510  bool emit_code = masm != NULL;
511  unsigned n, imm_s, imm_r;
512  int reg_size = dst.GetSizeInBits();
513
514  if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
515    // Immediate can be represented in a move zero instruction. Movz can't write
516    // to the stack pointer.
517    if (emit_code) {
518      masm->movz(dst, imm);
519    }
520    return true;
521  } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
522    // Immediate can be represented in a move negative instruction. Movn can't
523    // write to the stack pointer.
524    if (emit_code) {
525      masm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
526    }
527    return true;
528  } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
529    // Immediate can be represented in a logical orr instruction.
530    VIXL_ASSERT(!dst.IsZero());
531    if (emit_code) {
532      masm->LogicalImmediate(dst,
533                             AppropriateZeroRegFor(dst),
534                             n,
535                             imm_s,
536                             imm_r,
537                             ORR);
538    }
539    return true;
540  }
541  return false;
542}
543
544
545void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
546  VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
547              ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
548  if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
549    B(static_cast<Condition>(type), label);
550  } else {
551    switch (type) {
552      case always:
553        B(label);
554        break;
555      case never:
556        break;
557      case reg_zero:
558        Cbz(reg, label);
559        break;
560      case reg_not_zero:
561        Cbnz(reg, label);
562        break;
563      case reg_bit_clear:
564        Tbz(reg, bit, label);
565        break;
566      case reg_bit_set:
567        Tbnz(reg, bit, label);
568        break;
569      default:
570        VIXL_UNREACHABLE();
571    }
572  }
573}
574
575
576void MacroAssembler::B(Label* label) {
577  // We don't need to check the size of the literal pool, because the size of
578  // the literal pool is already bounded by the literal range, which is smaller
579  // than the range of this branch.
580  VIXL_ASSERT(Instruction::GetImmBranchForwardRange(UncondBranchType) >
581              Instruction::kLoadLiteralRange);
582  SingleEmissionCheckScope guard(this);
583  b(label);
584}
585
586
587void MacroAssembler::B(Label* label, Condition cond) {
588  // We don't need to check the size of the literal pool, because the size of
589  // the literal pool is already bounded by the literal range, which is smaller
590  // than the range of this branch.
591  VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CondBranchType) >
592              Instruction::kLoadLiteralRange);
593  VIXL_ASSERT(allow_macro_instructions_);
594  VIXL_ASSERT((cond != al) && (cond != nv));
595  EmissionCheckScope guard(this, 2 * kInstructionSize);
596
597  if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
598    Label done;
599    b(&done, InvertCondition(cond));
600    b(label);
601    bind(&done);
602  } else {
603    if (!label->IsBound()) {
604      veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
605                                            label,
606                                            CondBranchType);
607    }
608    b(label, cond);
609  }
610}
611
612
613void MacroAssembler::Cbnz(const Register& rt, Label* label) {
614  // We don't need to check the size of the literal pool, because the size of
615  // the literal pool is already bounded by the literal range, which is smaller
616  // than the range of this branch.
617  VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) >
618              Instruction::kLoadLiteralRange);
619  VIXL_ASSERT(allow_macro_instructions_);
620  VIXL_ASSERT(!rt.IsZero());
621  EmissionCheckScope guard(this, 2 * kInstructionSize);
622
623  if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
624    Label done;
625    cbz(rt, &done);
626    b(label);
627    bind(&done);
628  } else {
629    if (!label->IsBound()) {
630      veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
631                                            label,
632                                            CompareBranchType);
633    }
634    cbnz(rt, label);
635  }
636}
637
638
639void MacroAssembler::Cbz(const Register& rt, Label* label) {
640  // We don't need to check the size of the literal pool, because the size of
641  // the literal pool is already bounded by the literal range, which is smaller
642  // than the range of this branch.
643  VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) >
644              Instruction::kLoadLiteralRange);
645  VIXL_ASSERT(allow_macro_instructions_);
646  VIXL_ASSERT(!rt.IsZero());
647  EmissionCheckScope guard(this, 2 * kInstructionSize);
648
649  if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
650    Label done;
651    cbnz(rt, &done);
652    b(label);
653    bind(&done);
654  } else {
655    if (!label->IsBound()) {
656      veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
657                                            label,
658                                            CompareBranchType);
659    }
660    cbz(rt, label);
661  }
662}
663
664
665void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
666  // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch
667  // can become impossible because we emit the literal pool first.
668  literal_pool_.CheckEmitForBranch(
669      Instruction::GetImmBranchForwardRange(TestBranchType));
670  VIXL_ASSERT(allow_macro_instructions_);
671  VIXL_ASSERT(!rt.IsZero());
672  EmissionCheckScope guard(this, 2 * kInstructionSize);
673
674  if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
675    Label done;
676    tbz(rt, bit_pos, &done);
677    b(label);
678    bind(&done);
679  } else {
680    if (!label->IsBound()) {
681      veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
682                                            label,
683                                            TestBranchType);
684    }
685    tbnz(rt, bit_pos, label);
686  }
687}
688
689
690void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) {
691  // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch
692  // can become impossible because we emit the literal pool first.
693  literal_pool_.CheckEmitForBranch(
694      Instruction::GetImmBranchForwardRange(TestBranchType));
695  VIXL_ASSERT(allow_macro_instructions_);
696  VIXL_ASSERT(!rt.IsZero());
697  EmissionCheckScope guard(this, 2 * kInstructionSize);
698
699  if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
700    Label done;
701    tbnz(rt, bit_pos, &done);
702    b(label);
703    bind(&done);
704  } else {
705    if (!label->IsBound()) {
706      veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
707                                            label,
708                                            TestBranchType);
709    }
710    tbz(rt, bit_pos, label);
711  }
712}
713
714
715void MacroAssembler::Bind(Label* label) {
716  VIXL_ASSERT(allow_macro_instructions_);
717  veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
718  bind(label);
719}
720
721
722// Bind a label to a specified offset from the start of the buffer.
723void MacroAssembler::BindToOffset(Label* label, ptrdiff_t offset) {
724  VIXL_ASSERT(allow_macro_instructions_);
725  veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
726  Assembler::BindToOffset(label, offset);
727}
728
729
730void MacroAssembler::And(const Register& rd,
731                         const Register& rn,
732                         const Operand& operand) {
733  VIXL_ASSERT(allow_macro_instructions_);
734  LogicalMacro(rd, rn, operand, AND);
735}
736
737
738void MacroAssembler::Ands(const Register& rd,
739                          const Register& rn,
740                          const Operand& operand) {
741  VIXL_ASSERT(allow_macro_instructions_);
742  LogicalMacro(rd, rn, operand, ANDS);
743}
744
745
746void MacroAssembler::Tst(const Register& rn, const Operand& operand) {
747  VIXL_ASSERT(allow_macro_instructions_);
748  Ands(AppropriateZeroRegFor(rn), rn, operand);
749}
750
751
752void MacroAssembler::Bic(const Register& rd,
753                         const Register& rn,
754                         const Operand& operand) {
755  VIXL_ASSERT(allow_macro_instructions_);
756  LogicalMacro(rd, rn, operand, BIC);
757}
758
759
760void MacroAssembler::Bics(const Register& rd,
761                          const Register& rn,
762                          const Operand& operand) {
763  VIXL_ASSERT(allow_macro_instructions_);
764  LogicalMacro(rd, rn, operand, BICS);
765}
766
767
768void MacroAssembler::Orr(const Register& rd,
769                         const Register& rn,
770                         const Operand& operand) {
771  VIXL_ASSERT(allow_macro_instructions_);
772  LogicalMacro(rd, rn, operand, ORR);
773}
774
775
776void MacroAssembler::Orn(const Register& rd,
777                         const Register& rn,
778                         const Operand& operand) {
779  VIXL_ASSERT(allow_macro_instructions_);
780  LogicalMacro(rd, rn, operand, ORN);
781}
782
783
784void MacroAssembler::Eor(const Register& rd,
785                         const Register& rn,
786                         const Operand& operand) {
787  VIXL_ASSERT(allow_macro_instructions_);
788  LogicalMacro(rd, rn, operand, EOR);
789}
790
791
792void MacroAssembler::Eon(const Register& rd,
793                         const Register& rn,
794                         const Operand& operand) {
795  VIXL_ASSERT(allow_macro_instructions_);
796  LogicalMacro(rd, rn, operand, EON);
797}
798
799
800void MacroAssembler::LogicalMacro(const Register& rd,
801                                  const Register& rn,
802                                  const Operand& operand,
803                                  LogicalOp op) {
804  // The worst case for size is logical immediate to sp:
805  //  * up to 4 instructions to materialise the constant
806  //  * 1 instruction to do the operation
807  //  * 1 instruction to move to sp
808  MacroEmissionCheckScope guard(this);
809  UseScratchRegisterScope temps(this);
810
811  if (operand.IsImmediate()) {
812    uint64_t immediate = operand.GetImmediate();
813    unsigned reg_size = rd.GetSizeInBits();
814
815    // If the operation is NOT, invert the operation and immediate.
816    if ((op & NOT) == NOT) {
817      op = static_cast<LogicalOp>(op & ~NOT);
818      immediate = ~immediate;
819    }
820
821    // Ignore the top 32 bits of an immediate if we're moving to a W register.
822    if (rd.Is32Bits()) {
823      // Check that the top 32 bits are consistent.
824      VIXL_ASSERT(((immediate >> kWRegSize) == 0) ||
825                  ((immediate >> kWRegSize) == 0xffffffff));
826      immediate &= kWRegMask;
827    }
828
829    VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
830
831    // Special cases for all set or all clear immediates.
832    if (immediate == 0) {
833      switch (op) {
834        case AND:
835          Mov(rd, 0);
836          return;
837        case ORR:
838          VIXL_FALLTHROUGH();
839        case EOR:
840          Mov(rd, rn);
841          return;
842        case ANDS:
843          VIXL_FALLTHROUGH();
844        case BICS:
845          break;
846        default:
847          VIXL_UNREACHABLE();
848      }
849    } else if ((rd.Is64Bits() && (immediate == UINT64_C(0xffffffffffffffff))) ||
850               (rd.Is32Bits() && (immediate == UINT64_C(0x00000000ffffffff)))) {
851      switch (op) {
852        case AND:
853          Mov(rd, rn);
854          return;
855        case ORR:
856          Mov(rd, immediate);
857          return;
858        case EOR:
859          Mvn(rd, rn);
860          return;
861        case ANDS:
862          VIXL_FALLTHROUGH();
863        case BICS:
864          break;
865        default:
866          VIXL_UNREACHABLE();
867      }
868    }
869
870    unsigned n, imm_s, imm_r;
871    if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
872      // Immediate can be encoded in the instruction.
873      LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
874    } else {
875      // Immediate can't be encoded: synthesize using move immediate.
876      Register temp = temps.AcquireSameSizeAs(rn);
877
878      // If the left-hand input is the stack pointer, we can't pre-shift the
879      // immediate, as the encoding won't allow the subsequent post shift.
880      PreShiftImmMode mode = rn.IsSP() ? kNoShift : kAnyShift;
881      Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode);
882
883      if (rd.Is(sp)) {
884        // If rd is the stack pointer we cannot use it as the destination
885        // register so we use the temp register as an intermediate again.
886        Logical(temp, rn, imm_operand, op);
887        Mov(sp, temp);
888      } else {
889        Logical(rd, rn, imm_operand, op);
890      }
891    }
892  } else if (operand.IsExtendedRegister()) {
893    VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
894    // Add/sub extended supports shift <= 4. We want to support exactly the
895    // same modes here.
896    VIXL_ASSERT(operand.GetShiftAmount() <= 4);
897    VIXL_ASSERT(
898        operand.GetRegister().Is64Bits() ||
899        ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
900
901    temps.Exclude(operand.GetRegister());
902    Register temp = temps.AcquireSameSizeAs(rn);
903    EmitExtendShift(temp,
904                    operand.GetRegister(),
905                    operand.GetExtend(),
906                    operand.GetShiftAmount());
907    Logical(rd, rn, Operand(temp), op);
908  } else {
909    // The operand can be encoded in the instruction.
910    VIXL_ASSERT(operand.IsShiftedRegister());
911    Logical(rd, rn, operand, op);
912  }
913}
914
915
916void MacroAssembler::Mov(const Register& rd,
917                         const Operand& operand,
918                         DiscardMoveMode discard_mode) {
919  VIXL_ASSERT(allow_macro_instructions_);
920  // The worst case for size is mov immediate with up to 4 instructions.
921  MacroEmissionCheckScope guard(this);
922
923  if (operand.IsImmediate()) {
924    // Call the macro assembler for generic immediates.
925    Mov(rd, operand.GetImmediate());
926  } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
927    // Emit a shift instruction if moving a shifted register. This operation
928    // could also be achieved using an orr instruction (like orn used by Mvn),
929    // but using a shift instruction makes the disassembly clearer.
930    EmitShift(rd,
931              operand.GetRegister(),
932              operand.GetShift(),
933              operand.GetShiftAmount());
934  } else if (operand.IsExtendedRegister()) {
935    // Emit an extend instruction if moving an extended register. This handles
936    // extend with post-shift operations, too.
937    EmitExtendShift(rd,
938                    operand.GetRegister(),
939                    operand.GetExtend(),
940                    operand.GetShiftAmount());
941  } else {
942    Mov(rd, operand.GetRegister(), discard_mode);
943  }
944}
945
946
947void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
948  VIXL_ASSERT(IsUint16(imm));
949  int byte1 = (imm & 0xff);
950  int byte2 = ((imm >> 8) & 0xff);
951  if (byte1 == byte2) {
952    movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
953  } else if (byte1 == 0) {
954    movi(vd, byte2, LSL, 8);
955  } else if (byte2 == 0) {
956    movi(vd, byte1);
957  } else if (byte1 == 0xff) {
958    mvni(vd, ~byte2 & 0xff, LSL, 8);
959  } else if (byte2 == 0xff) {
960    mvni(vd, ~byte1 & 0xff);
961  } else {
962    UseScratchRegisterScope temps(this);
963    Register temp = temps.AcquireW();
964    movz(temp, imm);
965    dup(vd, temp);
966  }
967}
968
969
970void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
971  VIXL_ASSERT(IsUint32(imm));
972
973  uint8_t bytes[sizeof(imm)];
974  memcpy(bytes, &imm, sizeof(imm));
975
976  // All bytes are either 0x00 or 0xff.
977  {
978    bool all0orff = true;
979    for (int i = 0; i < 4; ++i) {
980      if ((bytes[i] != 0) && (bytes[i] != 0xff)) {
981        all0orff = false;
982        break;
983      }
984    }
985
986    if (all0orff == true) {
987      movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
988      return;
989    }
990  }
991
992  // Of the 4 bytes, only one byte is non-zero.
993  for (int i = 0; i < 4; i++) {
994    if ((imm & (0xff << (i * 8))) == imm) {
995      movi(vd, bytes[i], LSL, i * 8);
996      return;
997    }
998  }
999
1000  // Of the 4 bytes, only one byte is not 0xff.
1001  for (int i = 0; i < 4; i++) {
1002    uint32_t mask = ~(0xff << (i * 8));
1003    if ((imm & mask) == mask) {
1004      mvni(vd, ~bytes[i] & 0xff, LSL, i * 8);
1005      return;
1006    }
1007  }
1008
1009  // Immediate is of the form 0x00MMFFFF.
1010  if ((imm & 0xff00ffff) == 0x0000ffff) {
1011    movi(vd, bytes[2], MSL, 16);
1012    return;
1013  }
1014
1015  // Immediate is of the form 0x0000MMFF.
1016  if ((imm & 0xffff00ff) == 0x000000ff) {
1017    movi(vd, bytes[1], MSL, 8);
1018    return;
1019  }
1020
1021  // Immediate is of the form 0xFFMM0000.
1022  if ((imm & 0xff00ffff) == 0xff000000) {
1023    mvni(vd, ~bytes[2] & 0xff, MSL, 16);
1024    return;
1025  }
1026  // Immediate is of the form 0xFFFFMM00.
1027  if ((imm & 0xffff00ff) == 0xffff0000) {
1028    mvni(vd, ~bytes[1] & 0xff, MSL, 8);
1029    return;
1030  }
1031
1032  // Top and bottom 16-bits are equal.
1033  if (((imm >> 16) & 0xffff) == (imm & 0xffff)) {
1034    Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff);
1035    return;
1036  }
1037
1038  // Default case.
1039  {
1040    UseScratchRegisterScope temps(this);
1041    Register temp = temps.AcquireW();
1042    Mov(temp, imm);
1043    dup(vd, temp);
1044  }
1045}
1046
1047
1048void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
1049  // All bytes are either 0x00 or 0xff.
1050  {
1051    bool all0orff = true;
1052    for (int i = 0; i < 8; ++i) {
1053      int byteval = (imm >> (i * 8)) & 0xff;
1054      if (byteval != 0 && byteval != 0xff) {
1055        all0orff = false;
1056        break;
1057      }
1058    }
1059    if (all0orff == true) {
1060      movi(vd, imm);
1061      return;
1062    }
1063  }
1064
1065  // Top and bottom 32-bits are equal.
1066  if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) {
1067    Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff);
1068    return;
1069  }
1070
1071  // Default case.
1072  {
1073    UseScratchRegisterScope temps(this);
1074    Register temp = temps.AcquireX();
1075    Mov(temp, imm);
1076    if (vd.Is1D()) {
1077      mov(vd.D(), 0, temp);
1078    } else {
1079      dup(vd.V2D(), temp);
1080    }
1081  }
1082}
1083
1084
1085void MacroAssembler::Movi(const VRegister& vd,
1086                          uint64_t imm,
1087                          Shift shift,
1088                          int shift_amount) {
1089  VIXL_ASSERT(allow_macro_instructions_);
1090  MacroEmissionCheckScope guard(this);
1091  if (shift_amount != 0 || shift != LSL) {
1092    movi(vd, imm, shift, shift_amount);
1093  } else if (vd.Is8B() || vd.Is16B()) {
1094    // 8-bit immediate.
1095    VIXL_ASSERT(IsUint8(imm));
1096    movi(vd, imm);
1097  } else if (vd.Is4H() || vd.Is8H()) {
1098    // 16-bit immediate.
1099    Movi16bitHelper(vd, imm);
1100  } else if (vd.Is2S() || vd.Is4S()) {
1101    // 32-bit immediate.
1102    Movi32bitHelper(vd, imm);
1103  } else {
1104    // 64-bit immediate.
1105    Movi64bitHelper(vd, imm);
1106  }
1107}
1108
1109
1110void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
1111  // TODO: Move 128-bit values in a more efficient way.
1112  VIXL_ASSERT(vd.Is128Bits());
1113  UseScratchRegisterScope temps(this);
1114  Movi(vd.V2D(), lo);
1115  Register temp = temps.AcquireX();
1116  Mov(temp, hi);
1117  Ins(vd.V2D(), 1, temp);
1118}
1119
1120
1121void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
1122  VIXL_ASSERT(allow_macro_instructions_);
1123  // The worst case for size is mvn immediate with up to 4 instructions.
1124  MacroEmissionCheckScope guard(this);
1125
1126  if (operand.IsImmediate()) {
1127    // Call the macro assembler for generic immediates.
1128    Mvn(rd, operand.GetImmediate());
1129  } else if (operand.IsExtendedRegister()) {
1130    UseScratchRegisterScope temps(this);
1131    temps.Exclude(operand.GetRegister());
1132
1133    // Emit two instructions for the extend case. This differs from Mov, as
1134    // the extend and invert can't be achieved in one instruction.
1135    Register temp = temps.AcquireSameSizeAs(rd);
1136    EmitExtendShift(temp,
1137                    operand.GetRegister(),
1138                    operand.GetExtend(),
1139                    operand.GetShiftAmount());
1140    mvn(rd, Operand(temp));
1141  } else {
1142    // Otherwise, register and shifted register cases can be handled by the
1143    // assembler directly, using orn.
1144    mvn(rd, operand);
1145  }
1146}
1147
1148
1149void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
1150  VIXL_ASSERT(allow_macro_instructions_);
1151  MoveImmediateHelper(this, rd, imm);
1152}
1153
1154
1155void MacroAssembler::Ccmp(const Register& rn,
1156                          const Operand& operand,
1157                          StatusFlags nzcv,
1158                          Condition cond) {
1159  VIXL_ASSERT(allow_macro_instructions_);
1160  if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
1161    ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMN);
1162  } else {
1163    ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
1164  }
1165}
1166
1167
1168void MacroAssembler::Ccmn(const Register& rn,
1169                          const Operand& operand,
1170                          StatusFlags nzcv,
1171                          Condition cond) {
1172  VIXL_ASSERT(allow_macro_instructions_);
1173  if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
1174    ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMP);
1175  } else {
1176    ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
1177  }
1178}
1179
1180
1181void MacroAssembler::ConditionalCompareMacro(const Register& rn,
1182                                             const Operand& operand,
1183                                             StatusFlags nzcv,
1184                                             Condition cond,
1185                                             ConditionalCompareOp op) {
1186  VIXL_ASSERT((cond != al) && (cond != nv));
1187  // The worst case for size is ccmp immediate:
1188  //  * up to 4 instructions to materialise the constant
1189  //  * 1 instruction for ccmp
1190  MacroEmissionCheckScope guard(this);
1191
1192  if ((operand.IsShiftedRegister() && (operand.GetShiftAmount() == 0)) ||
1193      (operand.IsImmediate() &&
1194       IsImmConditionalCompare(operand.GetImmediate()))) {
1195    // The immediate can be encoded in the instruction, or the operand is an
1196    // unshifted register: call the assembler.
1197    ConditionalCompare(rn, operand, nzcv, cond, op);
1198  } else {
1199    UseScratchRegisterScope temps(this);
1200    // The operand isn't directly supported by the instruction: perform the
1201    // operation on a temporary register.
1202    Register temp = temps.AcquireSameSizeAs(rn);
1203    Mov(temp, operand);
1204    ConditionalCompare(rn, temp, nzcv, cond, op);
1205  }
1206}
1207
1208
1209void MacroAssembler::CselHelper(MacroAssembler* masm,
1210                                const Register& rd,
1211                                Operand left,
1212                                Operand right,
1213                                Condition cond,
1214                                bool* should_synthesise_left,
1215                                bool* should_synthesise_right) {
1216  bool emit_code = (masm != NULL);
1217
1218  VIXL_ASSERT(!emit_code || masm->allow_macro_instructions_);
1219  VIXL_ASSERT((cond != al) && (cond != nv));
1220  VIXL_ASSERT(!rd.IsZero() && !rd.IsSP());
1221  VIXL_ASSERT(left.IsImmediate() || !left.GetRegister().IsSP());
1222  VIXL_ASSERT(right.IsImmediate() || !right.GetRegister().IsSP());
1223
1224  if (should_synthesise_left != NULL) *should_synthesise_left = false;
1225  if (should_synthesise_right != NULL) *should_synthesise_right = false;
1226
1227  // The worst case for size occurs when the inputs are two non encodable
1228  // constants:
1229  //  * up to 4 instructions to materialise the left constant
1230  //  * up to 4 instructions to materialise the right constant
1231  //  * 1 instruction for csel
1232  EmissionCheckScope guard(masm, 9 * kInstructionSize);
1233  UseScratchRegisterScope temps;
1234  if (masm != NULL) {
1235    temps.Open(masm);
1236  }
1237
1238  // Try to handle cases where both inputs are immediates.
1239  bool left_is_immediate = left.IsImmediate() || left.IsZero();
1240  bool right_is_immediate = right.IsImmediate() || right.IsZero();
1241  if (left_is_immediate && right_is_immediate &&
1242      CselSubHelperTwoImmediates(masm,
1243                                 rd,
1244                                 left.GetEquivalentImmediate(),
1245                                 right.GetEquivalentImmediate(),
1246                                 cond,
1247                                 should_synthesise_left,
1248                                 should_synthesise_right)) {
1249    return;
1250  }
1251
1252  // Handle cases where one of the two inputs is -1, 0, or 1.
1253  bool left_is_small_immediate =
1254      left_is_immediate && ((-1 <= left.GetEquivalentImmediate()) &&
1255                            (left.GetEquivalentImmediate() <= 1));
1256  bool right_is_small_immediate =
1257      right_is_immediate && ((-1 <= right.GetEquivalentImmediate()) &&
1258                             (right.GetEquivalentImmediate() <= 1));
1259  if (right_is_small_immediate || left_is_small_immediate) {
1260    bool swapped_inputs = false;
1261    if (!right_is_small_immediate) {
1262      std::swap(left, right);
1263      cond = InvertCondition(cond);
1264      swapped_inputs = true;
1265    }
1266    CselSubHelperRightSmallImmediate(masm,
1267                                     &temps,
1268                                     rd,
1269                                     left,
1270                                     right,
1271                                     cond,
1272                                     swapped_inputs ? should_synthesise_right
1273                                                    : should_synthesise_left);
1274    return;
1275  }
1276
1277  // Otherwise both inputs need to be available in registers. Synthesise them
1278  // if necessary and emit the `csel`.
1279  if (!left.IsPlainRegister()) {
1280    if (emit_code) {
1281      Register temp = temps.AcquireSameSizeAs(rd);
1282      masm->Mov(temp, left);
1283      left = temp;
1284    }
1285    if (should_synthesise_left != NULL) *should_synthesise_left = true;
1286  }
1287  if (!right.IsPlainRegister()) {
1288    if (emit_code) {
1289      Register temp = temps.AcquireSameSizeAs(rd);
1290      masm->Mov(temp, right);
1291      right = temp;
1292    }
1293    if (should_synthesise_right != NULL) *should_synthesise_right = true;
1294  }
1295  if (emit_code) {
1296    VIXL_ASSERT(left.IsPlainRegister() && right.IsPlainRegister());
1297    if (left.GetRegister().Is(right.GetRegister())) {
1298      masm->Mov(rd, left.GetRegister());
1299    } else {
1300      masm->csel(rd, left.GetRegister(), right.GetRegister(), cond);
1301    }
1302  }
1303}
1304
1305
1306bool MacroAssembler::CselSubHelperTwoImmediates(MacroAssembler* masm,
1307                                                const Register& rd,
1308                                                int64_t left,
1309                                                int64_t right,
1310                                                Condition cond,
1311                                                bool* should_synthesise_left,
1312                                                bool* should_synthesise_right) {
1313  bool emit_code = (masm != NULL);
1314  if (should_synthesise_left != NULL) *should_synthesise_left = false;
1315  if (should_synthesise_right != NULL) *should_synthesise_right = false;
1316
1317  if (left == right) {
1318    if (emit_code) masm->Mov(rd, left);
1319    return true;
1320  } else if (left == -right) {
1321    if (should_synthesise_right != NULL) *should_synthesise_right = true;
1322    if (emit_code) {
1323      masm->Mov(rd, right);
1324      masm->Cneg(rd, rd, cond);
1325    }
1326    return true;
1327  }
1328
1329  if (CselSubHelperTwoOrderedImmediates(masm, rd, left, right, cond)) {
1330    return true;
1331  } else {
1332    std::swap(left, right);
1333    if (CselSubHelperTwoOrderedImmediates(masm,
1334                                          rd,
1335                                          left,
1336                                          right,
1337                                          InvertCondition(cond))) {
1338      return true;
1339    }
1340  }
1341
1342  // TODO: Handle more situations. For example handle `csel rd, #5, #6, cond`
1343  // with `cinc`.
1344  return false;
1345}
1346
1347
1348bool MacroAssembler::CselSubHelperTwoOrderedImmediates(MacroAssembler* masm,
1349                                                       const Register& rd,
1350                                                       int64_t left,
1351                                                       int64_t right,
1352                                                       Condition cond) {
1353  bool emit_code = (masm != NULL);
1354
1355  if ((left == 1) && (right == 0)) {
1356    if (emit_code) masm->cset(rd, cond);
1357    return true;
1358  } else if ((left == -1) && (right == 0)) {
1359    if (emit_code) masm->csetm(rd, cond);
1360    return true;
1361  }
1362  return false;
1363}
1364
1365
1366void MacroAssembler::CselSubHelperRightSmallImmediate(
1367    MacroAssembler* masm,
1368    UseScratchRegisterScope* temps,
1369    const Register& rd,
1370    const Operand& left,
1371    const Operand& right,
1372    Condition cond,
1373    bool* should_synthesise_left) {
1374  bool emit_code = (masm != NULL);
1375  VIXL_ASSERT((right.IsImmediate() || right.IsZero()) &&
1376              (-1 <= right.GetEquivalentImmediate()) &&
1377              (right.GetEquivalentImmediate() <= 1));
1378  Register left_register;
1379
1380  if (left.IsPlainRegister()) {
1381    left_register = left.GetRegister();
1382  } else {
1383    if (emit_code) {
1384      left_register = temps->AcquireSameSizeAs(rd);
1385      masm->Mov(left_register, left);
1386    }
1387    if (should_synthesise_left != NULL) *should_synthesise_left = true;
1388  }
1389  if (emit_code) {
1390    int64_t imm = right.GetEquivalentImmediate();
1391    Register zr = AppropriateZeroRegFor(rd);
1392    if (imm == 0) {
1393      masm->csel(rd, left_register, zr, cond);
1394    } else if (imm == 1) {
1395      masm->csinc(rd, left_register, zr, cond);
1396    } else {
1397      VIXL_ASSERT(imm == -1);
1398      masm->csinv(rd, left_register, zr, cond);
1399    }
1400  }
1401}
1402
1403
1404void MacroAssembler::Add(const Register& rd,
1405                         const Register& rn,
1406                         const Operand& operand,
1407                         FlagsUpdate S) {
1408  VIXL_ASSERT(allow_macro_instructions_);
1409  if (operand.IsImmediate() && (operand.GetImmediate() < 0) &&
1410      IsImmAddSub(-operand.GetImmediate())) {
1411    AddSubMacro(rd, rn, -operand.GetImmediate(), S, SUB);
1412  } else {
1413    AddSubMacro(rd, rn, operand, S, ADD);
1414  }
1415}
1416
1417
1418void MacroAssembler::Adds(const Register& rd,
1419                          const Register& rn,
1420                          const Operand& operand) {
1421  Add(rd, rn, operand, SetFlags);
1422}
1423
1424
1425void MacroAssembler::Sub(const Register& rd,
1426                         const Register& rn,
1427                         const Operand& operand,
1428                         FlagsUpdate S) {
1429  VIXL_ASSERT(allow_macro_instructions_);
1430  if (operand.IsImmediate() && (operand.GetImmediate() < 0) &&
1431      IsImmAddSub(-operand.GetImmediate())) {
1432    AddSubMacro(rd, rn, -operand.GetImmediate(), S, ADD);
1433  } else {
1434    AddSubMacro(rd, rn, operand, S, SUB);
1435  }
1436}
1437
1438
1439void MacroAssembler::Subs(const Register& rd,
1440                          const Register& rn,
1441                          const Operand& operand) {
1442  Sub(rd, rn, operand, SetFlags);
1443}
1444
1445
1446void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
1447  VIXL_ASSERT(allow_macro_instructions_);
1448  Adds(AppropriateZeroRegFor(rn), rn, operand);
1449}
1450
1451
1452void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
1453  VIXL_ASSERT(allow_macro_instructions_);
1454  Subs(AppropriateZeroRegFor(rn), rn, operand);
1455}
1456
1457
1458void MacroAssembler::Fcmp(const FPRegister& fn,
1459                          double value,
1460                          FPTrapFlags trap) {
1461  VIXL_ASSERT(allow_macro_instructions_);
1462  // The worst case for size is:
1463  //  * 1 to materialise the constant, using literal pool if necessary
1464  //  * 1 instruction for fcmp{e}
1465  MacroEmissionCheckScope guard(this);
1466  if (value != 0.0) {
1467    UseScratchRegisterScope temps(this);
1468    FPRegister tmp = temps.AcquireSameSizeAs(fn);
1469    Fmov(tmp, value);
1470    FPCompareMacro(fn, tmp, trap);
1471  } else {
1472    FPCompareMacro(fn, value, trap);
1473  }
1474}
1475
1476
1477void MacroAssembler::Fcmpe(const FPRegister& fn, double value) {
1478  Fcmp(fn, value, EnableTrap);
1479}
1480
1481
1482void MacroAssembler::Fmov(VRegister vd, double imm) {
1483  VIXL_ASSERT(allow_macro_instructions_);
1484  // Floating point immediates are loaded through the literal pool.
1485  MacroEmissionCheckScope guard(this);
1486
1487  if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
1488    Fmov(vd, static_cast<float>(imm));
1489    return;
1490  }
1491
1492  VIXL_ASSERT(vd.Is1D() || vd.Is2D());
1493  if (IsImmFP64(imm)) {
1494    fmov(vd, imm);
1495  } else {
1496    uint64_t rawbits = DoubleToRawbits(imm);
1497    if (vd.IsScalar()) {
1498      if (rawbits == 0) {
1499        fmov(vd, xzr);
1500      } else {
1501        ldr(vd,
1502            new Literal<double>(imm,
1503                                &literal_pool_,
1504                                RawLiteral::kDeletedOnPlacementByPool));
1505      }
1506    } else {
1507      // TODO: consider NEON support for load literal.
1508      Movi(vd, rawbits);
1509    }
1510  }
1511}
1512
1513
1514void MacroAssembler::Fmov(VRegister vd, float imm) {
1515  VIXL_ASSERT(allow_macro_instructions_);
1516  // Floating point immediates are loaded through the literal pool.
1517  MacroEmissionCheckScope guard(this);
1518
1519  if (vd.Is1D() || vd.Is2D()) {
1520    Fmov(vd, static_cast<double>(imm));
1521    return;
1522  }
1523
1524  VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S());
1525  if (IsImmFP32(imm)) {
1526    fmov(vd, imm);
1527  } else {
1528    uint32_t rawbits = FloatToRawbits(imm);
1529    if (vd.IsScalar()) {
1530      if (rawbits == 0) {
1531        fmov(vd, wzr);
1532      } else {
1533        ldr(vd,
1534            new Literal<float>(imm,
1535                               &literal_pool_,
1536                               RawLiteral::kDeletedOnPlacementByPool));
1537      }
1538    } else {
1539      // TODO: consider NEON support for load literal.
1540      Movi(vd, rawbits);
1541    }
1542  }
1543}
1544
1545
1546void MacroAssembler::Neg(const Register& rd, const Operand& operand) {
1547  VIXL_ASSERT(allow_macro_instructions_);
1548  if (operand.IsImmediate()) {
1549    Mov(rd, -operand.GetImmediate());
1550  } else {
1551    Sub(rd, AppropriateZeroRegFor(rd), operand);
1552  }
1553}
1554
1555
1556void MacroAssembler::Negs(const Register& rd, const Operand& operand) {
1557  VIXL_ASSERT(allow_macro_instructions_);
1558  Subs(rd, AppropriateZeroRegFor(rd), operand);
1559}
1560
1561
1562bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
1563                                              int64_t imm) {
1564  return OneInstrMoveImmediateHelper(this, dst, imm);
1565}
1566
1567
1568Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst,
1569                                                  int64_t imm,
1570                                                  PreShiftImmMode mode) {
1571  int reg_size = dst.GetSizeInBits();
1572
1573  // Encode the immediate in a single move instruction, if possible.
1574  if (TryOneInstrMoveImmediate(dst, imm)) {
1575    // The move was successful; nothing to do here.
1576  } else {
1577    // Pre-shift the immediate to the least-significant bits of the register.
1578    int shift_low = CountTrailingZeros(imm, reg_size);
1579    if (mode == kLimitShiftForSP) {
1580      // When applied to the stack pointer, the subsequent arithmetic operation
1581      // can use the extend form to shift left by a maximum of four bits. Right
1582      // shifts are not allowed, so we filter them out later before the new
1583      // immediate is tested.
1584      shift_low = std::min(shift_low, 4);
1585    }
1586    int64_t imm_low = imm >> shift_low;
1587
1588    // Pre-shift the immediate to the most-significant bits of the register,
1589    // inserting set bits in the least-significant bits.
1590    int shift_high = CountLeadingZeros(imm, reg_size);
1591    int64_t imm_high = (imm << shift_high) | ((INT64_C(1) << shift_high) - 1);
1592
1593    if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) {
1594      // The new immediate has been moved into the destination's low bits:
1595      // return a new leftward-shifting operand.
1596      return Operand(dst, LSL, shift_low);
1597    } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) {
1598      // The new immediate has been moved into the destination's high bits:
1599      // return a new rightward-shifting operand.
1600      return Operand(dst, LSR, shift_high);
1601    } else {
1602      Mov(dst, imm);
1603    }
1604  }
1605  return Operand(dst);
1606}
1607
1608
1609void MacroAssembler::Move(const GenericOperand& dst,
1610                          const GenericOperand& src) {
1611  if (dst.Equals(src)) {
1612    return;
1613  }
1614
1615  VIXL_ASSERT(dst.IsValid() && src.IsValid());
1616
1617  // The sizes of the operands must match exactly.
1618  VIXL_ASSERT(dst.GetSizeInBits() == src.GetSizeInBits());
1619  VIXL_ASSERT(dst.GetSizeInBits() <= kXRegSize);
1620  int operand_size = static_cast<int>(dst.GetSizeInBits());
1621
1622  if (dst.IsCPURegister() && src.IsCPURegister()) {
1623    CPURegister dst_reg = dst.GetCPURegister();
1624    CPURegister src_reg = src.GetCPURegister();
1625    if (dst_reg.IsRegister() && src_reg.IsRegister()) {
1626      Mov(Register(dst_reg), Register(src_reg));
1627    } else if (dst_reg.IsVRegister() && src_reg.IsVRegister()) {
1628      Fmov(VRegister(dst_reg), VRegister(src_reg));
1629    } else {
1630      if (dst_reg.IsRegister()) {
1631        Fmov(Register(dst_reg), VRegister(src_reg));
1632      } else {
1633        Fmov(VRegister(dst_reg), Register(src_reg));
1634      }
1635    }
1636    return;
1637  }
1638
1639  if (dst.IsMemOperand() && src.IsMemOperand()) {
1640    UseScratchRegisterScope temps(this);
1641    CPURegister temp = temps.AcquireCPURegisterOfSize(operand_size);
1642    Ldr(temp, src.GetMemOperand());
1643    Str(temp, dst.GetMemOperand());
1644    return;
1645  }
1646
1647  if (dst.IsCPURegister()) {
1648    Ldr(dst.GetCPURegister(), src.GetMemOperand());
1649  } else {
1650    Str(src.GetCPURegister(), dst.GetMemOperand());
1651  }
1652}
1653
1654
1655void MacroAssembler::ComputeAddress(const Register& dst,
1656                                    const MemOperand& mem_op) {
1657  // We cannot handle pre-indexing or post-indexing.
1658  VIXL_ASSERT(mem_op.GetAddrMode() == Offset);
1659  Register base = mem_op.GetBaseRegister();
1660  if (mem_op.IsImmediateOffset()) {
1661    Add(dst, base, mem_op.GetOffset());
1662  } else {
1663    VIXL_ASSERT(mem_op.IsRegisterOffset());
1664    Register reg_offset = mem_op.GetRegisterOffset();
1665    Shift shift = mem_op.GetShift();
1666    Extend extend = mem_op.GetExtend();
1667    if (shift == NO_SHIFT) {
1668      VIXL_ASSERT(extend != NO_EXTEND);
1669      Add(dst, base, Operand(reg_offset, extend, mem_op.GetShiftAmount()));
1670    } else {
1671      VIXL_ASSERT(extend == NO_EXTEND);
1672      Add(dst, base, Operand(reg_offset, shift, mem_op.GetShiftAmount()));
1673    }
1674  }
1675}
1676
1677
1678void MacroAssembler::AddSubMacro(const Register& rd,
1679                                 const Register& rn,
1680                                 const Operand& operand,
1681                                 FlagsUpdate S,
1682                                 AddSubOp op) {
1683  // Worst case is add/sub immediate:
1684  //  * up to 4 instructions to materialise the constant
1685  //  * 1 instruction for add/sub
1686  MacroEmissionCheckScope guard(this);
1687
1688  if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
1689      (S == LeaveFlags)) {
1690    // The instruction would be a nop. Avoid generating useless code.
1691    return;
1692  }
1693
1694  if ((operand.IsImmediate() && !IsImmAddSub(operand.GetImmediate())) ||
1695      (rn.IsZero() && !operand.IsShiftedRegister()) ||
1696      (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
1697    UseScratchRegisterScope temps(this);
1698    Register temp = temps.AcquireSameSizeAs(rn);
1699    if (operand.IsImmediate()) {
1700      PreShiftImmMode mode = kAnyShift;
1701
1702      // If the destination or source register is the stack pointer, we can
1703      // only pre-shift the immediate right by values supported in the add/sub
1704      // extend encoding.
1705      if (rd.IsSP()) {
1706        // If the destination is SP and flags will be set, we can't pre-shift
1707        // the immediate at all.
1708        mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP;
1709      } else if (rn.IsSP()) {
1710        mode = kLimitShiftForSP;
1711      }
1712
1713      Operand imm_operand =
1714          MoveImmediateForShiftedOp(temp, operand.GetImmediate(), mode);
1715      AddSub(rd, rn, imm_operand, S, op);
1716    } else {
1717      Mov(temp, operand);
1718      AddSub(rd, rn, temp, S, op);
1719    }
1720  } else {
1721    AddSub(rd, rn, operand, S, op);
1722  }
1723}
1724
1725
1726void MacroAssembler::Adc(const Register& rd,
1727                         const Register& rn,
1728                         const Operand& operand) {
1729  VIXL_ASSERT(allow_macro_instructions_);
1730  AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC);
1731}
1732
1733
1734void MacroAssembler::Adcs(const Register& rd,
1735                          const Register& rn,
1736                          const Operand& operand) {
1737  VIXL_ASSERT(allow_macro_instructions_);
1738  AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC);
1739}
1740
1741
1742void MacroAssembler::Sbc(const Register& rd,
1743                         const Register& rn,
1744                         const Operand& operand) {
1745  VIXL_ASSERT(allow_macro_instructions_);
1746  AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC);
1747}
1748
1749
1750void MacroAssembler::Sbcs(const Register& rd,
1751                          const Register& rn,
1752                          const Operand& operand) {
1753  VIXL_ASSERT(allow_macro_instructions_);
1754  AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC);
1755}
1756
1757
1758void MacroAssembler::Ngc(const Register& rd, const Operand& operand) {
1759  VIXL_ASSERT(allow_macro_instructions_);
1760  Register zr = AppropriateZeroRegFor(rd);
1761  Sbc(rd, zr, operand);
1762}
1763
1764
1765void MacroAssembler::Ngcs(const Register& rd, const Operand& operand) {
1766  VIXL_ASSERT(allow_macro_instructions_);
1767  Register zr = AppropriateZeroRegFor(rd);
1768  Sbcs(rd, zr, operand);
1769}
1770
1771
1772void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
1773                                          const Register& rn,
1774                                          const Operand& operand,
1775                                          FlagsUpdate S,
1776                                          AddSubWithCarryOp op) {
1777  VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits());
1778  // Worst case is addc/subc immediate:
1779  //  * up to 4 instructions to materialise the constant
1780  //  * 1 instruction for add/sub
1781  MacroEmissionCheckScope guard(this);
1782  UseScratchRegisterScope temps(this);
1783
1784  if (operand.IsImmediate() ||
1785      (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
1786    // Add/sub with carry (immediate or ROR shifted register.)
1787    Register temp = temps.AcquireSameSizeAs(rn);
1788    Mov(temp, operand);
1789    AddSubWithCarry(rd, rn, Operand(temp), S, op);
1790  } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
1791    // Add/sub with carry (shifted register).
1792    VIXL_ASSERT(operand.GetRegister().GetSizeInBits() == rd.GetSizeInBits());
1793    VIXL_ASSERT(operand.GetShift() != ROR);
1794    VIXL_ASSERT(
1795        IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
1796                operand.GetShiftAmount()));
1797    temps.Exclude(operand.GetRegister());
1798    Register temp = temps.AcquireSameSizeAs(rn);
1799    EmitShift(temp,
1800              operand.GetRegister(),
1801              operand.GetShift(),
1802              operand.GetShiftAmount());
1803    AddSubWithCarry(rd, rn, Operand(temp), S, op);
1804  } else if (operand.IsExtendedRegister()) {
1805    // Add/sub with carry (extended register).
1806    VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
1807    // Add/sub extended supports a shift <= 4. We want to support exactly the
1808    // same modes.
1809    VIXL_ASSERT(operand.GetShiftAmount() <= 4);
1810    VIXL_ASSERT(
1811        operand.GetRegister().Is64Bits() ||
1812        ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
1813    temps.Exclude(operand.GetRegister());
1814    Register temp = temps.AcquireSameSizeAs(rn);
1815    EmitExtendShift(temp,
1816                    operand.GetRegister(),
1817                    operand.GetExtend(),
1818                    operand.GetShiftAmount());
1819    AddSubWithCarry(rd, rn, Operand(temp), S, op);
1820  } else {
1821    // The addressing mode is directly supported by the instruction.
1822    AddSubWithCarry(rd, rn, operand, S, op);
1823  }
1824}
1825
1826
1827#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                          \
1828  void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
1829    VIXL_ASSERT(allow_macro_instructions_);                            \
1830    LoadStoreMacro(REG, addr, OP);                                     \
1831  }
1832LS_MACRO_LIST(DEFINE_FUNCTION)
1833#undef DEFINE_FUNCTION
1834
1835
1836void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
1837                                    const MemOperand& addr,
1838                                    LoadStoreOp op) {
1839  // Worst case is ldr/str pre/post index:
1840  //  * 1 instruction for ldr/str
1841  //  * up to 4 instructions to materialise the constant
1842  //  * 1 instruction to update the base
1843  MacroEmissionCheckScope guard(this);
1844
1845  int64_t offset = addr.GetOffset();
1846  unsigned access_size = CalcLSDataSize(op);
1847
1848  // Check if an immediate offset fits in the immediate field of the
1849  // appropriate instruction. If not, emit two instructions to perform
1850  // the operation.
1851  if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) &&
1852      !IsImmLSUnscaled(offset)) {
1853    // Immediate offset that can't be encoded using unsigned or unscaled
1854    // addressing modes.
1855    UseScratchRegisterScope temps(this);
1856    Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
1857    Mov(temp, addr.GetOffset());
1858    LoadStore(rt, MemOperand(addr.GetBaseRegister(), temp), op);
1859  } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
1860    // Post-index beyond unscaled addressing range.
1861    LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
1862    Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
1863  } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
1864    // Pre-index beyond unscaled addressing range.
1865    Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
1866    LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
1867  } else {
1868    // Encodable in one load/store instruction.
1869    LoadStore(rt, addr, op);
1870  }
1871}
1872
1873
1874#define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
1875  void MacroAssembler::FN(const REGTYPE REG,        \
1876                          const REGTYPE REG2,       \
1877                          const MemOperand& addr) { \
1878    VIXL_ASSERT(allow_macro_instructions_);         \
1879    LoadStorePairMacro(REG, REG2, addr, OP);        \
1880  }
1881LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
1882#undef DEFINE_FUNCTION
1883
1884void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
1885                                        const CPURegister& rt2,
1886                                        const MemOperand& addr,
1887                                        LoadStorePairOp op) {
1888  // TODO(all): Should we support register offset for load-store-pair?
1889  VIXL_ASSERT(!addr.IsRegisterOffset());
1890  // Worst case is ldp/stp immediate:
1891  //  * 1 instruction for ldp/stp
1892  //  * up to 4 instructions to materialise the constant
1893  //  * 1 instruction to update the base
1894  MacroEmissionCheckScope guard(this);
1895
1896  int64_t offset = addr.GetOffset();
1897  unsigned access_size = CalcLSPairDataSize(op);
1898
1899  // Check if the offset fits in the immediate field of the appropriate
1900  // instruction. If not, emit two instructions to perform the operation.
1901  if (IsImmLSPair(offset, access_size)) {
1902    // Encodable in one load/store pair instruction.
1903    LoadStorePair(rt, rt2, addr, op);
1904  } else {
1905    Register base = addr.GetBaseRegister();
1906    if (addr.IsImmediateOffset()) {
1907      UseScratchRegisterScope temps(this);
1908      Register temp = temps.AcquireSameSizeAs(base);
1909      Add(temp, base, offset);
1910      LoadStorePair(rt, rt2, MemOperand(temp), op);
1911    } else if (addr.IsPostIndex()) {
1912      LoadStorePair(rt, rt2, MemOperand(base), op);
1913      Add(base, base, offset);
1914    } else {
1915      VIXL_ASSERT(addr.IsPreIndex());
1916      Add(base, base, offset);
1917      LoadStorePair(rt, rt2, MemOperand(base), op);
1918    }
1919  }
1920}
1921
1922
1923void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) {
1924  MacroEmissionCheckScope guard(this);
1925
1926  // There are no pre- or post-index modes for prfm.
1927  VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset());
1928
1929  // The access size is implicitly 8 bytes for all prefetch operations.
1930  unsigned size = kXRegSizeInBytesLog2;
1931
1932  // Check if an immediate offset fits in the immediate field of the
1933  // appropriate instruction. If not, emit two instructions to perform
1934  // the operation.
1935  if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.GetOffset(), size) &&
1936      !IsImmLSUnscaled(addr.GetOffset())) {
1937    // Immediate offset that can't be encoded using unsigned or unscaled
1938    // addressing modes.
1939    UseScratchRegisterScope temps(this);
1940    Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
1941    Mov(temp, addr.GetOffset());
1942    Prefetch(op, MemOperand(addr.GetBaseRegister(), temp));
1943  } else {
1944    // Simple register-offsets are encodable in one instruction.
1945    Prefetch(op, addr);
1946  }
1947}
1948
1949
1950void MacroAssembler::Push(const CPURegister& src0,
1951                          const CPURegister& src1,
1952                          const CPURegister& src2,
1953                          const CPURegister& src3) {
1954  VIXL_ASSERT(allow_macro_instructions_);
1955  VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
1956  VIXL_ASSERT(src0.IsValid());
1957
1958  int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
1959  int size = src0.GetSizeInBytes();
1960
1961  PrepareForPush(count, size);
1962  PushHelper(count, size, src0, src1, src2, src3);
1963}
1964
1965
1966void MacroAssembler::Pop(const CPURegister& dst0,
1967                         const CPURegister& dst1,
1968                         const CPURegister& dst2,
1969                         const CPURegister& dst3) {
1970  // It is not valid to pop into the same register more than once in one
1971  // instruction, not even into the zero register.
1972  VIXL_ASSERT(allow_macro_instructions_);
1973  VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
1974  VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
1975  VIXL_ASSERT(dst0.IsValid());
1976
1977  int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
1978  int size = dst0.GetSizeInBytes();
1979
1980  PrepareForPop(count, size);
1981  PopHelper(count, size, dst0, dst1, dst2, dst3);
1982}
1983
1984
1985void MacroAssembler::PushCPURegList(CPURegList registers) {
1986  VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
1987  VIXL_ASSERT(!registers.Overlaps(*GetScratchFPRegisterList()));
1988  VIXL_ASSERT(allow_macro_instructions_);
1989
1990  int reg_size = registers.GetRegisterSizeInBytes();
1991  PrepareForPush(registers.GetCount(), reg_size);
1992
1993  // Bump the stack pointer and store two registers at the bottom.
1994  int size = registers.GetTotalSizeInBytes();
1995  const CPURegister& bottom_0 = registers.PopLowestIndex();
1996  const CPURegister& bottom_1 = registers.PopLowestIndex();
1997  if (bottom_0.IsValid() && bottom_1.IsValid()) {
1998    Stp(bottom_0, bottom_1, MemOperand(StackPointer(), -size, PreIndex));
1999  } else if (bottom_0.IsValid()) {
2000    Str(bottom_0, MemOperand(StackPointer(), -size, PreIndex));
2001  }
2002
2003  int offset = 2 * reg_size;
2004  while (!registers.IsEmpty()) {
2005    const CPURegister& src0 = registers.PopLowestIndex();
2006    const CPURegister& src1 = registers.PopLowestIndex();
2007    if (src1.IsValid()) {
2008      Stp(src0, src1, MemOperand(StackPointer(), offset));
2009    } else {
2010      Str(src0, MemOperand(StackPointer(), offset));
2011    }
2012    offset += 2 * reg_size;
2013  }
2014}
2015
2016
2017void MacroAssembler::PopCPURegList(CPURegList registers) {
2018  VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
2019  VIXL_ASSERT(!registers.Overlaps(*GetScratchFPRegisterList()));
2020  VIXL_ASSERT(allow_macro_instructions_);
2021
2022  int reg_size = registers.GetRegisterSizeInBytes();
2023  PrepareForPop(registers.GetCount(), reg_size);
2024
2025
2026  int size = registers.GetTotalSizeInBytes();
2027  const CPURegister& bottom_0 = registers.PopLowestIndex();
2028  const CPURegister& bottom_1 = registers.PopLowestIndex();
2029
2030  int offset = 2 * reg_size;
2031  while (!registers.IsEmpty()) {
2032    const CPURegister& dst0 = registers.PopLowestIndex();
2033    const CPURegister& dst1 = registers.PopLowestIndex();
2034    if (dst1.IsValid()) {
2035      Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
2036    } else {
2037      Ldr(dst0, MemOperand(StackPointer(), offset));
2038    }
2039    offset += 2 * reg_size;
2040  }
2041
2042  // Load the two registers at the bottom and drop the stack pointer.
2043  if (bottom_0.IsValid() && bottom_1.IsValid()) {
2044    Ldp(bottom_0, bottom_1, MemOperand(StackPointer(), size, PostIndex));
2045  } else if (bottom_0.IsValid()) {
2046    Ldr(bottom_0, MemOperand(StackPointer(), size, PostIndex));
2047  }
2048}
2049
2050
2051void MacroAssembler::PushMultipleTimes(int count, Register src) {
2052  VIXL_ASSERT(allow_macro_instructions_);
2053  int size = src.GetSizeInBytes();
2054
2055  PrepareForPush(count, size);
2056  // Push up to four registers at a time if possible because if the current
2057  // stack pointer is sp and the register size is 32, registers must be pushed
2058  // in blocks of four in order to maintain the 16-byte alignment for sp.
2059  while (count >= 4) {
2060    PushHelper(4, size, src, src, src, src);
2061    count -= 4;
2062  }
2063  if (count >= 2) {
2064    PushHelper(2, size, src, src, NoReg, NoReg);
2065    count -= 2;
2066  }
2067  if (count == 1) {
2068    PushHelper(1, size, src, NoReg, NoReg, NoReg);
2069    count -= 1;
2070  }
2071  VIXL_ASSERT(count == 0);
2072}
2073
2074
2075void MacroAssembler::PushHelper(int count,
2076                                int size,
2077                                const CPURegister& src0,
2078                                const CPURegister& src1,
2079                                const CPURegister& src2,
2080                                const CPURegister& src3) {
2081  // Ensure that we don't unintentionally modify scratch or debug registers.
2082  // Worst case for size is 2 stp.
2083  ExactAssemblyScope scope(this,
2084                           2 * kInstructionSize,
2085                           ExactAssemblyScope::kMaximumSize);
2086
2087  VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
2088  VIXL_ASSERT(size == src0.GetSizeInBytes());
2089
2090  // When pushing multiple registers, the store order is chosen such that
2091  // Push(a, b) is equivalent to Push(a) followed by Push(b).
2092  switch (count) {
2093    case 1:
2094      VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
2095      str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
2096      break;
2097    case 2:
2098      VIXL_ASSERT(src2.IsNone() && src3.IsNone());
2099      stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
2100      break;
2101    case 3:
2102      VIXL_ASSERT(src3.IsNone());
2103      stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
2104      str(src0, MemOperand(StackPointer(), 2 * size));
2105      break;
2106    case 4:
2107      // Skip over 4 * size, then fill in the gap. This allows four W registers
2108      // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
2109      // all times.
2110      stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
2111      stp(src1, src0, MemOperand(StackPointer(), 2 * size));
2112      break;
2113    default:
2114      VIXL_UNREACHABLE();
2115  }
2116}
2117
2118
2119void MacroAssembler::PopHelper(int count,
2120                               int size,
2121                               const CPURegister& dst0,
2122                               const CPURegister& dst1,
2123                               const CPURegister& dst2,
2124                               const CPURegister& dst3) {
2125  // Ensure that we don't unintentionally modify scratch or debug registers.
2126  // Worst case for size is 2 ldp.
2127  ExactAssemblyScope scope(this,
2128                           2 * kInstructionSize,
2129                           ExactAssemblyScope::kMaximumSize);
2130
2131  VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
2132  VIXL_ASSERT(size == dst0.GetSizeInBytes());
2133
2134  // When popping multiple registers, the load order is chosen such that
2135  // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
2136  switch (count) {
2137    case 1:
2138      VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
2139      ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
2140      break;
2141    case 2:
2142      VIXL_ASSERT(dst2.IsNone() && dst3.IsNone());
2143      ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
2144      break;
2145    case 3:
2146      VIXL_ASSERT(dst3.IsNone());
2147      ldr(dst2, MemOperand(StackPointer(), 2 * size));
2148      ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
2149      break;
2150    case 4:
2151      // Load the higher addresses first, then load the lower addresses and skip
2152      // the whole block in the second instruction. This allows four W registers
2153      // to be popped using sp, whilst maintaining 16-byte alignment for sp at
2154      // all times.
2155      ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
2156      ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
2157      break;
2158    default:
2159      VIXL_UNREACHABLE();
2160  }
2161}
2162
2163
2164void MacroAssembler::PrepareForPush(int count, int size) {
2165  if (sp.Is(StackPointer())) {
2166    // If the current stack pointer is sp, then it must be aligned to 16 bytes
2167    // on entry and the total size of the specified registers must also be a
2168    // multiple of 16 bytes.
2169    VIXL_ASSERT((count * size) % 16 == 0);
2170  } else {
2171    // Even if the current stack pointer is not the system stack pointer (sp),
2172    // the system stack pointer will still be modified in order to comply with
2173    // ABI rules about accessing memory below the system stack pointer.
2174    BumpSystemStackPointer(count * size);
2175  }
2176}
2177
2178
2179void MacroAssembler::PrepareForPop(int count, int size) {
2180  USE(count, size);
2181  if (sp.Is(StackPointer())) {
2182    // If the current stack pointer is sp, then it must be aligned to 16 bytes
2183    // on entry and the total size of the specified registers must also be a
2184    // multiple of 16 bytes.
2185    VIXL_ASSERT((count * size) % 16 == 0);
2186  }
2187}
2188
2189void MacroAssembler::Poke(const Register& src, const Operand& offset) {
2190  VIXL_ASSERT(allow_macro_instructions_);
2191  if (offset.IsImmediate()) {
2192    VIXL_ASSERT(offset.GetImmediate() >= 0);
2193  }
2194
2195  Str(src, MemOperand(StackPointer(), offset));
2196}
2197
2198
2199void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
2200  VIXL_ASSERT(allow_macro_instructions_);
2201  if (offset.IsImmediate()) {
2202    VIXL_ASSERT(offset.GetImmediate() >= 0);
2203  }
2204
2205  Ldr(dst, MemOperand(StackPointer(), offset));
2206}
2207
2208
2209void MacroAssembler::Claim(const Operand& size) {
2210  VIXL_ASSERT(allow_macro_instructions_);
2211
2212  if (size.IsZero()) {
2213    return;
2214  }
2215
2216  if (size.IsImmediate()) {
2217    VIXL_ASSERT(size.GetImmediate() > 0);
2218    if (sp.Is(StackPointer())) {
2219      VIXL_ASSERT((size.GetImmediate() % 16) == 0);
2220    }
2221  }
2222
2223  if (!sp.Is(StackPointer())) {
2224    BumpSystemStackPointer(size);
2225  }
2226
2227  Sub(StackPointer(), StackPointer(), size);
2228}
2229
2230
2231void MacroAssembler::Drop(const Operand& size) {
2232  VIXL_ASSERT(allow_macro_instructions_);
2233
2234  if (size.IsZero()) {
2235    return;
2236  }
2237
2238  if (size.IsImmediate()) {
2239    VIXL_ASSERT(size.GetImmediate() > 0);
2240    if (sp.Is(StackPointer())) {
2241      VIXL_ASSERT((size.GetImmediate() % 16) == 0);
2242    }
2243  }
2244
2245  Add(StackPointer(), StackPointer(), size);
2246}
2247
2248
2249void MacroAssembler::PushCalleeSavedRegisters() {
2250  // Ensure that the macro-assembler doesn't use any scratch registers.
2251  // 10 stp will be emitted.
2252  // TODO(all): Should we use GetCalleeSaved and SavedFP.
2253  ExactAssemblyScope scope(this, 10 * kInstructionSize);
2254
2255  // This method must not be called unless the current stack pointer is sp.
2256  VIXL_ASSERT(sp.Is(StackPointer()));
2257
2258  MemOperand tos(sp, -2 * static_cast<int>(kXRegSizeInBytes), PreIndex);
2259
2260  stp(x29, x30, tos);
2261  stp(x27, x28, tos);
2262  stp(x25, x26, tos);
2263  stp(x23, x24, tos);
2264  stp(x21, x22, tos);
2265  stp(x19, x20, tos);
2266
2267  stp(d14, d15, tos);
2268  stp(d12, d13, tos);
2269  stp(d10, d11, tos);
2270  stp(d8, d9, tos);
2271}
2272
2273
2274void MacroAssembler::PopCalleeSavedRegisters() {
2275  // Ensure that the macro-assembler doesn't use any scratch registers.
2276  // 10 ldp will be emitted.
2277  // TODO(all): Should we use GetCalleeSaved and SavedFP.
2278  ExactAssemblyScope scope(this, 10 * kInstructionSize);
2279
2280  // This method must not be called unless the current stack pointer is sp.
2281  VIXL_ASSERT(sp.Is(StackPointer()));
2282
2283  MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
2284
2285  ldp(d8, d9, tos);
2286  ldp(d10, d11, tos);
2287  ldp(d12, d13, tos);
2288  ldp(d14, d15, tos);
2289
2290  ldp(x19, x20, tos);
2291  ldp(x21, x22, tos);
2292  ldp(x23, x24, tos);
2293  ldp(x25, x26, tos);
2294  ldp(x27, x28, tos);
2295  ldp(x29, x30, tos);
2296}
2297
2298void MacroAssembler::LoadCPURegList(CPURegList registers,
2299                                    const MemOperand& src) {
2300  LoadStoreCPURegListHelper(kLoad, registers, src);
2301}
2302
2303void MacroAssembler::StoreCPURegList(CPURegList registers,
2304                                     const MemOperand& dst) {
2305  LoadStoreCPURegListHelper(kStore, registers, dst);
2306}
2307
2308
2309void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
2310                                               CPURegList registers,
2311                                               const MemOperand& mem) {
2312  // We do not handle pre-indexing or post-indexing.
2313  VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
2314  VIXL_ASSERT(!registers.Overlaps(tmp_list_));
2315  VIXL_ASSERT(!registers.Overlaps(fptmp_list_));
2316  VIXL_ASSERT(!registers.IncludesAliasOf(sp));
2317
2318  UseScratchRegisterScope temps(this);
2319
2320  MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers, mem, &temps);
2321  const int reg_size = registers.GetRegisterSizeInBytes();
2322
2323  VIXL_ASSERT(IsPowerOf2(reg_size));
2324
2325  // Since we are operating on register pairs, we would like to align on double
2326  // the standard size; on the other hand, we don't want to insert an extra
2327  // operation, which will happen if the number of registers is even. Note that
2328  // the alignment of the base pointer is unknown here, but we assume that it
2329  // is more likely to be aligned.
2330  if (((loc.GetOffset() & (2 * reg_size - 1)) != 0) &&
2331      ((registers.GetCount() % 2) != 0)) {
2332    if (op == kStore) {
2333      Str(registers.PopLowestIndex(), loc);
2334    } else {
2335      VIXL_ASSERT(op == kLoad);
2336      Ldr(registers.PopLowestIndex(), loc);
2337    }
2338    loc.AddOffset(reg_size);
2339  }
2340  while (registers.GetCount() >= 2) {
2341    const CPURegister& dst0 = registers.PopLowestIndex();
2342    const CPURegister& dst1 = registers.PopLowestIndex();
2343    if (op == kStore) {
2344      Stp(dst0, dst1, loc);
2345    } else {
2346      VIXL_ASSERT(op == kLoad);
2347      Ldp(dst0, dst1, loc);
2348    }
2349    loc.AddOffset(2 * reg_size);
2350  }
2351  if (!registers.IsEmpty()) {
2352    if (op == kStore) {
2353      Str(registers.PopLowestIndex(), loc);
2354    } else {
2355      VIXL_ASSERT(op == kLoad);
2356      Ldr(registers.PopLowestIndex(), loc);
2357    }
2358  }
2359}
2360
2361MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
2362    const CPURegList& registers,
2363    const MemOperand& mem,
2364    UseScratchRegisterScope* scratch_scope) {
2365  // If necessary, pre-compute the base address for the accesses.
2366  if (mem.IsRegisterOffset()) {
2367    Register reg_base = scratch_scope->AcquireX();
2368    ComputeAddress(reg_base, mem);
2369    return MemOperand(reg_base);
2370
2371  } else if (mem.IsImmediateOffset()) {
2372    int reg_size = registers.GetRegisterSizeInBytes();
2373    int total_size = registers.GetTotalSizeInBytes();
2374    int64_t min_offset = mem.GetOffset();
2375    int64_t max_offset =
2376        mem.GetOffset() + std::max(0, total_size - 2 * reg_size);
2377    if ((registers.GetCount() >= 2) &&
2378        (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
2379         !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
2380      Register reg_base = scratch_scope->AcquireX();
2381      ComputeAddress(reg_base, mem);
2382      return MemOperand(reg_base);
2383    }
2384  }
2385
2386  return mem;
2387}
2388
2389void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
2390  VIXL_ASSERT(!sp.Is(StackPointer()));
2391  // TODO: Several callers rely on this not using scratch registers, so we use
2392  // the assembler directly here. However, this means that large immediate
2393  // values of 'space' cannot be handled.
2394  ExactAssemblyScope scope(this, kInstructionSize);
2395  sub(sp, StackPointer(), space);
2396}
2397
2398
2399// TODO(all): Fix printf for NEON registers, and resolve whether we should be
2400// using FPRegister or VRegister here.
2401
2402// This is the main Printf implementation. All callee-saved registers are
2403// preserved, but NZCV and the caller-saved registers may be clobbered.
2404void MacroAssembler::PrintfNoPreserve(const char* format,
2405                                      const CPURegister& arg0,
2406                                      const CPURegister& arg1,
2407                                      const CPURegister& arg2,
2408                                      const CPURegister& arg3) {
2409  // We cannot handle a caller-saved stack pointer. It doesn't make much sense
2410  // in most cases anyway, so this restriction shouldn't be too serious.
2411  VIXL_ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));
2412
2413  // The provided arguments, and their proper PCS registers.
2414  CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3};
2415  CPURegister pcs[kPrintfMaxArgCount];
2416
2417  int arg_count = kPrintfMaxArgCount;
2418
2419  // The PCS varargs registers for printf. Note that x0 is used for the printf
2420  // format string.
2421  static const CPURegList kPCSVarargs =
2422      CPURegList(CPURegister::kRegister, kXRegSize, 1, arg_count);
2423  static const CPURegList kPCSVarargsFP =
2424      CPURegList(CPURegister::kVRegister, kDRegSize, 0, arg_count - 1);
2425
2426  // We can use caller-saved registers as scratch values, except for the
2427  // arguments and the PCS registers where they might need to go.
2428  UseScratchRegisterScope temps(this);
2429  temps.Include(kCallerSaved);
2430  temps.Include(kCallerSavedV);
2431  temps.Exclude(kPCSVarargs);
2432  temps.Exclude(kPCSVarargsFP);
2433  temps.Exclude(arg0, arg1, arg2, arg3);
2434
2435  // Copies of the arg lists that we can iterate through.
2436  CPURegList pcs_varargs = kPCSVarargs;
2437  CPURegList pcs_varargs_fp = kPCSVarargsFP;
2438
2439  // Place the arguments. There are lots of clever tricks and optimizations we
2440  // could use here, but Printf is a debug tool so instead we just try to keep
2441  // it simple: Move each input that isn't already in the right place to a
2442  // scratch register, then move everything back.
2443  for (unsigned i = 0; i < kPrintfMaxArgCount; i++) {
2444    // Work out the proper PCS register for this argument.
2445    if (args[i].IsRegister()) {
2446      pcs[i] = pcs_varargs.PopLowestIndex().X();
2447      // We might only need a W register here. We need to know the size of the
2448      // argument so we can properly encode it for the simulator call.
2449      if (args[i].Is32Bits()) pcs[i] = pcs[i].W();
2450    } else if (args[i].IsVRegister()) {
2451      // In C, floats are always cast to doubles for varargs calls.
2452      pcs[i] = pcs_varargs_fp.PopLowestIndex().D();
2453    } else {
2454      VIXL_ASSERT(args[i].IsNone());
2455      arg_count = i;
2456      break;
2457    }
2458
2459    // If the argument is already in the right place, leave it where it is.
2460    if (args[i].Aliases(pcs[i])) continue;
2461
2462    // Otherwise, if the argument is in a PCS argument register, allocate an
2463    // appropriate scratch register and then move it out of the way.
2464    if (kPCSVarargs.IncludesAliasOf(args[i]) ||
2465        kPCSVarargsFP.IncludesAliasOf(args[i])) {
2466      if (args[i].IsRegister()) {
2467        Register old_arg = Register(args[i]);
2468        Register new_arg = temps.AcquireSameSizeAs(old_arg);
2469        Mov(new_arg, old_arg);
2470        args[i] = new_arg;
2471      } else {
2472        FPRegister old_arg = FPRegister(args[i]);
2473        FPRegister new_arg = temps.AcquireSameSizeAs(old_arg);
2474        Fmov(new_arg, old_arg);
2475        args[i] = new_arg;
2476      }
2477    }
2478  }
2479
2480  // Do a second pass to move values into their final positions and perform any
2481  // conversions that may be required.
2482  for (int i = 0; i < arg_count; i++) {
2483    VIXL_ASSERT(pcs[i].GetType() == args[i].GetType());
2484    if (pcs[i].IsRegister()) {
2485      Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg);
2486    } else {
2487      VIXL_ASSERT(pcs[i].IsVRegister());
2488      if (pcs[i].GetSizeInBits() == args[i].GetSizeInBits()) {
2489        Fmov(FPRegister(pcs[i]), FPRegister(args[i]));
2490      } else {
2491        Fcvt(FPRegister(pcs[i]), FPRegister(args[i]));
2492      }
2493    }
2494  }
2495
2496  // Load the format string into x0, as per the procedure-call standard.
2497  //
2498  // To make the code as portable as possible, the format string is encoded
2499  // directly in the instruction stream. It might be cleaner to encode it in a
2500  // literal pool, but since Printf is usually used for debugging, it is
2501  // beneficial for it to be minimally dependent on other features.
2502  temps.Exclude(x0);
2503  Label format_address;
2504  Adr(x0, &format_address);
2505
2506  // Emit the format string directly in the instruction stream.
2507  {
2508    BlockPoolsScope scope(this);
2509    // Data emitted:
2510    //   branch
2511    //   strlen(format) + 1 (includes null termination)
2512    //   padding to next instruction
2513    //   unreachable
2514    EmissionCheckScope guard(this,
2515                             AlignUp(strlen(format) + 1, kInstructionSize) +
2516                                 2 * kInstructionSize);
2517    Label after_data;
2518    B(&after_data);
2519    Bind(&format_address);
2520    EmitString(format);
2521    Unreachable();
2522    Bind(&after_data);
2523  }
2524
2525  // We don't pass any arguments on the stack, but we still need to align the C
2526  // stack pointer to a 16-byte boundary for PCS compliance.
2527  if (!sp.Is(StackPointer())) {
2528    Bic(sp, StackPointer(), 0xf);
2529  }
2530
2531  // Actually call printf. This part needs special handling for the simulator,
2532  // since the system printf function will use a different instruction set and
2533  // the procedure-call standard will not be compatible.
2534  if (generate_simulator_code_) {
2535    ExactAssemblyScope scope(this, kPrintfLength);
2536    hlt(kPrintfOpcode);
2537    dc32(arg_count);  // kPrintfArgCountOffset
2538
2539    // Determine the argument pattern.
2540    uint32_t arg_pattern_list = 0;
2541    for (int i = 0; i < arg_count; i++) {
2542      uint32_t arg_pattern;
2543      if (pcs[i].IsRegister()) {
2544        arg_pattern = pcs[i].Is32Bits() ? kPrintfArgW : kPrintfArgX;
2545      } else {
2546        VIXL_ASSERT(pcs[i].Is64Bits());
2547        arg_pattern = kPrintfArgD;
2548      }
2549      VIXL_ASSERT(arg_pattern < (1 << kPrintfArgPatternBits));
2550      arg_pattern_list |= (arg_pattern << (kPrintfArgPatternBits * i));
2551    }
2552    dc32(arg_pattern_list);  // kPrintfArgPatternListOffset
2553  } else {
2554    Register tmp = temps.AcquireX();
2555    Mov(tmp, reinterpret_cast<uintptr_t>(printf));
2556    Blr(tmp);
2557  }
2558}
2559
2560
2561void MacroAssembler::Printf(const char* format,
2562                            CPURegister arg0,
2563                            CPURegister arg1,
2564                            CPURegister arg2,
2565                            CPURegister arg3) {
2566  // We can only print sp if it is the current stack pointer.
2567  if (!sp.Is(StackPointer())) {
2568    VIXL_ASSERT(!sp.Aliases(arg0));
2569    VIXL_ASSERT(!sp.Aliases(arg1));
2570    VIXL_ASSERT(!sp.Aliases(arg2));
2571    VIXL_ASSERT(!sp.Aliases(arg3));
2572  }
2573
2574  // Make sure that the macro assembler doesn't try to use any of our arguments
2575  // as scratch registers.
2576  UseScratchRegisterScope exclude_all(this);
2577  exclude_all.ExcludeAll();
2578
2579  // Preserve all caller-saved registers as well as NZCV.
2580  // If sp is the stack pointer, PushCPURegList asserts that the size of each
2581  // list is a multiple of 16 bytes.
2582  PushCPURegList(kCallerSaved);
2583  PushCPURegList(kCallerSavedV);
2584
2585  {
2586    UseScratchRegisterScope temps(this);
2587    // We can use caller-saved registers as scratch values (except for argN).
2588    temps.Include(kCallerSaved);
2589    temps.Include(kCallerSavedV);
2590    temps.Exclude(arg0, arg1, arg2, arg3);
2591
2592    // If any of the arguments are the current stack pointer, allocate a new
2593    // register for them, and adjust the value to compensate for pushing the
2594    // caller-saved registers.
2595    bool arg0_sp = StackPointer().Aliases(arg0);
2596    bool arg1_sp = StackPointer().Aliases(arg1);
2597    bool arg2_sp = StackPointer().Aliases(arg2);
2598    bool arg3_sp = StackPointer().Aliases(arg3);
2599    if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) {
2600      // Allocate a register to hold the original stack pointer value, to pass
2601      // to PrintfNoPreserve as an argument.
2602      Register arg_sp = temps.AcquireX();
2603      Add(arg_sp,
2604          StackPointer(),
2605          kCallerSaved.GetTotalSizeInBytes() +
2606              kCallerSavedV.GetTotalSizeInBytes());
2607      if (arg0_sp) arg0 = Register(arg_sp.GetCode(), arg0.GetSizeInBits());
2608      if (arg1_sp) arg1 = Register(arg_sp.GetCode(), arg1.GetSizeInBits());
2609      if (arg2_sp) arg2 = Register(arg_sp.GetCode(), arg2.GetSizeInBits());
2610      if (arg3_sp) arg3 = Register(arg_sp.GetCode(), arg3.GetSizeInBits());
2611    }
2612
2613    // Preserve NZCV.
2614    Register tmp = temps.AcquireX();
2615    Mrs(tmp, NZCV);
2616    Push(tmp, xzr);
2617    temps.Release(tmp);
2618
2619    PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
2620
2621    // Restore NZCV.
2622    tmp = temps.AcquireX();
2623    Pop(xzr, tmp);
2624    Msr(NZCV, tmp);
2625    temps.Release(tmp);
2626  }
2627
2628  PopCPURegList(kCallerSavedV);
2629  PopCPURegList(kCallerSaved);
2630}
2631
2632void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
2633  VIXL_ASSERT(allow_macro_instructions_);
2634
2635  if (generate_simulator_code_) {
2636    // The arguments to the trace pseudo instruction need to be contiguous in
2637    // memory, so make sure we don't try to emit a literal pool.
2638    ExactAssemblyScope scope(this, kTraceLength);
2639
2640    Label start;
2641    bind(&start);
2642
2643    // Refer to simulator-aarch64.h for a description of the marker and its
2644    // arguments.
2645    hlt(kTraceOpcode);
2646
2647    VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
2648    dc32(parameters);
2649
2650    VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
2651    dc32(command);
2652  } else {
2653    // Emit nothing on real hardware.
2654    USE(parameters, command);
2655  }
2656}
2657
2658
2659void MacroAssembler::Log(TraceParameters parameters) {
2660  VIXL_ASSERT(allow_macro_instructions_);
2661
2662  if (generate_simulator_code_) {
2663    // The arguments to the log pseudo instruction need to be contiguous in
2664    // memory, so make sure we don't try to emit a literal pool.
2665    ExactAssemblyScope scope(this, kLogLength);
2666
2667    Label start;
2668    bind(&start);
2669
2670    // Refer to simulator-aarch64.h for a description of the marker and its
2671    // arguments.
2672    hlt(kLogOpcode);
2673
2674    VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
2675    dc32(parameters);
2676  } else {
2677    // Emit nothing on real hardware.
2678    USE(parameters);
2679  }
2680}
2681
2682
2683void MacroAssembler::EnableInstrumentation() {
2684  VIXL_ASSERT(!isprint(InstrumentStateEnable));
2685  ExactAssemblyScope scope(this, kInstructionSize);
2686  movn(xzr, InstrumentStateEnable);
2687}
2688
2689
2690void MacroAssembler::DisableInstrumentation() {
2691  VIXL_ASSERT(!isprint(InstrumentStateDisable));
2692  ExactAssemblyScope scope(this, kInstructionSize);
2693  movn(xzr, InstrumentStateDisable);
2694}
2695
2696
2697void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
2698  VIXL_ASSERT(strlen(marker_name) == 2);
2699
2700  // We allow only printable characters in the marker names. Unprintable
2701  // characters are reserved for controlling features of the instrumentation.
2702  VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));
2703
2704  ExactAssemblyScope scope(this, kInstructionSize);
2705  movn(xzr, (marker_name[1] << 8) | marker_name[0]);
2706}
2707
2708
2709void UseScratchRegisterScope::Open(MacroAssembler* masm) {
2710  VIXL_ASSERT(masm_ == NULL);
2711  VIXL_ASSERT(masm != NULL);
2712  masm_ = masm;
2713
2714  CPURegList* available = masm->GetScratchRegisterList();
2715  CPURegList* available_fp = masm->GetScratchFPRegisterList();
2716  old_available_ = available->GetList();
2717  old_availablefp_ = available_fp->GetList();
2718  VIXL_ASSERT(available->GetType() == CPURegister::kRegister);
2719  VIXL_ASSERT(available_fp->GetType() == CPURegister::kVRegister);
2720
2721  parent_ = masm->GetCurrentScratchRegisterScope();
2722  masm->SetCurrentScratchRegisterScope(this);
2723}
2724
2725
2726void UseScratchRegisterScope::Close() {
2727  if (masm_ != NULL) {
2728    // Ensure that scopes nest perfectly, and do not outlive their parents.
2729    // This is a run-time check because the order of destruction of objects in
2730    // the _same_ scope is implementation-defined, and is likely to change in
2731    // optimised builds.
2732    VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
2733    masm_->SetCurrentScratchRegisterScope(parent_);
2734
2735    masm_->GetScratchRegisterList()->SetList(old_available_);
2736    masm_->GetScratchFPRegisterList()->SetList(old_availablefp_);
2737
2738    masm_ = NULL;
2739  }
2740}
2741
2742
2743bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
2744  return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) ||
2745         masm_->GetScratchFPRegisterList()->IncludesAliasOf(reg);
2746}
2747
2748
2749Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) {
2750  int code = AcquireNextAvailable(masm_->GetScratchRegisterList()).GetCode();
2751  return Register(code, size_in_bits);
2752}
2753
2754
2755FPRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) {
2756  int code = AcquireNextAvailable(masm_->GetScratchFPRegisterList()).GetCode();
2757  return FPRegister(code, size_in_bits);
2758}
2759
2760
2761void UseScratchRegisterScope::Release(const CPURegister& reg) {
2762  VIXL_ASSERT(masm_ != NULL);
2763  if (reg.IsRegister()) {
2764    ReleaseByCode(masm_->GetScratchRegisterList(), reg.GetCode());
2765  } else if (reg.IsVRegister()) {
2766    ReleaseByCode(masm_->GetScratchFPRegisterList(), reg.GetCode());
2767  } else {
2768    VIXL_ASSERT(reg.IsNone());
2769  }
2770}
2771
2772
2773void UseScratchRegisterScope::Include(const CPURegList& list) {
2774  VIXL_ASSERT(masm_ != NULL);
2775  if (list.GetType() == CPURegister::kRegister) {
2776    // Make sure that neither sp nor xzr are included the list.
2777    IncludeByRegList(masm_->GetScratchRegisterList(),
2778                     list.GetList() & ~(xzr.GetBit() | sp.GetBit()));
2779  } else {
2780    VIXL_ASSERT(list.GetType() == CPURegister::kVRegister);
2781    IncludeByRegList(masm_->GetScratchFPRegisterList(), list.GetList());
2782  }
2783}
2784
2785
2786void UseScratchRegisterScope::Include(const Register& reg1,
2787                                      const Register& reg2,
2788                                      const Register& reg3,
2789                                      const Register& reg4) {
2790  VIXL_ASSERT(masm_ != NULL);
2791  RegList include =
2792      reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
2793  // Make sure that neither sp nor xzr are included the list.
2794  include &= ~(xzr.GetBit() | sp.GetBit());
2795
2796  IncludeByRegList(masm_->GetScratchRegisterList(), include);
2797}
2798
2799
2800void UseScratchRegisterScope::Include(const FPRegister& reg1,
2801                                      const FPRegister& reg2,
2802                                      const FPRegister& reg3,
2803                                      const FPRegister& reg4) {
2804  RegList include =
2805      reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
2806  IncludeByRegList(masm_->GetScratchFPRegisterList(), include);
2807}
2808
2809
2810void UseScratchRegisterScope::Exclude(const CPURegList& list) {
2811  if (list.GetType() == CPURegister::kRegister) {
2812    ExcludeByRegList(masm_->GetScratchRegisterList(), list.GetList());
2813  } else {
2814    VIXL_ASSERT(list.GetType() == CPURegister::kVRegister);
2815    ExcludeByRegList(masm_->GetScratchFPRegisterList(), list.GetList());
2816  }
2817}
2818
2819
2820void UseScratchRegisterScope::Exclude(const Register& reg1,
2821                                      const Register& reg2,
2822                                      const Register& reg3,
2823                                      const Register& reg4) {
2824  RegList exclude =
2825      reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
2826  ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
2827}
2828
2829
2830void UseScratchRegisterScope::Exclude(const FPRegister& reg1,
2831                                      const FPRegister& reg2,
2832                                      const FPRegister& reg3,
2833                                      const FPRegister& reg4) {
2834  RegList excludefp =
2835      reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
2836  ExcludeByRegList(masm_->GetScratchFPRegisterList(), excludefp);
2837}
2838
2839
2840void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
2841                                      const CPURegister& reg2,
2842                                      const CPURegister& reg3,
2843                                      const CPURegister& reg4) {
2844  RegList exclude = 0;
2845  RegList excludefp = 0;
2846
2847  const CPURegister regs[] = {reg1, reg2, reg3, reg4};
2848
2849  for (unsigned i = 0; i < (sizeof(regs) / sizeof(regs[0])); i++) {
2850    if (regs[i].IsRegister()) {
2851      exclude |= regs[i].GetBit();
2852    } else if (regs[i].IsFPRegister()) {
2853      excludefp |= regs[i].GetBit();
2854    } else {
2855      VIXL_ASSERT(regs[i].IsNone());
2856    }
2857  }
2858
2859  ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
2860  ExcludeByRegList(masm_->GetScratchFPRegisterList(), excludefp);
2861}
2862
2863
2864void UseScratchRegisterScope::ExcludeAll() {
2865  ExcludeByRegList(masm_->GetScratchRegisterList(),
2866                   masm_->GetScratchRegisterList()->GetList());
2867  ExcludeByRegList(masm_->GetScratchFPRegisterList(),
2868                   masm_->GetScratchFPRegisterList()->GetList());
2869}
2870
2871
2872CPURegister UseScratchRegisterScope::AcquireNextAvailable(
2873    CPURegList* available) {
2874  VIXL_CHECK(!available->IsEmpty());
2875  CPURegister result = available->PopLowestIndex();
2876  VIXL_ASSERT(!AreAliased(result, xzr, sp));
2877  return result;
2878}
2879
2880
2881void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) {
2882  ReleaseByRegList(available, static_cast<RegList>(1) << code);
2883}
2884
2885
2886void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available,
2887                                               RegList regs) {
2888  available->SetList(available->GetList() | regs);
2889}
2890
2891
2892void UseScratchRegisterScope::IncludeByRegList(CPURegList* available,
2893                                               RegList regs) {
2894  available->SetList(available->GetList() | regs);
2895}
2896
2897
2898void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
2899                                               RegList exclude) {
2900  available->SetList(available->GetList() & ~exclude);
2901}
2902
2903}  // namespace aarch64
2904}  // namespace vixl
2905