macro-assembler-a64.cc revision 578645f14e122d2b87d907e298cda7e7d0babf1f
1// Copyright 2013, ARM Limited
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include "a64/macro-assembler-a64.h"
28namespace vixl {
29
30void MacroAssembler::And(const Register& rd,
31                         const Register& rn,
32                         const Operand& operand,
33                         FlagsUpdate S) {
34  ASSERT(allow_macro_instructions_);
35  LogicalMacro(rd, rn, operand, (S == SetFlags) ? ANDS : AND);
36}
37
38
39void MacroAssembler::Tst(const Register& rn,
40                         const Operand& operand) {
41  ASSERT(allow_macro_instructions_);
42  And(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
43}
44
45
46void MacroAssembler::Bic(const Register& rd,
47                         const Register& rn,
48                         const Operand& operand,
49                         FlagsUpdate S) {
50  ASSERT(allow_macro_instructions_);
51  LogicalMacro(rd, rn, operand, (S == SetFlags) ? BICS : BIC);
52}
53
54
55void MacroAssembler::Orr(const Register& rd,
56                         const Register& rn,
57                         const Operand& operand) {
58  ASSERT(allow_macro_instructions_);
59  LogicalMacro(rd, rn, operand, ORR);
60}
61
62
63void MacroAssembler::Orn(const Register& rd,
64                         const Register& rn,
65                         const Operand& operand) {
66  ASSERT(allow_macro_instructions_);
67  LogicalMacro(rd, rn, operand, ORN);
68}
69
70
71void MacroAssembler::Eor(const Register& rd,
72                         const Register& rn,
73                         const Operand& operand) {
74  ASSERT(allow_macro_instructions_);
75  LogicalMacro(rd, rn, operand, EOR);
76}
77
78
79void MacroAssembler::Eon(const Register& rd,
80                         const Register& rn,
81                         const Operand& operand) {
82  ASSERT(allow_macro_instructions_);
83  LogicalMacro(rd, rn, operand, EON);
84}
85
86
87void MacroAssembler::LogicalMacro(const Register& rd,
88                                  const Register& rn,
89                                  const Operand& operand,
90                                  LogicalOp op) {
91  if (operand.IsImmediate()) {
92    int64_t immediate = operand.immediate();
93    unsigned reg_size = rd.size();
94    ASSERT(rd.Is64Bits() || is_uint32(immediate));
95
96    // If the operation is NOT, invert the operation and immediate.
97    if ((op & NOT) == NOT) {
98      op = static_cast<LogicalOp>(op & ~NOT);
99      immediate = ~immediate;
100      if (rd.Is32Bits()) {
101        immediate &= kWRegMask;
102      }
103    }
104
105    // Special cases for all set or all clear immediates.
106    if (immediate == 0) {
107      switch (op) {
108        case AND:
109          Mov(rd, 0);
110          return;
111        case ORR:  // Fall through.
112        case EOR:
113          Mov(rd, rn);
114          return;
115        case ANDS:  // Fall through.
116        case BICS:
117          break;
118        default:
119          UNREACHABLE();
120      }
121    } else if ((rd.Is64Bits() && (immediate == -1L)) ||
122               (rd.Is32Bits() && (immediate == 0xffffffffL))) {
123      switch (op) {
124        case AND:
125          Mov(rd, rn);
126          return;
127        case ORR:
128          Mov(rd, immediate);
129          return;
130        case EOR:
131          Mvn(rd, rn);
132          return;
133        case ANDS:  // Fall through.
134        case BICS:
135          break;
136        default:
137          UNREACHABLE();
138      }
139    }
140
141    unsigned n, imm_s, imm_r;
142    if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
143      // Immediate can be encoded in the instruction.
144      LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
145    } else {
146      // Immediate can't be encoded: synthesize using move immediate.
147      Register temp = AppropriateTempFor(rn);
148      Mov(temp, immediate);
149      if (rd.Is(sp)) {
150        // If rd is the stack pointer we cannot use it as the destination
151        // register so we use the temp register as an intermediate again.
152        Logical(temp, rn, Operand(temp), op);
153        Mov(sp, temp);
154      } else {
155        Logical(rd, rn, Operand(temp), op);
156      }
157    }
158  } else if (operand.IsExtendedRegister()) {
159    ASSERT(operand.reg().size() <= rd.size());
160    // Add/sub extended supports shift <= 4. We want to support exactly the
161    // same modes here.
162    ASSERT(operand.shift_amount() <= 4);
163    ASSERT(operand.reg().Is64Bits() ||
164           ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
165    Register temp = AppropriateTempFor(rn, operand.reg());
166    EmitExtendShift(temp, operand.reg(), operand.extend(),
167                    operand.shift_amount());
168    Logical(rd, rn, Operand(temp), op);
169  } else {
170    // The operand can be encoded in the instruction.
171    ASSERT(operand.IsShiftedRegister());
172    Logical(rd, rn, operand, op);
173  }
174}
175
176
177void MacroAssembler::Mov(const Register& rd, const Operand& operand) {
178  ASSERT(allow_macro_instructions_);
179  if (operand.IsImmediate()) {
180    // Call the macro assembler for generic immediates.
181    Mov(rd, operand.immediate());
182  } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
183    // Emit a shift instruction if moving a shifted register. This operation
184    // could also be achieved using an orr instruction (like orn used by Mvn),
185    // but using a shift instruction makes the disassembly clearer.
186    EmitShift(rd, operand.reg(), operand.shift(), operand.shift_amount());
187  } else if (operand.IsExtendedRegister()) {
188    // Emit an extend instruction if moving an extended register. This handles
189    // extend with post-shift operations, too.
190    EmitExtendShift(rd, operand.reg(), operand.extend(),
191                    operand.shift_amount());
192  } else {
193    // Otherwise, emit a register move only if the registers are distinct, or
194    // if they are not X registers. Note that mov(w0, w0) is not a no-op
195    // because it clears the top word of x0.
196    // If the sp is an operand, add #0 is emitted, otherwise, orr #0.
197    if (!rd.Is(operand.reg()) || !rd.Is64Bits()) {
198      mov(rd, operand.reg());
199    }
200  }
201}
202
203
204void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
205  ASSERT(allow_macro_instructions_);
206  if (operand.IsImmediate()) {
207    // Call the macro assembler for generic immediates.
208    Mvn(rd, operand.immediate());
209  } else if (operand.IsExtendedRegister()) {
210    // Emit two instructions for the extend case. This differs from Mov, as
211    // the extend and invert can't be achieved in one instruction.
212    Register temp = AppropriateTempFor(rd, operand.reg());
213    EmitExtendShift(temp, operand.reg(), operand.extend(),
214                    operand.shift_amount());
215    mvn(rd, Operand(temp));
216  } else {
217    // Otherwise, register and shifted register cases can be handled by the
218    // assembler directly, using orn.
219    mvn(rd, operand);
220  }
221}
222
223
224void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
225  ASSERT(allow_macro_instructions_);
226  ASSERT(is_uint32(imm) || is_int32(imm) || rd.Is64Bits());
227
228  // Immediates on Aarch64 can be produced using an initial value, and zero to
229  // three move keep operations.
230  //
231  // Initial values can be generated with:
232  //  1. 64-bit move zero (movz).
233  //  2. 32-bit move negative (movn).
234  //  3. 64-bit move negative.
235  //  4. 32-bit orr immediate.
236  //  5. 64-bit orr immediate.
237  // Move-keep may then be used to modify each of the 16-bit nybbles.
238  //
239  // The code below supports all five initial value generators, and
240  // applying move-keep operations to move-zero initial values only.
241
242  unsigned reg_size = rd.size();
243  unsigned n, imm_s, imm_r;
244  if (IsImmMovz(imm, reg_size) && !rd.IsSP()) {
245    // Immediate can be represented in a move zero instruction.
246    movz(rd, imm);
247  } else if (IsImmMovn(imm, reg_size) && !rd.IsSP()) {
248    // Immediate can be represented in a move negative instruction. Movn can't
249    // write to the stack pointer.
250    movn(rd, rd.Is64Bits() ? ~imm : (~imm & kWRegMask));
251  } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
252    // Immediate can be represented in a logical orr instruction.
253    ASSERT(!rd.IsZero());
254    LogicalImmediate(rd, AppropriateZeroRegFor(rd), n, imm_s, imm_r, ORR);
255  } else {
256    // Generic immediate case. Imm will be represented by
257    //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
258    // A move-zero is generated for the first non-zero immX, and a move-keep
259    // for subsequent non-zero immX.
260
261    // Use a temporary register when moving to the stack pointer.
262    Register temp = rd.IsSP() ? AppropriateTempFor(rd) : rd;
263
264    ASSERT((reg_size % 16) == 0);
265    bool first_mov_done = false;
266    for (unsigned i = 0; i < (temp.size() / 16); i++) {
267      uint64_t imm16 = (imm >> (16 * i)) & 0xffffL;
268      if (imm16 != 0) {
269        if (!first_mov_done) {
270          // Move the first non-zero 16-bit chunk into the destination register.
271          movz(temp, imm16, 16 * i);
272          first_mov_done = true;
273        } else {
274          // Construct a wider constant.
275          movk(temp, imm16, 16 * i);
276        }
277      }
278    }
279
280    if (rd.IsSP()) {
281      mov(rd, temp);
282    }
283
284    ASSERT(first_mov_done);
285  }
286}
287
288
289// The movz instruction can generate immediates containing an arbitrary 16-bit
290// value, with remaining bits set, eg. 0x00001234, 0x0000123400000000.
291bool MacroAssembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
292  if (reg_size == kXRegSize) {
293    if (((imm & 0xffffffffffff0000UL) == 0UL) ||
294        ((imm & 0xffffffff0000ffffUL) == 0UL) ||
295        ((imm & 0xffff0000ffffffffUL) == 0UL) ||
296        ((imm & 0x0000ffffffffffffUL) == 0UL)) {
297      return true;
298    }
299  } else {
300    ASSERT(reg_size == kWRegSize);
301    imm &= kWRegMask;
302    if (((imm & 0xffff0000) == 0) ||
303        ((imm & 0x0000ffff) == 0)) {
304      return true;
305    }
306  }
307  return false;
308}
309
310
311// The movn instruction can generate immediates containing an arbitrary 16-bit
312// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff.
313bool MacroAssembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
314  return IsImmMovz(~imm, reg_size);
315}
316
317
318void MacroAssembler::Ccmp(const Register& rn,
319                          const Operand& operand,
320                          StatusFlags nzcv,
321                          Condition cond) {
322  ASSERT(allow_macro_instructions_);
323  ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
324}
325
326
327void MacroAssembler::Ccmn(const Register& rn,
328                          const Operand& operand,
329                          StatusFlags nzcv,
330                          Condition cond) {
331  ASSERT(allow_macro_instructions_);
332  ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
333}
334
335
336void MacroAssembler::ConditionalCompareMacro(const Register& rn,
337                                             const Operand& operand,
338                                             StatusFlags nzcv,
339                                             Condition cond,
340                                             ConditionalCompareOp op) {
341  ASSERT((cond != al) && (cond != nv));
342  if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) ||
343      (operand.IsImmediate() && IsImmConditionalCompare(operand.immediate()))) {
344    // The immediate can be encoded in the instruction, or the operand is an
345    // unshifted register: call the assembler.
346    ConditionalCompare(rn, operand, nzcv, cond, op);
347  } else {
348    // The operand isn't directly supported by the instruction: perform the
349    // operation on a temporary register.
350    Register temp(NoReg);
351    if (operand.IsImmediate()) {
352      temp = AppropriateTempFor(rn);
353      Mov(temp, operand.immediate());
354    } else if (operand.IsShiftedRegister()) {
355      ASSERT(operand.shift() != ROR);
356      ASSERT(is_uintn(rn.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
357                      operand.shift_amount()));
358      temp = AppropriateTempFor(rn, operand.reg());
359      EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
360    } else {
361      ASSERT(operand.IsExtendedRegister());
362      ASSERT(operand.reg().size() <= rn.size());
363      // Add/sub extended support a shift <= 4. We want to support exactly the
364      // same modes.
365      ASSERT(operand.shift_amount() <= 4);
366      ASSERT(operand.reg().Is64Bits() ||
367             ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
368      temp = AppropriateTempFor(rn, operand.reg());
369      EmitExtendShift(temp, operand.reg(), operand.extend(),
370                    operand.shift_amount());
371    }
372    ConditionalCompare(rn, Operand(temp), nzcv, cond, op);
373  }
374}
375
376
377void MacroAssembler::Add(const Register& rd,
378                         const Register& rn,
379                         const Operand& operand,
380                         FlagsUpdate S) {
381  ASSERT(allow_macro_instructions_);
382  if (operand.IsImmediate() && (operand.immediate() < 0)) {
383    AddSubMacro(rd, rn, -operand.immediate(), S, SUB);
384  } else {
385    AddSubMacro(rd, rn, operand, S, ADD);
386  }
387}
388
389
390void MacroAssembler::Sub(const Register& rd,
391                         const Register& rn,
392                         const Operand& operand,
393                         FlagsUpdate S) {
394  ASSERT(allow_macro_instructions_);
395  if (operand.IsImmediate() && (operand.immediate() < 0)) {
396    AddSubMacro(rd, rn, -operand.immediate(), S, ADD);
397  } else {
398    AddSubMacro(rd, rn, operand, S, SUB);
399  }
400}
401
402
403void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
404  ASSERT(allow_macro_instructions_);
405  Add(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
406}
407
408
409void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
410  ASSERT(allow_macro_instructions_);
411  Sub(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
412}
413
414
415void MacroAssembler::Neg(const Register& rd,
416                         const Operand& operand,
417                         FlagsUpdate S) {
418  ASSERT(allow_macro_instructions_);
419  if (operand.IsImmediate()) {
420    Mov(rd, -operand.immediate());
421  } else {
422    Sub(rd, AppropriateZeroRegFor(rd), operand, S);
423  }
424}
425
426
427void MacroAssembler::AddSubMacro(const Register& rd,
428                                 const Register& rn,
429                                 const Operand& operand,
430                                 FlagsUpdate S,
431                                 AddSubOp op) {
432  if ((operand.IsImmediate() && !IsImmAddSub(operand.immediate())) ||
433      (rn.IsZero() && !operand.IsShiftedRegister())                ||
434      (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
435    Register temp = AppropriateTempFor(rn);
436    Mov(temp, operand);
437    AddSub(rd, rn, temp, S, op);
438  } else {
439    AddSub(rd, rn, operand, S, op);
440  }
441}
442
443
444void MacroAssembler::Adc(const Register& rd,
445                         const Register& rn,
446                         const Operand& operand,
447                         FlagsUpdate S) {
448  ASSERT(allow_macro_instructions_);
449  AddSubWithCarryMacro(rd, rn, operand, S, ADC);
450}
451
452
453void MacroAssembler::Sbc(const Register& rd,
454                         const Register& rn,
455                         const Operand& operand,
456                         FlagsUpdate S) {
457  ASSERT(allow_macro_instructions_);
458  AddSubWithCarryMacro(rd, rn, operand, S, SBC);
459}
460
461
462void MacroAssembler::Ngc(const Register& rd,
463                         const Operand& operand,
464                         FlagsUpdate S) {
465  ASSERT(allow_macro_instructions_);
466  Register zr = AppropriateZeroRegFor(rd);
467  Sbc(rd, zr, operand, S);
468}
469
470
471void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
472                                          const Register& rn,
473                                          const Operand& operand,
474                                          FlagsUpdate S,
475                                          AddSubWithCarryOp op) {
476  ASSERT(rd.size() == rn.size());
477
478  if (operand.IsImmediate() ||
479      (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
480    // Add/sub with carry (immediate or ROR shifted register.)
481    Register temp = AppropriateTempFor(rn);
482    Mov(temp, operand);
483    AddSubWithCarry(rd, rn, Operand(temp), S, op);
484  } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
485    // Add/sub with carry (shifted register).
486    ASSERT(operand.reg().size() == rd.size());
487    ASSERT(operand.shift() != ROR);
488    ASSERT(is_uintn(rd.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
489                    operand.shift_amount()));
490    Register temp = AppropriateTempFor(rn, operand.reg());
491    EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
492    AddSubWithCarry(rd, rn, Operand(temp), S, op);
493  } else if (operand.IsExtendedRegister()) {
494    // Add/sub with carry (extended register).
495    ASSERT(operand.reg().size() <= rd.size());
496    // Add/sub extended supports a shift <= 4. We want to support exactly the
497    // same modes.
498    ASSERT(operand.shift_amount() <= 4);
499    ASSERT(operand.reg().Is64Bits() ||
500           ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
501    Register temp = AppropriateTempFor(rn, operand.reg());
502    EmitExtendShift(temp, operand.reg(), operand.extend(),
503                    operand.shift_amount());
504    AddSubWithCarry(rd, rn, Operand(temp), S, op);
505  } else {
506    // The addressing mode is directly supported by the instruction.
507    AddSubWithCarry(rd, rn, operand, S, op);
508  }
509}
510
511
512#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                         \
513void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) {  \
514  LoadStoreMacro(REG, addr, OP);                                      \
515}
516LS_MACRO_LIST(DEFINE_FUNCTION)
517#undef DEFINE_FUNCTION
518
519void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
520                                    const MemOperand& addr,
521                                    LoadStoreOp op) {
522  int64_t offset = addr.offset();
523  LSDataSize size = CalcLSDataSize(op);
524
525  // Check if an immediate offset fits in the immediate field of the
526  // appropriate instruction. If not, emit two instructions to perform
527  // the operation.
528  if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, size) &&
529      !IsImmLSUnscaled(offset)) {
530    // Immediate offset that can't be encoded using unsigned or unscaled
531    // addressing modes.
532    Register temp = AppropriateTempFor(addr.base());
533    Mov(temp, addr.offset());
534    LoadStore(rt, MemOperand(addr.base(), temp), op);
535  } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
536    // Post-index beyond unscaled addressing range.
537    LoadStore(rt, MemOperand(addr.base()), op);
538    Add(addr.base(), addr.base(), Operand(offset));
539  } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
540    // Pre-index beyond unscaled addressing range.
541    Add(addr.base(), addr.base(), Operand(offset));
542    LoadStore(rt, MemOperand(addr.base()), op);
543  } else {
544    // Encodable in one load/store instruction.
545    LoadStore(rt, addr, op);
546  }
547}
548
549
550void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1,
551                          const CPURegister& src2, const CPURegister& src3) {
552  ASSERT(allow_macro_instructions_);
553  ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
554  ASSERT(src0.IsValid());
555
556  int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
557  int size = src0.SizeInBytes();
558
559  PrepareForPush(count, size);
560  PushHelper(count, size, src0, src1, src2, src3);
561}
562
563
564void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
565                         const CPURegister& dst2, const CPURegister& dst3) {
566  // It is not valid to pop into the same register more than once in one
567  // instruction, not even into the zero register.
568  ASSERT(allow_macro_instructions_);
569  ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
570  ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
571  ASSERT(dst0.IsValid());
572
573  int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
574  int size = dst0.SizeInBytes();
575
576  PrepareForPop(count, size);
577  PopHelper(count, size, dst0, dst1, dst2, dst3);
578}
579
580
581void MacroAssembler::PushCPURegList(CPURegList registers) {
582  int size = registers.RegisterSizeInBytes();
583
584  PrepareForPush(registers.Count(), size);
585  // Push up to four registers at a time because if the current stack pointer is
586  // sp and reg_size is 32, registers must be pushed in blocks of four in order
587  // to maintain the 16-byte alignment for sp.
588  ASSERT(allow_macro_instructions_);
589  while (!registers.IsEmpty()) {
590    int count_before = registers.Count();
591    const CPURegister& src0 = registers.PopHighestIndex();
592    const CPURegister& src1 = registers.PopHighestIndex();
593    const CPURegister& src2 = registers.PopHighestIndex();
594    const CPURegister& src3 = registers.PopHighestIndex();
595    int count = count_before - registers.Count();
596    PushHelper(count, size, src0, src1, src2, src3);
597  }
598}
599
600
601void MacroAssembler::PopCPURegList(CPURegList registers) {
602  int size = registers.RegisterSizeInBytes();
603
604  PrepareForPop(registers.Count(), size);
605  // Pop up to four registers at a time because if the current stack pointer is
606  // sp and reg_size is 32, registers must be pushed in blocks of four in order
607  // to maintain the 16-byte alignment for sp.
608  ASSERT(allow_macro_instructions_);
609  while (!registers.IsEmpty()) {
610    int count_before = registers.Count();
611    const CPURegister& dst0 = registers.PopLowestIndex();
612    const CPURegister& dst1 = registers.PopLowestIndex();
613    const CPURegister& dst2 = registers.PopLowestIndex();
614    const CPURegister& dst3 = registers.PopLowestIndex();
615    int count = count_before - registers.Count();
616    PopHelper(count, size, dst0, dst1, dst2, dst3);
617  }
618}
619
620
621void MacroAssembler::PushMultipleTimes(int count, Register src) {
622  ASSERT(allow_macro_instructions_);
623  int size = src.SizeInBytes();
624
625  PrepareForPush(count, size);
626  // Push up to four registers at a time if possible because if the current
627  // stack pointer is sp and the register size is 32, registers must be pushed
628  // in blocks of four in order to maintain the 16-byte alignment for sp.
629  while (count >= 4) {
630    PushHelper(4, size, src, src, src, src);
631    count -= 4;
632  }
633  if (count >= 2) {
634    PushHelper(2, size, src, src, NoReg, NoReg);
635    count -= 2;
636  }
637  if (count == 1) {
638    PushHelper(1, size, src, NoReg, NoReg, NoReg);
639    count -= 1;
640  }
641  ASSERT(count == 0);
642}
643
644
645void MacroAssembler::PushHelper(int count, int size,
646                                const CPURegister& src0,
647                                const CPURegister& src1,
648                                const CPURegister& src2,
649                                const CPURegister& src3) {
650  // Ensure that we don't unintentionally modify scratch or debug registers.
651  InstructionAccurateScope scope(this);
652
653  ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
654  ASSERT(size == src0.SizeInBytes());
655
656  // When pushing multiple registers, the store order is chosen such that
657  // Push(a, b) is equivalent to Push(a) followed by Push(b).
658  switch (count) {
659    case 1:
660      ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
661      str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
662      break;
663    case 2:
664      ASSERT(src2.IsNone() && src3.IsNone());
665      stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
666      break;
667    case 3:
668      ASSERT(src3.IsNone());
669      stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
670      str(src0, MemOperand(StackPointer(), 2 * size));
671      break;
672    case 4:
673      // Skip over 4 * size, then fill in the gap. This allows four W registers
674      // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
675      // all times.
676      stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
677      stp(src1, src0, MemOperand(StackPointer(), 2 * size));
678      break;
679    default:
680      UNREACHABLE();
681  }
682}
683
684
685void MacroAssembler::PopHelper(int count, int size,
686                               const CPURegister& dst0,
687                               const CPURegister& dst1,
688                               const CPURegister& dst2,
689                               const CPURegister& dst3) {
690  // Ensure that we don't unintentionally modify scratch or debug registers.
691  InstructionAccurateScope scope(this);
692
693  ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
694  ASSERT(size == dst0.SizeInBytes());
695
696  // When popping multiple registers, the load order is chosen such that
697  // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
698  switch (count) {
699    case 1:
700      ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
701      ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
702      break;
703    case 2:
704      ASSERT(dst2.IsNone() && dst3.IsNone());
705      ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
706      break;
707    case 3:
708      ASSERT(dst3.IsNone());
709      ldr(dst2, MemOperand(StackPointer(), 2 * size));
710      ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
711      break;
712    case 4:
713      // Load the higher addresses first, then load the lower addresses and skip
714      // the whole block in the second instruction. This allows four W registers
715      // to be popped using sp, whilst maintaining 16-byte alignment for sp at
716      // all times.
717      ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
718      ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
719      break;
720    default:
721      UNREACHABLE();
722  }
723}
724
725
726void MacroAssembler::PrepareForPush(int count, int size) {
727  if (sp.Is(StackPointer())) {
728    // If the current stack pointer is sp, then it must be aligned to 16 bytes
729    // on entry and the total size of the specified registers must also be a
730    // multiple of 16 bytes.
731    ASSERT((count * size) % 16 == 0);
732  } else {
733    // Even if the current stack pointer is not the system stack pointer (sp),
734    // the system stack pointer will still be modified in order to comply with
735    // ABI rules about accessing memory below the system stack pointer.
736    BumpSystemStackPointer(count * size);
737  }
738}
739
740
741void MacroAssembler::PrepareForPop(int count, int size) {
742  USE(count);
743  USE(size);
744  if (sp.Is(StackPointer())) {
745    // If the current stack pointer is sp, then it must be aligned to 16 bytes
746    // on entry and the total size of the specified registers must also be a
747    // multiple of 16 bytes.
748    ASSERT((count * size) % 16 == 0);
749  }
750}
751
752void MacroAssembler::Poke(const Register& src, const Operand& offset) {
753  ASSERT(allow_macro_instructions_);
754  if (offset.IsImmediate()) {
755    ASSERT(offset.immediate() >= 0);
756  }
757
758  Str(src, MemOperand(StackPointer(), offset));
759}
760
761
762void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
763  ASSERT(allow_macro_instructions_);
764  if (offset.IsImmediate()) {
765    ASSERT(offset.immediate() >= 0);
766  }
767
768  Ldr(dst, MemOperand(StackPointer(), offset));
769}
770
771
772void MacroAssembler::Claim(const Operand& size) {
773  ASSERT(allow_macro_instructions_);
774  if (size.IsImmediate()) {
775    ASSERT(size.immediate() >= 0);
776    if (sp.Is(StackPointer())) {
777      ASSERT((size.immediate() % 16) == 0);
778    }
779  }
780
781  if (!sp.Is(StackPointer())) {
782    BumpSystemStackPointer(size);
783  }
784
785  Sub(StackPointer(), StackPointer(), size);
786}
787
788
789void MacroAssembler::Drop(const Operand& size) {
790  ASSERT(allow_macro_instructions_);
791  if (size.IsImmediate()) {
792    ASSERT(size.immediate() >= 0);
793    if (sp.Is(StackPointer())) {
794      ASSERT((size.immediate() % 16) == 0);
795    }
796  }
797
798  Add(StackPointer(), StackPointer(), size);
799}
800
801
802void MacroAssembler::PushCalleeSavedRegisters() {
803  // Ensure that the macro-assembler doesn't use any scratch registers.
804  InstructionAccurateScope scope(this);
805
806  // This method must not be called unless the current stack pointer is sp.
807  ASSERT(sp.Is(StackPointer()));
808
809  MemOperand tos(sp, -2 * kXRegSizeInBytes, PreIndex);
810
811  stp(d14, d15, tos);
812  stp(d12, d13, tos);
813  stp(d10, d11, tos);
814  stp(d8, d9, tos);
815
816  stp(x29, x30, tos);
817  stp(x27, x28, tos);
818  stp(x25, x26, tos);
819  stp(x23, x24, tos);
820  stp(x21, x22, tos);
821  stp(x19, x20, tos);
822}
823
824
825void MacroAssembler::PopCalleeSavedRegisters() {
826  // Ensure that the macro-assembler doesn't use any scratch registers.
827  InstructionAccurateScope scope(this);
828
829  // This method must not be called unless the current stack pointer is sp.
830  ASSERT(sp.Is(StackPointer()));
831
832  MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
833
834  ldp(x19, x20, tos);
835  ldp(x21, x22, tos);
836  ldp(x23, x24, tos);
837  ldp(x25, x26, tos);
838  ldp(x27, x28, tos);
839  ldp(x29, x30, tos);
840
841  ldp(d8, d9, tos);
842  ldp(d10, d11, tos);
843  ldp(d12, d13, tos);
844  ldp(d14, d15, tos);
845}
846
847void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
848  ASSERT(!sp.Is(StackPointer()));
849  // TODO: Several callers rely on this not using scratch registers, so we use
850  // the assembler directly here. However, this means that large immediate
851  // values of 'space' cannot be handled.
852  InstructionAccurateScope scope(this);
853  sub(sp, StackPointer(), space);
854}
855
856
857// This is the main Printf implementation. All callee-saved registers are
858// preserved, but NZCV and the caller-saved registers may be clobbered.
859void MacroAssembler::PrintfNoPreserve(const char * format,
860                                      const CPURegister& arg0,
861                                      const CPURegister& arg1,
862                                      const CPURegister& arg2,
863                                      const CPURegister& arg3) {
864  // We cannot handle a caller-saved stack pointer. It doesn't make much sense
865  // in most cases anyway, so this restriction shouldn't be too serious.
866  ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));
867
868  // We cannot print Tmp0() or Tmp1() as they're used internally by the macro
869  // assembler. We cannot print the stack pointer because it is typically used
870  // to preserve caller-saved registers (using other Printf variants which
871  // depend on this helper).
872  ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg0));
873  ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg1));
874  ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg2));
875  ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg3));
876
877  static const int kMaxArgCount = 4;
878  // Assume that we have the maximum number of arguments until we know
879  // otherwise.
880  int arg_count = kMaxArgCount;
881
882  // The provided arguments.
883  CPURegister args[kMaxArgCount] = {arg0, arg1, arg2, arg3};
884
885  // The PCS registers where the arguments need to end up.
886  CPURegister pcs[kMaxArgCount];
887
888  // Promote FP arguments to doubles, and integer arguments to X registers.
889  // Note that FP and integer arguments cannot be mixed, but we'll check
890  // AreSameSizeAndType once we've processed these promotions.
891  for (int i = 0; i < kMaxArgCount; i++) {
892    if (args[i].IsRegister()) {
893      // Note that we use x1 onwards, because x0 will hold the format string.
894      pcs[i] = Register::XRegFromCode(i + 1);
895      // For simplicity, we handle all integer arguments as X registers. An X
896      // register argument takes the same space as a W register argument in the
897      // PCS anyway. The only limitation is that we must explicitly clear the
898      // top word for W register arguments as the callee will expect it to be
899      // clear.
900      if (!args[i].Is64Bits()) {
901        const Register& as_x = args[i].X();
902        And(as_x, as_x, 0x00000000ffffffff);
903        args[i] = as_x;
904      }
905    } else if (args[i].IsFPRegister()) {
906      pcs[i] = FPRegister::DRegFromCode(i);
907      // C and C++ varargs functions (such as printf) implicitly promote float
908      // arguments to doubles.
909      if (!args[i].Is64Bits()) {
910        FPRegister s(args[i]);
911        const FPRegister& as_d = args[i].D();
912        Fcvt(as_d, s);
913        args[i] = as_d;
914      }
915    } else {
916      // This is the first empty (NoCPUReg) argument, so use it to set the
917      // argument count and bail out.
918      arg_count = i;
919      break;
920    }
921  }
922  ASSERT((arg_count >= 0) && (arg_count <= kMaxArgCount));
923  // Check that every remaining argument is NoCPUReg.
924  for (int i = arg_count; i < kMaxArgCount; i++) {
925    ASSERT(args[i].IsNone());
926  }
927  ASSERT((arg_count == 0) || AreSameSizeAndType(args[0], args[1],
928                                                args[2], args[3],
929                                                pcs[0], pcs[1],
930                                                pcs[2], pcs[3]));
931
932  // Move the arguments into the appropriate PCS registers.
933  //
934  // Arranging an arbitrary list of registers into x1-x4 (or d0-d3) is
935  // surprisingly complicated.
936  //
937  //  * For even numbers of registers, we push the arguments and then pop them
938  //    into their final registers. This maintains 16-byte stack alignment in
939  //    case sp is the stack pointer, since we're only handling X or D registers
940  //    at this point.
941  //
942  //  * For odd numbers of registers, we push and pop all but one register in
943  //    the same way, but the left-over register is moved directly, since we
944  //    can always safely move one register without clobbering any source.
945  if (arg_count >= 4) {
946    Push(args[3], args[2], args[1], args[0]);
947  } else if (arg_count >= 2) {
948    Push(args[1], args[0]);
949  }
950
951  if ((arg_count % 2) != 0) {
952    // Move the left-over register directly.
953    const CPURegister& leftover_arg = args[arg_count - 1];
954    const CPURegister& leftover_pcs = pcs[arg_count - 1];
955    if (leftover_arg.IsRegister()) {
956      Mov(Register(leftover_pcs), Register(leftover_arg));
957    } else {
958      Fmov(FPRegister(leftover_pcs), FPRegister(leftover_arg));
959    }
960  }
961
962  if (arg_count >= 4) {
963    Pop(pcs[0], pcs[1], pcs[2], pcs[3]);
964  } else if (arg_count >= 2) {
965    Pop(pcs[0], pcs[1]);
966  }
967
968  // Load the format string into x0, as per the procedure-call standard.
969  //
970  // To make the code as portable as possible, the format string is encoded
971  // directly in the instruction stream. It might be cleaner to encode it in a
972  // literal pool, but since Printf is usually used for debugging, it is
973  // beneficial for it to be minimally dependent on other features.
974  Label format_address;
975  Adr(x0, &format_address);
976
977  // Emit the format string directly in the instruction stream.
978  { BlockLiteralPoolScope scope(this);
979    Label after_data;
980    B(&after_data);
981    Bind(&format_address);
982    EmitStringData(format);
983    Unreachable();
984    Bind(&after_data);
985  }
986
987  // We don't pass any arguments on the stack, but we still need to align the C
988  // stack pointer to a 16-byte boundary for PCS compliance.
989  if (!sp.Is(StackPointer())) {
990    Bic(sp, StackPointer(), 0xf);
991  }
992
993  // Actually call printf. This part needs special handling for the simulator,
994  // since the system printf function will use a different instruction set and
995  // the procedure-call standard will not be compatible.
996#ifdef USE_SIMULATOR
997  { InstructionAccurateScope scope(this, kPrintfLength / kInstructionSize);
998    hlt(kPrintfOpcode);
999    dc32(pcs[0].type());
1000  }
1001#else
1002  Mov(Tmp0(), reinterpret_cast<uintptr_t>(printf));
1003  Blr(Tmp0());
1004#endif
1005}
1006
1007
1008void MacroAssembler::Printf(const char * format,
1009                            const CPURegister& arg0,
1010                            const CPURegister& arg1,
1011                            const CPURegister& arg2,
1012                            const CPURegister& arg3) {
1013  // Preserve all caller-saved registers as well as NZCV.
1014  // If sp is the stack pointer, PushCPURegList asserts that the size of each
1015  // list is a multiple of 16 bytes.
1016  PushCPURegList(kCallerSaved);
1017  PushCPURegList(kCallerSavedFP);
1018  // Use Tmp0() as a scratch register. It is not accepted by Printf so it will
1019  // never overlap an argument register.
1020  Mrs(Tmp0(), NZCV);
1021  Push(Tmp0(), xzr);
1022
1023  PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
1024
1025  Pop(xzr, Tmp0());
1026  Msr(NZCV, Tmp0());
1027  PopCPURegList(kCallerSavedFP);
1028  PopCPURegList(kCallerSaved);
1029}
1030
1031void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
1032  ASSERT(allow_macro_instructions_);
1033
1034#ifdef USE_SIMULATOR
1035  // The arguments to the trace pseudo instruction need to be contiguous in
1036  // memory, so make sure we don't try to emit a literal pool.
1037  InstructionAccurateScope scope(this, kTraceLength / kInstructionSize);
1038
1039  Label start;
1040  bind(&start);
1041
1042  // Refer to instructions-a64.h for a description of the marker and its
1043  // arguments.
1044  hlt(kTraceOpcode);
1045
1046  ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
1047  dc32(parameters);
1048
1049  ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
1050  dc32(command);
1051#else
1052  // Emit nothing on real hardware.
1053  USE(parameters);
1054  USE(command);
1055#endif
1056}
1057
1058
1059void MacroAssembler::Log(TraceParameters parameters) {
1060  ASSERT(allow_macro_instructions_);
1061
1062#ifdef USE_SIMULATOR
1063  // The arguments to the log pseudo instruction need to be contiguous in
1064  // memory, so make sure we don't try to emit a literal pool.
1065  InstructionAccurateScope scope(this, kLogLength / kInstructionSize);
1066
1067  Label start;
1068  bind(&start);
1069
1070  // Refer to instructions-a64.h for a description of the marker and its
1071  // arguments.
1072  hlt(kLogOpcode);
1073
1074  ASSERT(SizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
1075  dc32(parameters);
1076#else
1077  // Emit nothing on real hardware.
1078  USE(parameters);
1079#endif
1080}
1081
1082
1083void MacroAssembler::EnableInstrumentation() {
1084  ASSERT(!isprint(InstrumentStateEnable));
1085  InstructionAccurateScope scope(this, 1);
1086  movn(xzr, InstrumentStateEnable);
1087}
1088
1089
1090void MacroAssembler::DisableInstrumentation() {
1091  ASSERT(!isprint(InstrumentStateDisable));
1092  InstructionAccurateScope scope(this, 1);
1093  movn(xzr, InstrumentStateDisable);
1094}
1095
1096
1097void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
1098  ASSERT(strlen(marker_name) == 2);
1099
1100  // We allow only printable characters in the marker names. Unprintable
1101  // characters are reserved for controlling features of the instrumentation.
1102  ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));
1103
1104  InstructionAccurateScope scope(this, 1);
1105  movn(xzr, (marker_name[1] << 8) | marker_name[0]);
1106}
1107
1108}  // namespace vixl
1109