AArch64ExpandPseudoInsts.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that expands pseudo instructions into target
11// instructions to allow proper scheduling and other late optimizations.  This
12// pass should be run after register allocation but before the post-regalloc
13// scheduling pass.
14//
15//===----------------------------------------------------------------------===//
16
17#include "MCTargetDesc/AArch64AddressingModes.h"
18#include "AArch64InstrInfo.h"
19#include "llvm/CodeGen/MachineFunctionPass.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/Support/MathExtras.h"
22using namespace llvm;
23
24namespace {
25class AArch64ExpandPseudo : public MachineFunctionPass {
26public:
27  static char ID;
28  AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
29
30  const AArch64InstrInfo *TII;
31
32  bool runOnMachineFunction(MachineFunction &Fn) override;
33
34  const char *getPassName() const override {
35    return "AArch64 pseudo instruction expansion pass";
36  }
37
38private:
39  bool expandMBB(MachineBasicBlock &MBB);
40  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
41  bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
42                    unsigned BitSize);
43};
44char AArch64ExpandPseudo::ID = 0;
45}
46
47/// \brief Transfer implicit operands on the pseudo instruction to the
48/// instructions created from the expansion.
49static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
50                           MachineInstrBuilder &DefMI) {
51  const MCInstrDesc &Desc = OldMI.getDesc();
52  for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
53       ++i) {
54    const MachineOperand &MO = OldMI.getOperand(i);
55    assert(MO.isReg() && MO.getReg());
56    if (MO.isUse())
57      UseMI.addOperand(MO);
58    else
59      DefMI.addOperand(MO);
60  }
61}
62
63/// \brief Helper function which extracts the specified 16-bit chunk from a
64/// 64-bit value.
65static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
66  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
67
68  return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
69}
70
71/// \brief Helper function which replicates a 16-bit chunk within a 64-bit
72/// value. Indices correspond to element numbers in a v4i16.
73static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
74  assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
75  const unsigned ShiftAmt = ToIdx * 16;
76
77  // Replicate the source chunk to the destination position.
78  const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
79  // Clear the destination chunk.
80  Imm &= ~(0xFFFFLL << ShiftAmt);
81  // Insert the replicated chunk.
82  return Imm | Chunk;
83}
84
85/// \brief Helper function which tries to materialize a 64-bit value with an
86/// ORR + MOVK instruction sequence.
87static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
88                       MachineBasicBlock &MBB,
89                       MachineBasicBlock::iterator &MBBI,
90                       const AArch64InstrInfo *TII, unsigned ChunkIdx) {
91  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
92  const unsigned ShiftAmt = ChunkIdx * 16;
93
94  uint64_t Encoding;
95  if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
96    // Create the ORR-immediate instruction.
97    MachineInstrBuilder MIB =
98        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
99            .addOperand(MI.getOperand(0))
100            .addReg(AArch64::XZR)
101            .addImm(Encoding);
102
103    // Create the MOVK instruction.
104    const unsigned Imm16 = getChunk(UImm, ChunkIdx);
105    const unsigned DstReg = MI.getOperand(0).getReg();
106    const bool DstIsDead = MI.getOperand(0).isDead();
107    MachineInstrBuilder MIB1 =
108        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
109            .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
110            .addReg(DstReg)
111            .addImm(Imm16)
112            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
113
114    transferImpOps(MI, MIB, MIB1);
115    MI.eraseFromParent();
116    return true;
117  }
118
119  return false;
120}
121
122/// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
123/// can be materialized with an ORR instruction.
124static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
125  Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
126
127  return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
128}
129
130/// \brief Check for identical 16-bit chunks within the constant and if so
131/// materialize them with a single ORR instruction. The remaining one or two
132/// 16-bit chunks will be materialized with MOVK instructions.
133///
134/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
135/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
136/// an ORR instruction.
137///
138static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
139                                 MachineBasicBlock &MBB,
140                                 MachineBasicBlock::iterator &MBBI,
141                                 const AArch64InstrInfo *TII) {
142  typedef DenseMap<uint64_t, unsigned> CountMap;
143  CountMap Counts;
144
145  // Scan the constant and count how often every chunk occurs.
146  for (unsigned Idx = 0; Idx < 4; ++Idx)
147    ++Counts[getChunk(UImm, Idx)];
148
149  // Traverse the chunks to find one which occurs more than once.
150  for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
151       Chunk != End; ++Chunk) {
152    const uint64_t ChunkVal = Chunk->first;
153    const unsigned Count = Chunk->second;
154
155    uint64_t Encoding = 0;
156
157    // We are looking for chunks which have two or three instances and can be
158    // materialized with an ORR instruction.
159    if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
160      continue;
161
162    const bool CountThree = Count == 3;
163    // Create the ORR-immediate instruction.
164    MachineInstrBuilder MIB =
165        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
166            .addOperand(MI.getOperand(0))
167            .addReg(AArch64::XZR)
168            .addImm(Encoding);
169
170    const unsigned DstReg = MI.getOperand(0).getReg();
171    const bool DstIsDead = MI.getOperand(0).isDead();
172
173    unsigned ShiftAmt = 0;
174    uint64_t Imm16 = 0;
175    // Find the first chunk not materialized with the ORR instruction.
176    for (; ShiftAmt < 64; ShiftAmt += 16) {
177      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
178
179      if (Imm16 != ChunkVal)
180        break;
181    }
182
183    // Create the first MOVK instruction.
184    MachineInstrBuilder MIB1 =
185        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
186            .addReg(DstReg,
187                    RegState::Define | getDeadRegState(DstIsDead && CountThree))
188            .addReg(DstReg)
189            .addImm(Imm16)
190            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
191
192    // In case we have three instances the whole constant is now materialized
193    // and we can exit.
194    if (CountThree) {
195      transferImpOps(MI, MIB, MIB1);
196      MI.eraseFromParent();
197      return true;
198    }
199
200    // Find the remaining chunk which needs to be materialized.
201    for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
202      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
203
204      if (Imm16 != ChunkVal)
205        break;
206    }
207
208    // Create the second MOVK instruction.
209    MachineInstrBuilder MIB2 =
210        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
211            .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
212            .addReg(DstReg)
213            .addImm(Imm16)
214            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
215
216    transferImpOps(MI, MIB, MIB2);
217    MI.eraseFromParent();
218    return true;
219  }
220
221  return false;
222}
223
224/// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
225/// starts a contiguous sequence of ones if we look at the bits from the LSB
226/// towards the MSB.
227static bool isStartChunk(uint64_t Chunk) {
228  if (Chunk == 0 || Chunk == UINT64_MAX)
229    return false;
230
231  return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
232}
233
234/// \brief Check whether this chunk matches the pattern '0...1...' This pattern
235/// ends a contiguous sequence of ones if we look at the bits from the LSB
236/// towards the MSB.
237static bool isEndChunk(uint64_t Chunk) {
238  if (Chunk == 0 || Chunk == UINT64_MAX)
239    return false;
240
241  return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
242}
243
244/// \brief Clear or set all bits in the chunk at the given index.
245static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
246  const uint64_t Mask = 0xFFFF;
247
248  if (Clear)
249    // Clear chunk in the immediate.
250    Imm &= ~(Mask << (Idx * 16));
251  else
252    // Set all bits in the immediate for the particular chunk.
253    Imm |= Mask << (Idx * 16);
254
255  return Imm;
256}
257
258/// \brief Check whether the constant contains a sequence of contiguous ones,
259/// which might be interrupted by one or two chunks. If so, materialize the
260/// sequence of contiguous ones with an ORR instruction.
261/// Materialize the chunks which are either interrupting the sequence or outside
262/// of the sequence with a MOVK instruction.
263///
264/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
265/// which ends the sequence (0...1...). Then we are looking for constants which
266/// contain at least one S and E chunk.
267/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
268///
269/// We are also looking for constants like |S|A|B|E| where the contiguous
270/// sequence of ones wraps around the MSB into the LSB.
271///
272static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
273                              MachineBasicBlock &MBB,
274                              MachineBasicBlock::iterator &MBBI,
275                              const AArch64InstrInfo *TII) {
276  const int NotSet = -1;
277  const uint64_t Mask = 0xFFFF;
278
279  int StartIdx = NotSet;
280  int EndIdx = NotSet;
281  // Try to find the chunks which start/end a contiguous sequence of ones.
282  for (int Idx = 0; Idx < 4; ++Idx) {
283    int64_t Chunk = getChunk(UImm, Idx);
284    // Sign extend the 16-bit chunk to 64-bit.
285    Chunk = (Chunk << 48) >> 48;
286
287    if (isStartChunk(Chunk))
288      StartIdx = Idx;
289    else if (isEndChunk(Chunk))
290      EndIdx = Idx;
291  }
292
293  // Early exit in case we can't find a start/end chunk.
294  if (StartIdx == NotSet || EndIdx == NotSet)
295    return false;
296
297  // Outside of the contiguous sequence of ones everything needs to be zero.
298  uint64_t Outside = 0;
299  // Chunks between the start and end chunk need to have all their bits set.
300  uint64_t Inside = Mask;
301
302  // If our contiguous sequence of ones wraps around from the MSB into the LSB,
303  // just swap indices and pretend we are materializing a contiguous sequence
304  // of zeros surrounded by a contiguous sequence of ones.
305  if (StartIdx > EndIdx) {
306    std::swap(StartIdx, EndIdx);
307    std::swap(Outside, Inside);
308  }
309
310  uint64_t OrrImm = UImm;
311  int FirstMovkIdx = NotSet;
312  int SecondMovkIdx = NotSet;
313
314  // Find out which chunks we need to patch up to obtain a contiguous sequence
315  // of ones.
316  for (int Idx = 0; Idx < 4; ++Idx) {
317    const uint64_t Chunk = getChunk(UImm, Idx);
318
319    // Check whether we are looking at a chunk which is not part of the
320    // contiguous sequence of ones.
321    if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
322      OrrImm = updateImm(OrrImm, Idx, Outside == 0);
323
324      // Remember the index we need to patch.
325      if (FirstMovkIdx == NotSet)
326        FirstMovkIdx = Idx;
327      else
328        SecondMovkIdx = Idx;
329
330      // Check whether we are looking a chunk which is part of the contiguous
331      // sequence of ones.
332    } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
333      OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
334
335      // Remember the index we need to patch.
336      if (FirstMovkIdx == NotSet)
337        FirstMovkIdx = Idx;
338      else
339        SecondMovkIdx = Idx;
340    }
341  }
342  assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
343
344  // Create the ORR-immediate instruction.
345  uint64_t Encoding = 0;
346  AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
347  MachineInstrBuilder MIB =
348      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
349          .addOperand(MI.getOperand(0))
350          .addReg(AArch64::XZR)
351          .addImm(Encoding);
352
353  const unsigned DstReg = MI.getOperand(0).getReg();
354  const bool DstIsDead = MI.getOperand(0).isDead();
355
356  const bool SingleMovk = SecondMovkIdx == NotSet;
357  // Create the first MOVK instruction.
358  MachineInstrBuilder MIB1 =
359      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
360          .addReg(DstReg,
361                  RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
362          .addReg(DstReg)
363          .addImm(getChunk(UImm, FirstMovkIdx))
364          .addImm(
365              AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
366
367  // Early exit in case we only need to emit a single MOVK instruction.
368  if (SingleMovk) {
369    transferImpOps(MI, MIB, MIB1);
370    MI.eraseFromParent();
371    return true;
372  }
373
374  // Create the second MOVK instruction.
375  MachineInstrBuilder MIB2 =
376      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
377          .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
378          .addReg(DstReg)
379          .addImm(getChunk(UImm, SecondMovkIdx))
380          .addImm(
381              AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
382
383  transferImpOps(MI, MIB, MIB2);
384  MI.eraseFromParent();
385  return true;
386}
387
388/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
389/// real move-immediate instructions to synthesize the immediate.
390bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
391                                       MachineBasicBlock::iterator MBBI,
392                                       unsigned BitSize) {
393  MachineInstr &MI = *MBBI;
394  uint64_t Imm = MI.getOperand(1).getImm();
395  const unsigned Mask = 0xFFFF;
396
397  // Try a MOVI instruction (aka ORR-immediate with the zero register).
398  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
399  uint64_t Encoding;
400  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
401    unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
402    MachineInstrBuilder MIB =
403        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
404            .addOperand(MI.getOperand(0))
405            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
406            .addImm(Encoding);
407    transferImpOps(MI, MIB, MIB);
408    MI.eraseFromParent();
409    return true;
410  }
411
412  // Scan the immediate and count the number of 16-bit chunks which are either
413  // all ones or all zeros.
414  unsigned OneChunks = 0;
415  unsigned ZeroChunks = 0;
416  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
417    const unsigned Chunk = (Imm >> Shift) & Mask;
418    if (Chunk == Mask)
419      OneChunks++;
420    else if (Chunk == 0)
421      ZeroChunks++;
422  }
423
424  // Since we can't materialize the constant with a single ORR instruction,
425  // let's see whether we can materialize 3/4 of the constant with an ORR
426  // instruction and use an additional MOVK instruction to materialize the
427  // remaining 1/4.
428  //
429  // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
430  //
431  // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
432  // we would create the following instruction sequence:
433  //
434  // ORR x0, xzr, |A|X|A|X|
435  // MOVK x0, |B|, LSL #16
436  //
437  // Only look at 64-bit constants which can't be materialized with a single
438  // instruction e.g. which have less than either three all zero or all one
439  // chunks.
440  //
441  // Ignore 32-bit constants here, they always can be materialized with a
442  // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
443  // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
444  // Thus we fall back to the default code below which in the best case creates
445  // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
446  //
447  if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
448    // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
449    // identical?
450    if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
451      // See if we can come up with a constant which can be materialized with
452      // ORR-immediate by replicating element 3 into element 1.
453      uint64_t OrrImm = replicateChunk(UImm, 3, 1);
454      if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
455        return true;
456
457      // See if we can come up with a constant which can be materialized with
458      // ORR-immediate by replicating element 1 into element 3.
459      OrrImm = replicateChunk(UImm, 1, 3);
460      if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
461        return true;
462
463      // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
464      // identical?
465    } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
466      // See if we can come up with a constant which can be materialized with
467      // ORR-immediate by replicating element 2 into element 0.
468      uint64_t OrrImm = replicateChunk(UImm, 2, 0);
469      if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
470        return true;
471
472      // See if we can come up with a constant which can be materialized with
473      // ORR-immediate by replicating element 1 into element 3.
474      OrrImm = replicateChunk(UImm, 0, 2);
475      if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
476        return true;
477    }
478  }
479
480  // Check for identical 16-bit chunks within the constant and if so materialize
481  // them with a single ORR instruction. The remaining one or two 16-bit chunks
482  // will be materialized with MOVK instructions.
483  if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
484    return true;
485
486  // Check whether the constant contains a sequence of contiguous ones, which
487  // might be interrupted by one or two chunks. If so, materialize the sequence
488  // of contiguous ones with an ORR instruction. Materialize the chunks which
489  // are either interrupting the sequence or outside of the sequence with a
490  // MOVK instruction.
491  if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
492    return true;
493
494  // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
495  // more MOVK instructions to insert additional 16-bit portions into the
496  // lower bits.
497  bool isNeg = false;
498
499  // Use MOVN to materialize the high bits if we have more all one chunks
500  // than all zero chunks.
501  if (OneChunks > ZeroChunks) {
502    isNeg = true;
503    Imm = ~Imm;
504  }
505
506  unsigned FirstOpc;
507  if (BitSize == 32) {
508    Imm &= (1LL << 32) - 1;
509    FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
510  } else {
511    FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
512  }
513  unsigned Shift = 0;     // LSL amount for high bits with MOVZ/MOVN
514  unsigned LastShift = 0; // LSL amount for last MOVK
515  if (Imm != 0) {
516    unsigned LZ = countLeadingZeros(Imm);
517    unsigned TZ = countTrailingZeros(Imm);
518    Shift = ((63 - LZ) / 16) * 16;
519    LastShift = (TZ / 16) * 16;
520  }
521  unsigned Imm16 = (Imm >> Shift) & Mask;
522  unsigned DstReg = MI.getOperand(0).getReg();
523  bool DstIsDead = MI.getOperand(0).isDead();
524  MachineInstrBuilder MIB1 =
525      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
526          .addReg(DstReg, RegState::Define |
527                              getDeadRegState(DstIsDead && Shift == LastShift))
528          .addImm(Imm16)
529          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
530
531  // If a MOVN was used for the high bits of a negative value, flip the rest
532  // of the bits back for use with MOVK.
533  if (isNeg)
534    Imm = ~Imm;
535
536  if (Shift == LastShift) {
537    transferImpOps(MI, MIB1, MIB1);
538    MI.eraseFromParent();
539    return true;
540  }
541
542  MachineInstrBuilder MIB2;
543  unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
544  while (Shift != LastShift) {
545    Shift -= 16;
546    Imm16 = (Imm >> Shift) & Mask;
547    if (Imm16 == (isNeg ? Mask : 0))
548      continue; // This 16-bit portion is already set correctly.
549    MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
550               .addReg(DstReg,
551                       RegState::Define |
552                           getDeadRegState(DstIsDead && Shift == LastShift))
553               .addReg(DstReg)
554               .addImm(Imm16)
555               .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
556  }
557
558  transferImpOps(MI, MIB1, MIB2);
559  MI.eraseFromParent();
560  return true;
561}
562
563/// \brief If MBBI references a pseudo instruction that should be expanded here,
564/// do the expansion and return true.  Otherwise return false.
565bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
566                                 MachineBasicBlock::iterator MBBI) {
567  MachineInstr &MI = *MBBI;
568  unsigned Opcode = MI.getOpcode();
569  switch (Opcode) {
570  default:
571    break;
572
573  case AArch64::ADDWrr:
574  case AArch64::SUBWrr:
575  case AArch64::ADDXrr:
576  case AArch64::SUBXrr:
577  case AArch64::ADDSWrr:
578  case AArch64::SUBSWrr:
579  case AArch64::ADDSXrr:
580  case AArch64::SUBSXrr:
581  case AArch64::ANDWrr:
582  case AArch64::ANDXrr:
583  case AArch64::BICWrr:
584  case AArch64::BICXrr:
585  case AArch64::ANDSWrr:
586  case AArch64::ANDSXrr:
587  case AArch64::BICSWrr:
588  case AArch64::BICSXrr:
589  case AArch64::EONWrr:
590  case AArch64::EONXrr:
591  case AArch64::EORWrr:
592  case AArch64::EORXrr:
593  case AArch64::ORNWrr:
594  case AArch64::ORNXrr:
595  case AArch64::ORRWrr:
596  case AArch64::ORRXrr: {
597    unsigned Opcode;
598    switch (MI.getOpcode()) {
599    default:
600      return false;
601    case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
602    case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
603    case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
604    case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
605    case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
606    case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
607    case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
608    case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
609    case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
610    case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
611    case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
612    case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
613    case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
614    case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
615    case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
616    case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
617    case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
618    case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
619    case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
620    case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
621    case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
622    case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
623    case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
624    case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
625    }
626    MachineInstrBuilder MIB1 =
627        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
628                MI.getOperand(0).getReg())
629            .addOperand(MI.getOperand(1))
630            .addOperand(MI.getOperand(2))
631            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
632    transferImpOps(MI, MIB1, MIB1);
633    MI.eraseFromParent();
634    return true;
635  }
636
637  case AArch64::FCVTSHpseudo: {
638    MachineOperand Src = MI.getOperand(1);
639    Src.setImplicit();
640    unsigned SrcH =
641        TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub);
642    auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr))
643                   .addOperand(MI.getOperand(0))
644                   .addReg(SrcH, RegState::Undef)
645                   .addOperand(Src);
646    transferImpOps(MI, MIB, MIB);
647    MI.eraseFromParent();
648    return true;
649  }
650  case AArch64::LOADgot: {
651    // Expand into ADRP + LDR.
652    unsigned DstReg = MI.getOperand(0).getReg();
653    const MachineOperand &MO1 = MI.getOperand(1);
654    unsigned Flags = MO1.getTargetFlags();
655    MachineInstrBuilder MIB1 =
656        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
657    MachineInstrBuilder MIB2 =
658        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
659            .addOperand(MI.getOperand(0))
660            .addReg(DstReg);
661
662    if (MO1.isGlobal()) {
663      MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
664      MIB2.addGlobalAddress(MO1.getGlobal(), 0,
665                            Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
666    } else if (MO1.isSymbol()) {
667      MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
668      MIB2.addExternalSymbol(MO1.getSymbolName(),
669                             Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
670    } else {
671      assert(MO1.isCPI() &&
672             "Only expect globals, externalsymbols, or constant pools");
673      MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
674                                Flags | AArch64II::MO_PAGE);
675      MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
676                                Flags | AArch64II::MO_PAGEOFF |
677                                    AArch64II::MO_NC);
678    }
679
680    transferImpOps(MI, MIB1, MIB2);
681    MI.eraseFromParent();
682    return true;
683  }
684
685  case AArch64::MOVaddr:
686  case AArch64::MOVaddrJT:
687  case AArch64::MOVaddrCP:
688  case AArch64::MOVaddrBA:
689  case AArch64::MOVaddrTLS:
690  case AArch64::MOVaddrEXT: {
691    // Expand into ADRP + ADD.
692    unsigned DstReg = MI.getOperand(0).getReg();
693    MachineInstrBuilder MIB1 =
694        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
695            .addOperand(MI.getOperand(1));
696
697    MachineInstrBuilder MIB2 =
698        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
699            .addOperand(MI.getOperand(0))
700            .addReg(DstReg)
701            .addOperand(MI.getOperand(2))
702            .addImm(0);
703
704    transferImpOps(MI, MIB1, MIB2);
705    MI.eraseFromParent();
706    return true;
707  }
708
709  case AArch64::MOVi32imm:
710    return expandMOVImm(MBB, MBBI, 32);
711  case AArch64::MOVi64imm:
712    return expandMOVImm(MBB, MBBI, 64);
713  case AArch64::RET_ReallyLR:
714    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
715        .addReg(AArch64::LR);
716    MI.eraseFromParent();
717    return true;
718  }
719  return false;
720}
721
722/// \brief Iterate over the instructions in basic block MBB and expand any
723/// pseudo instructions.  Return true if anything was modified.
724bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
725  bool Modified = false;
726
727  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
728  while (MBBI != E) {
729    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
730    Modified |= expandMI(MBB, MBBI);
731    MBBI = NMBBI;
732  }
733
734  return Modified;
735}
736
737bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
738  TII = static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
739
740  bool Modified = false;
741  for (auto &MBB : MF)
742    Modified |= expandMBB(MBB);
743  return Modified;
744}
745
746/// \brief Returns an instance of the pseudo instruction expansion pass.
747FunctionPass *llvm::createAArch64ExpandPseudoPass() {
748  return new AArch64ExpandPseudo();
749}
750