1//
2//                        The Subzero Code Generator
3//
4// This file is distributed under the University of Illinois Open Source
5// License. See LICENSE.TXT for details.
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11/// entirely of the lowering sequence for each high-level instruction.
12///
13//===----------------------------------------------------------------------===//
14
15#include "IceTargetLoweringMIPS32.h"
16
17#include "IceCfg.h"
18#include "IceCfgNode.h"
19#include "IceClFlags.h"
20#include "IceDefs.h"
21#include "IceELFObjectWriter.h"
22#include "IceGlobalInits.h"
23#include "IceInstMIPS32.h"
24#include "IceInstVarIter.h"
25#include "IceLiveness.h"
26#include "IceOperand.h"
27#include "IcePhiLoweringImpl.h"
28#include "IceRegistersMIPS32.h"
29#include "IceTargetLoweringMIPS32.def"
30#include "IceUtils.h"
31#include "llvm/Support/MathExtras.h"
32
33namespace MIPS32 {
34std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35  return ::Ice::MIPS32::TargetMIPS32::create(Func);
36}
37
38std::unique_ptr<::Ice::TargetDataLowering>
39createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40  return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41}
42
43std::unique_ptr<::Ice::TargetHeaderLowering>
44createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45  return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46}
47
48void staticInit(::Ice::GlobalContext *Ctx) {
49  ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50}
51
52bool shouldBePooled(const ::Ice::Constant *C) {
53  return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54}
55
56::Ice::Type getPointerType() {
57  return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58}
59
60} // end of namespace MIPS32
61
62namespace Ice {
63namespace MIPS32 {
64
65using llvm::isInt;
66
67namespace {
68
69// The maximum number of arguments to pass in GPR registers.
70constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71
72std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74
75constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76
77std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79
80const char *getRegClassName(RegClass C) {
81  auto ClassNum = static_cast<RegClassMIPS32>(C);
82  assert(ClassNum < RCMIPS32_NUM);
83  switch (ClassNum) {
84  default:
85    assert(C < RC_Target);
86    return regClassString(C);
87    // Add handling of new register classes below.
88  }
89}
90
91// Stack alignment
92constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93
94// Value is in bytes. Return Value adjusted to the next highest multiple of the
95// stack alignment required for the given type.
96uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97  size_t typeAlignInBytes = typeWidthInBytes(Ty);
98  // Vectors are stored on stack with the same alignment as that of int type
99  if (isVectorType(Ty))
100    typeAlignInBytes = typeWidthInBytes(IceType_i64);
101  return Utils::applyAlignment(Value, typeAlignInBytes);
102}
103
104// Value is in bytes. Return Value adjusted to the next highest multiple of the
105// stack alignment.
106uint32_t applyStackAlignment(uint32_t Value) {
107  return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108}
109
110} // end of anonymous namespace
111
112TargetMIPS32::TargetMIPS32(Cfg *Func)
113    : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {}
114
115void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
116                                       size_t SpillAreaPaddingBytes,
117                                       size_t SpillAreaSizeBytes,
118                                       size_t GlobalsAndSubsequentPaddingSize) {
119  const VariablesMetadata *VMetadata = Func->getVMetadata();
120  size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
121  size_t NextStackOffset = SpillAreaPaddingBytes;
122  CfgVector<size_t> LocalsSize(Func->getNumNodes());
123  const bool SimpleCoalescing = !callsReturnsTwice();
124  for (Variable *Var : SortedSpilledVariables) {
125    size_t Increment = typeWidthInBytesOnStack(Var->getType());
126    if (SimpleCoalescing && VMetadata->isTracked(Var)) {
127      if (VMetadata->isMultiBlock(Var)) {
128        GlobalsSpaceUsed += Increment;
129        NextStackOffset = GlobalsSpaceUsed;
130      } else {
131        SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
132        LocalsSize[NodeIndex] += Increment;
133        NextStackOffset = SpillAreaPaddingBytes +
134                          GlobalsAndSubsequentPaddingSize +
135                          LocalsSize[NodeIndex];
136      }
137    } else {
138      NextStackOffset += Increment;
139    }
140    Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
141  }
142}
143
144void TargetMIPS32::staticInit(GlobalContext *Ctx) {
145  (void)Ctx;
146  RegNumT::setLimit(RegMIPS32::Reg_NUM);
147  SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
148  SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
149  SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
150  SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
151  SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
152  SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
153#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
154          isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
155  IntegerRegisters[RegMIPS32::val] = isInt;                                    \
156  I64PairRegisters[RegMIPS32::val] = isI64Pair;                                \
157  Float32Registers[RegMIPS32::val] = isFP32;                                   \
158  Float64Registers[RegMIPS32::val] = isFP64;                                   \
159  VectorRegisters[RegMIPS32::val] = isVec128;                                  \
160  RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
161  for (SizeT RegAlias : alias_init) {                                          \
162    assert(!RegisterAliases[RegMIPS32::val][RegAlias] &&                       \
163           "Duplicate alias for " #val);                                       \
164    RegisterAliases[RegMIPS32::val].set(RegAlias);                             \
165  }                                                                            \
166  RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
167  assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
168  REGMIPS32_TABLE;
169#undef X
170
171  // TODO(mohit.bhakkad): Change these inits once we provide argument related
172  // field in register tables
173  for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
174    GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
175
176  for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
177    I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
178
179  for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
180    FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
181    FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
182  }
183
184  TypeToRegisterSet[IceType_void] = InvalidRegisters;
185  TypeToRegisterSet[IceType_i1] = IntegerRegisters;
186  TypeToRegisterSet[IceType_i8] = IntegerRegisters;
187  TypeToRegisterSet[IceType_i16] = IntegerRegisters;
188  TypeToRegisterSet[IceType_i32] = IntegerRegisters;
189  TypeToRegisterSet[IceType_i64] = IntegerRegisters;
190  TypeToRegisterSet[IceType_f32] = Float32Registers;
191  TypeToRegisterSet[IceType_f64] = Float64Registers;
192  TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
193  TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
194  TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
195  TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
196  TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
197  TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
198  TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
199
200  for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
201    TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
202
203  filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
204                          llvm::array_lengthof(TypeToRegisterSet),
205                          RegMIPS32::getRegName, getRegClassName);
206}
207
208void TargetMIPS32::unsetIfNonLeafFunc() {
209  for (CfgNode *Node : Func->getNodes()) {
210    for (Inst &Instr : Node->getInsts()) {
211      if (llvm::isa<InstCall>(&Instr)) {
212        // Unset MaybeLeafFunc if call instruction exists.
213        MaybeLeafFunc = false;
214        return;
215      }
216    }
217  }
218}
219
220uint32_t TargetMIPS32::getStackAlignment() const {
221  return MIPS32_STACK_ALIGNMENT_BYTES;
222}
223
224uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
225  TargetMIPS32::CallingConv CC;
226  RegNumT DummyReg;
227  size_t OutArgsSizeBytes = 0;
228  Variable *Dest = Call->getDest();
229  bool PartialOnStack = false;
230  if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
231    CC.discardReg(RegMIPS32::Reg_A0);
232    // Next vector is partially on stack
233    PartialOnStack = true;
234  }
235  for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
236    Operand *Arg = legalizeUndef(Call->getArg(i));
237    const Type Ty = Arg->getType();
238    RegNumT RegNum;
239    if (CC.argInReg(Ty, i, &RegNum)) {
240      // If PartialOnStack is true and if this is a vector type then last two
241      // elements are on stack
242      if (PartialOnStack && isVectorType(Ty)) {
243        OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
244        OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
245      }
246      continue;
247    }
248    OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
249    OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
250  }
251  // Add size of argument save area
252  constexpr int BytesPerStackArg = 4;
253  OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
254  return applyStackAlignment(OutArgsSizeBytes);
255}
256
257namespace {
258inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
259  if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
260    return Integer->getValue();
261  return Intrinsics::MemoryOrderInvalid;
262}
263}
264
265void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
266  constexpr bool NoTailCall = false;
267  constexpr bool IsTargetHelperCall = true;
268  Variable *Dest = Instr->getDest();
269  const Type DestTy = Dest ? Dest->getType() : IceType_void;
270
271  switch (Instr->getKind()) {
272  default:
273    return;
274  case Inst::Select: {
275    if (isVectorType(DestTy)) {
276      Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
277      Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
278      Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
279      Variable *T = Func->makeVariable(DestTy);
280      auto *Undef = ConstantUndef::create(Ctx, DestTy);
281      Context.insert<InstAssign>(T, Undef);
282      auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
283      VarVecOn32->initVecElement(Func);
284      for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
285        auto *Index = Ctx->getConstantInt32(I);
286        auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
287        Context.insert<InstExtractElement>(OpC, Cond, Index);
288        auto *OpT = Func->makeVariable(typeElementType(DestTy));
289        Context.insert<InstExtractElement>(OpT, SrcT, Index);
290        auto *OpF = Func->makeVariable(typeElementType(DestTy));
291        Context.insert<InstExtractElement>(OpF, SrcF, Index);
292        auto *Dst = Func->makeVariable(typeElementType(DestTy));
293        Variable *DestT = Func->makeVariable(DestTy);
294        Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
295        Context.insert<InstInsertElement>(DestT, T, Dst, Index);
296        T = DestT;
297      }
298      Context.insert<InstAssign>(Dest, T);
299      Instr->setDeleted();
300    }
301    return;
302  }
303  case Inst::Fcmp: {
304    if (isVectorType(DestTy)) {
305      InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
306      Operand *Src0 = Instr->getSrc(0);
307      Operand *Src1 = Instr->getSrc(1);
308      Variable *T = Func->makeVariable(IceType_v4f32);
309      auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
310      Context.insert<InstAssign>(T, Undef);
311      auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
312      VarVecOn32->initVecElement(Func);
313      for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
314        auto *Index = Ctx->getConstantInt32(I);
315        auto *Op0 = Func->makeVariable(IceType_f32);
316        Context.insert<InstExtractElement>(Op0, Src0, Index);
317        auto *Op1 = Func->makeVariable(IceType_f32);
318        Context.insert<InstExtractElement>(Op1, Src1, Index);
319        auto *Dst = Func->makeVariable(IceType_f32);
320        Variable *DestT = Func->makeVariable(IceType_v4f32);
321        Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
322        Context.insert<InstInsertElement>(DestT, T, Dst, Index);
323        T = DestT;
324      }
325      Context.insert<InstAssign>(Dest, T);
326      Instr->setDeleted();
327    }
328    return;
329  }
330  case Inst::Icmp: {
331    if (isVectorType(DestTy)) {
332      InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
333      Operand *Src0 = Instr->getSrc(0);
334      Operand *Src1 = Instr->getSrc(1);
335      const Type SrcType = Src0->getType();
336      Variable *T = Func->makeVariable(DestTy);
337      auto *Undef = ConstantUndef::create(Ctx, DestTy);
338      Context.insert<InstAssign>(T, Undef);
339      auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
340      VarVecOn32->initVecElement(Func);
341      for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
342        auto *Index = Ctx->getConstantInt32(I);
343        auto *Op0 = Func->makeVariable(typeElementType(SrcType));
344        Context.insert<InstExtractElement>(Op0, Src0, Index);
345        auto *Op1 = Func->makeVariable(typeElementType(SrcType));
346        Context.insert<InstExtractElement>(Op1, Src1, Index);
347        auto *Dst = Func->makeVariable(typeElementType(DestTy));
348        Variable *DestT = Func->makeVariable(DestTy);
349        Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
350        Context.insert<InstInsertElement>(DestT, T, Dst, Index);
351        T = DestT;
352      }
353      Context.insert<InstAssign>(Dest, T);
354      Instr->setDeleted();
355    }
356    return;
357  }
358  case Inst::Arithmetic: {
359    const InstArithmetic::OpKind Op =
360        llvm::cast<InstArithmetic>(Instr)->getOp();
361    if (isVectorType(DestTy)) {
362      scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
363      Instr->setDeleted();
364      return;
365    }
366    switch (DestTy) {
367    default:
368      return;
369    case IceType_i64: {
370      RuntimeHelper HelperID = RuntimeHelper::H_Num;
371      switch (Op) {
372      default:
373        return;
374      case InstArithmetic::Udiv:
375        HelperID = RuntimeHelper::H_udiv_i64;
376        break;
377      case InstArithmetic::Sdiv:
378        HelperID = RuntimeHelper::H_sdiv_i64;
379        break;
380      case InstArithmetic::Urem:
381        HelperID = RuntimeHelper::H_urem_i64;
382        break;
383      case InstArithmetic::Srem:
384        HelperID = RuntimeHelper::H_srem_i64;
385        break;
386      }
387
388      if (HelperID == RuntimeHelper::H_Num) {
389        return;
390      }
391
392      Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
393      constexpr SizeT MaxArgs = 2;
394      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
395                                            NoTailCall, IsTargetHelperCall);
396      Call->addArg(Instr->getSrc(0));
397      Call->addArg(Instr->getSrc(1));
398      Instr->setDeleted();
399      return;
400    }
401    case IceType_f32:
402    case IceType_f64: {
403      if (Op != InstArithmetic::Frem) {
404        return;
405      }
406      constexpr SizeT MaxArgs = 2;
407      Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
408          DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
409                                : RuntimeHelper::H_frem_f64);
410      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
411                                            NoTailCall, IsTargetHelperCall);
412      Call->addArg(Instr->getSrc(0));
413      Call->addArg(Instr->getSrc(1));
414      Instr->setDeleted();
415      return;
416    }
417    }
418    llvm::report_fatal_error("Control flow should never have reached here.");
419  }
420  case Inst::Cast: {
421    Operand *Src0 = Instr->getSrc(0);
422    const Type SrcTy = Src0->getType();
423    auto *CastInstr = llvm::cast<InstCast>(Instr);
424    const InstCast::OpKind CastKind = CastInstr->getCastKind();
425
426    if (isVectorType(DestTy)) {
427      Variable *T = Func->makeVariable(DestTy);
428      auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
429      VarVecOn32->initVecElement(Func);
430      auto *Undef = ConstantUndef::create(Ctx, DestTy);
431      Context.insert<InstAssign>(T, Undef);
432      for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
433        auto *Index = Ctx->getConstantInt32(I);
434        auto *Op = Func->makeVariable(typeElementType(SrcTy));
435        Context.insert<InstExtractElement>(Op, Src0, Index);
436        auto *Dst = Func->makeVariable(typeElementType(DestTy));
437        Variable *DestT = Func->makeVariable(DestTy);
438        Context.insert<InstCast>(CastKind, Dst, Op);
439        Context.insert<InstInsertElement>(DestT, T, Dst, Index);
440        T = DestT;
441      }
442      Context.insert<InstAssign>(Dest, T);
443      Instr->setDeleted();
444      return;
445    }
446
447    switch (CastKind) {
448    default:
449      return;
450    case InstCast::Fptosi:
451    case InstCast::Fptoui: {
452      if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
453        return;
454      }
455      const bool DestIs32 = DestTy == IceType_i32;
456      const bool DestIsSigned = CastKind == InstCast::Fptosi;
457      const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
458      RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
459      if (DestIsSigned) {
460        if (DestIs32) {
461          return;
462        }
463        RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
464                            : RuntimeHelper::H_fptosi_f64_i64;
465      } else {
466        RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
467                                        : RuntimeHelper::H_fptoui_f32_i64)
468                            : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
469                                        : RuntimeHelper::H_fptoui_f64_i64);
470      }
471      Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
472      static constexpr SizeT MaxArgs = 1;
473      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
474                                            NoTailCall, IsTargetHelperCall);
475      Call->addArg(Src0);
476      Instr->setDeleted();
477      return;
478    }
479    case InstCast::Sitofp:
480    case InstCast::Uitofp: {
481      if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
482        return;
483      }
484      const bool SourceIs32 = SrcTy == IceType_i32;
485      const bool SourceIsSigned = CastKind == InstCast::Sitofp;
486      const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
487      RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
488      if (SourceIsSigned) {
489        if (SourceIs32) {
490          return;
491        }
492        RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
493                            : RuntimeHelper::H_sitofp_i64_f64;
494      } else {
495        RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
496                                          : RuntimeHelper::H_uitofp_i64_f32)
497                            : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
498                                          : RuntimeHelper::H_uitofp_i64_f64);
499      }
500      Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
501      static constexpr SizeT MaxArgs = 1;
502      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
503                                            NoTailCall, IsTargetHelperCall);
504      Call->addArg(Src0);
505      Instr->setDeleted();
506      return;
507    }
508    case InstCast::Bitcast: {
509      if (DestTy == SrcTy) {
510        return;
511      }
512      Variable *CallDest = Dest;
513      RuntimeHelper HelperID = RuntimeHelper::H_Num;
514      switch (DestTy) {
515      default:
516        return;
517      case IceType_i8:
518        assert(SrcTy == IceType_v8i1);
519        HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
520        CallDest = Func->makeVariable(IceType_i32);
521        break;
522      case IceType_i16:
523        assert(SrcTy == IceType_v16i1);
524        HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
525        CallDest = Func->makeVariable(IceType_i32);
526        break;
527      case IceType_v8i1: {
528        assert(SrcTy == IceType_i8);
529        HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
530        Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
531        // Arguments to functions are required to be at least 32 bits wide.
532        Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
533        Src0 = Src0AsI32;
534      } break;
535      case IceType_v16i1: {
536        assert(SrcTy == IceType_i16);
537        HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
538        Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
539        // Arguments to functions are required to be at least 32 bits wide.
540        Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
541        Src0 = Src0AsI32;
542      } break;
543      }
544      constexpr SizeT MaxSrcs = 1;
545      InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
546      Call->addArg(Src0);
547      Context.insert(Call);
548      // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
549      // call result to the appropriate type as necessary.
550      if (CallDest->getType() != DestTy)
551        Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
552      Instr->setDeleted();
553      return;
554    }
555    case InstCast::Trunc: {
556      if (DestTy == SrcTy) {
557        return;
558      }
559      if (!isVectorType(SrcTy)) {
560        return;
561      }
562      assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
563      assert(typeElementType(DestTy) == IceType_i1);
564      assert(isVectorIntegerType(SrcTy));
565      return;
566    }
567    case InstCast::Sext:
568    case InstCast::Zext: {
569      if (DestTy == SrcTy) {
570        return;
571      }
572      if (!isVectorType(DestTy)) {
573        return;
574      }
575      assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
576      assert(typeElementType(SrcTy) == IceType_i1);
577      assert(isVectorIntegerType(DestTy));
578      return;
579    }
580    }
581    llvm::report_fatal_error("Control flow should never have reached here.");
582  }
583  case Inst::IntrinsicCall: {
584    auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
585    Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
586    if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
587      Operand *Src0 = IntrinsicCall->getArg(0);
588      GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32");
589      Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat);
590      GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32");
591      bool BadIntrinsic = false;
592      const Intrinsics::FullIntrinsicInfo *FullInfo =
593          Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic);
594      Intrinsics::IntrinsicInfo Info = FullInfo->Info;
595
596      Variable *T = Func->makeVariable(IceType_v4f32);
597      auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
598      Context.insert<InstAssign>(T, Undef);
599      auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
600      VarVecOn32->initVecElement(Func);
601
602      for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
603        auto *Index = Ctx->getConstantInt32(i);
604        auto *Op = Func->makeVariable(IceType_f32);
605        Context.insert<InstExtractElement>(Op, Src0, Index);
606        auto *Res = Func->makeVariable(IceType_f32);
607        Variable *DestT = Func->makeVariable(IceType_v4f32);
608        auto *Call =
609            Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info);
610        Call->addArg(Op);
611        Context.insert<InstInsertElement>(DestT, T, Res, Index);
612        T = DestT;
613      }
614
615      Context.insert<InstAssign>(Dest, T);
616
617      Instr->setDeleted();
618      return;
619    }
620    switch (ID) {
621    default:
622      return;
623    case Intrinsics::AtomicLoad: {
624      if (DestTy != IceType_i64)
625        return;
626      if (!Intrinsics::isMemoryOrderValid(
627              ID, getConstantMemoryOrder(IntrinsicCall->getArg(1)))) {
628        Func->setError("Unexpected memory ordering for AtomicLoad");
629        return;
630      }
631      Operand *Addr = IntrinsicCall->getArg(0);
632      Operand *TargetHelper = Ctx->getConstantExternSym(
633          Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
634      static constexpr SizeT MaxArgs = 3;
635      auto *_0 = Ctx->getConstantZero(IceType_i64);
636      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
637                                            NoTailCall, IsTargetHelperCall);
638      Call->addArg(Addr);
639      Call->addArg(_0);
640      Call->addArg(_0);
641      Context.insert<InstMIPS32Sync>();
642      Instr->setDeleted();
643      return;
644    }
645    case Intrinsics::AtomicStore: {
646      Operand *Val = IntrinsicCall->getArg(0);
647      if (Val->getType() != IceType_i64)
648        return;
649      if (!Intrinsics::isMemoryOrderValid(
650              ID, getConstantMemoryOrder(IntrinsicCall->getArg(2)))) {
651        Func->setError("Unexpected memory ordering for AtomicStore");
652        return;
653      }
654      Operand *Addr = IntrinsicCall->getArg(1);
655      Variable *NoDest = nullptr;
656      Operand *TargetHelper = Ctx->getConstantExternSym(
657          Ctx->getGlobalString("__sync_lock_test_and_set_8"));
658      Context.insert<InstMIPS32Sync>();
659      static constexpr SizeT MaxArgs = 2;
660      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
661                                            NoTailCall, IsTargetHelperCall);
662      Call->addArg(Addr);
663      Call->addArg(Val);
664      Context.insert<InstMIPS32Sync>();
665      Instr->setDeleted();
666      return;
667    }
668    case Intrinsics::AtomicCmpxchg: {
669      if (DestTy != IceType_i64)
670        return;
671      if (!Intrinsics::isMemoryOrderValid(
672              ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)),
673              getConstantMemoryOrder(IntrinsicCall->getArg(4)))) {
674        Func->setError("Unexpected memory ordering for AtomicCmpxchg");
675        return;
676      }
677      Operand *Addr = IntrinsicCall->getArg(0);
678      Operand *Oldval = IntrinsicCall->getArg(1);
679      Operand *Newval = IntrinsicCall->getArg(2);
680      Operand *TargetHelper = Ctx->getConstantExternSym(
681          Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
682      Context.insert<InstMIPS32Sync>();
683      static constexpr SizeT MaxArgs = 3;
684      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
685                                            NoTailCall, IsTargetHelperCall);
686      Call->addArg(Addr);
687      Call->addArg(Oldval);
688      Call->addArg(Newval);
689      Context.insert<InstMIPS32Sync>();
690      Instr->setDeleted();
691      return;
692    }
693    case Intrinsics::AtomicRMW: {
694      if (DestTy != IceType_i64)
695        return;
696      if (!Intrinsics::isMemoryOrderValid(
697              ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)))) {
698        Func->setError("Unexpected memory ordering for AtomicRMW");
699        return;
700      }
701      auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
702          llvm::cast<ConstantInteger32>(IntrinsicCall->getArg(0))->getValue());
703      auto *Addr = IntrinsicCall->getArg(1);
704      auto *Newval = IntrinsicCall->getArg(2);
705      Operand *TargetHelper;
706      switch (Operation) {
707      case Intrinsics::AtomicAdd:
708        TargetHelper = Ctx->getConstantExternSym(
709            Ctx->getGlobalString("__sync_fetch_and_add_8"));
710        break;
711      case Intrinsics::AtomicSub:
712        TargetHelper = Ctx->getConstantExternSym(
713            Ctx->getGlobalString("__sync_fetch_and_sub_8"));
714        break;
715      case Intrinsics::AtomicOr:
716        TargetHelper = Ctx->getConstantExternSym(
717            Ctx->getGlobalString("__sync_fetch_and_or_8"));
718        break;
719      case Intrinsics::AtomicAnd:
720        TargetHelper = Ctx->getConstantExternSym(
721            Ctx->getGlobalString("__sync_fetch_and_and_8"));
722        break;
723      case Intrinsics::AtomicXor:
724        TargetHelper = Ctx->getConstantExternSym(
725            Ctx->getGlobalString("__sync_fetch_and_xor_8"));
726        break;
727      case Intrinsics::AtomicExchange:
728        TargetHelper = Ctx->getConstantExternSym(
729            Ctx->getGlobalString("__sync_lock_test_and_set_8"));
730        break;
731      default:
732        llvm::report_fatal_error("Unknown AtomicRMW operation");
733        return;
734      }
735      Context.insert<InstMIPS32Sync>();
736      static constexpr SizeT MaxArgs = 2;
737      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738                                            NoTailCall, IsTargetHelperCall);
739      Call->addArg(Addr);
740      Call->addArg(Newval);
741      Context.insert<InstMIPS32Sync>();
742      Instr->setDeleted();
743      return;
744    }
745    case Intrinsics::Ctpop: {
746      Operand *Src0 = IntrinsicCall->getArg(0);
747      Operand *TargetHelper =
748          Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
749                                        ? RuntimeHelper::H_call_ctpop_i32
750                                        : RuntimeHelper::H_call_ctpop_i64);
751      static constexpr SizeT MaxArgs = 1;
752      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
753                                            NoTailCall, IsTargetHelperCall);
754      Call->addArg(Src0);
755      Instr->setDeleted();
756      return;
757    }
758    case Intrinsics::Longjmp: {
759      static constexpr SizeT MaxArgs = 2;
760      static constexpr Variable *NoDest = nullptr;
761      Operand *TargetHelper =
762          Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
763      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
764                                            NoTailCall, IsTargetHelperCall);
765      Call->addArg(IntrinsicCall->getArg(0));
766      Call->addArg(IntrinsicCall->getArg(1));
767      Instr->setDeleted();
768      return;
769    }
770    case Intrinsics::Memcpy: {
771      static constexpr SizeT MaxArgs = 3;
772      static constexpr Variable *NoDest = nullptr;
773      Operand *TargetHelper =
774          Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
775      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
776                                            NoTailCall, IsTargetHelperCall);
777      Call->addArg(IntrinsicCall->getArg(0));
778      Call->addArg(IntrinsicCall->getArg(1));
779      Call->addArg(IntrinsicCall->getArg(2));
780      Instr->setDeleted();
781      return;
782    }
783    case Intrinsics::Memmove: {
784      static constexpr SizeT MaxArgs = 3;
785      static constexpr Variable *NoDest = nullptr;
786      Operand *TargetHelper =
787          Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
788      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
789                                            NoTailCall, IsTargetHelperCall);
790      Call->addArg(IntrinsicCall->getArg(0));
791      Call->addArg(IntrinsicCall->getArg(1));
792      Call->addArg(IntrinsicCall->getArg(2));
793      Instr->setDeleted();
794      return;
795    }
796    case Intrinsics::Memset: {
797      Operand *ValOp = IntrinsicCall->getArg(1);
798      assert(ValOp->getType() == IceType_i8);
799      Variable *ValExt = Func->makeVariable(stackSlotType());
800      Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
801
802      static constexpr SizeT MaxArgs = 3;
803      static constexpr Variable *NoDest = nullptr;
804      Operand *TargetHelper =
805          Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
806      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
807                                            NoTailCall, IsTargetHelperCall);
808      Call->addArg(IntrinsicCall->getArg(0));
809      Call->addArg(ValExt);
810      Call->addArg(IntrinsicCall->getArg(2));
811      Instr->setDeleted();
812      return;
813    }
814    case Intrinsics::NaClReadTP: {
815      if (SandboxingType == ST_NaCl) {
816        return;
817      }
818      static constexpr SizeT MaxArgs = 0;
819      assert(SandboxingType != ST_Nonsfi);
820      Operand *TargetHelper =
821          Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
822      Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
823                               IsTargetHelperCall);
824      Instr->setDeleted();
825      return;
826    }
827    case Intrinsics::Setjmp: {
828      static constexpr SizeT MaxArgs = 1;
829      Operand *TargetHelper =
830          Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
831      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
832                                            NoTailCall, IsTargetHelperCall);
833      Call->addArg(IntrinsicCall->getArg(0));
834      Instr->setDeleted();
835      return;
836    }
837    }
838    llvm::report_fatal_error("Control flow should never have reached here.");
839  }
840  }
841}
842
843void TargetMIPS32::findMaxStackOutArgsSize() {
844  // MinNeededOutArgsBytes should be updated if the Target ever creates a
845  // high-level InstCall that requires more stack bytes.
846  size_t MinNeededOutArgsBytes = 0;
847  if (!MaybeLeafFunc)
848    MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
849  MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
850  for (CfgNode *Node : Func->getNodes()) {
851    Context.init(Node);
852    while (!Context.atEnd()) {
853      PostIncrLoweringContext PostIncrement(Context);
854      Inst *CurInstr = iteratorToInst(Context.getCur());
855      if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
856        SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
857        MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
858      }
859    }
860  }
861  CurrentAllocaOffset = MaxOutArgsSizeBytes;
862}
863
864void TargetMIPS32::translateO2() {
865  TimerMarker T(TimerStack::TT_O2, Func);
866
867  // TODO(stichnot): share passes with X86?
868  // https://code.google.com/p/nativeclient/issues/detail?id=4094
869  genTargetHelperCalls();
870
871  unsetIfNonLeafFunc();
872
873  findMaxStackOutArgsSize();
874
875  // Merge Alloca instructions, and lay out the stack.
876  static constexpr bool SortAndCombineAllocas = true;
877  Func->processAllocas(SortAndCombineAllocas);
878  Func->dump("After Alloca processing");
879
880  if (!getFlags().getEnablePhiEdgeSplit()) {
881    // Lower Phi instructions.
882    Func->placePhiLoads();
883    if (Func->hasError())
884      return;
885    Func->placePhiStores();
886    if (Func->hasError())
887      return;
888    Func->deletePhis();
889    if (Func->hasError())
890      return;
891    Func->dump("After Phi lowering");
892  }
893
894  // Address mode optimization.
895  Func->getVMetadata()->init(VMK_SingleDefs);
896  Func->doAddressOpt();
897
898  // Argument lowering
899  Func->doArgLowering();
900
901  // Target lowering. This requires liveness analysis for some parts of the
902  // lowering decisions, such as compare/branch fusing. If non-lightweight
903  // liveness analysis is used, the instructions need to be renumbered first.
904  // TODO: This renumbering should only be necessary if we're actually
905  // calculating live intervals, which we only do for register allocation.
906  Func->renumberInstructions();
907  if (Func->hasError())
908    return;
909
910  // TODO: It should be sufficient to use the fastest liveness calculation,
911  // i.e. livenessLightweight(). However, for some reason that slows down the
912  // rest of the translation. Investigate.
913  Func->liveness(Liveness_Basic);
914  if (Func->hasError())
915    return;
916  Func->dump("After MIPS32 address mode opt");
917
918  Func->genCode();
919  if (Func->hasError())
920    return;
921  Func->dump("After MIPS32 codegen");
922
923  // Register allocation. This requires instruction renumbering and full
924  // liveness analysis.
925  Func->renumberInstructions();
926  if (Func->hasError())
927    return;
928  Func->liveness(Liveness_Intervals);
929  if (Func->hasError())
930    return;
931  // The post-codegen dump is done here, after liveness analysis and associated
932  // cleanup, to make the dump cleaner and more useful.
933  Func->dump("After initial MIPS32 codegen");
934  // Validate the live range computations. The expensive validation call is
935  // deliberately only made when assertions are enabled.
936  assert(Func->validateLiveness());
937  Func->getVMetadata()->init(VMK_All);
938  regAlloc(RAK_Global);
939  if (Func->hasError())
940    return;
941  Func->dump("After linear scan regalloc");
942
943  if (getFlags().getEnablePhiEdgeSplit()) {
944    Func->advancedPhiLowering();
945    Func->dump("After advanced Phi lowering");
946  }
947
948  // Stack frame mapping.
949  Func->genFrame();
950  if (Func->hasError())
951    return;
952  Func->dump("After stack frame mapping");
953
954  postLowerLegalization();
955  if (Func->hasError())
956    return;
957  Func->dump("After postLowerLegalization");
958
959  Func->contractEmptyNodes();
960  Func->reorderNodes();
961
962  // Branch optimization. This needs to be done just before code emission. In
963  // particular, no transformations that insert or reorder CfgNodes should be
964  // done after branch optimization. We go ahead and do it before nop insertion
965  // to reduce the amount of work needed for searching for opportunities.
966  Func->doBranchOpt();
967  Func->dump("After branch optimization");
968
969  // Nop insertion
970  if (getFlags().getShouldDoNopInsertion()) {
971    Func->doNopInsertion();
972  }
973}
974
975void TargetMIPS32::translateOm1() {
976  TimerMarker T(TimerStack::TT_Om1, Func);
977
978  // TODO: share passes with X86?
979  genTargetHelperCalls();
980
981  unsetIfNonLeafFunc();
982
983  findMaxStackOutArgsSize();
984
985  // Do not merge Alloca instructions, and lay out the stack.
986  static constexpr bool SortAndCombineAllocas = false;
987  Func->processAllocas(SortAndCombineAllocas);
988  Func->dump("After Alloca processing");
989
990  Func->placePhiLoads();
991  if (Func->hasError())
992    return;
993  Func->placePhiStores();
994  if (Func->hasError())
995    return;
996  Func->deletePhis();
997  if (Func->hasError())
998    return;
999  Func->dump("After Phi lowering");
1000
1001  Func->doArgLowering();
1002
1003  Func->genCode();
1004  if (Func->hasError())
1005    return;
1006  Func->dump("After initial MIPS32 codegen");
1007
1008  regAlloc(RAK_InfOnly);
1009  if (Func->hasError())
1010    return;
1011  Func->dump("After regalloc of infinite-weight variables");
1012
1013  Func->genFrame();
1014  if (Func->hasError())
1015    return;
1016  Func->dump("After stack frame mapping");
1017
1018  postLowerLegalization();
1019  if (Func->hasError())
1020    return;
1021  Func->dump("After postLowerLegalization");
1022
1023  // Nop insertion
1024  if (getFlags().getShouldDoNopInsertion()) {
1025    Func->doNopInsertion();
1026  }
1027}
1028
1029bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
1030  if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
1031    return Br->optimizeBranch(NextNode);
1032  }
1033  return false;
1034}
1035
1036namespace {
1037
1038const char *RegNames[RegMIPS32::Reg_NUM] = {
1039#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
1040          isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
1041  name,
1042    REGMIPS32_TABLE
1043#undef X
1044};
1045
1046} // end of anonymous namespace
1047
1048const char *RegMIPS32::getRegName(RegNumT RegNum) {
1049  RegNum.assertIsValid();
1050  return RegNames[RegNum];
1051}
1052
1053const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1054  (void)Ty;
1055  return RegMIPS32::getRegName(RegNum);
1056}
1057
1058Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1059  if (Ty == IceType_void)
1060    Ty = IceType_i32;
1061  if (PhysicalRegisters[Ty].empty())
1062    PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1063  RegNum.assertIsValid();
1064  Variable *Reg = PhysicalRegisters[Ty][RegNum];
1065  if (Reg == nullptr) {
1066    Reg = Func->makeVariable(Ty);
1067    Reg->setRegNum(RegNum);
1068    PhysicalRegisters[Ty][RegNum] = Reg;
1069    // Specially mark a named physical register as an "argument" so that it is
1070    // considered live upon function entry.  Otherwise it's possible to get
1071    // liveness validation errors for saving callee-save registers.
1072    Func->addImplicitArg(Reg);
1073    // Don't bother tracking the live range of a named physical register.
1074    Reg->setIgnoreLiveness();
1075  }
1076  return Reg;
1077}
1078
1079void TargetMIPS32::emitJumpTable(const Cfg *Func,
1080                                 const InstJumpTable *JumpTable) const {
1081  (void)Func;
1082  (void)JumpTable;
1083  UnimplementedError(getFlags());
1084}
1085
1086/// Provide a trivial wrapper to legalize() for this common usage.
1087Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1088  return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1089}
1090
1091/// Legalize undef values to concrete values.
1092Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1093  (void)RegNum;
1094  Type Ty = From->getType();
1095  if (llvm::isa<ConstantUndef>(From)) {
1096    // Lower undefs to zero.  Another option is to lower undefs to an
1097    // uninitialized register; however, using an uninitialized register
1098    // results in less predictable code.
1099    //
1100    // If in the future the implementation is changed to lower undef
1101    // values to uninitialized registers, a FakeDef will be needed:
1102    //     Context.insert(InstFakeDef::create(Func, Reg));
1103    // This is in order to ensure that the live range of Reg is not
1104    // overestimated.  If the constant being lowered is a 64 bit value,
1105    // then the result should be split and the lo and hi components will
1106    // need to go in uninitialized registers.
1107    if (isVectorType(Ty)) {
1108      Variable *Var = makeReg(Ty, RegNum);
1109      auto *Reg = llvm::cast<VariableVecOn32>(Var);
1110      Reg->initVecElement(Func);
1111      auto *Zero = getZero();
1112      for (Variable *Var : Reg->getContainers()) {
1113        _mov(Var, Zero);
1114      }
1115      return Reg;
1116    }
1117    return Ctx->getConstantZero(Ty);
1118  }
1119  return From;
1120}
1121
1122Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1123  // There aren't any 64-bit integer registers for Mips32.
1124  assert(Type != IceType_i64);
1125  Variable *Reg = Func->makeVariable(Type);
1126  if (RegNum.hasValue())
1127    Reg->setRegNum(RegNum);
1128  else
1129    Reg->setMustHaveReg();
1130  return Reg;
1131}
1132
1133OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1134  // It may be the case that address mode optimization already creates an
1135  // OperandMIPS32Mem, so in that case it wouldn't need another level of
1136  // transformation.
1137  if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1138    return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1139  }
1140
1141  // If we didn't do address mode optimization, then we only have a base/offset
1142  // to work with. MIPS always requires a base register, so just use that to
1143  // hold the operand.
1144  auto *Base = llvm::cast<Variable>(
1145      legalize(Operand, Legal_Reg | Legal_Rematerializable));
1146  const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1147  return OperandMIPS32Mem::create(
1148      Func, Ty, Base,
1149      llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1150}
1151
1152void TargetMIPS32::emitVariable(const Variable *Var) const {
1153  if (!BuildDefs::dump())
1154    return;
1155  Ostream &Str = Ctx->getStrEmit();
1156  const Type FrameSPTy = IceType_i32;
1157  if (Var->hasReg()) {
1158    Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1159    return;
1160  }
1161  if (Var->mustHaveReg()) {
1162    llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1163                             ") has no register assigned - function " +
1164                             Func->getFunctionName());
1165  }
1166  const int32_t Offset = Var->getStackOffset();
1167  Str << Offset;
1168  Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1169  Str << ")";
1170}
1171
1172TargetMIPS32::CallingConv::CallingConv()
1173    : GPRegsUsed(RegMIPS32::Reg_NUM),
1174      GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1175      I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1176      VFPRegsUsed(RegMIPS32::Reg_NUM),
1177      FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1178      FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1179
1180// In MIPS O32 abi FP argument registers can be used only if first argument is
1181// of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1182// registers can be used only for first 2 arguments, so we require argument
1183// number to make register allocation decisions.
1184bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1185                                         RegNumT *Reg) {
1186  if (isScalarIntegerType(Ty) || isVectorType(Ty))
1187    return argInGPR(Ty, Reg);
1188  if (isScalarFloatingType(Ty)) {
1189    if (ArgNo == 0) {
1190      UseFPRegs = true;
1191      return argInVFP(Ty, Reg);
1192    }
1193    if (UseFPRegs && ArgNo == 1) {
1194      UseFPRegs = false;
1195      return argInVFP(Ty, Reg);
1196    }
1197    return argInGPR(Ty, Reg);
1198  }
1199  llvm::report_fatal_error("argInReg: Invalid type.");
1200  return false;
1201}
1202
1203bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1204  CfgVector<RegNumT> *Source;
1205
1206  switch (Ty) {
1207  default: {
1208    llvm::report_fatal_error("argInGPR: Invalid type.");
1209    return false;
1210  } break;
1211  case IceType_v4i1:
1212  case IceType_v8i1:
1213  case IceType_v16i1:
1214  case IceType_v16i8:
1215  case IceType_v8i16:
1216  case IceType_v4i32:
1217  case IceType_v4f32:
1218  case IceType_i32:
1219  case IceType_f32: {
1220    Source = &GPRArgs;
1221  } break;
1222  case IceType_i64:
1223  case IceType_f64: {
1224    Source = &I64Args;
1225  } break;
1226  }
1227
1228  discardUnavailableGPRsAndTheirAliases(Source);
1229
1230  // If $4 is used for any scalar type (or returining v4f32) then the next
1231  // vector type if passed in $6:$7:stack:stack
1232  if (isVectorType(Ty)) {
1233    alignGPR(Source);
1234  }
1235
1236  if (Source->empty()) {
1237    GPRegsUsed.set();
1238    return false;
1239  }
1240
1241  *Reg = Source->back();
1242  // Note that we don't Source->pop_back() here. This is intentional. Notice how
1243  // we mark all of Reg's aliases as Used. So, for the next argument,
1244  // Source->back() is marked as unavailable, and it is thus implicitly popped
1245  // from the stack.
1246  GPRegsUsed |= RegisterAliases[*Reg];
1247
1248  // All vector arguments irrespective of their base type are passed in GP
1249  // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1250  // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1251  // $4:$5:$6:$7 otherwise discard $6:$7 only.
1252  if (isVectorType(Ty)) {
1253    if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1254      GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1255      GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1256      GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1257    } else {
1258      GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1259    }
1260  }
1261
1262  return true;
1263}
1264
1265inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1266    CfgVector<RegNumT> *Regs) {
1267  GPRegsUsed |= RegisterAliases[Regs->back()];
1268  Regs->pop_back();
1269}
1270
1271inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1272  if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1273    discardNextGPRAndItsAliases(Regs);
1274}
1275
1276// GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1277// i32) will have the first argument in a0, the second in a2-a3, and the third
1278// on the stack. To model this behavior, whenever we pop a register from Regs,
1279// we remove all of its aliases from the pool of available GPRs. This has the
1280// effect of computing the "closure" on the GPR registers.
1281void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1282    CfgVector<RegNumT> *Regs) {
1283  while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1284    discardNextGPRAndItsAliases(Regs);
1285  }
1286}
1287
1288bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1289  CfgVector<RegNumT> *Source;
1290
1291  switch (Ty) {
1292  default: {
1293    llvm::report_fatal_error("argInVFP: Invalid type.");
1294    return false;
1295  } break;
1296  case IceType_f32: {
1297    Source = &FP32Args;
1298  } break;
1299  case IceType_f64: {
1300    Source = &FP64Args;
1301  } break;
1302  }
1303
1304  discardUnavailableVFPRegsAndTheirAliases(Source);
1305
1306  if (Source->empty()) {
1307    VFPRegsUsed.set();
1308    return false;
1309  }
1310
1311  *Reg = Source->back();
1312  VFPRegsUsed |= RegisterAliases[*Reg];
1313
1314  // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1315  // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1316  // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1317  // in reg_a3 and a0, a1 are not used.
1318  Source = &GPRArgs;
1319  // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1320  if (Ty == IceType_f64) {
1321    // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1322    // must be aligned at even register. Similarly when we discard GPR registers
1323    // when some arguments from starting 16 bytes goes in FPR, we must take care
1324    // of alignment. For example if fun args are (f32, f64, f32), for first f32
1325    // we discard a0, now for f64 argument, which will go in F14F15, we must
1326    // first align GPR vector to even register by discarding a1, then discard
1327    // two GPRs a2 and a3. Now last f32 argument will go on stack.
1328    alignGPR(Source);
1329    discardNextGPRAndItsAliases(Source);
1330  }
1331  discardNextGPRAndItsAliases(Source);
1332  return true;
1333}
1334
1335void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1336    CfgVector<RegNumT> *Regs) {
1337  while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1338    Regs->pop_back();
1339  }
1340}
1341
1342void TargetMIPS32::lowerArguments() {
1343  VarList &Args = Func->getArgs();
1344  TargetMIPS32::CallingConv CC;
1345
1346  // For each register argument, replace Arg in the argument list with the home
1347  // register. Then generate an instruction in the prolog to copy the home
1348  // register to the assigned location of Arg.
1349  Context.init(Func->getEntryNode());
1350  Context.setInsertPoint(Context.getCur());
1351
1352  // v4f32 is returned through stack. $4 is setup by the caller and passed as
1353  // first argument implicitly. Callee then copies the return vector at $4.
1354  Variable *ImplicitRetVec = nullptr;
1355  if (isVectorFloatingType(Func->getReturnType())) {
1356    ImplicitRetVec = Func->makeVariable(IceType_i32);
1357    ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1358    ImplicitRetVec->setIsArg();
1359    Args.insert(Args.begin(), ImplicitRetVec);
1360    setImplicitRet(ImplicitRetVec);
1361  }
1362
1363  for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1364    Variable *Arg = Args[i];
1365    Type Ty = Arg->getType();
1366    RegNumT RegNum;
1367    if (!CC.argInReg(Ty, i, &RegNum)) {
1368      continue;
1369    }
1370    Variable *RegisterArg = Func->makeVariable(Ty);
1371    if (BuildDefs::dump()) {
1372      RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1373    }
1374    RegisterArg->setIsArg();
1375    Arg->setIsArg(false);
1376    Args[i] = RegisterArg;
1377
1378    if (isVectorType(Ty)) {
1379      auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1380      RegisterArgVec->initVecElement(Func);
1381      RegisterArgVec->getContainers()[0]->setRegNum(
1382          RegNumT::fixme((unsigned)RegNum + 0));
1383      RegisterArgVec->getContainers()[1]->setRegNum(
1384          RegNumT::fixme((unsigned)RegNum + 1));
1385      // First two elements of second vector argument are passed
1386      // in $6:$7 and remaining two on stack. Do not assign register
1387      // to this is second vector argument.
1388      if (i == 0) {
1389        RegisterArgVec->getContainers()[2]->setRegNum(
1390            RegNumT::fixme((unsigned)RegNum + 2));
1391        RegisterArgVec->getContainers()[3]->setRegNum(
1392            RegNumT::fixme((unsigned)RegNum + 3));
1393      } else {
1394        RegisterArgVec->getContainers()[2]->setRegNum(
1395            RegNumT::fixme(RegNumT()));
1396        RegisterArgVec->getContainers()[3]->setRegNum(
1397            RegNumT::fixme(RegNumT()));
1398      }
1399    } else {
1400      switch (Ty) {
1401      default: { RegisterArg->setRegNum(RegNum); } break;
1402      case IceType_i64: {
1403        auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1404        RegisterArg64->initHiLo(Func);
1405        RegisterArg64->getLo()->setRegNum(
1406            RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1407        RegisterArg64->getHi()->setRegNum(
1408            RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1409      } break;
1410      }
1411    }
1412    Context.insert<InstAssign>(Arg, RegisterArg);
1413  }
1414
1415  // Insert fake use of ImplicitRet_v4f32 to keep it live
1416  if (ImplicitRetVec) {
1417    for (CfgNode *Node : Func->getNodes()) {
1418      for (Inst &Instr : Node->getInsts()) {
1419        if (llvm::isa<InstRet>(&Instr)) {
1420          Context.setInsertPoint(instToIterator(&Instr));
1421          Context.insert<InstFakeUse>(ImplicitRetVec);
1422          break;
1423        }
1424      }
1425    }
1426  }
1427}
1428
1429Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1430
1431// Helper function for addProlog().
1432//
1433// This assumes Arg is an argument passed on the stack. This sets the frame
1434// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1435// I64 arg that has been split into Lo and Hi components, it calls itself
1436// recursively on the components, taking care to handle Lo first because of the
1437// little-endian architecture. Lastly, this function generates an instruction
1438// to copy Arg into its assigned register if applicable.
1439void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1440                                          Variable *FramePtr,
1441                                          size_t BasicFrameOffset,
1442                                          size_t *InArgsSizeBytes) {
1443  const Type Ty = Arg->getType();
1444  *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1445
1446  // If $4 is used for any scalar type (or returining v4f32) then the next
1447  // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1448  // from agument stack.
1449  if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1450    if (PartialOnStack == false) {
1451      auto *Elem0 = ArgVecOn32->getContainers()[0];
1452      auto *Elem1 = ArgVecOn32->getContainers()[1];
1453      finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1454                             InArgsSizeBytes);
1455      finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1456                             InArgsSizeBytes);
1457    }
1458    auto *Elem2 = ArgVecOn32->getContainers()[2];
1459    auto *Elem3 = ArgVecOn32->getContainers()[3];
1460    finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1461                           InArgsSizeBytes);
1462    finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1463                           InArgsSizeBytes);
1464    return;
1465  }
1466
1467  if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1468    Variable *const Lo = Arg64On32->getLo();
1469    Variable *const Hi = Arg64On32->getHi();
1470    finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1471                           InArgsSizeBytes);
1472    finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1473                           InArgsSizeBytes);
1474    return;
1475  }
1476
1477  assert(Ty != IceType_i64);
1478  assert(!isVectorType(Ty));
1479
1480  const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1481  *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1482
1483  if (!Arg->hasReg()) {
1484    Arg->setStackOffset(ArgStackOffset);
1485    return;
1486  }
1487
1488  // If the argument variable has been assigned a register, we need to copy the
1489  // value from the stack slot.
1490  Variable *Parameter = Func->makeVariable(Ty);
1491  Parameter->setMustNotHaveReg();
1492  Parameter->setStackOffset(ArgStackOffset);
1493  _mov(Arg, Parameter);
1494}
1495
1496void TargetMIPS32::addProlog(CfgNode *Node) {
1497  // Stack frame layout:
1498  //
1499  // +------------------------+
1500  // | 1. preserved registers |
1501  // +------------------------+
1502  // | 2. padding             |
1503  // +------------------------+
1504  // | 3. global spill area   |
1505  // +------------------------+
1506  // | 4. padding             |
1507  // +------------------------+
1508  // | 5. local spill area    |
1509  // +------------------------+
1510  // | 6. padding             |
1511  // +------------------------+
1512  // | 7. allocas             |
1513  // +------------------------+
1514  // | 8. padding             |
1515  // +------------------------+
1516  // | 9. out args            |
1517  // +------------------------+ <--- StackPointer
1518  //
1519  // The following variables record the size in bytes of the given areas:
1520  //  * PreservedRegsSizeBytes: area 1
1521  //  * SpillAreaPaddingBytes:  area 2
1522  //  * GlobalsSize:            area 3
1523  //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1524  //  * LocalsSpillAreaSize:    area 5
1525  //  * SpillAreaSizeBytes:     areas 2 - 9
1526  //  * maxOutArgsSizeBytes():  area 9
1527
1528  Context.init(Node);
1529  Context.setInsertPoint(Context.getCur());
1530
1531  SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1532  RegsUsed = SmallBitVector(CalleeSaves.size());
1533
1534  VarList SortedSpilledVariables;
1535
1536  size_t GlobalsSize = 0;
1537  // If there is a separate locals area, this represents that area. Otherwise
1538  // it counts any variable not counted by GlobalsSize.
1539  SpillAreaSizeBytes = 0;
1540  // If there is a separate locals area, this specifies the alignment for it.
1541  uint32_t LocalsSlotsAlignmentBytes = 0;
1542  // The entire spill locations area gets aligned to largest natural alignment
1543  // of the variables that have a spill slot.
1544  uint32_t SpillAreaAlignmentBytes = 0;
1545  // For now, we don't have target-specific variables that need special
1546  // treatment (no stack-slot-linked SpillVariable type).
1547  std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1548    static constexpr bool AssignStackSlot = false;
1549    static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1550    if (llvm::isa<Variable64On32>(Var)) {
1551      return DontAssignStackSlot;
1552    }
1553    return AssignStackSlot;
1554  };
1555
1556  // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1557  getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1558                        &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1559                        &LocalsSlotsAlignmentBytes, TargetVarHook);
1560  uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1561  SpillAreaSizeBytes += GlobalsSize;
1562
1563  PreservedGPRs.reserve(CalleeSaves.size());
1564
1565  // Consider FP and RA as callee-save / used as needed.
1566  if (UsesFramePointer) {
1567    if (RegsUsed[RegMIPS32::Reg_FP]) {
1568      llvm::report_fatal_error("Frame pointer has been used.");
1569    }
1570    CalleeSaves[RegMIPS32::Reg_FP] = true;
1571    RegsUsed[RegMIPS32::Reg_FP] = true;
1572  }
1573  if (!MaybeLeafFunc) {
1574    CalleeSaves[RegMIPS32::Reg_RA] = true;
1575    RegsUsed[RegMIPS32::Reg_RA] = true;
1576  }
1577
1578  // Make two passes over the used registers. The first pass records all the
1579  // used registers -- and their aliases. Then, we figure out which GPR
1580  // registers should be saved.
1581  SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1582  for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1583    if (CalleeSaves[i] && RegsUsed[i]) {
1584      ToPreserve |= RegisterAliases[i];
1585    }
1586  }
1587
1588  uint32_t NumCallee = 0;
1589
1590  // RegClasses is a tuple of
1591  //
1592  // <First Register in Class, Last Register in Class, Vector of Save Registers>
1593  //
1594  // We use this tuple to figure out which register we should save/restore
1595  // during
1596  // prolog/epilog.
1597  using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1598  const RegClassType RegClass = RegClassType(
1599      RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1600  const uint32_t FirstRegInClass = std::get<0>(RegClass);
1601  const uint32_t LastRegInClass = std::get<1>(RegClass);
1602  VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1603  for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1604    if (!ToPreserve[Reg]) {
1605      continue;
1606    }
1607    ++NumCallee;
1608    Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1609    PreservedRegsSizeBytes +=
1610        typeWidthInBytesOnStack(PhysicalRegister->getType());
1611    PreservedRegsInClass->push_back(PhysicalRegister);
1612  }
1613
1614  Ctx->statsUpdateRegistersSaved(NumCallee);
1615
1616  // Align the variables area. SpillAreaPaddingBytes is the size of the region
1617  // after the preserved registers and before the spill areas.
1618  // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1619  // locals area if they are separate.
1620  assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1621  (void)MIPS32_STACK_ALIGNMENT_BYTES;
1622  assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1623  uint32_t SpillAreaPaddingBytes = 0;
1624  uint32_t LocalsSlotsPaddingBytes = 0;
1625  alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1626                       GlobalsSize, LocalsSlotsAlignmentBytes,
1627                       &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1628  SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1629  uint32_t GlobalsAndSubsequentPaddingSize =
1630      GlobalsSize + LocalsSlotsPaddingBytes;
1631
1632  // Adds the out args space to the stack, and align SP if necessary.
1633  if (!NeedsStackAlignment) {
1634    SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1635  } else {
1636    SpillAreaSizeBytes = applyStackAlignment(
1637        SpillAreaSizeBytes +
1638        (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1639  }
1640
1641  // Combine fixed alloca with SpillAreaSize.
1642  SpillAreaSizeBytes += FixedAllocaSizeBytes;
1643
1644  TotalStackSizeBytes =
1645      applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1646
1647  // Generate "addiu sp, sp, -TotalStackSizeBytes"
1648  if (TotalStackSizeBytes) {
1649    // Use the scratch register if needed to legalize the immediate.
1650    Sandboxer(this).addiu_sp(-TotalStackSizeBytes);
1651  }
1652
1653  Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1654
1655  if (!PreservedGPRs.empty()) {
1656    uint32_t StackOffset = TotalStackSizeBytes;
1657    for (Variable *Var : *PreservedRegsInClass) {
1658      Type RegType;
1659      if (RegMIPS32::isFPRReg(Var->getRegNum()))
1660        RegType = IceType_f32;
1661      else
1662        RegType = IceType_i32;
1663      auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1664      StackOffset -= typeWidthInBytesOnStack(RegType);
1665      Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1666      OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1667          Func, RegType, SP,
1668          llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1669      Sandboxer(this).sw(PhysicalRegister, MemoryLocation);
1670    }
1671  }
1672
1673  Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1674
1675  // Generate "mov FP, SP" if needed.
1676  if (UsesFramePointer) {
1677    Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1678    _mov(FP, SP);
1679    // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1680    Context.insert<InstFakeUse>(FP);
1681  }
1682
1683  // Fill in stack offsets for stack args, and copy args into registers for
1684  // those that were register-allocated. Args are pushed right to left, so
1685  // Arg[0] is closest to the stack/frame pointer.
1686  const VarList &Args = Func->getArgs();
1687  size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1688  TargetMIPS32::CallingConv CC;
1689  uint32_t ArgNo = 0;
1690
1691  for (Variable *Arg : Args) {
1692    RegNumT DummyReg;
1693    const Type Ty = Arg->getType();
1694    bool PartialOnStack;
1695    // Skip arguments passed in registers.
1696    if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1697      // Load argument from stack:
1698      // 1. If this is first vector argument and return type is v4f32.
1699      //    In this case $4 is used to pass stack address implicitly.
1700      //    3rd and 4th element of vector argument is passed through stack.
1701      // 2. If this is second vector argument.
1702      if (ArgNo != 0 && isVectorType(Ty)) {
1703        PartialOnStack = true;
1704        finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1705                               &InArgsSizeBytes);
1706      }
1707    } else {
1708      PartialOnStack = false;
1709      finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1710                             &InArgsSizeBytes);
1711    }
1712    ++ArgNo;
1713  }
1714
1715  // Fill in stack offsets for locals.
1716  assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1717                      SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1718  this->HasComputedFrame = true;
1719
1720  if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1721    OstreamLocker _(Func->getContext());
1722    Ostream &Str = Func->getContext()->getStrDump();
1723
1724    Str << "Stack layout:\n";
1725    uint32_t SPAdjustmentPaddingSize =
1726        SpillAreaSizeBytes - LocalsSpillAreaSize -
1727        GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1728        MaxOutArgsSizeBytes;
1729    Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1730        << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1731        << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1732        << " globals spill area = " << GlobalsSize << " bytes\n"
1733        << " globals-locals spill areas intermediate padding = "
1734        << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1735        << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1736        << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1737
1738    Str << "Stack details:\n"
1739        << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1740        << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1741        << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1742        << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1743        << " bytes\n"
1744        << " is FP based = " << 1 << "\n";
1745  }
1746  return;
1747}
1748
1749void TargetMIPS32::addEpilog(CfgNode *Node) {
1750  InstList &Insts = Node->getInsts();
1751  InstList::reverse_iterator RI, E;
1752  for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1753    if (llvm::isa<InstMIPS32Ret>(*RI))
1754      break;
1755  }
1756  if (RI == E)
1757    return;
1758
1759  // Convert the reverse_iterator position into its corresponding (forward)
1760  // iterator position.
1761  InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1762  --InsertPoint;
1763  Context.init(Node);
1764  Context.setInsertPoint(InsertPoint);
1765
1766  Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1767  if (UsesFramePointer) {
1768    Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1769    // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1770    // use of SP before the assignment of SP=FP keeps previous SP adjustments
1771    // from being dead-code eliminated.
1772    Context.insert<InstFakeUse>(SP);
1773    Sandboxer(this).reset_sp(FP);
1774  }
1775
1776  VarList::reverse_iterator RIter, END;
1777
1778  if (!PreservedGPRs.empty()) {
1779    uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1780    for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1781         RIter != END; ++RIter) {
1782      Type RegType;
1783      if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1784        RegType = IceType_f32;
1785      else
1786        RegType = IceType_i32;
1787      auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1788      Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1789      OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1790          Func, RegType, SP,
1791          llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1792      _lw(PhysicalRegister, MemoryLocation);
1793      StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1794    }
1795  }
1796
1797  if (TotalStackSizeBytes) {
1798    Sandboxer(this).addiu_sp(TotalStackSizeBytes);
1799  }
1800  if (!getFlags().getUseSandboxing())
1801    return;
1802
1803  Variable *RA = getPhysicalRegister(RegMIPS32::Reg_RA);
1804  Variable *RetValue = nullptr;
1805  if (RI->getSrcSize())
1806    RetValue = llvm::cast<Variable>(RI->getSrc(0));
1807
1808  Sandboxer(this).ret(RA, RetValue);
1809
1810  RI->setDeleted();
1811}
1812
1813Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1814    Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1815  // Legalize will likely need a lui/ori combination, but if the top bits are
1816  // all 0 from negating the offset and subtracting, we could use that instead.
1817  const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1818  Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1819  if (ShouldSub) {
1820    Target->_addi(ScratchReg, Base, -Offset);
1821  } else {
1822    constexpr bool SignExt = true;
1823    if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1824      const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1825      const uint32_t LowerBits = Offset & 0xFFFF;
1826      Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1827      if (LowerBits)
1828        Target->_ori(ScratchReg, ScratchReg, LowerBits);
1829      Target->_addu(ScratchReg, ScratchReg, Base);
1830    } else {
1831      Target->_addiu(ScratchReg, Base, Offset);
1832    }
1833  }
1834
1835  return ScratchReg;
1836}
1837
1838void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1839    InstMIPS32MovFP64ToI64 *MovInstr) {
1840  Variable *Dest = MovInstr->getDest();
1841  Operand *Src = MovInstr->getSrc(0);
1842  const Type SrcTy = Src->getType();
1843
1844  if (Dest != nullptr && SrcTy == IceType_f64) {
1845    int32_t Offset = Dest->getStackOffset();
1846    auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1847    OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1848        Target->Func, IceType_f32, Base,
1849        llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1850    OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1851    auto *SrcV = llvm::cast<Variable>(Src);
1852    Variable *SrcR;
1853    if (MovInstr->getInt64Part() == Int64_Lo) {
1854      SrcR = Target->makeReg(
1855          IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1856    } else {
1857      SrcR = Target->makeReg(
1858          IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1859    }
1860    Sandboxer(Target).sw(SrcR, Addr);
1861    if (MovInstr->isDestRedefined()) {
1862      Target->_set_dest_redefined();
1863    }
1864    MovInstr->setDeleted();
1865    return;
1866  }
1867
1868  llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1869}
1870
1871void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1872  Variable *Dest = MovInstr->getDest();
1873  assert(Dest != nullptr);
1874  const Type DestTy = Dest->getType();
1875  assert(DestTy != IceType_i64);
1876
1877  Operand *Src = MovInstr->getSrc(0);
1878  const Type SrcTy = Src->getType();
1879  (void)SrcTy;
1880  assert(SrcTy != IceType_i64);
1881
1882  bool Legalized = false;
1883  auto *SrcR = llvm::cast<Variable>(Src);
1884  if (Dest->hasReg() && SrcR->hasReg()) {
1885    // This might be a GP to/from FP move generated due to argument passing.
1886    // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1887    // different types.
1888    const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1889    const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1890    const RegNumT SRegNum = SrcR->getRegNum();
1891    const RegNumT DRegNum = Dest->getRegNum();
1892    if (IsDstGPR != IsSrcGPR) {
1893      if (IsDstGPR) {
1894        // Dest is GPR and SrcR is FPR. Use mfc1.
1895        int32_t TypeWidth = typeWidthInBytes(DestTy);
1896        if (MovInstr->getDestHi() != nullptr)
1897          TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1898        if (TypeWidth == 8) {
1899          // Split it into two mfc1 instructions
1900          Variable *SrcGPRHi = Target->makeReg(
1901              IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1902          Variable *SrcGPRLo = Target->makeReg(
1903              IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1904          Variable *DstFPRHi, *DstFPRLo;
1905          if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1906            DstFPRHi = Target->makeReg(IceType_i32,
1907                                       MovInstr->getDestHi()->getRegNum());
1908            DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1909          } else {
1910            DstFPRHi = Target->makeReg(
1911                IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1912            DstFPRLo = Target->makeReg(
1913                IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1914          }
1915          Target->_mov(DstFPRHi, SrcGPRHi);
1916          Target->_mov(DstFPRLo, SrcGPRLo);
1917          Legalized = true;
1918        } else {
1919          Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1920          Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1921          Target->_mov(DstFPR, SrcGPR);
1922          Legalized = true;
1923        }
1924      } else {
1925        // Dest is FPR and SrcR is GPR. Use mtc1.
1926        if (typeWidthInBytes(Dest->getType()) == 8) {
1927          Variable *SrcGPRHi, *SrcGPRLo;
1928          // SrcR could be $zero which is i32
1929          if (SRegNum == RegMIPS32::Reg_ZERO) {
1930            SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1931            SrcGPRLo = SrcGPRHi;
1932          } else {
1933            // Split it into two mtc1 instructions
1934            if (MovInstr->getSrcSize() == 2) {
1935              const auto FirstReg =
1936                  (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1937              const auto SecondReg =
1938                  (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1939              SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1940              SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1941            } else {
1942              SrcGPRLo = Target->makeReg(
1943                  IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1944              SrcGPRHi = Target->makeReg(
1945                  IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1946            }
1947          }
1948          Variable *DstFPRHi = Target->makeReg(
1949              IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1950          Variable *DstFPRLo = Target->makeReg(
1951              IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1952          Target->_mov(DstFPRHi, SrcGPRLo);
1953          Target->_mov(DstFPRLo, SrcGPRHi);
1954          Legalized = true;
1955        } else {
1956          Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1957          Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1958          Target->_mov(DstFPR, SrcGPR);
1959          Legalized = true;
1960        }
1961      }
1962    }
1963    if (Legalized) {
1964      if (MovInstr->isDestRedefined()) {
1965        Target->_set_dest_redefined();
1966      }
1967      MovInstr->setDeleted();
1968      return;
1969    }
1970  }
1971
1972  if (!Dest->hasReg()) {
1973    auto *SrcR = llvm::cast<Variable>(Src);
1974    assert(SrcR->hasReg());
1975    assert(!SrcR->isRematerializable());
1976    int32_t Offset = Dest->getStackOffset();
1977
1978    // This is a _mov(Mem(), Variable), i.e., a store.
1979    auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1980
1981    OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1982        Target->Func, DestTy, Base,
1983        llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1984    OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1985        Target->Func, DestTy, Base,
1986        llvm::cast<ConstantInteger32>(
1987            Target->Ctx->getConstantInt32(Offset + 4)));
1988    OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1989
1990    // FP arguments are passed in GP reg if first argument is in GP. In this
1991    // case type of the SrcR is still FP thus we need to explicitly generate sw
1992    // instead of swc1.
1993    const RegNumT RegNum = SrcR->getRegNum();
1994    const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1995    if (SrcTy == IceType_f32 && IsSrcGPReg) {
1996      Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1997      Sandboxer(Target).sw(SrcGPR, Addr);
1998    } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
1999      Variable *SrcGPRHi =
2000          Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2001      Variable *SrcGPRLo = Target->makeReg(
2002          IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2003      Sandboxer(Target).sw(SrcGPRHi, Addr);
2004      OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2005      Sandboxer(Target).sw(SrcGPRLo, AddrHi);
2006    } else if (DestTy == IceType_f64 && IsSrcGPReg) {
2007      const auto FirstReg =
2008          (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2009      const auto SecondReg =
2010          (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2011      Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
2012      Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
2013      Sandboxer(Target).sw(SrcGPRLo, Addr);
2014      OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2015      Sandboxer(Target).sw(SrcGPRHi, AddrHi);
2016    } else {
2017      Sandboxer(Target).sw(SrcR, Addr);
2018    }
2019
2020    Target->Context.insert<InstFakeDef>(Dest);
2021    Legalized = true;
2022  } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
2023    if (Var->isRematerializable()) {
2024      // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
2025
2026      // ExtraOffset is only needed for stack-pointer based frames as we have
2027      // to account for spill storage.
2028      const int32_t ExtraOffset =
2029          (Var->getRegNum() == Target->getFrameOrStackReg())
2030              ? Target->getFrameFixedAllocaOffset()
2031              : 0;
2032
2033      const int32_t Offset = Var->getStackOffset() + ExtraOffset;
2034      Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
2035      Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
2036      Target->_mov(Dest, T);
2037      Legalized = true;
2038    } else {
2039      if (!Var->hasReg()) {
2040        // This is a _mov(Variable, Mem()), i.e., a load.
2041        const int32_t Offset = Var->getStackOffset();
2042        auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2043        const RegNumT RegNum = Dest->getRegNum();
2044        const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2045        // If we are moving i64 to a double using stack then the address may
2046        // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2047        // and store them individually with 4-byte alignment. Load the Hi-Lo
2048        // parts in TmpReg and move them to the dest using mtc1.
2049        if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2050            !IsDstGPReg) {
2051          auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2052          const RegNumT RegNum = Dest->getRegNum();
2053          Variable *DestLo = Target->makeReg(
2054              IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2055          Variable *DestHi = Target->makeReg(
2056              IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2057          OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2058              Target->Func, IceType_i32, Base,
2059              llvm::cast<ConstantInteger32>(
2060                  Target->Ctx->getConstantInt32(Offset)));
2061          OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2062              Target->Func, IceType_i32, Base,
2063              llvm::cast<ConstantInteger32>(
2064                  Target->Ctx->getConstantInt32(Offset + 4)));
2065          Sandboxer(Target).lw(Reg, AddrLo);
2066          Target->_mov(DestLo, Reg);
2067          Sandboxer(Target).lw(Reg, AddrHi);
2068          Target->_mov(DestHi, Reg);
2069        } else {
2070          OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2071              Target->Func, DestTy, Base,
2072              llvm::cast<ConstantInteger32>(
2073                  Target->Ctx->getConstantInt32(Offset)));
2074          OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2075          OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2076              Target->Func, DestTy, Base,
2077              llvm::cast<ConstantInteger32>(
2078                  Target->Ctx->getConstantInt32(Offset + 4)));
2079          // FP arguments are passed in GP reg if first argument is in GP.
2080          // In this case type of the Dest is still FP thus we need to
2081          // explicitly generate lw instead of lwc1.
2082          if (DestTy == IceType_f32 && IsDstGPReg) {
2083            Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2084            Sandboxer(Target).lw(DstGPR, Addr);
2085          } else if (DestTy == IceType_f64 && IsDstGPReg) {
2086            Variable *DstGPRHi = Target->makeReg(
2087                IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2088            Variable *DstGPRLo = Target->makeReg(
2089                IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2090            Sandboxer(Target).lw(DstGPRHi, Addr);
2091            OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2092            Sandboxer(Target).lw(DstGPRLo, AddrHi);
2093          } else if (DestTy == IceType_f64 && IsDstGPReg) {
2094            const auto FirstReg =
2095                (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2096            const auto SecondReg =
2097                (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2098            Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2099            Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2100            Sandboxer(Target).lw(DstGPRLo, Addr);
2101            OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2102            Sandboxer(Target).lw(DstGPRHi, AddrHi);
2103          } else {
2104            Sandboxer(Target).lw(Dest, Addr);
2105          }
2106        }
2107        Legalized = true;
2108      }
2109    }
2110  }
2111
2112  if (Legalized) {
2113    if (MovInstr->isDestRedefined()) {
2114      Target->_set_dest_redefined();
2115    }
2116    MovInstr->setDeleted();
2117  }
2118}
2119
2120OperandMIPS32Mem *
2121TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2122  if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2123    return nullptr;
2124  }
2125  Variable *Base = Mem->getBase();
2126  auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2127  int32_t Offset = Ci32->getValue();
2128
2129  if (Base->isRematerializable()) {
2130    const int32_t ExtraOffset =
2131        (Base->getRegNum() == Target->getFrameOrStackReg())
2132            ? Target->getFrameFixedAllocaOffset()
2133            : 0;
2134    Offset += Base->getStackOffset() + ExtraOffset;
2135    Base = Target->getPhysicalRegister(Base->getRegNum());
2136  }
2137
2138  constexpr bool SignExt = true;
2139  if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2140    Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2141    Offset = 0;
2142  }
2143
2144  return OperandMIPS32Mem::create(
2145      Target->Func, Mem->getType(), Base,
2146      llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2147}
2148
2149Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2150  Variable *Reg = nullptr;
2151  if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2152        (Imm <= std::numeric_limits<int16_t>::max()))) {
2153    const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2154    const uint32_t LowerBits = Imm & 0xFFFF;
2155    Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2156    Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2157    if (LowerBits) {
2158      Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2159      Target->_ori(Reg, TReg, LowerBits);
2160    } else {
2161      Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2162    }
2163  }
2164  return Reg;
2165}
2166
2167void TargetMIPS32::postLowerLegalization() {
2168  Func->dump("Before postLowerLegalization");
2169  assert(hasComputedFrame());
2170  for (CfgNode *Node : Func->getNodes()) {
2171    Context.init(Node);
2172    PostLoweringLegalizer Legalizer(this);
2173    while (!Context.atEnd()) {
2174      PostIncrLoweringContext PostIncrement(Context);
2175      Inst *CurInstr = iteratorToInst(Context.getCur());
2176      const SizeT NumSrcs = CurInstr->getSrcSize();
2177      Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2178      Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2179      auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2180      auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2181      auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2182      Variable *Dst = CurInstr->getDest();
2183      if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2184        Legalizer.legalizeMov(MovInstr);
2185        continue;
2186      }
2187      if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2188        Legalizer.legalizeMovFp(MovInstr);
2189        continue;
2190      }
2191      if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2192        if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2193          Sandboxer(this).sw(Src0V, LegalMem);
2194          CurInstr->setDeleted();
2195        }
2196        continue;
2197      }
2198      if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2199        if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2200          _swc1(Src0V, LegalMem);
2201          CurInstr->setDeleted();
2202        }
2203        continue;
2204      }
2205      if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2206        if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2207          _sdc1(Src0V, LegalMem);
2208          CurInstr->setDeleted();
2209        }
2210        continue;
2211      }
2212      if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2213        if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2214          Sandboxer(this).lw(Dst, LegalMem);
2215          CurInstr->setDeleted();
2216        }
2217        continue;
2218      }
2219      if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2220        if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2221          _lwc1(Dst, LegalMem);
2222          CurInstr->setDeleted();
2223        }
2224        continue;
2225      }
2226      if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2227        if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2228          _ldc1(Dst, LegalMem);
2229          CurInstr->setDeleted();
2230        }
2231        continue;
2232      }
2233      if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2234        if (auto *LegalImm = Legalizer.legalizeImmediate(
2235                static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2236          _addu(Dst, Src0V, LegalImm);
2237          CurInstr->setDeleted();
2238        }
2239        continue;
2240      }
2241    }
2242  }
2243}
2244
2245Operand *TargetMIPS32::loOperand(Operand *Operand) {
2246  assert(Operand->getType() == IceType_i64);
2247  if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2248    return Var64On32->getLo();
2249  if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2250    return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2251  }
2252  if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2253    // Conservatively disallow memory operands with side-effects (pre/post
2254    // increment) in case of duplication.
2255    assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2256    return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2257                                    Mem->getOffset(), Mem->getAddrMode());
2258  }
2259  llvm_unreachable("Unsupported operand type");
2260  return nullptr;
2261}
2262
2263Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2264                                         uint32_t Index) {
2265  if (!isVectorType(Operand->getType())) {
2266    llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2267    return nullptr;
2268  }
2269
2270  if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2271    assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2272    Variable *Base = Mem->getBase();
2273    auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2274    assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2275    int32_t NextOffsetVal =
2276        Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2277    constexpr bool NoSignExt = false;
2278    if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2279      Constant *_4 = Ctx->getConstantInt32(4);
2280      Variable *NewBase = Func->makeVariable(Base->getType());
2281      lowerArithmetic(
2282          InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2283      Base = NewBase;
2284    } else {
2285      Offset =
2286          llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2287    }
2288    return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2289                                    Mem->getAddrMode());
2290  }
2291
2292  if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2293    return VarVecOn32->getContainers()[Index];
2294
2295  llvm_unreachable("Unsupported operand type");
2296  return nullptr;
2297}
2298
2299Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2300  assert(Operand->getType() == IceType_i64);
2301  if (Operand->getType() != IceType_i64)
2302    return Operand;
2303  if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2304    return Var64On32->getHi();
2305  if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2306    return Ctx->getConstantInt32(
2307        static_cast<uint32_t>(Const->getValue() >> 32));
2308  }
2309  if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2310    // Conservatively disallow memory operands with side-effects
2311    // in case of duplication.
2312    assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2313    const Type SplitType = IceType_i32;
2314    Variable *Base = Mem->getBase();
2315    auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2316    assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2317    int32_t NextOffsetVal = Offset->getValue() + 4;
2318    constexpr bool SignExt = false;
2319    if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2320      // We have to make a temp variable and add 4 to either Base or Offset.
2321      // If we add 4 to Offset, this will convert a non-RegReg addressing
2322      // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2323      // RegReg addressing modes, prefer adding to base and replacing instead.
2324      // Thus we leave the old offset alone.
2325      Constant *Four = Ctx->getConstantInt32(4);
2326      Variable *NewBase = Func->makeVariable(Base->getType());
2327      lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2328                                             Base, Four));
2329      Base = NewBase;
2330    } else {
2331      Offset =
2332          llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2333    }
2334    return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2335                                    Mem->getAddrMode());
2336  }
2337  llvm_unreachable("Unsupported operand type");
2338  return nullptr;
2339}
2340
2341SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2342                                            RegSetMask Exclude) const {
2343  SmallBitVector Registers(RegMIPS32::Reg_NUM);
2344
2345#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
2346          isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
2347  if (scratch && (Include & RegSet_CallerSave))                                \
2348    Registers[RegMIPS32::val] = true;                                          \
2349  if (preserved && (Include & RegSet_CalleeSave))                              \
2350    Registers[RegMIPS32::val] = true;                                          \
2351  if (stackptr && (Include & RegSet_StackPointer))                             \
2352    Registers[RegMIPS32::val] = true;                                          \
2353  if (frameptr && (Include & RegSet_FramePointer))                             \
2354    Registers[RegMIPS32::val] = true;                                          \
2355  if (scratch && (Exclude & RegSet_CallerSave))                                \
2356    Registers[RegMIPS32::val] = false;                                         \
2357  if (preserved && (Exclude & RegSet_CalleeSave))                              \
2358    Registers[RegMIPS32::val] = false;                                         \
2359  if (stackptr && (Exclude & RegSet_StackPointer))                             \
2360    Registers[RegMIPS32::val] = false;                                         \
2361  if (frameptr && (Exclude & RegSet_FramePointer))                             \
2362    Registers[RegMIPS32::val] = false;
2363
2364  REGMIPS32_TABLE
2365
2366#undef X
2367
2368  if (NeedSandboxing) {
2369    Registers[RegMIPS32::Reg_T6] = false;
2370    Registers[RegMIPS32::Reg_T7] = false;
2371    Registers[RegMIPS32::Reg_T8] = false;
2372  }
2373  return Registers;
2374}
2375
2376void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2377  // Conservatively require the stack to be aligned. Some stack adjustment
2378  // operations implemented below assume that the stack is aligned before the
2379  // alloca. All the alloca code ensures that the stack alignment is preserved
2380  // after the alloca. The stack alignment restriction can be relaxed in some
2381  // cases.
2382  NeedsStackAlignment = true;
2383
2384  // For default align=0, set it to the real value 1, to avoid any
2385  // bit-manipulation problems below.
2386  const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2387
2388  // LLVM enforces power of 2 alignment.
2389  assert(llvm::isPowerOf2_32(AlignmentParam));
2390  assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2391
2392  const uint32_t Alignment =
2393      std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2394  const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2395  const bool OptM1 = Func->getOptLevel() == Opt_m1;
2396  const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2397  const bool UseFramePointer =
2398      hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2399
2400  if (UseFramePointer)
2401    setHasFramePointer();
2402
2403  Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2404
2405  Variable *Dest = Instr->getDest();
2406  Operand *TotalSize = Instr->getSizeInBytes();
2407
2408  if (const auto *ConstantTotalSize =
2409          llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2410    const uint32_t Value =
2411        Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2412    FixedAllocaSizeBytes += Value;
2413    // Constant size alloca.
2414    if (!UseFramePointer) {
2415      // If we don't need a Frame Pointer, this alloca has a known offset to the
2416      // stack pointer. We don't need adjust the stack pointer, nor assign any
2417      // value to Dest, as Dest is rematerializable.
2418      assert(Dest->isRematerializable());
2419      Context.insert<InstFakeDef>(Dest);
2420      return;
2421    }
2422
2423    if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2424      CurrentAllocaOffset =
2425          Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2426    }
2427    auto *T = I32Reg();
2428    _addiu(T, SP, CurrentAllocaOffset);
2429    _mov(Dest, T);
2430    CurrentAllocaOffset += Value;
2431    return;
2432
2433  } else {
2434    // Non-constant sizes need to be adjusted to the next highest multiple of
2435    // the required alignment at runtime.
2436    VariableAllocaUsed = true;
2437    VariableAllocaAlignBytes = AlignmentParam;
2438    Variable *AlignAmount;
2439    auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2440    auto *T1 = I32Reg();
2441    auto *T2 = I32Reg();
2442    auto *T3 = I32Reg();
2443    auto *T4 = I32Reg();
2444    auto *T5 = I32Reg();
2445    _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2446    _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2447    _and(T3, T1, T2);
2448    _subu(T4, SP, T3);
2449    if (Instr->getAlignInBytes()) {
2450      AlignAmount =
2451          legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2452      _and(T5, T4, AlignAmount);
2453      _mov(Dest, T5);
2454    } else {
2455      _mov(Dest, T4);
2456    }
2457    if (OptM1)
2458      _mov(SP, Dest);
2459    else
2460      Sandboxer(this).reset_sp(Dest);
2461    return;
2462  }
2463}
2464
2465void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2466                                        Variable *Dest, Operand *Src0,
2467                                        Operand *Src1) {
2468  InstArithmetic::OpKind Op = Instr->getOp();
2469  auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2470  auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2471  Variable *Src0LoR = nullptr;
2472  Variable *Src1LoR = nullptr;
2473  Variable *Src0HiR = nullptr;
2474  Variable *Src1HiR = nullptr;
2475
2476  switch (Op) {
2477  case InstArithmetic::_num:
2478    llvm::report_fatal_error("Unknown arithmetic operator");
2479    return;
2480  case InstArithmetic::Add: {
2481    Src0LoR = legalizeToReg(loOperand(Src0));
2482    Src1LoR = legalizeToReg(loOperand(Src1));
2483    Src0HiR = legalizeToReg(hiOperand(Src0));
2484    Src1HiR = legalizeToReg(hiOperand(Src1));
2485    auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2486         *T_Hi2 = I32Reg();
2487    _addu(T_Lo, Src0LoR, Src1LoR);
2488    _mov(DestLo, T_Lo);
2489    _sltu(T_Carry, T_Lo, Src0LoR);
2490    _addu(T_Hi, T_Carry, Src0HiR);
2491    _addu(T_Hi2, Src1HiR, T_Hi);
2492    _mov(DestHi, T_Hi2);
2493    return;
2494  }
2495  case InstArithmetic::And: {
2496    Src0LoR = legalizeToReg(loOperand(Src0));
2497    Src1LoR = legalizeToReg(loOperand(Src1));
2498    Src0HiR = legalizeToReg(hiOperand(Src0));
2499    Src1HiR = legalizeToReg(hiOperand(Src1));
2500    auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2501    _and(T_Lo, Src0LoR, Src1LoR);
2502    _mov(DestLo, T_Lo);
2503    _and(T_Hi, Src0HiR, Src1HiR);
2504    _mov(DestHi, T_Hi);
2505    return;
2506  }
2507  case InstArithmetic::Sub: {
2508    Src0LoR = legalizeToReg(loOperand(Src0));
2509    Src1LoR = legalizeToReg(loOperand(Src1));
2510    Src0HiR = legalizeToReg(hiOperand(Src0));
2511    Src1HiR = legalizeToReg(hiOperand(Src1));
2512    auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2513         *T_Hi2 = I32Reg();
2514    _subu(T_Lo, Src0LoR, Src1LoR);
2515    _mov(DestLo, T_Lo);
2516    _sltu(T_Borrow, Src0LoR, Src1LoR);
2517    _addu(T_Hi, T_Borrow, Src1HiR);
2518    _subu(T_Hi2, Src0HiR, T_Hi);
2519    _mov(DestHi, T_Hi2);
2520    return;
2521  }
2522  case InstArithmetic::Or: {
2523    Src0LoR = legalizeToReg(loOperand(Src0));
2524    Src1LoR = legalizeToReg(loOperand(Src1));
2525    Src0HiR = legalizeToReg(hiOperand(Src0));
2526    Src1HiR = legalizeToReg(hiOperand(Src1));
2527    auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2528    _or(T_Lo, Src0LoR, Src1LoR);
2529    _mov(DestLo, T_Lo);
2530    _or(T_Hi, Src0HiR, Src1HiR);
2531    _mov(DestHi, T_Hi);
2532    return;
2533  }
2534  case InstArithmetic::Xor: {
2535    Src0LoR = legalizeToReg(loOperand(Src0));
2536    Src1LoR = legalizeToReg(loOperand(Src1));
2537    Src0HiR = legalizeToReg(hiOperand(Src0));
2538    Src1HiR = legalizeToReg(hiOperand(Src1));
2539    auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2540    _xor(T_Lo, Src0LoR, Src1LoR);
2541    _mov(DestLo, T_Lo);
2542    _xor(T_Hi, Src0HiR, Src1HiR);
2543    _mov(DestHi, T_Hi);
2544    return;
2545  }
2546  case InstArithmetic::Mul: {
2547    // TODO(rkotler): Make sure that mul has the side effect of clobbering
2548    // LO, HI. Check for any other LO, HI quirkiness in this section.
2549    Src0LoR = legalizeToReg(loOperand(Src0));
2550    Src1LoR = legalizeToReg(loOperand(Src1));
2551    Src0HiR = legalizeToReg(hiOperand(Src0));
2552    Src1HiR = legalizeToReg(hiOperand(Src1));
2553    auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2554    auto *T1 = I32Reg(), *T2 = I32Reg();
2555    auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2556    _multu(T_Lo, Src0LoR, Src1LoR);
2557    Context.insert<InstFakeDef>(T_Hi, T_Lo);
2558    _mflo(T1, T_Lo);
2559    _mfhi(T2, T_Hi);
2560    _mov(DestLo, T1);
2561    _mul(TM1, Src0HiR, Src1LoR);
2562    _mul(TM2, Src0LoR, Src1HiR);
2563    _addu(TM3, TM1, T2);
2564    _addu(TM4, TM3, TM2);
2565    _mov(DestHi, TM4);
2566    return;
2567  }
2568  case InstArithmetic::Shl: {
2569    auto *T_Lo = I32Reg();
2570    auto *T_Hi = I32Reg();
2571    auto *T1_Lo = I32Reg();
2572    auto *T1_Hi = I32Reg();
2573    auto *T1 = I32Reg();
2574    auto *T2 = I32Reg();
2575    auto *T3 = I32Reg();
2576    auto *T4 = I32Reg();
2577    auto *T5 = I32Reg();
2578
2579    if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2580      Src0LoR = legalizeToReg(loOperand(Src0));
2581      int64_t ShiftAmount = Const->getValue();
2582      if (ShiftAmount == 1) {
2583        Src0HiR = legalizeToReg(hiOperand(Src0));
2584        _addu(T_Lo, Src0LoR, Src0LoR);
2585        _sltu(T1, T_Lo, Src0LoR);
2586        _addu(T2, T1, Src0HiR);
2587        _addu(T_Hi, Src0HiR, T2);
2588      } else if (ShiftAmount < INT32_BITS) {
2589        Src0HiR = legalizeToReg(hiOperand(Src0));
2590        _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2591        _sll(T2, Src0HiR, ShiftAmount);
2592        _or(T_Hi, T1, T2);
2593        _sll(T_Lo, Src0LoR, ShiftAmount);
2594      } else if (ShiftAmount == INT32_BITS) {
2595        _addiu(T_Lo, getZero(), 0);
2596        _mov(T_Hi, Src0LoR);
2597      } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2598        _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2599        _addiu(T_Lo, getZero(), 0);
2600      }
2601      _mov(DestLo, T_Lo);
2602      _mov(DestHi, T_Hi);
2603      return;
2604    }
2605
2606    Src0LoR = legalizeToReg(loOperand(Src0));
2607    Src1LoR = legalizeToReg(loOperand(Src1));
2608    Src0HiR = legalizeToReg(hiOperand(Src0));
2609
2610    _sllv(T1, Src0HiR, Src1LoR);
2611    _not(T2, Src1LoR);
2612    _srl(T3, Src0LoR, 1);
2613    _srlv(T4, T3, T2);
2614    _or(T_Hi, T1, T4);
2615    _sllv(T_Lo, Src0LoR, Src1LoR);
2616
2617    _mov(T1_Hi, T_Hi);
2618    _mov(T1_Lo, T_Lo);
2619    _andi(T5, Src1LoR, INT32_BITS);
2620    _movn(T1_Hi, T_Lo, T5);
2621    _movn(T1_Lo, getZero(), T5);
2622    _mov(DestHi, T1_Hi);
2623    _mov(DestLo, T1_Lo);
2624    return;
2625  }
2626  case InstArithmetic::Lshr: {
2627
2628    auto *T_Lo = I32Reg();
2629    auto *T_Hi = I32Reg();
2630    auto *T1_Lo = I32Reg();
2631    auto *T1_Hi = I32Reg();
2632    auto *T1 = I32Reg();
2633    auto *T2 = I32Reg();
2634    auto *T3 = I32Reg();
2635    auto *T4 = I32Reg();
2636    auto *T5 = I32Reg();
2637
2638    if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2639      Src0HiR = legalizeToReg(hiOperand(Src0));
2640      int64_t ShiftAmount = Const->getValue();
2641      if (ShiftAmount < INT32_BITS) {
2642        Src0LoR = legalizeToReg(loOperand(Src0));
2643        _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2644        _srl(T2, Src0LoR, ShiftAmount);
2645        _or(T_Lo, T1, T2);
2646        _srl(T_Hi, Src0HiR, ShiftAmount);
2647      } else if (ShiftAmount == INT32_BITS) {
2648        _mov(T_Lo, Src0HiR);
2649        _addiu(T_Hi, getZero(), 0);
2650      } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2651        _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2652        _addiu(T_Hi, getZero(), 0);
2653      }
2654      _mov(DestLo, T_Lo);
2655      _mov(DestHi, T_Hi);
2656      return;
2657    }
2658
2659    Src0LoR = legalizeToReg(loOperand(Src0));
2660    Src1LoR = legalizeToReg(loOperand(Src1));
2661    Src0HiR = legalizeToReg(hiOperand(Src0));
2662
2663    _srlv(T1, Src0LoR, Src1LoR);
2664    _not(T2, Src1LoR);
2665    _sll(T3, Src0HiR, 1);
2666    _sllv(T4, T3, T2);
2667    _or(T_Lo, T1, T4);
2668    _srlv(T_Hi, Src0HiR, Src1LoR);
2669
2670    _mov(T1_Hi, T_Hi);
2671    _mov(T1_Lo, T_Lo);
2672    _andi(T5, Src1LoR, INT32_BITS);
2673    _movn(T1_Lo, T_Hi, T5);
2674    _movn(T1_Hi, getZero(), T5);
2675    _mov(DestHi, T1_Hi);
2676    _mov(DestLo, T1_Lo);
2677    return;
2678  }
2679  case InstArithmetic::Ashr: {
2680
2681    auto *T_Lo = I32Reg();
2682    auto *T_Hi = I32Reg();
2683    auto *T1_Lo = I32Reg();
2684    auto *T1_Hi = I32Reg();
2685    auto *T1 = I32Reg();
2686    auto *T2 = I32Reg();
2687    auto *T3 = I32Reg();
2688    auto *T4 = I32Reg();
2689    auto *T5 = I32Reg();
2690    auto *T6 = I32Reg();
2691
2692    if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2693      Src0HiR = legalizeToReg(hiOperand(Src0));
2694      int64_t ShiftAmount = Const->getValue();
2695      if (ShiftAmount < INT32_BITS) {
2696        Src0LoR = legalizeToReg(loOperand(Src0));
2697        _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2698        _srl(T2, Src0LoR, ShiftAmount);
2699        _or(T_Lo, T1, T2);
2700        _sra(T_Hi, Src0HiR, ShiftAmount);
2701      } else if (ShiftAmount == INT32_BITS) {
2702        _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2703        _mov(T_Lo, Src0HiR);
2704      } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2705        _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2706        _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2707      }
2708      _mov(DestLo, T_Lo);
2709      _mov(DestHi, T_Hi);
2710      return;
2711    }
2712
2713    Src0LoR = legalizeToReg(loOperand(Src0));
2714    Src1LoR = legalizeToReg(loOperand(Src1));
2715    Src0HiR = legalizeToReg(hiOperand(Src0));
2716
2717    _srlv(T1, Src0LoR, Src1LoR);
2718    _not(T2, Src1LoR);
2719    _sll(T3, Src0HiR, 1);
2720    _sllv(T4, T3, T2);
2721    _or(T_Lo, T1, T4);
2722    _srav(T_Hi, Src0HiR, Src1LoR);
2723
2724    _mov(T1_Hi, T_Hi);
2725    _mov(T1_Lo, T_Lo);
2726    _andi(T5, Src1LoR, INT32_BITS);
2727    _movn(T1_Lo, T_Hi, T5);
2728    _sra(T6, Src0HiR, INT32_BITS - 1);
2729    _movn(T1_Hi, T6, T5);
2730    _mov(DestHi, T1_Hi);
2731    _mov(DestLo, T1_Lo);
2732    return;
2733  }
2734  case InstArithmetic::Fadd:
2735  case InstArithmetic::Fsub:
2736  case InstArithmetic::Fmul:
2737  case InstArithmetic::Fdiv:
2738  case InstArithmetic::Frem:
2739    llvm::report_fatal_error("FP instruction with i64 type");
2740    return;
2741  case InstArithmetic::Udiv:
2742  case InstArithmetic::Sdiv:
2743  case InstArithmetic::Urem:
2744  case InstArithmetic::Srem:
2745    llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2746    return;
2747  }
2748}
2749
2750void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2751  Variable *Dest = Instr->getDest();
2752
2753  if (Dest->isRematerializable()) {
2754    Context.insert<InstFakeDef>(Dest);
2755    return;
2756  }
2757
2758  // We need to signal all the UnimplementedLoweringError errors before any
2759  // legalization into new variables, otherwise Om1 register allocation may fail
2760  // when it sees variables that are defined but not used.
2761  Type DestTy = Dest->getType();
2762  Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2763  Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2764  if (DestTy == IceType_i64) {
2765    lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2766    return;
2767  }
2768  if (isVectorType(Dest->getType())) {
2769    llvm::report_fatal_error("Arithmetic: Destination type is vector");
2770    return;
2771  }
2772
2773  Variable *T = makeReg(Dest->getType());
2774  Variable *Src0R = legalizeToReg(Src0);
2775  Variable *Src1R = nullptr;
2776  uint32_t Value = 0;
2777  bool IsSrc1Imm16 = false;
2778
2779  switch (Instr->getOp()) {
2780  case InstArithmetic::Add:
2781  case InstArithmetic::Sub: {
2782    auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2783    if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2784      IsSrc1Imm16 = true;
2785      Value = Const32->getValue();
2786    } else {
2787      Src1R = legalizeToReg(Src1);
2788    }
2789    break;
2790  }
2791  case InstArithmetic::And:
2792  case InstArithmetic::Or:
2793  case InstArithmetic::Xor:
2794  case InstArithmetic::Shl:
2795  case InstArithmetic::Lshr:
2796  case InstArithmetic::Ashr: {
2797    auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2798    if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2799      IsSrc1Imm16 = true;
2800      Value = Const32->getValue();
2801    } else {
2802      Src1R = legalizeToReg(Src1);
2803    }
2804    break;
2805  }
2806  default:
2807    Src1R = legalizeToReg(Src1);
2808    break;
2809  }
2810  constexpr uint32_t DivideByZeroTrapCode = 7;
2811
2812  switch (Instr->getOp()) {
2813  case InstArithmetic::_num:
2814    break;
2815  case InstArithmetic::Add: {
2816    auto *T0R = Src0R;
2817    auto *T1R = Src1R;
2818    if (Dest->getType() != IceType_i32) {
2819      T0R = makeReg(IceType_i32);
2820      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2821      if (!IsSrc1Imm16) {
2822        T1R = makeReg(IceType_i32);
2823        lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2824      }
2825    }
2826    if (IsSrc1Imm16) {
2827      _addiu(T, T0R, Value);
2828    } else {
2829      _addu(T, T0R, T1R);
2830    }
2831    _mov(Dest, T);
2832    return;
2833  }
2834  case InstArithmetic::And:
2835    if (IsSrc1Imm16) {
2836      _andi(T, Src0R, Value);
2837    } else {
2838      _and(T, Src0R, Src1R);
2839    }
2840    _mov(Dest, T);
2841    return;
2842  case InstArithmetic::Or:
2843    if (IsSrc1Imm16) {
2844      _ori(T, Src0R, Value);
2845    } else {
2846      _or(T, Src0R, Src1R);
2847    }
2848    _mov(Dest, T);
2849    return;
2850  case InstArithmetic::Xor:
2851    if (IsSrc1Imm16) {
2852      _xori(T, Src0R, Value);
2853    } else {
2854      _xor(T, Src0R, Src1R);
2855    }
2856    _mov(Dest, T);
2857    return;
2858  case InstArithmetic::Sub: {
2859    auto *T0R = Src0R;
2860    auto *T1R = Src1R;
2861    if (Dest->getType() != IceType_i32) {
2862      T0R = makeReg(IceType_i32);
2863      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2864      if (!IsSrc1Imm16) {
2865        T1R = makeReg(IceType_i32);
2866        lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2867      }
2868    }
2869    if (IsSrc1Imm16) {
2870      _addiu(T, T0R, -Value);
2871    } else {
2872      _subu(T, T0R, T1R);
2873    }
2874    _mov(Dest, T);
2875    return;
2876  }
2877  case InstArithmetic::Mul: {
2878    _mul(T, Src0R, Src1R);
2879    _mov(Dest, T);
2880    return;
2881  }
2882  case InstArithmetic::Shl: {
2883    if (IsSrc1Imm16) {
2884      _sll(T, Src0R, Value);
2885    } else {
2886      _sllv(T, Src0R, Src1R);
2887    }
2888    _mov(Dest, T);
2889    return;
2890  }
2891  case InstArithmetic::Lshr: {
2892    auto *T0R = Src0R;
2893    auto *T1R = Src1R;
2894    if (Dest->getType() != IceType_i32) {
2895      T0R = makeReg(IceType_i32);
2896      lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2897      if (!IsSrc1Imm16) {
2898        T1R = makeReg(IceType_i32);
2899        lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2900      }
2901    }
2902    if (IsSrc1Imm16) {
2903      _srl(T, T0R, Value);
2904    } else {
2905      _srlv(T, T0R, T1R);
2906    }
2907    _mov(Dest, T);
2908    return;
2909  }
2910  case InstArithmetic::Ashr: {
2911    auto *T0R = Src0R;
2912    auto *T1R = Src1R;
2913    if (Dest->getType() != IceType_i32) {
2914      T0R = makeReg(IceType_i32);
2915      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2916      if (!IsSrc1Imm16) {
2917        T1R = makeReg(IceType_i32);
2918        lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2919      }
2920    }
2921    if (IsSrc1Imm16) {
2922      _sra(T, T0R, Value);
2923    } else {
2924      _srav(T, T0R, T1R);
2925    }
2926    _mov(Dest, T);
2927    return;
2928  }
2929  case InstArithmetic::Udiv: {
2930    auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2931    auto *T0R = Src0R;
2932    auto *T1R = Src1R;
2933    if (Dest->getType() != IceType_i32) {
2934      T0R = makeReg(IceType_i32);
2935      lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2936      T1R = makeReg(IceType_i32);
2937      lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2938    }
2939    _divu(T_Zero, T0R, T1R);
2940    _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2941    _mflo(T, T_Zero);
2942    _mov(Dest, T);
2943    return;
2944  }
2945  case InstArithmetic::Sdiv: {
2946    auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2947    auto *T0R = Src0R;
2948    auto *T1R = Src1R;
2949    if (Dest->getType() != IceType_i32) {
2950      T0R = makeReg(IceType_i32);
2951      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2952      T1R = makeReg(IceType_i32);
2953      lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2954    }
2955    _div(T_Zero, T0R, T1R);
2956    _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2957    _mflo(T, T_Zero);
2958    _mov(Dest, T);
2959    return;
2960  }
2961  case InstArithmetic::Urem: {
2962    auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2963    auto *T0R = Src0R;
2964    auto *T1R = Src1R;
2965    if (Dest->getType() != IceType_i32) {
2966      T0R = makeReg(IceType_i32);
2967      lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2968      T1R = makeReg(IceType_i32);
2969      lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2970    }
2971    _divu(T_Zero, T0R, T1R);
2972    _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2973    _mfhi(T, T_Zero);
2974    _mov(Dest, T);
2975    return;
2976  }
2977  case InstArithmetic::Srem: {
2978    auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2979    auto *T0R = Src0R;
2980    auto *T1R = Src1R;
2981    if (Dest->getType() != IceType_i32) {
2982      T0R = makeReg(IceType_i32);
2983      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2984      T1R = makeReg(IceType_i32);
2985      lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2986    }
2987    _div(T_Zero, T0R, T1R);
2988    _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2989    _mfhi(T, T_Zero);
2990    _mov(Dest, T);
2991    return;
2992  }
2993  case InstArithmetic::Fadd: {
2994    if (DestTy == IceType_f32) {
2995      _add_s(T, Src0R, Src1R);
2996      _mov(Dest, T);
2997      return;
2998    }
2999    if (DestTy == IceType_f64) {
3000      _add_d(T, Src0R, Src1R);
3001      _mov(Dest, T);
3002      return;
3003    }
3004    break;
3005  }
3006  case InstArithmetic::Fsub:
3007    if (DestTy == IceType_f32) {
3008      _sub_s(T, Src0R, Src1R);
3009      _mov(Dest, T);
3010      return;
3011    }
3012    if (DestTy == IceType_f64) {
3013      _sub_d(T, Src0R, Src1R);
3014      _mov(Dest, T);
3015      return;
3016    }
3017    break;
3018  case InstArithmetic::Fmul:
3019    if (DestTy == IceType_f32) {
3020      _mul_s(T, Src0R, Src1R);
3021      _mov(Dest, T);
3022      return;
3023    }
3024    if (DestTy == IceType_f64) {
3025      _mul_d(T, Src0R, Src1R);
3026      _mov(Dest, T);
3027      return;
3028    }
3029    break;
3030  case InstArithmetic::Fdiv:
3031    if (DestTy == IceType_f32) {
3032      _div_s(T, Src0R, Src1R);
3033      _mov(Dest, T);
3034      return;
3035    }
3036    if (DestTy == IceType_f64) {
3037      _div_d(T, Src0R, Src1R);
3038      _mov(Dest, T);
3039      return;
3040    }
3041    break;
3042  case InstArithmetic::Frem:
3043    llvm::report_fatal_error("frem should have been prelowered.");
3044    break;
3045  }
3046  llvm::report_fatal_error("Unknown arithmetic operator");
3047}
3048
3049void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3050  Variable *Dest = Instr->getDest();
3051
3052  if (Dest->isRematerializable()) {
3053    Context.insert<InstFakeDef>(Dest);
3054    return;
3055  }
3056
3057  // Source type may not be same as destination
3058  if (isVectorType(Dest->getType())) {
3059    Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3060    auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3061    for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3062      auto *DCont = DstVec->getContainers()[i];
3063      auto *SCont =
3064          legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3065      auto *TReg = makeReg(IceType_i32);
3066      _mov(TReg, SCont);
3067      _mov(DCont, TReg);
3068    }
3069    return;
3070  }
3071  Operand *Src0 = Instr->getSrc(0);
3072  assert(Dest->getType() == Src0->getType());
3073  if (Dest->getType() == IceType_i64) {
3074    Src0 = legalizeUndef(Src0);
3075    Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3076    Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3077    auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3078    auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3079    auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3080    _mov(T_Lo, Src0Lo);
3081    _mov(DestLo, T_Lo);
3082    _mov(T_Hi, Src0Hi);
3083    _mov(DestHi, T_Hi);
3084    return;
3085  }
3086  Operand *SrcR;
3087  if (Dest->hasReg()) {
3088    // If Dest already has a physical register, then legalize the Src operand
3089    // into a Variable with the same register assignment.  This especially
3090    // helps allow the use of Flex operands.
3091    SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3092  } else {
3093    // Dest could be a stack operand. Since we could potentially need
3094    // to do a Store (and store can only have Register operands),
3095    // legalize this to a register.
3096    SrcR = legalize(Src0, Legal_Reg);
3097  }
3098  _mov(Dest, SrcR);
3099}
3100
3101void TargetMIPS32::lowerBr(const InstBr *Instr) {
3102  if (Instr->isUnconditional()) {
3103    _br(Instr->getTargetUnconditional());
3104    return;
3105  }
3106  CfgNode *TargetTrue = Instr->getTargetTrue();
3107  CfgNode *TargetFalse = Instr->getTargetFalse();
3108  Operand *Boolean = Instr->getCondition();
3109  const Inst *Producer = Computations.getProducerOf(Boolean);
3110  if (Producer == nullptr) {
3111    // Since we don't know the producer of this boolean we will assume its
3112    // producer will keep it in positive logic and just emit beqz with this
3113    // Boolean as an operand.
3114    auto *BooleanR = legalizeToReg(Boolean);
3115    _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3116    return;
3117  }
3118  if (Producer->getKind() == Inst::Icmp) {
3119    const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3120    Operand *Src0 = CompareInst->getSrc(0);
3121    Operand *Src1 = CompareInst->getSrc(1);
3122    const Type Src0Ty = Src0->getType();
3123    assert(Src0Ty == Src1->getType());
3124
3125    Variable *Src0R = nullptr;
3126    Variable *Src1R = nullptr;
3127    Variable *Src0HiR = nullptr;
3128    Variable *Src1HiR = nullptr;
3129    if (Src0Ty == IceType_i64) {
3130      Src0R = legalizeToReg(loOperand(Src0));
3131      Src1R = legalizeToReg(loOperand(Src1));
3132      Src0HiR = legalizeToReg(hiOperand(Src0));
3133      Src1HiR = legalizeToReg(hiOperand(Src1));
3134    } else {
3135      auto *Src0RT = legalizeToReg(Src0);
3136      auto *Src1RT = legalizeToReg(Src1);
3137      // Sign/Zero extend the source operands
3138      if (Src0Ty != IceType_i32) {
3139        InstCast::OpKind CastKind;
3140        switch (CompareInst->getCondition()) {
3141        case InstIcmp::Eq:
3142        case InstIcmp::Ne:
3143        case InstIcmp::Sgt:
3144        case InstIcmp::Sge:
3145        case InstIcmp::Slt:
3146        case InstIcmp::Sle:
3147          CastKind = InstCast::Sext;
3148          break;
3149        default:
3150          CastKind = InstCast::Zext;
3151          break;
3152        }
3153        Src0R = makeReg(IceType_i32);
3154        Src1R = makeReg(IceType_i32);
3155        lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3156        lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3157      } else {
3158        Src0R = Src0RT;
3159        Src1R = Src1RT;
3160      }
3161    }
3162    auto *DestT = makeReg(IceType_i32);
3163
3164    switch (CompareInst->getCondition()) {
3165    default:
3166      llvm_unreachable("unexpected condition");
3167      return;
3168    case InstIcmp::Eq: {
3169      if (Src0Ty == IceType_i64) {
3170        auto *T1 = I32Reg();
3171        auto *T2 = I32Reg();
3172        auto *T3 = I32Reg();
3173        _xor(T1, Src0HiR, Src1HiR);
3174        _xor(T2, Src0R, Src1R);
3175        _or(T3, T1, T2);
3176        _mov(DestT, T3);
3177        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3178      } else {
3179        _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3180      }
3181      return;
3182    }
3183    case InstIcmp::Ne: {
3184      if (Src0Ty == IceType_i64) {
3185        auto *T1 = I32Reg();
3186        auto *T2 = I32Reg();
3187        auto *T3 = I32Reg();
3188        _xor(T1, Src0HiR, Src1HiR);
3189        _xor(T2, Src0R, Src1R);
3190        _or(T3, T1, T2);
3191        _mov(DestT, T3);
3192        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3193      } else {
3194        _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3195      }
3196      return;
3197    }
3198    case InstIcmp::Ugt: {
3199      if (Src0Ty == IceType_i64) {
3200        auto *T1 = I32Reg();
3201        auto *T2 = I32Reg();
3202        auto *T3 = I32Reg();
3203        auto *T4 = I32Reg();
3204        auto *T5 = I32Reg();
3205        _xor(T1, Src0HiR, Src1HiR);
3206        _sltu(T2, Src1HiR, Src0HiR);
3207        _xori(T3, T2, 1);
3208        _sltu(T4, Src1R, Src0R);
3209        _xori(T5, T4, 1);
3210        _movz(T3, T5, T1);
3211        _mov(DestT, T3);
3212        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3213      } else {
3214        _sltu(DestT, Src1R, Src0R);
3215        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3216      }
3217      return;
3218    }
3219    case InstIcmp::Uge: {
3220      if (Src0Ty == IceType_i64) {
3221        auto *T1 = I32Reg();
3222        auto *T2 = I32Reg();
3223        auto *T3 = I32Reg();
3224        _xor(T1, Src0HiR, Src1HiR);
3225        _sltu(T2, Src0HiR, Src1HiR);
3226        _sltu(T3, Src0R, Src1R);
3227        _movz(T2, T3, T1);
3228        _mov(DestT, T2);
3229        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3230      } else {
3231        _sltu(DestT, Src0R, Src1R);
3232        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3233      }
3234      return;
3235    }
3236    case InstIcmp::Ult: {
3237      if (Src0Ty == IceType_i64) {
3238        auto *T1 = I32Reg();
3239        auto *T2 = I32Reg();
3240        auto *T3 = I32Reg();
3241        auto *T4 = I32Reg();
3242        auto *T5 = I32Reg();
3243        _xor(T1, Src0HiR, Src1HiR);
3244        _sltu(T2, Src0HiR, Src1HiR);
3245        _xori(T3, T2, 1);
3246        _sltu(T4, Src0R, Src1R);
3247        _xori(T5, T4, 1);
3248        _movz(T3, T5, T1);
3249        _mov(DestT, T3);
3250        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3251      } else {
3252        _sltu(DestT, Src0R, Src1R);
3253        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3254      }
3255      return;
3256    }
3257    case InstIcmp::Ule: {
3258      if (Src0Ty == IceType_i64) {
3259        auto *T1 = I32Reg();
3260        auto *T2 = I32Reg();
3261        auto *T3 = I32Reg();
3262        _xor(T1, Src0HiR, Src1HiR);
3263        _sltu(T2, Src1HiR, Src0HiR);
3264        _sltu(T3, Src1R, Src0R);
3265        _movz(T2, T3, T1);
3266        _mov(DestT, T2);
3267        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3268      } else {
3269        _sltu(DestT, Src1R, Src0R);
3270        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3271      }
3272      return;
3273    }
3274    case InstIcmp::Sgt: {
3275      if (Src0Ty == IceType_i64) {
3276        auto *T1 = I32Reg();
3277        auto *T2 = I32Reg();
3278        auto *T3 = I32Reg();
3279        auto *T4 = I32Reg();
3280        auto *T5 = I32Reg();
3281        _xor(T1, Src0HiR, Src1HiR);
3282        _slt(T2, Src1HiR, Src0HiR);
3283        _xori(T3, T2, 1);
3284        _sltu(T4, Src1R, Src0R);
3285        _xori(T5, T4, 1);
3286        _movz(T3, T5, T1);
3287        _mov(DestT, T3);
3288        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3289      } else {
3290        _slt(DestT, Src1R, Src0R);
3291        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3292      }
3293      return;
3294    }
3295    case InstIcmp::Sge: {
3296      if (Src0Ty == IceType_i64) {
3297        auto *T1 = I32Reg();
3298        auto *T2 = I32Reg();
3299        auto *T3 = I32Reg();
3300        _xor(T1, Src0HiR, Src1HiR);
3301        _slt(T2, Src0HiR, Src1HiR);
3302        _sltu(T3, Src0R, Src1R);
3303        _movz(T2, T3, T1);
3304        _mov(DestT, T2);
3305        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3306      } else {
3307        _slt(DestT, Src0R, Src1R);
3308        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3309      }
3310      return;
3311    }
3312    case InstIcmp::Slt: {
3313      if (Src0Ty == IceType_i64) {
3314        auto *T1 = I32Reg();
3315        auto *T2 = I32Reg();
3316        auto *T3 = I32Reg();
3317        auto *T4 = I32Reg();
3318        auto *T5 = I32Reg();
3319        _xor(T1, Src0HiR, Src1HiR);
3320        _slt(T2, Src0HiR, Src1HiR);
3321        _xori(T3, T2, 1);
3322        _sltu(T4, Src0R, Src1R);
3323        _xori(T5, T4, 1);
3324        _movz(T3, T5, T1);
3325        _mov(DestT, T3);
3326        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3327      } else {
3328        _slt(DestT, Src0R, Src1R);
3329        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3330      }
3331      return;
3332    }
3333    case InstIcmp::Sle: {
3334      if (Src0Ty == IceType_i64) {
3335        auto *T1 = I32Reg();
3336        auto *T2 = I32Reg();
3337        auto *T3 = I32Reg();
3338        _xor(T1, Src0HiR, Src1HiR);
3339        _slt(T2, Src1HiR, Src0HiR);
3340        _sltu(T3, Src1R, Src0R);
3341        _movz(T2, T3, T1);
3342        _mov(DestT, T2);
3343        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3344      } else {
3345        _slt(DestT, Src1R, Src0R);
3346        _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3347      }
3348      return;
3349    }
3350    }
3351  }
3352}
3353
3354void TargetMIPS32::lowerCall(const InstCall *Instr) {
3355  CfgVector<Variable *> RegArgs;
3356  NeedsStackAlignment = true;
3357
3358  //  Assign arguments to registers and stack. Also reserve stack.
3359  TargetMIPS32::CallingConv CC;
3360
3361  // Pair of Arg Operand -> GPR number assignments.
3362  llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3363  llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3364  // Pair of Arg Operand -> stack offset.
3365  llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3366  size_t ParameterAreaSizeBytes = 16;
3367
3368  // Classify each argument operand according to the location where the
3369  // argument is passed.
3370
3371  // v4f32 is returned through stack. $4 is setup by the caller and passed as
3372  // first argument implicitly. Callee then copies the return vector at $4.
3373  SizeT ArgNum = 0;
3374  Variable *Dest = Instr->getDest();
3375  Variable *RetVecFloat = nullptr;
3376  if (Dest && isVectorFloatingType(Dest->getType())) {
3377    ArgNum = 1;
3378    CC.discardReg(RegMIPS32::Reg_A0);
3379    RetVecFloat = Func->makeVariable(IceType_i32);
3380    auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3381    constexpr SizeT Alignment = 4;
3382    lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3383    RegArgs.emplace_back(
3384        legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3385  }
3386
3387  for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3388    Operand *Arg = legalizeUndef(Instr->getArg(i));
3389    const Type Ty = Arg->getType();
3390    bool InReg = false;
3391    RegNumT Reg;
3392
3393    InReg = CC.argInReg(Ty, i, &Reg);
3394
3395    if (!InReg) {
3396      if (isVectorType(Ty)) {
3397        auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3398        ParameterAreaSizeBytes =
3399            applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3400        for (Variable *Elem : ArgVec->getContainers()) {
3401          StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3402          ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3403        }
3404      } else {
3405        ParameterAreaSizeBytes =
3406            applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3407        StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3408        ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3409      }
3410      ++ArgNum;
3411      continue;
3412    }
3413
3414    if (isVectorType(Ty)) {
3415      auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3416      Operand *Elem0 = ArgVec->getContainers()[0];
3417      Operand *Elem1 = ArgVec->getContainers()[1];
3418      GPRArgs.push_back(
3419          std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3420      GPRArgs.push_back(
3421          std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3422      Operand *Elem2 = ArgVec->getContainers()[2];
3423      Operand *Elem3 = ArgVec->getContainers()[3];
3424      // First argument is passed in $4:$5:$6:$7
3425      // Second and rest arguments are passed in $6:$7:stack:stack
3426      if (ArgNum == 0) {
3427        GPRArgs.push_back(
3428            std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3429        GPRArgs.push_back(
3430            std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3431      } else {
3432        ParameterAreaSizeBytes =
3433            applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3434        StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3435        ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3436        StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3437        ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3438      }
3439    } else if (Ty == IceType_i64) {
3440      Operand *Lo = loOperand(Arg);
3441      Operand *Hi = hiOperand(Arg);
3442      GPRArgs.push_back(
3443          std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3444      GPRArgs.push_back(
3445          std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3446    } else if (isScalarIntegerType(Ty)) {
3447      GPRArgs.push_back(std::make_pair(Arg, Reg));
3448    } else {
3449      FPArgs.push_back(std::make_pair(Arg, Reg));
3450    }
3451    ++ArgNum;
3452  }
3453
3454  // Adjust the parameter area so that the stack is aligned. It is assumed that
3455  // the stack is already aligned at the start of the calling sequence.
3456  ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3457
3458  // Copy arguments that are passed on the stack to the appropriate stack
3459  // locations.
3460  Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3461  for (auto &StackArg : StackArgs) {
3462    ConstantInteger32 *Loc =
3463        llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3464    Type Ty = StackArg.first->getType();
3465    OperandMIPS32Mem *Addr;
3466    constexpr bool SignExt = false;
3467    if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3468      Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3469    } else {
3470      Variable *NewBase = Func->makeVariable(SP->getType());
3471      lowerArithmetic(
3472          InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3473      Addr = formMemoryOperand(NewBase, Ty);
3474    }
3475    lowerStore(InstStore::create(Func, StackArg.first, Addr));
3476  }
3477
3478  // Generate the call instruction.  Assign its result to a temporary with high
3479  // register allocation weight.
3480
3481  // ReturnReg doubles as ReturnRegLo as necessary.
3482  Variable *ReturnReg = nullptr;
3483  Variable *ReturnRegHi = nullptr;
3484  if (Dest) {
3485    switch (Dest->getType()) {
3486    case IceType_NUM:
3487      llvm_unreachable("Invalid Call dest type");
3488      return;
3489    case IceType_void:
3490      break;
3491    case IceType_i1:
3492    case IceType_i8:
3493    case IceType_i16:
3494    case IceType_i32:
3495      ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3496      break;
3497    case IceType_i64:
3498      ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3499      ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3500      break;
3501    case IceType_f32:
3502      ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3503      break;
3504    case IceType_f64:
3505      ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3506      break;
3507    case IceType_v4i1:
3508    case IceType_v8i1:
3509    case IceType_v16i1:
3510    case IceType_v16i8:
3511    case IceType_v8i16:
3512    case IceType_v4i32: {
3513      ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3514      auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3515      RetVec->initVecElement(Func);
3516      for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3517        auto *Var = RetVec->getContainers()[i];
3518        Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3519      }
3520      break;
3521    }
3522    case IceType_v4f32:
3523      ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3524      break;
3525    }
3526  }
3527  Operand *CallTarget = Instr->getCallTarget();
3528  // Allow ConstantRelocatable to be left alone as a direct call,
3529  // but force other constants like ConstantInteger32 to be in
3530  // a register and make it an indirect call.
3531  if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3532    CallTarget = legalize(CallTarget, Legal_Reg);
3533  }
3534
3535  // Copy arguments to be passed in registers to the appropriate registers.
3536  for (auto &FPArg : FPArgs) {
3537    RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3538  }
3539  for (auto &GPRArg : GPRArgs) {
3540    RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3541  }
3542
3543  // Generate a FakeUse of register arguments so that they do not get dead code
3544  // eliminated as a result of the FakeKill of scratch registers after the call.
3545  // These fake-uses need to be placed here to avoid argument registers from
3546  // being used during the legalizeToReg() calls above.
3547  for (auto *RegArg : RegArgs) {
3548    Context.insert<InstFakeUse>(RegArg);
3549  }
3550
3551  // If variable alloca is used the extra 16 bytes for argument build area
3552  // will be allocated on stack before a call.
3553  if (VariableAllocaUsed)
3554    Sandboxer(this).addiu_sp(-MaxOutArgsSizeBytes);
3555
3556  Inst *NewCall;
3557
3558  // We don't need to define the return register if it is a vector.
3559  // We have inserted fake defs of it just after the call.
3560  if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3561    Variable *RetReg = nullptr;
3562    NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3563    Context.insert(NewCall);
3564  } else {
3565    NewCall = Sandboxer(this, InstBundleLock::Opt_AlignToEnd)
3566                  .jal(ReturnReg, CallTarget);
3567  }
3568
3569  if (VariableAllocaUsed)
3570    Sandboxer(this).addiu_sp(MaxOutArgsSizeBytes);
3571
3572  // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3573  // instruction.
3574  Context.insert<InstFakeUse>(SP);
3575
3576  if (ReturnRegHi)
3577    Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3578
3579  if (ReturnReg) {
3580    if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3581      for (Variable *Var : RetVec->getContainers()) {
3582        Context.insert(InstFakeDef::create(Func, Var));
3583      }
3584    }
3585  }
3586
3587  // Insert a register-kill pseudo instruction.
3588  Context.insert(InstFakeKill::create(Func, NewCall));
3589
3590  // Generate a FakeUse to keep the call live if necessary.
3591  if (Instr->hasSideEffects() && ReturnReg) {
3592    if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3593      for (Variable *Var : RetVec->getContainers()) {
3594        Context.insert<InstFakeUse>(Var);
3595      }
3596    } else {
3597      Context.insert<InstFakeUse>(ReturnReg);
3598    }
3599  }
3600
3601  if (Dest == nullptr)
3602    return;
3603
3604  // Assign the result of the call to Dest.
3605  if (ReturnReg) {
3606    if (RetVecFloat) {
3607      auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3608      auto *TBase = legalizeToReg(RetVecFloat);
3609      for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3610        auto *Var = DestVecOn32->getContainers()[i];
3611        auto *TVar = makeReg(IceType_i32);
3612        OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3613            Func, IceType_i32, TBase,
3614            llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3615        _lw(TVar, Mem);
3616        _mov(Var, TVar);
3617      }
3618    } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3619      auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3620      for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3621        _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3622      }
3623    } else if (ReturnRegHi) {
3624      assert(Dest->getType() == IceType_i64);
3625      auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3626      Variable *DestLo = Dest64On32->getLo();
3627      Variable *DestHi = Dest64On32->getHi();
3628      _mov(DestLo, ReturnReg);
3629      _mov(DestHi, ReturnRegHi);
3630    } else {
3631      assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3632             Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3633             isScalarFloatingType(Dest->getType()) ||
3634             isVectorType(Dest->getType()));
3635      _mov(Dest, ReturnReg);
3636    }
3637  }
3638}
3639
3640void TargetMIPS32::lowerCast(const InstCast *Instr) {
3641  InstCast::OpKind CastKind = Instr->getCastKind();
3642  Variable *Dest = Instr->getDest();
3643  Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3644  const Type DestTy = Dest->getType();
3645  const Type Src0Ty = Src0->getType();
3646  const uint32_t ShiftAmount =
3647      (Src0Ty == IceType_i1
3648           ? INT32_BITS - 1
3649           : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3650  const uint32_t Mask =
3651      (Src0Ty == IceType_i1
3652           ? 1
3653           : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3654
3655  if (isVectorType(DestTy)) {
3656    llvm::report_fatal_error("Cast: Destination type is vector");
3657    return;
3658  }
3659  switch (CastKind) {
3660  default:
3661    Func->setError("Cast type not supported");
3662    return;
3663  case InstCast::Sext: {
3664    if (DestTy == IceType_i64) {
3665      auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3666      auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3667      Variable *Src0R = legalizeToReg(Src0);
3668      Variable *T1_Lo = I32Reg();
3669      Variable *T2_Lo = I32Reg();
3670      Variable *T_Hi = I32Reg();
3671      if (Src0Ty == IceType_i1) {
3672        _sll(T1_Lo, Src0R, INT32_BITS - 1);
3673        _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3674        _mov(DestHi, T2_Lo);
3675        _mov(DestLo, T2_Lo);
3676      } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3677        _sll(T1_Lo, Src0R, ShiftAmount);
3678        _sra(T2_Lo, T1_Lo, ShiftAmount);
3679        _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3680        _mov(DestHi, T_Hi);
3681        _mov(DestLo, T2_Lo);
3682      } else if (Src0Ty == IceType_i32) {
3683        _mov(T1_Lo, Src0R);
3684        _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3685        _mov(DestHi, T_Hi);
3686        _mov(DestLo, T1_Lo);
3687      }
3688    } else {
3689      Variable *Src0R = legalizeToReg(Src0);
3690      Variable *T1 = makeReg(DestTy);
3691      Variable *T2 = makeReg(DestTy);
3692      if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3693          Src0Ty == IceType_i16) {
3694        _sll(T1, Src0R, ShiftAmount);
3695        _sra(T2, T1, ShiftAmount);
3696        _mov(Dest, T2);
3697      }
3698    }
3699    break;
3700  }
3701  case InstCast::Zext: {
3702    if (DestTy == IceType_i64) {
3703      auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3704      auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3705      Variable *Src0R = legalizeToReg(Src0);
3706      Variable *T_Lo = I32Reg();
3707      Variable *T_Hi = I32Reg();
3708
3709      if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3710        _andi(T_Lo, Src0R, Mask);
3711      else if (Src0Ty == IceType_i32)
3712        _mov(T_Lo, Src0R);
3713      else
3714        assert(Src0Ty != IceType_i64);
3715      _mov(DestLo, T_Lo);
3716
3717      auto *Zero = getZero();
3718      _addiu(T_Hi, Zero, 0);
3719      _mov(DestHi, T_Hi);
3720    } else {
3721      Variable *Src0R = legalizeToReg(Src0);
3722      Variable *T = makeReg(DestTy);
3723      if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3724          Src0Ty == IceType_i16) {
3725        _andi(T, Src0R, Mask);
3726        _mov(Dest, T);
3727      }
3728    }
3729    break;
3730  }
3731  case InstCast::Trunc: {
3732    if (Src0Ty == IceType_i64)
3733      Src0 = loOperand(Src0);
3734    Variable *Src0R = legalizeToReg(Src0);
3735    Variable *T = makeReg(DestTy);
3736    switch (DestTy) {
3737    case IceType_i1:
3738      _andi(T, Src0R, 0x1);
3739      break;
3740    case IceType_i8:
3741      _andi(T, Src0R, 0xff);
3742      break;
3743    case IceType_i16:
3744      _andi(T, Src0R, 0xffff);
3745      break;
3746    default:
3747      _mov(T, Src0R);
3748      break;
3749    }
3750    _mov(Dest, T);
3751    break;
3752  }
3753  case InstCast::Fptrunc: {
3754    assert(Dest->getType() == IceType_f32);
3755    assert(Src0->getType() == IceType_f64);
3756    auto *DestR = legalizeToReg(Dest);
3757    auto *Src0R = legalizeToReg(Src0);
3758    _cvt_s_d(DestR, Src0R);
3759    _mov(Dest, DestR);
3760    break;
3761  }
3762  case InstCast::Fpext: {
3763    assert(Dest->getType() == IceType_f64);
3764    assert(Src0->getType() == IceType_f32);
3765    auto *DestR = legalizeToReg(Dest);
3766    auto *Src0R = legalizeToReg(Src0);
3767    _cvt_d_s(DestR, Src0R);
3768    _mov(Dest, DestR);
3769    break;
3770  }
3771  case InstCast::Fptosi:
3772  case InstCast::Fptoui: {
3773    if (llvm::isa<Variable64On32>(Dest)) {
3774      llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3775      return;
3776    }
3777    if (DestTy != IceType_i64) {
3778      if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3779        Variable *Src0R = legalizeToReg(Src0);
3780        Variable *FTmp = makeReg(IceType_f32);
3781        _trunc_w_s(FTmp, Src0R);
3782        _mov(Dest, FTmp);
3783        return;
3784      }
3785      if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3786        Variable *Src0R = legalizeToReg(Src0);
3787        Variable *FTmp = makeReg(IceType_f64);
3788        _trunc_w_d(FTmp, Src0R);
3789        _mov(Dest, FTmp);
3790        return;
3791      }
3792    }
3793    llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3794    break;
3795  }
3796  case InstCast::Sitofp:
3797  case InstCast::Uitofp: {
3798    if (llvm::isa<Variable64On32>(Dest)) {
3799      llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3800      return;
3801    }
3802    if (Src0Ty != IceType_i64) {
3803      Variable *Src0R = legalizeToReg(Src0);
3804      auto *T0R = Src0R;
3805      if (Src0Ty != IceType_i32) {
3806        T0R = makeReg(IceType_i32);
3807        if (CastKind == InstCast::Uitofp)
3808          lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3809        else
3810          lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3811      }
3812      if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3813        Variable *FTmp1 = makeReg(IceType_f32);
3814        Variable *FTmp2 = makeReg(IceType_f32);
3815        _mtc1(FTmp1, T0R);
3816        _cvt_s_w(FTmp2, FTmp1);
3817        _mov(Dest, FTmp2);
3818        return;
3819      }
3820      if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3821        Variable *FTmp1 = makeReg(IceType_f64);
3822        Variable *FTmp2 = makeReg(IceType_f64);
3823        _mtc1(FTmp1, T0R);
3824        _cvt_d_w(FTmp2, FTmp1);
3825        _mov(Dest, FTmp2);
3826        return;
3827      }
3828    }
3829    llvm::report_fatal_error("Source is i64 in i32-to-fp");
3830    break;
3831  }
3832  case InstCast::Bitcast: {
3833    Operand *Src0 = Instr->getSrc(0);
3834    if (DestTy == Src0->getType()) {
3835      auto *Assign = InstAssign::create(Func, Dest, Src0);
3836      lowerAssign(Assign);
3837      return;
3838    }
3839    if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3840      llvm::report_fatal_error(
3841          "Bitcast: vector type should have been prelowered.");
3842      return;
3843    }
3844    switch (DestTy) {
3845    case IceType_NUM:
3846    case IceType_void:
3847      llvm::report_fatal_error("Unexpected bitcast.");
3848    case IceType_i1:
3849      UnimplementedLoweringError(this, Instr);
3850      break;
3851    case IceType_i8:
3852      assert(Src0->getType() == IceType_v8i1);
3853      llvm::report_fatal_error(
3854          "i8 to v8i1 conversion should have been prelowered.");
3855      break;
3856    case IceType_i16:
3857      assert(Src0->getType() == IceType_v16i1);
3858      llvm::report_fatal_error(
3859          "i16 to v16i1 conversion should have been prelowered.");
3860      break;
3861    case IceType_i32:
3862    case IceType_f32: {
3863      Variable *Src0R = legalizeToReg(Src0);
3864      _mov(Dest, Src0R);
3865      break;
3866    }
3867    case IceType_i64: {
3868      assert(Src0->getType() == IceType_f64);
3869      Variable *Src0R = legalizeToReg(Src0);
3870      auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3871      T->initHiLo(Func);
3872      T->getHi()->setMustNotHaveReg();
3873      T->getLo()->setMustNotHaveReg();
3874      Context.insert<InstFakeDef>(T->getHi());
3875      Context.insert<InstFakeDef>(T->getLo());
3876      _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3877      _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3878      lowerAssign(InstAssign::create(Func, Dest, T));
3879      break;
3880    }
3881    case IceType_f64: {
3882      assert(Src0->getType() == IceType_i64);
3883      const uint32_t Mask = 0xFFFFFFFF;
3884      if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3885        Variable *RegHi, *RegLo;
3886        const uint64_t Value = C64->getValue();
3887        uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3888        uint64_t Lower32Bits = Value & Mask;
3889        RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3890        RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3891        _mov(Dest, RegHi, RegLo);
3892      } else {
3893        auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3894        auto *RegLo = legalizeToReg(loOperand(Var64On32));
3895        auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3896        _mov(Dest, RegHi, RegLo);
3897      }
3898      break;
3899    }
3900    default:
3901      llvm::report_fatal_error("Unexpected bitcast.");
3902    }
3903    break;
3904  }
3905  }
3906}
3907
3908void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3909  Variable *Dest = Instr->getDest();
3910  const Type DestTy = Dest->getType();
3911  Operand *Src1 = Instr->getSrc(1);
3912  if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3913    const uint32_t Index = Imm->getValue();
3914    Variable *TDest = makeReg(DestTy);
3915    Variable *TReg = makeReg(DestTy);
3916    auto *Src0 = legalizeUndef(Instr->getSrc(0));
3917    auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3918    // Number of elements in each container
3919    uint32_t ElemPerCont =
3920        typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3921    auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3922    auto *SrcE = legalizeToReg(Src);
3923    // Position of the element in the container
3924    uint32_t PosInCont = Index % ElemPerCont;
3925    if (ElemPerCont == 1) {
3926      _mov(TDest, SrcE);
3927    } else if (ElemPerCont == 2) {
3928      switch (PosInCont) {
3929      case 0:
3930        _andi(TDest, SrcE, 0xffff);
3931        break;
3932      case 1:
3933        _srl(TDest, SrcE, 16);
3934        break;
3935      default:
3936        llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3937        break;
3938      }
3939    } else if (ElemPerCont == 4) {
3940      switch (PosInCont) {
3941      case 0:
3942        _andi(TDest, SrcE, 0xff);
3943        break;
3944      case 1:
3945        _srl(TReg, SrcE, 8);
3946        _andi(TDest, TReg, 0xff);
3947        break;
3948      case 2:
3949        _srl(TReg, SrcE, 16);
3950        _andi(TDest, TReg, 0xff);
3951        break;
3952      case 3:
3953        _srl(TDest, SrcE, 24);
3954        break;
3955      default:
3956        llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3957        break;
3958      }
3959    }
3960    if (typeElementType(Src0R->getType()) == IceType_i1) {
3961      Variable *TReg1 = makeReg(DestTy);
3962      _andi(TReg1, TDest, 0x1);
3963      _mov(Dest, TReg1);
3964    } else {
3965      _mov(Dest, TDest);
3966    }
3967    return;
3968  }
3969  llvm::report_fatal_error("ExtractElement requires a constant index");
3970}
3971
3972void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3973  Variable *Dest = Instr->getDest();
3974  if (isVectorType(Dest->getType())) {
3975    llvm::report_fatal_error("Fcmp: Destination type is vector");
3976    return;
3977  }
3978
3979  auto *Src0 = Instr->getSrc(0);
3980  auto *Src1 = Instr->getSrc(1);
3981  auto *Zero = getZero();
3982
3983  InstFcmp::FCond Cond = Instr->getCondition();
3984  auto *DestR = makeReg(IceType_i32);
3985  auto *Src0R = legalizeToReg(Src0);
3986  auto *Src1R = legalizeToReg(Src1);
3987  const Type Src0Ty = Src0->getType();
3988
3989  Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3990
3991  switch (Cond) {
3992  default: {
3993    llvm::report_fatal_error("Unhandled fp comparison.");
3994    return;
3995  }
3996  case InstFcmp::False: {
3997    Context.insert<InstFakeUse>(Src0R);
3998    Context.insert<InstFakeUse>(Src1R);
3999    _addiu(DestR, Zero, 0);
4000    _mov(Dest, DestR);
4001    break;
4002  }
4003  case InstFcmp::Oeq: {
4004    if (Src0Ty == IceType_f32) {
4005      _c_eq_s(Src0R, Src1R);
4006    } else {
4007      _c_eq_d(Src0R, Src1R);
4008    }
4009    _addiu(DestR, Zero, 1);
4010    _movf(DestR, Zero, FCC0);
4011    _mov(Dest, DestR);
4012    break;
4013  }
4014  case InstFcmp::Ogt: {
4015    if (Src0Ty == IceType_f32) {
4016      _c_ule_s(Src0R, Src1R);
4017    } else {
4018      _c_ule_d(Src0R, Src1R);
4019    }
4020    _addiu(DestR, Zero, 1);
4021    _movt(DestR, Zero, FCC0);
4022    _mov(Dest, DestR);
4023    break;
4024  }
4025  case InstFcmp::Oge: {
4026    if (Src0Ty == IceType_f32) {
4027      _c_ult_s(Src0R, Src1R);
4028    } else {
4029      _c_ult_d(Src0R, Src1R);
4030    }
4031    _addiu(DestR, Zero, 1);
4032    _movt(DestR, Zero, FCC0);
4033    _mov(Dest, DestR);
4034    break;
4035  }
4036  case InstFcmp::Olt: {
4037    if (Src0Ty == IceType_f32) {
4038      _c_olt_s(Src0R, Src1R);
4039    } else {
4040      _c_olt_d(Src0R, Src1R);
4041    }
4042    _addiu(DestR, Zero, 1);
4043    _movf(DestR, Zero, FCC0);
4044    _mov(Dest, DestR);
4045    break;
4046  }
4047  case InstFcmp::Ole: {
4048    if (Src0Ty == IceType_f32) {
4049      _c_ole_s(Src0R, Src1R);
4050    } else {
4051      _c_ole_d(Src0R, Src1R);
4052    }
4053    _addiu(DestR, Zero, 1);
4054    _movf(DestR, Zero, FCC0);
4055    _mov(Dest, DestR);
4056    break;
4057  }
4058  case InstFcmp::One: {
4059    if (Src0Ty == IceType_f32) {
4060      _c_ueq_s(Src0R, Src1R);
4061    } else {
4062      _c_ueq_d(Src0R, Src1R);
4063    }
4064    _addiu(DestR, Zero, 1);
4065    _movt(DestR, Zero, FCC0);
4066    _mov(Dest, DestR);
4067    break;
4068  }
4069  case InstFcmp::Ord: {
4070    if (Src0Ty == IceType_f32) {
4071      _c_un_s(Src0R, Src1R);
4072    } else {
4073      _c_un_d(Src0R, Src1R);
4074    }
4075    _addiu(DestR, Zero, 1);
4076    _movt(DestR, Zero, FCC0);
4077    _mov(Dest, DestR);
4078    break;
4079  }
4080  case InstFcmp::Ueq: {
4081    if (Src0Ty == IceType_f32) {
4082      _c_ueq_s(Src0R, Src1R);
4083    } else {
4084      _c_ueq_d(Src0R, Src1R);
4085    }
4086    _addiu(DestR, Zero, 1);
4087    _movf(DestR, Zero, FCC0);
4088    _mov(Dest, DestR);
4089    break;
4090  }
4091  case InstFcmp::Ugt: {
4092    if (Src0Ty == IceType_f32) {
4093      _c_ole_s(Src0R, Src1R);
4094    } else {
4095      _c_ole_d(Src0R, Src1R);
4096    }
4097    _addiu(DestR, Zero, 1);
4098    _movt(DestR, Zero, FCC0);
4099    _mov(Dest, DestR);
4100    break;
4101  }
4102  case InstFcmp::Uge: {
4103    if (Src0Ty == IceType_f32) {
4104      _c_olt_s(Src0R, Src1R);
4105    } else {
4106      _c_olt_d(Src0R, Src1R);
4107    }
4108    _addiu(DestR, Zero, 1);
4109    _movt(DestR, Zero, FCC0);
4110    _mov(Dest, DestR);
4111    break;
4112  }
4113  case InstFcmp::Ult: {
4114    if (Src0Ty == IceType_f32) {
4115      _c_ult_s(Src0R, Src1R);
4116    } else {
4117      _c_ult_d(Src0R, Src1R);
4118    }
4119    _addiu(DestR, Zero, 1);
4120    _movf(DestR, Zero, FCC0);
4121    _mov(Dest, DestR);
4122    break;
4123  }
4124  case InstFcmp::Ule: {
4125    if (Src0Ty == IceType_f32) {
4126      _c_ule_s(Src0R, Src1R);
4127    } else {
4128      _c_ule_d(Src0R, Src1R);
4129    }
4130    _addiu(DestR, Zero, 1);
4131    _movf(DestR, Zero, FCC0);
4132    _mov(Dest, DestR);
4133    break;
4134  }
4135  case InstFcmp::Une: {
4136    if (Src0Ty == IceType_f32) {
4137      _c_eq_s(Src0R, Src1R);
4138    } else {
4139      _c_eq_d(Src0R, Src1R);
4140    }
4141    _addiu(DestR, Zero, 1);
4142    _movt(DestR, Zero, FCC0);
4143    _mov(Dest, DestR);
4144    break;
4145  }
4146  case InstFcmp::Uno: {
4147    if (Src0Ty == IceType_f32) {
4148      _c_un_s(Src0R, Src1R);
4149    } else {
4150      _c_un_d(Src0R, Src1R);
4151    }
4152    _addiu(DestR, Zero, 1);
4153    _movf(DestR, Zero, FCC0);
4154    _mov(Dest, DestR);
4155    break;
4156  }
4157  case InstFcmp::True: {
4158    Context.insert<InstFakeUse>(Src0R);
4159    Context.insert<InstFakeUse>(Src1R);
4160    _addiu(DestR, Zero, 1);
4161    _mov(Dest, DestR);
4162    break;
4163  }
4164  }
4165}
4166
4167void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4168  Operand *Src0 = legalize(Instr->getSrc(0));
4169  Operand *Src1 = legalize(Instr->getSrc(1));
4170  Variable *Dest = Instr->getDest();
4171  InstIcmp::ICond Condition = Instr->getCondition();
4172
4173  Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4174  Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4175  Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4176  Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4177
4178  switch (Condition) {
4179  default:
4180    llvm_unreachable("unexpected condition");
4181    return;
4182  case InstIcmp::Eq: {
4183    auto *T1 = I32Reg();
4184    auto *T2 = I32Reg();
4185    auto *T3 = I32Reg();
4186    auto *T4 = I32Reg();
4187    _xor(T1, Src0HiR, Src1HiR);
4188    _xor(T2, Src0LoR, Src1LoR);
4189    _or(T3, T1, T2);
4190    _sltiu(T4, T3, 1);
4191    _mov(Dest, T4);
4192    return;
4193  }
4194  case InstIcmp::Ne: {
4195    auto *T1 = I32Reg();
4196    auto *T2 = I32Reg();
4197    auto *T3 = I32Reg();
4198    auto *T4 = I32Reg();
4199    _xor(T1, Src0HiR, Src1HiR);
4200    _xor(T2, Src0LoR, Src1LoR);
4201    _or(T3, T1, T2);
4202    _sltu(T4, getZero(), T3);
4203    _mov(Dest, T4);
4204    return;
4205  }
4206  case InstIcmp::Sgt: {
4207    auto *T1 = I32Reg();
4208    auto *T2 = I32Reg();
4209    auto *T3 = I32Reg();
4210    _xor(T1, Src0HiR, Src1HiR);
4211    _slt(T2, Src1HiR, Src0HiR);
4212    _sltu(T3, Src1LoR, Src0LoR);
4213    _movz(T2, T3, T1);
4214    _mov(Dest, T2);
4215    return;
4216  }
4217  case InstIcmp::Ugt: {
4218    auto *T1 = I32Reg();
4219    auto *T2 = I32Reg();
4220    auto *T3 = I32Reg();
4221    _xor(T1, Src0HiR, Src1HiR);
4222    _sltu(T2, Src1HiR, Src0HiR);
4223    _sltu(T3, Src1LoR, Src0LoR);
4224    _movz(T2, T3, T1);
4225    _mov(Dest, T2);
4226    return;
4227  }
4228  case InstIcmp::Sge: {
4229    auto *T1 = I32Reg();
4230    auto *T2 = I32Reg();
4231    auto *T3 = I32Reg();
4232    auto *T4 = I32Reg();
4233    auto *T5 = I32Reg();
4234    _xor(T1, Src0HiR, Src1HiR);
4235    _slt(T2, Src0HiR, Src1HiR);
4236    _xori(T3, T2, 1);
4237    _sltu(T4, Src0LoR, Src1LoR);
4238    _xori(T5, T4, 1);
4239    _movz(T3, T5, T1);
4240    _mov(Dest, T3);
4241    return;
4242  }
4243  case InstIcmp::Uge: {
4244    auto *T1 = I32Reg();
4245    auto *T2 = I32Reg();
4246    auto *T3 = I32Reg();
4247    auto *T4 = I32Reg();
4248    auto *T5 = I32Reg();
4249    _xor(T1, Src0HiR, Src1HiR);
4250    _sltu(T2, Src0HiR, Src1HiR);
4251    _xori(T3, T2, 1);
4252    _sltu(T4, Src0LoR, Src1LoR);
4253    _xori(T5, T4, 1);
4254    _movz(T3, T5, T1);
4255    _mov(Dest, T3);
4256    return;
4257  }
4258  case InstIcmp::Slt: {
4259    auto *T1 = I32Reg();
4260    auto *T2 = I32Reg();
4261    auto *T3 = I32Reg();
4262    _xor(T1, Src0HiR, Src1HiR);
4263    _slt(T2, Src0HiR, Src1HiR);
4264    _sltu(T3, Src0LoR, Src1LoR);
4265    _movz(T2, T3, T1);
4266    _mov(Dest, T2);
4267    return;
4268  }
4269  case InstIcmp::Ult: {
4270    auto *T1 = I32Reg();
4271    auto *T2 = I32Reg();
4272    auto *T3 = I32Reg();
4273    _xor(T1, Src0HiR, Src1HiR);
4274    _sltu(T2, Src0HiR, Src1HiR);
4275    _sltu(T3, Src0LoR, Src1LoR);
4276    _movz(T2, T3, T1);
4277    _mov(Dest, T2);
4278    return;
4279  }
4280  case InstIcmp::Sle: {
4281    auto *T1 = I32Reg();
4282    auto *T2 = I32Reg();
4283    auto *T3 = I32Reg();
4284    auto *T4 = I32Reg();
4285    auto *T5 = I32Reg();
4286    _xor(T1, Src0HiR, Src1HiR);
4287    _slt(T2, Src1HiR, Src0HiR);
4288    _xori(T3, T2, 1);
4289    _sltu(T4, Src1LoR, Src0LoR);
4290    _xori(T5, T4, 1);
4291    _movz(T3, T5, T1);
4292    _mov(Dest, T3);
4293    return;
4294  }
4295  case InstIcmp::Ule: {
4296    auto *T1 = I32Reg();
4297    auto *T2 = I32Reg();
4298    auto *T3 = I32Reg();
4299    auto *T4 = I32Reg();
4300    auto *T5 = I32Reg();
4301    _xor(T1, Src0HiR, Src1HiR);
4302    _sltu(T2, Src1HiR, Src0HiR);
4303    _xori(T3, T2, 1);
4304    _sltu(T4, Src1LoR, Src0LoR);
4305    _xori(T5, T4, 1);
4306    _movz(T3, T5, T1);
4307    _mov(Dest, T3);
4308    return;
4309  }
4310  }
4311}
4312
4313void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4314  auto *Src0 = Instr->getSrc(0);
4315  auto *Src1 = Instr->getSrc(1);
4316  if (Src0->getType() == IceType_i64) {
4317    lower64Icmp(Instr);
4318    return;
4319  }
4320  Variable *Dest = Instr->getDest();
4321  if (isVectorType(Dest->getType())) {
4322    llvm::report_fatal_error("Icmp: Destination type is vector");
4323    return;
4324  }
4325  InstIcmp::ICond Cond = Instr->getCondition();
4326  auto *Src0R = legalizeToReg(Src0);
4327  auto *Src1R = legalizeToReg(Src1);
4328  const Type Src0Ty = Src0R->getType();
4329  const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4330  Variable *Src0RT = I32Reg();
4331  Variable *Src1RT = I32Reg();
4332
4333  if (Src0Ty != IceType_i32) {
4334    _sll(Src0RT, Src0R, ShAmt);
4335    _sll(Src1RT, Src1R, ShAmt);
4336  } else {
4337    _mov(Src0RT, Src0R);
4338    _mov(Src1RT, Src1R);
4339  }
4340
4341  switch (Cond) {
4342  case InstIcmp::Eq: {
4343    auto *DestT = I32Reg();
4344    auto *T = I32Reg();
4345    _xor(T, Src0RT, Src1RT);
4346    _sltiu(DestT, T, 1);
4347    _mov(Dest, DestT);
4348    return;
4349  }
4350  case InstIcmp::Ne: {
4351    auto *DestT = I32Reg();
4352    auto *T = I32Reg();
4353    auto *Zero = getZero();
4354    _xor(T, Src0RT, Src1RT);
4355    _sltu(DestT, Zero, T);
4356    _mov(Dest, DestT);
4357    return;
4358  }
4359  case InstIcmp::Ugt: {
4360    auto *DestT = I32Reg();
4361    _sltu(DestT, Src1RT, Src0RT);
4362    _mov(Dest, DestT);
4363    return;
4364  }
4365  case InstIcmp::Uge: {
4366    auto *DestT = I32Reg();
4367    auto *T = I32Reg();
4368    _sltu(T, Src0RT, Src1RT);
4369    _xori(DestT, T, 1);
4370    _mov(Dest, DestT);
4371    return;
4372  }
4373  case InstIcmp::Ult: {
4374    auto *DestT = I32Reg();
4375    _sltu(DestT, Src0RT, Src1RT);
4376    _mov(Dest, DestT);
4377    return;
4378  }
4379  case InstIcmp::Ule: {
4380    auto *DestT = I32Reg();
4381    auto *T = I32Reg();
4382    _sltu(T, Src1RT, Src0RT);
4383    _xori(DestT, T, 1);
4384    _mov(Dest, DestT);
4385    return;
4386  }
4387  case InstIcmp::Sgt: {
4388    auto *DestT = I32Reg();
4389    _slt(DestT, Src1RT, Src0RT);
4390    _mov(Dest, DestT);
4391    return;
4392  }
4393  case InstIcmp::Sge: {
4394    auto *DestT = I32Reg();
4395    auto *T = I32Reg();
4396    _slt(T, Src0RT, Src1RT);
4397    _xori(DestT, T, 1);
4398    _mov(Dest, DestT);
4399    return;
4400  }
4401  case InstIcmp::Slt: {
4402    auto *DestT = I32Reg();
4403    _slt(DestT, Src0RT, Src1RT);
4404    _mov(Dest, DestT);
4405    return;
4406  }
4407  case InstIcmp::Sle: {
4408    auto *DestT = I32Reg();
4409    auto *T = I32Reg();
4410    _slt(T, Src1RT, Src0RT);
4411    _xori(DestT, T, 1);
4412    _mov(Dest, DestT);
4413    return;
4414  }
4415  default:
4416    llvm_unreachable("Invalid ICmp operator");
4417    return;
4418  }
4419}
4420
4421void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4422  Variable *Dest = Instr->getDest();
4423  const Type DestTy = Dest->getType();
4424  Operand *Src2 = Instr->getSrc(2);
4425  if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4426    const uint32_t Index = Imm->getValue();
4427    // Vector to insert in
4428    auto *Src0 = legalizeUndef(Instr->getSrc(0));
4429    auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4430    // Number of elements in each container
4431    uint32_t ElemPerCont =
4432        typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4433    // Source Element
4434    auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4435    auto *SrcE = Src;
4436    if (ElemPerCont > 1)
4437      SrcE = legalizeToReg(Src);
4438    // Dest is a vector
4439    auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4440    VDest->initVecElement(Func);
4441    // Temp vector variable
4442    auto *TDest = makeReg(DestTy);
4443    auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4444    TVDest->initVecElement(Func);
4445    // Destination element
4446    auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4447    // Element to insert
4448    auto *Src1R = legalizeToReg(Instr->getSrc(1));
4449    auto *TReg1 = makeReg(IceType_i32);
4450    auto *TReg2 = makeReg(IceType_i32);
4451    auto *TReg3 = makeReg(IceType_i32);
4452    auto *TReg4 = makeReg(IceType_i32);
4453    auto *TReg5 = makeReg(IceType_i32);
4454    auto *TDReg = makeReg(IceType_i32);
4455    // Position of the element in the container
4456    uint32_t PosInCont = Index % ElemPerCont;
4457    // Load source vector in a temporary vector
4458    for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4459      auto *DCont = TVDest->getContainers()[i];
4460      // Do not define DstE as we are going to redefine it
4461      if (DCont == DstE)
4462        continue;
4463      auto *SCont = Src0R->getContainers()[i];
4464      auto *TReg = makeReg(IceType_i32);
4465      _mov(TReg, SCont);
4466      _mov(DCont, TReg);
4467    }
4468    // Insert the element
4469    if (ElemPerCont == 1) {
4470      _mov(DstE, Src1R);
4471    } else if (ElemPerCont == 2) {
4472      switch (PosInCont) {
4473      case 0:
4474        _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4475        _srl(TReg2, SrcE, 16);
4476        _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4477        _or(TDReg, TReg1, TReg3);
4478        _mov(DstE, TDReg);
4479        break;
4480      case 1:
4481        _sll(TReg1, Src1R, 16); // Clear lower 16-bits  of source
4482        _sll(TReg2, SrcE, 16);
4483        _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4484        _or(TDReg, TReg1, TReg3);
4485        _mov(DstE, TDReg);
4486        break;
4487      default:
4488        llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4489        break;
4490      }
4491    } else if (ElemPerCont == 4) {
4492      switch (PosInCont) {
4493      case 0:
4494        _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4495        _srl(TReg2, SrcE, 8);
4496        _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4497        _or(TDReg, TReg1, TReg3);
4498        _mov(DstE, TDReg);
4499        break;
4500      case 1:
4501        _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4502        _sll(TReg5, TReg1, 8);     // Position in the destination
4503        _lui(TReg2, Ctx->getConstantInt32(0xffff));
4504        _ori(TReg3, TReg2, 0x00ff);
4505        _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4506        _or(TDReg, TReg5, TReg4);
4507        _mov(DstE, TDReg);
4508        break;
4509      case 2:
4510        _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4511        _sll(TReg5, TReg1, 16);    // Position in the destination
4512        _lui(TReg2, Ctx->getConstantInt32(0xff00));
4513        _ori(TReg3, TReg2, 0xffff);
4514        _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4515        _or(TDReg, TReg5, TReg4);
4516        _mov(DstE, TDReg);
4517        break;
4518      case 3:
4519        _sll(TReg1, Src1R, 24); // Position in the destination
4520        _sll(TReg2, SrcE, 8);
4521        _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4522        _or(TDReg, TReg1, TReg3);
4523        _mov(DstE, TDReg);
4524        break;
4525      default:
4526        llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4527        break;
4528      }
4529    }
4530    // Write back temporary vector to the destination
4531    auto *Assign = InstAssign::create(Func, Dest, TDest);
4532    lowerAssign(Assign);
4533    return;
4534  }
4535  llvm::report_fatal_error("InsertElement requires a constant index");
4536}
4537
4538void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4539                                   Variable *Dest, Variable *Src0,
4540                                   Variable *Src1) {
4541  switch (Operation) {
4542  default:
4543    llvm::report_fatal_error("Unknown AtomicRMW operation");
4544  case Intrinsics::AtomicExchange:
4545    llvm::report_fatal_error("Can't handle Atomic xchg operation");
4546  case Intrinsics::AtomicAdd:
4547    _addu(Dest, Src0, Src1);
4548    break;
4549  case Intrinsics::AtomicAnd:
4550    _and(Dest, Src0, Src1);
4551    break;
4552  case Intrinsics::AtomicSub:
4553    _subu(Dest, Src0, Src1);
4554    break;
4555  case Intrinsics::AtomicOr:
4556    _or(Dest, Src0, Src1);
4557    break;
4558  case Intrinsics::AtomicXor:
4559    _xor(Dest, Src0, Src1);
4560    break;
4561  }
4562}
4563
4564void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
4565  Variable *Dest = Instr->getDest();
4566  Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4567
4568  Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
4569  switch (ID) {
4570  case Intrinsics::AtomicLoad: {
4571    assert(isScalarIntegerType(DestTy));
4572    // We require the memory address to be naturally aligned. Given that is the
4573    // case, then normal loads are atomic.
4574    if (!Intrinsics::isMemoryOrderValid(
4575            ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4576      Func->setError("Unexpected memory ordering for AtomicLoad");
4577      return;
4578    }
4579    if (DestTy == IceType_i64) {
4580      llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4581      return;
4582    } else if (DestTy == IceType_i32) {
4583      auto *T1 = makeReg(DestTy);
4584      auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4585      auto *Base = legalizeToReg(Instr->getArg(0));
4586      auto *Addr = formMemoryOperand(Base, DestTy);
4587      InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4588      InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4589      constexpr CfgNode *NoTarget = nullptr;
4590      _sync();
4591      Context.insert(Retry);
4592      Sandboxer(this).ll(T1, Addr);
4593      _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4594      _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4595      Sandboxer(this).sc(RegAt, Addr);
4596      _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4597      Context.insert(Exit);
4598      _sync();
4599      _mov(Dest, T1);
4600      Context.insert<InstFakeUse>(T1);
4601    } else {
4602      const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4603      auto *Base = legalizeToReg(Instr->getArg(0));
4604      auto *T1 = makeReg(IceType_i32);
4605      auto *T2 = makeReg(IceType_i32);
4606      auto *T3 = makeReg(IceType_i32);
4607      auto *T4 = makeReg(IceType_i32);
4608      auto *T5 = makeReg(IceType_i32);
4609      auto *T6 = makeReg(IceType_i32);
4610      auto *SrcMask = makeReg(IceType_i32);
4611      auto *Tdest = makeReg(IceType_i32);
4612      auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4613      InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4614      InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4615      constexpr CfgNode *NoTarget = nullptr;
4616      _sync();
4617      _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4618      _andi(T2, Base, 3);        // Last two bits of the address
4619      _and(T3, Base, T1);        // Align the address
4620      _sll(T4, T2, 3);
4621      _ori(T5, getZero(), Mask);
4622      _sllv(SrcMask, T5, T4); // Source mask
4623      auto *Addr = formMemoryOperand(T3, IceType_i32);
4624      Context.insert(Retry);
4625      Sandboxer(this).ll(T6, Addr);
4626      _and(Tdest, T6, SrcMask);
4627      _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4628      _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4629      Sandboxer(this).sc(RegAt, Addr);
4630      _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4631      Context.insert(Exit);
4632      auto *T7 = makeReg(IceType_i32);
4633      auto *T8 = makeReg(IceType_i32);
4634      _srlv(T7, Tdest, T4);
4635      _andi(T8, T7, Mask);
4636      _sync();
4637      _mov(Dest, T8);
4638      Context.insert<InstFakeUse>(T6);
4639      Context.insert<InstFakeUse>(SrcMask);
4640    }
4641    return;
4642  }
4643  case Intrinsics::AtomicStore: {
4644    // We require the memory address to be naturally aligned. Given that is the
4645    // case, then normal stores are atomic.
4646    if (!Intrinsics::isMemoryOrderValid(
4647            ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4648      Func->setError("Unexpected memory ordering for AtomicStore");
4649      return;
4650    }
4651    auto *Val = Instr->getArg(0);
4652    auto Ty = Val->getType();
4653    if (Ty == IceType_i64) {
4654      llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4655      return;
4656    } else if (Ty == IceType_i32) {
4657      auto *Val = legalizeToReg(Instr->getArg(0));
4658      auto *Base = legalizeToReg(Instr->getArg(1));
4659      auto *Addr = formMemoryOperand(Base, Ty);
4660      InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4661      constexpr CfgNode *NoTarget = nullptr;
4662      auto *T1 = makeReg(IceType_i32);
4663      auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4664      _sync();
4665      Context.insert(Retry);
4666      Sandboxer(this).ll(T1, Addr);
4667      _mov(RegAt, Val);
4668      Sandboxer(this).sc(RegAt, Addr);
4669      _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4670      Context.insert<InstFakeUse>(T1); // To keep LL alive
4671      _sync();
4672    } else {
4673      auto *Val = legalizeToReg(Instr->getArg(0));
4674      auto *Base = legalizeToReg(Instr->getArg(1));
4675      InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4676      constexpr CfgNode *NoTarget = nullptr;
4677      auto *T1 = makeReg(IceType_i32);
4678      auto *T2 = makeReg(IceType_i32);
4679      auto *T3 = makeReg(IceType_i32);
4680      auto *T4 = makeReg(IceType_i32);
4681      auto *T5 = makeReg(IceType_i32);
4682      auto *T6 = makeReg(IceType_i32);
4683      auto *T7 = makeReg(IceType_i32);
4684      auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4685      auto *SrcMask = makeReg(IceType_i32);
4686      auto *DstMask = makeReg(IceType_i32);
4687      const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4688      _sync();
4689      _addiu(T1, getZero(), -4);
4690      _and(T7, Base, T1);
4691      auto *Addr = formMemoryOperand(T7, Ty);
4692      _andi(T2, Base, 3);
4693      _sll(T3, T2, 3);
4694      _ori(T4, getZero(), Mask);
4695      _sllv(T5, T4, T3);
4696      _sllv(T6, Val, T3);
4697      _nor(SrcMask, getZero(), T5);
4698      _and(DstMask, T6, T5);
4699      Context.insert(Retry);
4700      Sandboxer(this).ll(RegAt, Addr);
4701      _and(RegAt, RegAt, SrcMask);
4702      _or(RegAt, RegAt, DstMask);
4703      Sandboxer(this).sc(RegAt, Addr);
4704      _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4705      Context.insert<InstFakeUse>(SrcMask);
4706      Context.insert<InstFakeUse>(DstMask);
4707      _sync();
4708    }
4709    return;
4710  }
4711  case Intrinsics::AtomicCmpxchg: {
4712    assert(isScalarIntegerType(DestTy));
4713    // We require the memory address to be naturally aligned. Given that is the
4714    // case, then normal loads are atomic.
4715    if (!Intrinsics::isMemoryOrderValid(
4716            ID, getConstantMemoryOrder(Instr->getArg(3)),
4717            getConstantMemoryOrder(Instr->getArg(4)))) {
4718      Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4719      return;
4720    }
4721
4722    InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4723    InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4724    constexpr CfgNode *NoTarget = nullptr;
4725    auto *New = Instr->getArg(2);
4726    auto *Expected = Instr->getArg(1);
4727    auto *ActualAddress = Instr->getArg(0);
4728
4729    if (DestTy == IceType_i64) {
4730      llvm::report_fatal_error(
4731          "AtomicCmpxchg.i64 should have been prelowered.");
4732      return;
4733    } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4734      auto *NewR = legalizeToReg(New);
4735      auto *ExpectedR = legalizeToReg(Expected);
4736      auto *ActualAddressR = legalizeToReg(ActualAddress);
4737      const uint32_t ShiftAmount =
4738          (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4739      const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4740      auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4741      auto *T1 = I32Reg();
4742      auto *T2 = I32Reg();
4743      auto *T3 = I32Reg();
4744      auto *T4 = I32Reg();
4745      auto *T5 = I32Reg();
4746      auto *T6 = I32Reg();
4747      auto *T7 = I32Reg();
4748      auto *T8 = I32Reg();
4749      auto *T9 = I32Reg();
4750      _addiu(RegAt, getZero(), -4);
4751      _and(T1, ActualAddressR, RegAt);
4752      auto *Addr = formMemoryOperand(T1, DestTy);
4753      _andi(RegAt, ActualAddressR, 3);
4754      _sll(T2, RegAt, 3);
4755      _ori(RegAt, getZero(), Mask);
4756      _sllv(T3, RegAt, T2);
4757      _nor(T4, getZero(), T3);
4758      _andi(RegAt, ExpectedR, Mask);
4759      _sllv(T5, RegAt, T2);
4760      _andi(RegAt, NewR, Mask);
4761      _sllv(T6, RegAt, T2);
4762      _sync();
4763      Context.insert(Retry);
4764      Sandboxer(this).ll(T7, Addr);
4765      _and(T8, T7, T3);
4766      _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4767      _and(RegAt, T7, T4);
4768      _or(T9, RegAt, T6);
4769      Sandboxer(this).sc(T9, Addr);
4770      _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4771      Context.insert<InstFakeUse>(getZero());
4772      Context.insert(Exit);
4773      _srlv(RegAt, T8, T2);
4774      _sll(RegAt, RegAt, ShiftAmount);
4775      _sra(RegAt, RegAt, ShiftAmount);
4776      _mov(Dest, RegAt);
4777      _sync();
4778      Context.insert<InstFakeUse>(T3);
4779      Context.insert<InstFakeUse>(T4);
4780      Context.insert<InstFakeUse>(T5);
4781      Context.insert<InstFakeUse>(T6);
4782      Context.insert<InstFakeUse>(T8);
4783      Context.insert<InstFakeUse>(ExpectedR);
4784      Context.insert<InstFakeUse>(NewR);
4785    } else {
4786      auto *T1 = I32Reg();
4787      auto *T2 = I32Reg();
4788      auto *NewR = legalizeToReg(New);
4789      auto *ExpectedR = legalizeToReg(Expected);
4790      auto *ActualAddressR = legalizeToReg(ActualAddress);
4791      _sync();
4792      Context.insert(Retry);
4793      Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4794      _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4795      _mov(T2, NewR);
4796      Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4797      _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4798      Context.insert<InstFakeUse>(getZero());
4799      Context.insert(Exit);
4800      _mov(Dest, T1);
4801      _sync();
4802      Context.insert<InstFakeUse>(ExpectedR);
4803      Context.insert<InstFakeUse>(NewR);
4804    }
4805    return;
4806  }
4807  case Intrinsics::AtomicRMW: {
4808    assert(isScalarIntegerType(DestTy));
4809    // We require the memory address to be naturally aligned. Given that is the
4810    // case, then normal loads are atomic.
4811    if (!Intrinsics::isMemoryOrderValid(
4812            ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4813      Func->setError("Unexpected memory ordering for AtomicRMW");
4814      return;
4815    }
4816
4817    constexpr CfgNode *NoTarget = nullptr;
4818    InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4819    auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4820        llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4821    auto *New = Instr->getArg(2);
4822    auto *ActualAddress = Instr->getArg(1);
4823
4824    if (DestTy == IceType_i64) {
4825      llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4826      return;
4827    } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4828      const uint32_t ShiftAmount =
4829          INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4830      const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4831      auto *NewR = legalizeToReg(New);
4832      auto *ActualAddressR = legalizeToReg(ActualAddress);
4833      auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4834      auto *T1 = I32Reg();
4835      auto *T2 = I32Reg();
4836      auto *T3 = I32Reg();
4837      auto *T4 = I32Reg();
4838      auto *T5 = I32Reg();
4839      auto *T6 = I32Reg();
4840      auto *T7 = I32Reg();
4841      _sync();
4842      _addiu(RegAt, getZero(), -4);
4843      _and(T1, ActualAddressR, RegAt);
4844      _andi(RegAt, ActualAddressR, 3);
4845      _sll(T2, RegAt, 3);
4846      _ori(RegAt, getZero(), Mask);
4847      _sllv(T3, RegAt, T2);
4848      _nor(T4, getZero(), T3);
4849      _sllv(T5, NewR, T2);
4850      Context.insert(Retry);
4851      Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy));
4852      if (Operation != Intrinsics::AtomicExchange) {
4853        createArithInst(Operation, RegAt, T6, T5);
4854        _and(RegAt, RegAt, T3);
4855      }
4856      _and(T7, T6, T4);
4857      if (Operation == Intrinsics::AtomicExchange) {
4858        _or(RegAt, T7, T5);
4859      } else {
4860        _or(RegAt, T7, RegAt);
4861      }
4862      Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy));
4863      _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4864      Context.insert<InstFakeUse>(getZero());
4865      _and(RegAt, T6, T3);
4866      _srlv(RegAt, RegAt, T2);
4867      _sll(RegAt, RegAt, ShiftAmount);
4868      _sra(RegAt, RegAt, ShiftAmount);
4869      _mov(Dest, RegAt);
4870      _sync();
4871      Context.insert<InstFakeUse>(NewR);
4872      Context.insert<InstFakeUse>(Dest);
4873    } else {
4874      auto *T1 = I32Reg();
4875      auto *T2 = I32Reg();
4876      auto *NewR = legalizeToReg(New);
4877      auto *ActualAddressR = legalizeToReg(ActualAddress);
4878      _sync();
4879      Context.insert(Retry);
4880      Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4881      if (Operation == Intrinsics::AtomicExchange) {
4882        _mov(T2, NewR);
4883      } else {
4884        createArithInst(Operation, T2, T1, NewR);
4885      }
4886      Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4887      _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4888      Context.insert<InstFakeUse>(getZero());
4889      _mov(Dest, T1);
4890      _sync();
4891      Context.insert<InstFakeUse>(NewR);
4892      Context.insert<InstFakeUse>(Dest);
4893    }
4894    return;
4895  }
4896  case Intrinsics::AtomicFence:
4897  case Intrinsics::AtomicFenceAll:
4898    assert(Dest == nullptr);
4899    _sync();
4900    return;
4901  case Intrinsics::AtomicIsLockFree: {
4902    Operand *ByteSize = Instr->getArg(0);
4903    auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4904    auto *T = I32Reg();
4905    if (CI == nullptr) {
4906      // The PNaCl ABI requires the byte size to be a compile-time constant.
4907      Func->setError("AtomicIsLockFree byte size should be compile-time const");
4908      return;
4909    }
4910    static constexpr int32_t NotLockFree = 0;
4911    static constexpr int32_t LockFree = 1;
4912    int32_t Result = NotLockFree;
4913    switch (CI->getValue()) {
4914    case 1:
4915    case 2:
4916    case 4:
4917      Result = LockFree;
4918      break;
4919    }
4920    _addiu(T, getZero(), Result);
4921    _mov(Dest, T);
4922    return;
4923  }
4924  case Intrinsics::Bswap: {
4925    auto *Src = Instr->getArg(0);
4926    const Type SrcTy = Src->getType();
4927    assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4928           SrcTy == IceType_i64);
4929    switch (SrcTy) {
4930    case IceType_i16: {
4931      auto *T1 = I32Reg();
4932      auto *T2 = I32Reg();
4933      auto *T3 = I32Reg();
4934      auto *T4 = I32Reg();
4935      auto *SrcR = legalizeToReg(Src);
4936      _sll(T1, SrcR, 8);
4937      _lui(T2, Ctx->getConstantInt32(255));
4938      _and(T1, T1, T2);
4939      _sll(T3, SrcR, 24);
4940      _or(T1, T3, T1);
4941      _srl(T4, T1, 16);
4942      _mov(Dest, T4);
4943      return;
4944    }
4945    case IceType_i32: {
4946      auto *T1 = I32Reg();
4947      auto *T2 = I32Reg();
4948      auto *T3 = I32Reg();
4949      auto *T4 = I32Reg();
4950      auto *T5 = I32Reg();
4951      auto *SrcR = legalizeToReg(Src);
4952      _srl(T1, SrcR, 24);
4953      _srl(T2, SrcR, 8);
4954      _andi(T2, T2, 0xFF00);
4955      _or(T1, T2, T1);
4956      _sll(T4, SrcR, 8);
4957      _lui(T3, Ctx->getConstantInt32(255));
4958      _and(T4, T4, T3);
4959      _sll(T5, SrcR, 24);
4960      _or(T4, T5, T4);
4961      _or(T4, T4, T1);
4962      _mov(Dest, T4);
4963      return;
4964    }
4965    case IceType_i64: {
4966      auto *T1 = I32Reg();
4967      auto *T2 = I32Reg();
4968      auto *T3 = I32Reg();
4969      auto *T4 = I32Reg();
4970      auto *T5 = I32Reg();
4971      auto *T6 = I32Reg();
4972      auto *T7 = I32Reg();
4973      auto *T8 = I32Reg();
4974      auto *T9 = I32Reg();
4975      auto *T10 = I32Reg();
4976      auto *T11 = I32Reg();
4977      auto *T12 = I32Reg();
4978      auto *T13 = I32Reg();
4979      auto *T14 = I32Reg();
4980      auto *T15 = I32Reg();
4981      auto *T16 = I32Reg();
4982      auto *T17 = I32Reg();
4983      auto *T18 = I32Reg();
4984      auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4985      auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4986      Src = legalizeUndef(Src);
4987      auto *SrcLoR = legalizeToReg(loOperand(Src));
4988      auto *SrcHiR = legalizeToReg(hiOperand(Src));
4989      _sll(T1, SrcHiR, 8);
4990      _srl(T2, SrcHiR, 24);
4991      _srl(T3, SrcHiR, 8);
4992      _andi(T3, T3, 0xFF00);
4993      _lui(T4, Ctx->getConstantInt32(255));
4994      _or(T5, T3, T2);
4995      _and(T6, T1, T4);
4996      _sll(T7, SrcHiR, 24);
4997      _or(T8, T7, T6);
4998      _srl(T9, SrcLoR, 24);
4999      _srl(T10, SrcLoR, 8);
5000      _andi(T11, T10, 0xFF00);
5001      _or(T12, T8, T5);
5002      _or(T13, T11, T9);
5003      _sll(T14, SrcLoR, 8);
5004      _and(T15, T14, T4);
5005      _sll(T16, SrcLoR, 24);
5006      _or(T17, T16, T15);
5007      _or(T18, T17, T13);
5008      _mov(DestLo, T12);
5009      _mov(DestHi, T18);
5010      return;
5011    }
5012    default:
5013      llvm::report_fatal_error("Control flow should never have reached here.");
5014    }
5015    return;
5016  }
5017  case Intrinsics::Ctpop: {
5018    llvm::report_fatal_error("Ctpop should have been prelowered.");
5019    return;
5020  }
5021  case Intrinsics::Ctlz: {
5022    auto *Src = Instr->getArg(0);
5023    const Type SrcTy = Src->getType();
5024    assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5025    switch (SrcTy) {
5026    case IceType_i32: {
5027      auto *T = I32Reg();
5028      auto *SrcR = legalizeToReg(Src);
5029      _clz(T, SrcR);
5030      _mov(Dest, T);
5031      break;
5032    }
5033    case IceType_i64: {
5034      auto *T1 = I32Reg();
5035      auto *T2 = I32Reg();
5036      auto *T3 = I32Reg();
5037      auto *T4 = I32Reg();
5038      auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5039      auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5040      Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5041      Variable *SrcLoR = legalizeToReg(loOperand(Src));
5042      _clz(T1, SrcHiR);
5043      _clz(T2, SrcLoR);
5044      _addiu(T3, T2, 32);
5045      _movn(T3, T1, SrcHiR);
5046      _addiu(T4, getZero(), 0);
5047      _mov(DestHi, T4);
5048      _mov(DestLo, T3);
5049      break;
5050    }
5051    default:
5052      llvm::report_fatal_error("Control flow should never have reached here.");
5053    }
5054    break;
5055  }
5056  case Intrinsics::Cttz: {
5057    auto *Src = Instr->getArg(0);
5058    const Type SrcTy = Src->getType();
5059    assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5060    switch (SrcTy) {
5061    case IceType_i32: {
5062      auto *T1 = I32Reg();
5063      auto *T2 = I32Reg();
5064      auto *T3 = I32Reg();
5065      auto *T4 = I32Reg();
5066      auto *T5 = I32Reg();
5067      auto *T6 = I32Reg();
5068      auto *SrcR = legalizeToReg(Src);
5069      _addiu(T1, SrcR, -1);
5070      _not(T2, SrcR);
5071      _and(T3, T2, T1);
5072      _clz(T4, T3);
5073      _addiu(T5, getZero(), 32);
5074      _subu(T6, T5, T4);
5075      _mov(Dest, T6);
5076      break;
5077    }
5078    case IceType_i64: {
5079      auto *THi1 = I32Reg();
5080      auto *THi2 = I32Reg();
5081      auto *THi3 = I32Reg();
5082      auto *THi4 = I32Reg();
5083      auto *THi5 = I32Reg();
5084      auto *THi6 = I32Reg();
5085      auto *TLo1 = I32Reg();
5086      auto *TLo2 = I32Reg();
5087      auto *TLo3 = I32Reg();
5088      auto *TLo4 = I32Reg();
5089      auto *TLo5 = I32Reg();
5090      auto *TLo6 = I32Reg();
5091      auto *TResHi = I32Reg();
5092      auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5093      auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5094      Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5095      Variable *SrcLoR = legalizeToReg(loOperand(Src));
5096      _addiu(THi1, SrcHiR, -1);
5097      _not(THi2, SrcHiR);
5098      _and(THi3, THi2, THi1);
5099      _clz(THi4, THi3);
5100      _addiu(THi5, getZero(), 64);
5101      _subu(THi6, THi5, THi4);
5102      _addiu(TLo1, SrcLoR, -1);
5103      _not(TLo2, SrcLoR);
5104      _and(TLo3, TLo2, TLo1);
5105      _clz(TLo4, TLo3);
5106      _addiu(TLo5, getZero(), 32);
5107      _subu(TLo6, TLo5, TLo4);
5108      _movn(THi6, TLo6, SrcLoR);
5109      _addiu(TResHi, getZero(), 0);
5110      _mov(DestHi, TResHi);
5111      _mov(DestLo, THi6);
5112      break;
5113    }
5114    default:
5115      llvm::report_fatal_error("Control flow should never have reached here.");
5116    }
5117    return;
5118  }
5119  case Intrinsics::Fabs: {
5120    if (isScalarFloatingType(DestTy)) {
5121      Variable *T = makeReg(DestTy);
5122      if (DestTy == IceType_f32) {
5123        _abs_s(T, legalizeToReg(Instr->getArg(0)));
5124      } else {
5125        _abs_d(T, legalizeToReg(Instr->getArg(0)));
5126      }
5127      _mov(Dest, T);
5128    }
5129    return;
5130  }
5131  case Intrinsics::Longjmp: {
5132    llvm::report_fatal_error("longjmp should have been prelowered.");
5133    return;
5134  }
5135  case Intrinsics::Memcpy: {
5136    llvm::report_fatal_error("memcpy should have been prelowered.");
5137    return;
5138  }
5139  case Intrinsics::Memmove: {
5140    llvm::report_fatal_error("memmove should have been prelowered.");
5141    return;
5142  }
5143  case Intrinsics::Memset: {
5144    llvm::report_fatal_error("memset should have been prelowered.");
5145    return;
5146  }
5147  case Intrinsics::NaClReadTP: {
5148    if (SandboxingType != ST_NaCl)
5149      llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5150    else {
5151      auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8);
5152      Context.insert<InstFakeDef>(T8);
5153      Variable *TP = legalizeToReg(OperandMIPS32Mem::create(
5154          Func, getPointerType(), T8,
5155          llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5156      _mov(Dest, TP);
5157    }
5158    return;
5159  }
5160  case Intrinsics::Setjmp: {
5161    llvm::report_fatal_error("setjmp should have been prelowered.");
5162    return;
5163  }
5164  case Intrinsics::Sqrt: {
5165    if (isScalarFloatingType(DestTy)) {
5166      Variable *T = makeReg(DestTy);
5167      if (DestTy == IceType_f32) {
5168        _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5169      } else {
5170        _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5171      }
5172      _mov(Dest, T);
5173    } else {
5174      assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5175      UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5176    }
5177    return;
5178  }
5179  case Intrinsics::Stacksave: {
5180    Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5181    _mov(Dest, SP);
5182    return;
5183  }
5184  case Intrinsics::Stackrestore: {
5185    Variable *Val = legalizeToReg(Instr->getArg(0));
5186    Sandboxer(this).reset_sp(Val);
5187    return;
5188  }
5189  case Intrinsics::Trap: {
5190    const uint32_t TrapCodeZero = 0;
5191    _teq(getZero(), getZero(), TrapCodeZero);
5192    return;
5193  }
5194  case Intrinsics::LoadSubVector: {
5195    UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5196    return;
5197  }
5198  case Intrinsics::StoreSubVector: {
5199    UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5200    return;
5201  }
5202  default: // UnknownIntrinsic
5203    Func->setError("Unexpected intrinsic");
5204    return;
5205  }
5206  return;
5207}
5208
5209void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5210  // A Load instruction can be treated the same as an Assign instruction, after
5211  // the source operand is transformed into an OperandMIPS32Mem operand.
5212  Type Ty = Instr->getDest()->getType();
5213  Operand *Src0 = formMemoryOperand(Instr->getSourceAddress(), Ty);
5214  Variable *DestLoad = Instr->getDest();
5215  auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5216  lowerAssign(Assign);
5217}
5218
5219namespace {
5220void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5221                    const Inst *Reason) {
5222  if (!BuildDefs::dump())
5223    return;
5224  if (!Func->isVerbose(IceV_AddrOpt))
5225    return;
5226  OstreamLocker _(Func->getContext());
5227  Ostream &Str = Func->getContext()->getStrDump();
5228  Str << "Instruction: ";
5229  Reason->dumpDecorated(Func);
5230  Str << "  results in Base=";
5231  if (Base)
5232    Base->dump(Func);
5233  else
5234    Str << "<null>";
5235  Str << ", Offset=" << Offset << "\n";
5236}
5237
5238bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5239                 int32_t *Offset, const Inst **Reason) {
5240  // Var originates from Var=SrcVar ==> set Var:=SrcVar
5241  if (*Var == nullptr)
5242    return false;
5243  const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5244  if (!VarAssign)
5245    return false;
5246  assert(!VMetadata->isMultiDef(*Var));
5247  if (!llvm::isa<InstAssign>(VarAssign))
5248    return false;
5249
5250  Operand *SrcOp = VarAssign->getSrc(0);
5251  bool Optimized = false;
5252  if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5253    if (!VMetadata->isMultiDef(SrcVar) ||
5254        // TODO: ensure SrcVar stays single-BB
5255        false) {
5256      Optimized = true;
5257      *Var = SrcVar;
5258    } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5259      int32_t MoreOffset = Const->getValue();
5260      int32_t NewOffset = MoreOffset + *Offset;
5261      if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5262        return false;
5263      *Var = nullptr;
5264      *Offset += NewOffset;
5265      Optimized = true;
5266    }
5267  }
5268
5269  if (Optimized) {
5270    *Reason = VarAssign;
5271  }
5272
5273  return Optimized;
5274}
5275
5276bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5277  if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5278    switch (Arith->getOp()) {
5279    default:
5280      return false;
5281    case InstArithmetic::Add:
5282    case InstArithmetic::Sub:
5283      *Kind = Arith->getOp();
5284      return true;
5285    }
5286  }
5287  return false;
5288}
5289
5290bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5291                     int32_t *Offset, const Inst **Reason) {
5292  // Base is Base=Var+Const || Base is Base=Const+Var ==>
5293  //   set Base=Var, Offset+=Const
5294  // Base is Base=Var-Const ==>
5295  //   set Base=Var, Offset-=Const
5296  if (*Base == nullptr)
5297    return false;
5298  const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5299  if (BaseInst == nullptr) {
5300    return false;
5301  }
5302  assert(!VMetadata->isMultiDef(*Base));
5303
5304  auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5305  if (ArithInst == nullptr)
5306    return false;
5307  InstArithmetic::OpKind Kind;
5308  if (!isAddOrSub(ArithInst, &Kind))
5309    return false;
5310  bool IsAdd = Kind == InstArithmetic::Add;
5311  Operand *Src0 = ArithInst->getSrc(0);
5312  Operand *Src1 = ArithInst->getSrc(1);
5313  auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5314  auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5315  auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5316  auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5317  Variable *NewBase = nullptr;
5318  int32_t NewOffset = *Offset;
5319
5320  if (Var0 == nullptr && Const0 == nullptr) {
5321    assert(llvm::isa<ConstantRelocatable>(Src0));
5322    return false;
5323  }
5324
5325  if (Var1 == nullptr && Const1 == nullptr) {
5326    assert(llvm::isa<ConstantRelocatable>(Src1));
5327    return false;
5328  }
5329
5330  if (Var0 && Var1)
5331    // TODO(jpp): merge base/index splitting into here.
5332    return false;
5333  if (!IsAdd && Var1)
5334    return false;
5335  if (Var0)
5336    NewBase = Var0;
5337  else if (Var1)
5338    NewBase = Var1;
5339  // Compute the updated constant offset.
5340  if (Const0) {
5341    int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5342    if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5343      return false;
5344    NewOffset += MoreOffset;
5345  }
5346  if (Const1) {
5347    int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5348    if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5349      return false;
5350    NewOffset += MoreOffset;
5351  }
5352
5353  // Update the computed address parameters once we are sure optimization
5354  // is valid.
5355  *Base = NewBase;
5356  *Offset = NewOffset;
5357  *Reason = BaseInst;
5358  return true;
5359}
5360} // end of anonymous namespace
5361
5362OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5363                                                   const Inst *LdSt,
5364                                                   Operand *Base) {
5365  assert(Base != nullptr);
5366  int32_t OffsetImm = 0;
5367
5368  Func->resetCurrentNode();
5369  if (Func->isVerbose(IceV_AddrOpt)) {
5370    OstreamLocker _(Func->getContext());
5371    Ostream &Str = Func->getContext()->getStrDump();
5372    Str << "\nAddress mode formation:\t";
5373    LdSt->dumpDecorated(Func);
5374  }
5375
5376  if (isVectorType(Ty)) {
5377    return nullptr;
5378  }
5379
5380  auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5381  if (BaseVar == nullptr)
5382    return nullptr;
5383
5384  const VariablesMetadata *VMetadata = Func->getVMetadata();
5385  const Inst *Reason = nullptr;
5386
5387  do {
5388    if (Reason != nullptr) {
5389      dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5390      Reason = nullptr;
5391    }
5392
5393    if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5394      continue;
5395    }
5396
5397    if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5398      continue;
5399    }
5400  } while (Reason);
5401
5402  if (BaseVar == nullptr) {
5403    // We need base register rather than just OffsetImm. Move the OffsetImm to
5404    // BaseVar and form 0(BaseVar) addressing.
5405    const Type PointerType = getPointerType();
5406    BaseVar = makeReg(PointerType);
5407    Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5408    OffsetImm = 0;
5409  } else if (OffsetImm != 0) {
5410    // If the OffsetImm is more than signed 16-bit value then add it in the
5411    // BaseVar and form 0(BaseVar) addressing.
5412    const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5413    const InstArithmetic::OpKind Op =
5414        OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5415    constexpr bool ZeroExt = false;
5416    if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5417      const Type PointerType = getPointerType();
5418      Variable *T = makeReg(PointerType);
5419      Context.insert<InstArithmetic>(Op, T, BaseVar,
5420                                     Ctx->getConstantInt32(PositiveOffset));
5421      BaseVar = T;
5422      OffsetImm = 0;
5423    }
5424  }
5425
5426  assert(BaseVar != nullptr);
5427  assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5428                       : (OffsetImm & 0x0000ffff) == OffsetImm);
5429
5430  return OperandMIPS32Mem::create(
5431      Func, Ty, BaseVar,
5432      llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5433}
5434
5435void TargetMIPS32::doAddressOptLoad() {
5436  Inst *Instr = iteratorToInst(Context.getCur());
5437  assert(llvm::isa<InstLoad>(Instr));
5438  Variable *Dest = Instr->getDest();
5439  Operand *Addr = Instr->getSrc(0);
5440  if (OperandMIPS32Mem *Mem =
5441          formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5442    Instr->setDeleted();
5443    Context.insert<InstLoad>(Dest, Mem);
5444  }
5445}
5446
5447void TargetMIPS32::randomlyInsertNop(float Probability,
5448                                     RandomNumberGenerator &RNG) {
5449  RandomNumberGeneratorWrapper RNGW(RNG);
5450  if (RNGW.getTrueWithProbability(Probability)) {
5451    _nop();
5452  }
5453}
5454
5455void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5456  Func->setError("Phi found in regular instruction list");
5457}
5458
5459void TargetMIPS32::lowerRet(const InstRet *Instr) {
5460  Variable *Reg = nullptr;
5461  if (Instr->hasRetValue()) {
5462    Operand *Src0 = Instr->getRetValue();
5463    switch (Src0->getType()) {
5464    case IceType_f32: {
5465      Operand *Src0F = legalizeToReg(Src0);
5466      Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5467      _mov(Reg, Src0F);
5468      break;
5469    }
5470    case IceType_f64: {
5471      Operand *Src0F = legalizeToReg(Src0);
5472      Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5473      _mov(Reg, Src0F);
5474      break;
5475    }
5476    case IceType_i1:
5477    case IceType_i8:
5478    case IceType_i16:
5479    case IceType_i32: {
5480      Operand *Src0F = legalizeToReg(Src0);
5481      Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5482      _mov(Reg, Src0F);
5483      break;
5484    }
5485    case IceType_i64: {
5486      Src0 = legalizeUndef(Src0);
5487      Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5488      Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5489      Reg = R0;
5490      Context.insert<InstFakeUse>(R1);
5491      break;
5492    }
5493    case IceType_v4i1:
5494    case IceType_v8i1:
5495    case IceType_v16i1:
5496    case IceType_v16i8:
5497    case IceType_v8i16:
5498    case IceType_v4i32: {
5499      auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5500      Variable *V0 =
5501          legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5502      Variable *V1 =
5503          legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5504      Variable *A0 =
5505          legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5506      Variable *A1 =
5507          legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5508      Reg = V0;
5509      Context.insert<InstFakeUse>(V1);
5510      Context.insert<InstFakeUse>(A0);
5511      Context.insert<InstFakeUse>(A1);
5512      break;
5513    }
5514    case IceType_v4f32: {
5515      auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5516      Reg = getImplicitRet();
5517      auto *RegT = legalizeToReg(Reg);
5518      // Return the vector through buffer in implicit argument a0
5519      for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5520        OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5521            Func, IceType_f32, RegT,
5522            llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5523        Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5524        _sw(Var, Mem);
5525      }
5526      Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5527      _mov(V0, Reg); // move v0,a0
5528      Context.insert<InstFakeUse>(Reg);
5529      Context.insert<InstFakeUse>(V0);
5530      break;
5531    }
5532    default:
5533      llvm::report_fatal_error("Ret: Invalid type.");
5534      break;
5535    }
5536  }
5537  _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5538}
5539
5540void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5541  Variable *Dest = Instr->getDest();
5542  const Type DestTy = Dest->getType();
5543
5544  if (isVectorType(DestTy)) {
5545    llvm::report_fatal_error("Select: Destination type is vector");
5546    return;
5547  }
5548
5549  Variable *DestR = nullptr;
5550  Variable *DestHiR = nullptr;
5551  Variable *SrcTR = nullptr;
5552  Variable *SrcTHiR = nullptr;
5553  Variable *SrcFR = nullptr;
5554  Variable *SrcFHiR = nullptr;
5555
5556  if (DestTy == IceType_i64) {
5557    DestR = llvm::cast<Variable>(loOperand(Dest));
5558    DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5559    SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5560    SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5561    SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5562    SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5563  } else {
5564    SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5565    SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5566  }
5567
5568  Variable *ConditionR = legalizeToReg(Instr->getCondition());
5569
5570  assert(Instr->getCondition()->getType() == IceType_i1);
5571
5572  switch (DestTy) {
5573  case IceType_i1:
5574  case IceType_i8:
5575  case IceType_i16:
5576  case IceType_i32:
5577    _movn(SrcFR, SrcTR, ConditionR);
5578    _mov(Dest, SrcFR);
5579    break;
5580  case IceType_i64:
5581    _movn(SrcFR, SrcTR, ConditionR);
5582    _movn(SrcFHiR, SrcTHiR, ConditionR);
5583    _mov(DestR, SrcFR);
5584    _mov(DestHiR, SrcFHiR);
5585    break;
5586  case IceType_f32:
5587    _movn_s(SrcFR, SrcTR, ConditionR);
5588    _mov(Dest, SrcFR);
5589    break;
5590  case IceType_f64:
5591    _movn_d(SrcFR, SrcTR, ConditionR);
5592    _mov(Dest, SrcFR);
5593    break;
5594  default:
5595    llvm::report_fatal_error("Select: Invalid type.");
5596  }
5597}
5598
5599void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5600  UnimplementedLoweringError(this, Instr);
5601}
5602
5603void TargetMIPS32::lowerStore(const InstStore *Instr) {
5604  Operand *Value = Instr->getData();
5605  Operand *Addr = Instr->getAddr();
5606  OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5607  Type Ty = NewAddr->getType();
5608
5609  if (Ty == IceType_i64) {
5610    Value = legalizeUndef(Value);
5611    Variable *ValueHi = legalizeToReg(hiOperand(Value));
5612    Variable *ValueLo = legalizeToReg(loOperand(Value));
5613    _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5614    _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5615  } else if (isVectorType(Value->getType())) {
5616    auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5617    for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5618      auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5619      auto *MCont = llvm::cast<OperandMIPS32Mem>(
5620          getOperandAtIndex(NewAddr, IceType_i32, i));
5621      _sw(DCont, MCont);
5622    }
5623  } else {
5624    Variable *ValueR = legalizeToReg(Value);
5625    _sw(ValueR, NewAddr);
5626  }
5627}
5628
5629void TargetMIPS32::doAddressOptStore() {
5630  Inst *Instr = iteratorToInst(Context.getCur());
5631  assert(llvm::isa<InstStore>(Instr));
5632  Operand *Src = Instr->getSrc(0);
5633  Operand *Addr = Instr->getSrc(1);
5634  if (OperandMIPS32Mem *Mem =
5635          formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5636    Instr->setDeleted();
5637    Context.insert<InstStore>(Src, Mem);
5638  }
5639}
5640
5641void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5642  Operand *Src = Instr->getComparison();
5643  SizeT NumCases = Instr->getNumCases();
5644  if (Src->getType() == IceType_i64) {
5645    Src = legalizeUndef(Src);
5646    Variable *Src0Lo = legalizeToReg(loOperand(Src));
5647    Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5648    for (SizeT I = 0; I < NumCases; ++I) {
5649      Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5650      Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5651      CfgNode *TargetTrue = Instr->getLabel(I);
5652      constexpr CfgNode *NoTarget = nullptr;
5653      ValueHi = legalizeToReg(ValueHi);
5654      InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5655      _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5656          CondMIPS32::Cond::NE);
5657      ValueLo = legalizeToReg(ValueLo);
5658      _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5659      Context.insert(IntraLabel);
5660    }
5661    _br(Instr->getLabelDefault());
5662    return;
5663  }
5664  Variable *SrcVar = legalizeToReg(Src);
5665  assert(SrcVar->mustHaveReg());
5666  for (SizeT I = 0; I < NumCases; ++I) {
5667    Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5668    CfgNode *TargetTrue = Instr->getLabel(I);
5669    constexpr CfgNode *NoTargetFalse = nullptr;
5670    Value = legalizeToReg(Value);
5671    _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5672  }
5673  _br(Instr->getLabelDefault());
5674}
5675
5676void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5677  UnimplementedLoweringError(this, Instr);
5678}
5679
5680void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5681  const uint32_t TrapCodeZero = 0;
5682  _teq(getZero(), getZero(), TrapCodeZero);
5683}
5684
5685void TargetMIPS32::lowerOther(const Inst *Instr) {
5686  if (llvm::isa<InstMIPS32Sync>(Instr)) {
5687    _sync();
5688  } else {
5689    TargetLowering::lowerOther(Instr);
5690  }
5691}
5692
5693// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5694// integrity of liveness analysis. Undef values are also turned into zeroes,
5695// since loOperand() and hiOperand() don't expect Undef input.
5696void TargetMIPS32::prelowerPhis() {
5697  PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5698}
5699
5700void TargetMIPS32::postLower() {
5701  if (Func->getOptLevel() == Opt_m1)
5702    return;
5703  markRedefinitions();
5704  Context.availabilityUpdate();
5705}
5706
5707void TargetMIPS32::makeRandomRegisterPermutation(
5708    llvm::SmallVectorImpl<RegNumT> &Permutation,
5709    const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
5710  (void)Permutation;
5711  (void)ExcludeRegisters;
5712  (void)Salt;
5713  UnimplementedError(getFlags());
5714}
5715
5716/* TODO(jvoung): avoid duplicate symbols with multiple targets.
5717void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5718  llvm_unreachable("Not expecting to emitWithoutDollar undef");
5719}
5720
5721void ConstantUndef::emit(GlobalContext *) const {
5722  llvm_unreachable("undef value encountered by emitter.");
5723}
5724*/
5725
5726TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5727    : TargetDataLowering(Ctx) {}
5728
5729// Generate .MIPS.abiflags section. This section contains a versioned data
5730// structure with essential information required for loader to determine the
5731// requirements of the application.
5732void TargetDataMIPS32::emitTargetRODataSections() {
5733  struct MipsABIFlagsSection Flags;
5734  ELFObjectWriter *Writer = Ctx->getObjectWriter();
5735  const std::string Name = ".MIPS.abiflags";
5736  const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5737  const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5738  const llvm::ELF::Elf64_Xword ShAddralign = 8;
5739  const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5740  Writer->writeTargetRODataSection(
5741      Name, ShType, ShFlags, ShAddralign, ShEntsize,
5742      llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5743}
5744
5745void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5746                                    const std::string &SectionSuffix) {
5747  const bool IsPIC = getFlags().getUseNonsfi();
5748  switch (getFlags().getOutFileType()) {
5749  case FT_Elf: {
5750    ELFObjectWriter *Writer = Ctx->getObjectWriter();
5751    Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5752  } break;
5753  case FT_Asm:
5754  case FT_Iasm: {
5755    OstreamLocker L(Ctx);
5756    for (const VariableDeclaration *Var : Vars) {
5757      if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5758        emitGlobal(*Var, SectionSuffix);
5759      }
5760    }
5761  } break;
5762  }
5763}
5764
5765namespace {
5766template <typename T> struct ConstantPoolEmitterTraits;
5767
5768static_assert(sizeof(uint64_t) == 8,
5769              "uint64_t is supposed to be 8 bytes wide.");
5770
5771// TODO(jaydeep.patil): implement the following when implementing constant
5772// randomization:
5773//  * template <> struct ConstantPoolEmitterTraits<uint8_t>
5774//  * template <> struct ConstantPoolEmitterTraits<uint16_t>
5775//  * template <> struct ConstantPoolEmitterTraits<uint32_t>
5776template <> struct ConstantPoolEmitterTraits<float> {
5777  using ConstantType = ConstantFloat;
5778  static constexpr Type IceType = IceType_f32;
5779  // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5780  // about them being constexpr.
5781  static const char AsmTag[];
5782  static const char TypeName[];
5783  static uint64_t bitcastToUint64(float Value) {
5784    static_assert(sizeof(Value) == sizeof(uint32_t),
5785                  "Float should be 4 bytes.");
5786    const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5787    return static_cast<uint64_t>(IntValue);
5788  }
5789};
5790const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5791const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5792
5793template <> struct ConstantPoolEmitterTraits<double> {
5794  using ConstantType = ConstantDouble;
5795  static constexpr Type IceType = IceType_f64;
5796  static const char AsmTag[];
5797  static const char TypeName[];
5798  static uint64_t bitcastToUint64(double Value) {
5799    static_assert(sizeof(double) == sizeof(uint64_t),
5800                  "Double should be 8 bytes.");
5801    return Utils::bitCopy<uint64_t>(Value);
5802  }
5803};
5804const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5805const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5806
5807template <typename T>
5808void emitConstant(
5809    Ostream &Str,
5810    const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5811  if (!BuildDefs::dump())
5812    return;
5813  using Traits = ConstantPoolEmitterTraits<T>;
5814  Str << Const->getLabelName();
5815  T Value = Const->getValue();
5816  Str << ":\n\t" << Traits::AsmTag << "\t0x";
5817  Str.write_hex(Traits::bitcastToUint64(Value));
5818  Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5819}
5820
5821template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5822  if (!BuildDefs::dump())
5823    return;
5824  using Traits = ConstantPoolEmitterTraits<T>;
5825  static constexpr size_t MinimumAlignment = 4;
5826  SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5827  assert((Align % 4) == 0 && "Constants should be aligned");
5828  Ostream &Str = Ctx->getStrEmit();
5829  ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5830  Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5831      << "\n"
5832      << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5833  if (getFlags().getReorderPooledConstants()) {
5834    // TODO(jaydeep.patil): add constant pooling.
5835    UnimplementedError(getFlags());
5836  }
5837  for (Constant *C : Pool) {
5838    if (!C->getShouldBePooled()) {
5839      continue;
5840    }
5841    emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5842  }
5843}
5844} // end of anonymous namespace
5845
5846void TargetDataMIPS32::lowerConstants() {
5847  if (getFlags().getDisableTranslation())
5848    return;
5849  switch (getFlags().getOutFileType()) {
5850  case FT_Elf: {
5851    ELFObjectWriter *Writer = Ctx->getObjectWriter();
5852    Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5853    Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5854  } break;
5855  case FT_Asm:
5856  case FT_Iasm: {
5857    OstreamLocker _(Ctx);
5858    emitConstantPool<float>(Ctx);
5859    emitConstantPool<double>(Ctx);
5860    break;
5861  }
5862  }
5863}
5864
5865void TargetDataMIPS32::lowerJumpTables() {
5866  if (getFlags().getDisableTranslation())
5867    return;
5868}
5869
5870// Helper for legalize() to emit the right code to lower an operand to a
5871// register of the appropriate type.
5872Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5873  Type Ty = Src->getType();
5874  Variable *Reg = makeReg(Ty, RegNum);
5875  if (isVectorType(Ty)) {
5876    llvm::report_fatal_error("Invalid copy from vector type.");
5877  } else {
5878    if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5879      _lw(Reg, Mem);
5880    } else {
5881      _mov(Reg, Src);
5882    }
5883  }
5884  return Reg;
5885}
5886
5887Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5888                                RegNumT RegNum) {
5889  Type Ty = From->getType();
5890  // Assert that a physical register is allowed.  To date, all calls
5891  // to legalize() allow a physical register. Legal_Flex converts
5892  // registers to the right type OperandMIPS32FlexReg as needed.
5893  assert(Allowed & Legal_Reg);
5894
5895  if (RegNum.hasNoValue()) {
5896    if (Variable *Subst = getContext().availabilityGet(From)) {
5897      // At this point we know there is a potential substitution available.
5898      if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5899          !Subst->hasReg()) {
5900        // At this point we know the substitution will have a register.
5901        if (From->getType() == Subst->getType()) {
5902          // At this point we know the substitution's register is compatible.
5903          return Subst;
5904        }
5905      }
5906    }
5907  }
5908
5909  // Go through the various types of operands:
5910  // OperandMIPS32Mem, Constant, and Variable.
5911  // Given the above assertion, if type of operand is not legal
5912  // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5913  // to a register.
5914  if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5915    // Base must be in a physical register.
5916    Variable *Base = Mem->getBase();
5917    ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5918    Variable *RegBase = nullptr;
5919    assert(Base);
5920
5921    RegBase = llvm::cast<Variable>(
5922        legalize(Base, Legal_Reg | Legal_Rematerializable));
5923
5924    if (Offset != nullptr && Offset->getValue() != 0) {
5925      static constexpr bool ZeroExt = false;
5926      if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5927        llvm::report_fatal_error("Invalid memory offset.");
5928      }
5929    }
5930
5931    // Create a new operand if there was a change.
5932    if (Base != RegBase) {
5933      Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5934                                     Mem->getAddrMode());
5935    }
5936
5937    if (Allowed & Legal_Mem) {
5938      From = Mem;
5939    } else {
5940      Variable *Reg = makeReg(Ty, RegNum);
5941      _lw(Reg, Mem);
5942      From = Reg;
5943    }
5944    return From;
5945  }
5946
5947  if (llvm::isa<Constant>(From)) {
5948    if (llvm::isa<ConstantUndef>(From)) {
5949      From = legalizeUndef(From, RegNum);
5950      if (isVectorType(Ty))
5951        return From;
5952    }
5953    if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5954      Variable *Reg = makeReg(Ty, RegNum);
5955      Variable *TReg = makeReg(Ty, RegNum);
5956      _lui(TReg, C, RO_Hi);
5957      _addiu(Reg, TReg, C, RO_Lo);
5958      return Reg;
5959    } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5960      const uint32_t Value = C32->getValue();
5961      // Use addiu if the immediate is a 16bit value. Otherwise load it
5962      // using a lui-ori instructions.
5963      Variable *Reg = makeReg(Ty, RegNum);
5964      if (isInt<16>(int32_t(Value))) {
5965        Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5966        Context.insert<InstFakeDef>(Zero);
5967        _addiu(Reg, Zero, Value);
5968      } else {
5969        uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5970        uint32_t LowerBits = Value & 0xFFFF;
5971        if (LowerBits) {
5972          Variable *TReg = makeReg(Ty, RegNum);
5973          _lui(TReg, Ctx->getConstantInt32(UpperBits));
5974          _ori(Reg, TReg, LowerBits);
5975        } else {
5976          _lui(Reg, Ctx->getConstantInt32(UpperBits));
5977        }
5978      }
5979      return Reg;
5980    } else if (isScalarFloatingType(Ty)) {
5981      auto *CFrom = llvm::cast<Constant>(From);
5982      Variable *TReg = makeReg(Ty);
5983      if (!CFrom->getShouldBePooled()) {
5984        // Float/Double constant 0 is not pooled.
5985        Context.insert<InstFakeDef>(TReg);
5986        _mov(TReg, getZero());
5987      } else {
5988        // Load floats/doubles from literal pool.
5989        Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5990        Variable *TReg1 = makeReg(getPointerType());
5991        _lui(TReg1, Offset, RO_Hi);
5992        OperandMIPS32Mem *Addr =
5993            OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5994        if (Ty == IceType_f32)
5995          Sandboxer(this).lwc1(TReg, Addr, RO_Lo);
5996        else
5997          Sandboxer(this).ldc1(TReg, Addr, RO_Lo);
5998      }
5999      return copyToReg(TReg, RegNum);
6000    }
6001  }
6002
6003  if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6004    if (Var->isRematerializable()) {
6005      if (Allowed & Legal_Rematerializable) {
6006        return From;
6007      }
6008
6009      Variable *T = makeReg(Var->getType(), RegNum);
6010      _mov(T, Var);
6011      return T;
6012    }
6013    // Check if the variable is guaranteed a physical register.  This
6014    // can happen either when the variable is pre-colored or when it is
6015    // assigned infinite weight.
6016    bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6017    // We need a new physical register for the operand if:
6018    //   Mem is not allowed and Var isn't guaranteed a physical
6019    //   register, or
6020    //   RegNum is required and Var->getRegNum() doesn't match.
6021    if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6022        (RegNum.hasValue() && RegNum != Var->getRegNum())) {
6023      From = copyToReg(From, RegNum);
6024    }
6025    return From;
6026  }
6027  return From;
6028}
6029
6030namespace BoolFolding {
6031// TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
6032// and isValidConsumer()
6033bool shouldTrackProducer(const Inst &Instr) {
6034  return Instr.getKind() == Inst::Icmp;
6035}
6036
6037bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
6038} // end of namespace BoolFolding
6039
6040void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
6041  for (Inst &Instr : Node->getInsts()) {
6042    if (Instr.isDeleted())
6043      continue;
6044    // Check whether Instr is a valid producer.
6045    Variable *Dest = Instr.getDest();
6046    if (Dest // only consider instructions with an actual dest var; and
6047        && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6048        && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6049      KnownComputations.emplace(Dest->getIndex(),
6050                                ComputationEntry(&Instr, IceType_i1));
6051    }
6052    // Check each src variable against the map.
6053    FOREACH_VAR_IN_INST(Var, Instr) {
6054      SizeT VarNum = Var->getIndex();
6055      auto ComputationIter = KnownComputations.find(VarNum);
6056      if (ComputationIter == KnownComputations.end()) {
6057        continue;
6058      }
6059
6060      ++ComputationIter->second.NumUses;
6061      switch (ComputationIter->second.ComputationType) {
6062      default:
6063        KnownComputations.erase(VarNum);
6064        continue;
6065      case IceType_i1:
6066        if (!BoolFolding::isValidConsumer(Instr)) {
6067          KnownComputations.erase(VarNum);
6068          continue;
6069        }
6070        break;
6071      }
6072
6073      if (Instr.isLastUse(Var)) {
6074        ComputationIter->second.IsLiveOut = false;
6075      }
6076    }
6077  }
6078
6079  for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6080       Iter != End;) {
6081    // Disable the folding if its dest may be live beyond this block.
6082    if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6083      Iter = KnownComputations.erase(Iter);
6084      continue;
6085    }
6086
6087    // Mark as "dead" rather than outright deleting. This is so that other
6088    // peephole style optimizations during or before lowering have access to
6089    // this instruction in undeleted form. See for example
6090    // tryOptimizedCmpxchgCmpBr().
6091    Iter->second.Instr->setDead();
6092    ++Iter;
6093  }
6094}
6095
6096TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6097    : TargetHeaderLowering(Ctx) {}
6098
6099void TargetHeaderMIPS32::lower() {
6100  if (!BuildDefs::dump())
6101    return;
6102  OstreamLocker L(Ctx);
6103  Ostream &Str = Ctx->getStrEmit();
6104  Str << "\t.set\t"
6105      << "nomicromips\n";
6106  Str << "\t.set\t"
6107      << "nomips16\n";
6108  Str << "\t.set\t"
6109      << "noat\n";
6110  if (getFlags().getUseSandboxing())
6111    Str << "\t.bundle_align_mode 4\n";
6112}
6113
6114SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6115SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6116SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6117
6118TargetMIPS32::Sandboxer::Sandboxer(TargetMIPS32 *Target,
6119                                   InstBundleLock::Option BundleOption)
6120    : Target(Target), BundleOption(BundleOption) {}
6121
6122TargetMIPS32::Sandboxer::~Sandboxer() {}
6123
6124void TargetMIPS32::Sandboxer::createAutoBundle() {
6125  Bundler = makeUnique<AutoBundle>(Target, BundleOption);
6126}
6127
6128void TargetMIPS32::Sandboxer::addiu_sp(uint32_t StackOffset) {
6129  Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6130  if (!Target->NeedSandboxing) {
6131    Target->_addiu(SP, SP, StackOffset);
6132    return;
6133  }
6134  auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6135  Target->Context.insert<InstFakeDef>(T7);
6136  createAutoBundle();
6137  Target->_addiu(SP, SP, StackOffset);
6138  Target->_and(SP, SP, T7);
6139}
6140
6141void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) {
6142  Variable *Base = Mem->getBase();
6143  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) &&
6144      (RegMIPS32::Reg_T8 != Base->getRegNum())) {
6145    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6146    Target->Context.insert<InstFakeDef>(T7);
6147    createAutoBundle();
6148    Target->_and(Base, Base, T7);
6149  }
6150  Target->_lw(Dest, Mem);
6151  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6152    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6153    Target->Context.insert<InstFakeDef>(T7);
6154    Target->_and(Dest, Dest, T7);
6155  }
6156}
6157
6158void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) {
6159  Variable *Base = Mem->getBase();
6160  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6161    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6162    Target->Context.insert<InstFakeDef>(T7);
6163    createAutoBundle();
6164    Target->_and(Base, Base, T7);
6165  }
6166  Target->_ll(Dest, Mem);
6167  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6168    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6169    Target->Context.insert<InstFakeDef>(T7);
6170    Target->_and(Dest, Dest, T7);
6171  }
6172}
6173
6174void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) {
6175  Variable *Base = Mem->getBase();
6176  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6177    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6178    Target->Context.insert<InstFakeDef>(T7);
6179    createAutoBundle();
6180    Target->_and(Base, Base, T7);
6181  }
6182  Target->_sc(Dest, Mem);
6183}
6184
6185void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) {
6186  Variable *Base = Mem->getBase();
6187  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6188    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6189    Target->Context.insert<InstFakeDef>(T7);
6190    createAutoBundle();
6191    Target->_and(Base, Base, T7);
6192  }
6193  Target->_sw(Dest, Mem);
6194}
6195
6196void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem,
6197                                   RelocOp Reloc) {
6198  Variable *Base = Mem->getBase();
6199  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6200    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6201    Target->Context.insert<InstFakeDef>(T7);
6202    createAutoBundle();
6203    Target->_and(Base, Base, T7);
6204  }
6205  Target->_lwc1(Dest, Mem, Reloc);
6206  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6207    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6208    Target->Context.insert<InstFakeDef>(T7);
6209    Target->_and(Dest, Dest, T7);
6210  }
6211}
6212
6213void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem,
6214                                   RelocOp Reloc) {
6215  Variable *Base = Mem->getBase();
6216  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6217    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6218    Target->Context.insert<InstFakeDef>(T7);
6219    createAutoBundle();
6220    Target->_and(Base, Base, T7);
6221  }
6222  Target->_ldc1(Dest, Mem, Reloc);
6223  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6224    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6225    Target->Context.insert<InstFakeDef>(T7);
6226    Target->_and(Dest, Dest, T7);
6227  }
6228}
6229
6230void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
6231  if (!Target->NeedSandboxing) {
6232    Target->_ret(RetAddr, RetValue);
6233  }
6234  auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6235  Target->Context.insert<InstFakeDef>(T6);
6236  createAutoBundle();
6237  Target->_and(RetAddr, RetAddr, T6);
6238  Target->_ret(RetAddr, RetValue);
6239}
6240
6241void TargetMIPS32::Sandboxer::reset_sp(Variable *Src) {
6242  Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6243  if (!Target->NeedSandboxing) {
6244    Target->_mov(SP, Src);
6245    return;
6246  }
6247  auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6248  Target->Context.insert<InstFakeDef>(T7);
6249  createAutoBundle();
6250  Target->_mov(SP, Src);
6251  Target->_and(SP, SP, T7);
6252  Target->getContext().insert<InstFakeUse>(SP);
6253}
6254
6255InstMIPS32Call *TargetMIPS32::Sandboxer::jal(Variable *ReturnReg,
6256                                             Operand *CallTarget) {
6257  if (Target->NeedSandboxing) {
6258    createAutoBundle();
6259    if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
6260      auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6261      Target->Context.insert<InstFakeDef>(T6);
6262      Target->_and(CallTargetR, CallTargetR, T6);
6263    }
6264  }
6265  return Target->Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
6266}
6267
6268} // end of namespace MIPS32
6269} // end of namespace Ice
6270