ARMAsmParser.cpp revision 8ab1112bdc30b8675bb12431d8b5b270da42f1b5
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "ARM.h" 11#include "ARMAddressingModes.h" 12#include "ARMSubtarget.h" 13#include "llvm/MC/MCParser/MCAsmLexer.h" 14#include "llvm/MC/MCParser/MCAsmParser.h" 15#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16#include "llvm/MC/MCContext.h" 17#include "llvm/MC/MCStreamer.h" 18#include "llvm/MC/MCExpr.h" 19#include "llvm/MC/MCInst.h" 20#include "llvm/Target/TargetRegistry.h" 21#include "llvm/Target/TargetAsmParser.h" 22#include "llvm/Support/SourceMgr.h" 23#include "llvm/Support/raw_ostream.h" 24#include "llvm/ADT/SmallVector.h" 25#include "llvm/ADT/StringSwitch.h" 26#include "llvm/ADT/Twine.h" 27using namespace llvm; 28 29/// Shift types used for register controlled shifts in ARM memory addressing. 30enum ShiftType { 31 Lsl, 32 Lsr, 33 Asr, 34 Ror, 35 Rrx 36}; 37 38namespace { 39 40class ARMOperand; 41 42class ARMAsmParser : public TargetAsmParser { 43 MCAsmParser &Parser; 44 TargetMachine &TM; 45 46 MCAsmParser &getParser() const { return Parser; } 47 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 48 49 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 50 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 51 52 int TryParseRegister(); 53 bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); 54 bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); 55 bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); 56 bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &); 57 58 bool ParseMemoryOffsetReg(bool &Negative, 59 bool &OffsetRegShifted, 60 enum ShiftType &ShiftType, 61 const MCExpr *&ShiftAmount, 62 const MCExpr *&Offset, 63 bool &OffsetIsReg, 64 int &OffsetRegNum, 65 SMLoc &E); 66 bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); 67 bool ParseDirectiveWord(unsigned Size, SMLoc L); 68 bool ParseDirectiveThumb(SMLoc L); 69 bool ParseDirectiveThumbFunc(SMLoc L); 70 bool ParseDirectiveCode(SMLoc L); 71 bool ParseDirectiveSyntax(SMLoc L); 72 73 bool MatchAndEmitInstruction(SMLoc IDLoc, 74 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 75 MCStreamer &Out); 76 77 /// @name Auto-generated Match Functions 78 /// { 79 80#define GET_ASSEMBLER_HEADER 81#include "ARMGenAsmMatcher.inc" 82 83 /// } 84 85public: 86 ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) 87 : TargetAsmParser(T), Parser(_Parser), TM(_TM) { 88 // Initialize the set of available features. 89 setAvailableFeatures(ComputeAvailableFeatures( 90 &TM.getSubtarget<ARMSubtarget>())); 91 } 92 93 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 94 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 95 virtual bool ParseDirective(AsmToken DirectiveID); 96}; 97} // end anonymous namespace 98 99namespace { 100 101/// ARMOperand - Instances of this class represent a parsed ARM machine 102/// instruction. 103class ARMOperand : public MCParsedAsmOperand { 104 enum KindTy { 105 CondCode, 106 CCOut, 107 Immediate, 108 Memory, 109 Register, 110 RegisterList, 111 DPRRegisterList, 112 SPRRegisterList, 113 Token 114 } Kind; 115 116 SMLoc StartLoc, EndLoc; 117 SmallVector<unsigned, 8> Registers; 118 119 union { 120 struct { 121 ARMCC::CondCodes Val; 122 } CC; 123 124 struct { 125 const char *Data; 126 unsigned Length; 127 } Tok; 128 129 struct { 130 unsigned RegNum; 131 } Reg; 132 133 struct { 134 const MCExpr *Val; 135 } Imm; 136 137 /// Combined record for all forms of ARM address expressions. 138 struct { 139 unsigned BaseRegNum; 140 unsigned OffsetRegNum; // used when OffsetIsReg is true 141 const MCExpr *Offset; // used when OffsetIsReg is false 142 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true 143 enum ShiftType ShiftType; // used when OffsetRegShifted is true 144 unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true 145 unsigned Preindexed : 1; 146 unsigned Postindexed : 1; 147 unsigned OffsetIsReg : 1; 148 unsigned Negative : 1; // only used when OffsetIsReg is true 149 unsigned Writeback : 1; 150 } Mem; 151 }; 152 153 ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} 154public: 155 ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { 156 Kind = o.Kind; 157 StartLoc = o.StartLoc; 158 EndLoc = o.EndLoc; 159 switch (Kind) { 160 case CondCode: 161 CC = o.CC; 162 break; 163 case Token: 164 Tok = o.Tok; 165 break; 166 case CCOut: 167 case Register: 168 Reg = o.Reg; 169 break; 170 case RegisterList: 171 case DPRRegisterList: 172 case SPRRegisterList: 173 Registers = o.Registers; 174 break; 175 case Immediate: 176 Imm = o.Imm; 177 break; 178 case Memory: 179 Mem = o.Mem; 180 break; 181 } 182 } 183 184 /// getStartLoc - Get the location of the first token of this operand. 185 SMLoc getStartLoc() const { return StartLoc; } 186 /// getEndLoc - Get the location of the last token of this operand. 187 SMLoc getEndLoc() const { return EndLoc; } 188 189 ARMCC::CondCodes getCondCode() const { 190 assert(Kind == CondCode && "Invalid access!"); 191 return CC.Val; 192 } 193 194 StringRef getToken() const { 195 assert(Kind == Token && "Invalid access!"); 196 return StringRef(Tok.Data, Tok.Length); 197 } 198 199 unsigned getReg() const { 200 assert((Kind == Register || Kind == CCOut) && "Invalid access!"); 201 return Reg.RegNum; 202 } 203 204 const SmallVectorImpl<unsigned> &getRegList() const { 205 assert((Kind == RegisterList || Kind == DPRRegisterList || 206 Kind == SPRRegisterList) && "Invalid access!"); 207 return Registers; 208 } 209 210 const MCExpr *getImm() const { 211 assert(Kind == Immediate && "Invalid access!"); 212 return Imm.Val; 213 } 214 215 bool isCondCode() const { return Kind == CondCode; } 216 bool isCCOut() const { return Kind == CCOut; } 217 bool isImm() const { return Kind == Immediate; } 218 bool isReg() const { return Kind == Register; } 219 bool isRegList() const { return Kind == RegisterList; } 220 bool isDPRRegList() const { return Kind == DPRRegisterList; } 221 bool isSPRRegList() const { return Kind == SPRRegisterList; } 222 bool isToken() const { return Kind == Token; } 223 bool isMemory() const { return Kind == Memory; } 224 bool isMemMode5() const { 225 if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted || 226 Mem.Writeback || Mem.Negative) 227 return false; 228 229 // If there is an offset expression, make sure it's valid. 230 if (!Mem.Offset) return true; 231 232 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 233 if (!CE) return false; 234 235 // The offset must be a multiple of 4 in the range 0-1020. 236 int64_t Value = CE->getValue(); 237 return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); 238 } 239 bool isMemModeRegThumb() const { 240 if (!isMemory() || (!Mem.OffsetIsReg && !Mem.Offset) || Mem.Writeback) 241 return false; 242 return !Mem.Offset || !isa<MCConstantExpr>(Mem.Offset); 243 } 244 bool isMemModeImmThumb() const { 245 if (!isMemory() || (!Mem.OffsetIsReg && !Mem.Offset) || Mem.Writeback) 246 return false; 247 248 if (!Mem.Offset) return false; 249 250 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 251 if (!CE) return false; 252 253 // The offset must be a multiple of 4 in the range 0-124. 254 uint64_t Value = CE->getValue(); 255 return ((Value & 0x3) == 0 && Value <= 124); 256 } 257 258 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 259 // Add as immediates when possible. Null MCExpr = 0. 260 if (Expr == 0) 261 Inst.addOperand(MCOperand::CreateImm(0)); 262 else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 263 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 264 else 265 Inst.addOperand(MCOperand::CreateExpr(Expr)); 266 } 267 268 void addCondCodeOperands(MCInst &Inst, unsigned N) const { 269 assert(N == 2 && "Invalid number of operands!"); 270 Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); 271 unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR; 272 Inst.addOperand(MCOperand::CreateReg(RegNum)); 273 } 274 275 void addCCOutOperands(MCInst &Inst, unsigned N) const { 276 assert(N == 1 && "Invalid number of operands!"); 277 Inst.addOperand(MCOperand::CreateReg(getReg())); 278 } 279 280 void addRegOperands(MCInst &Inst, unsigned N) const { 281 assert(N == 1 && "Invalid number of operands!"); 282 Inst.addOperand(MCOperand::CreateReg(getReg())); 283 } 284 285 void addRegListOperands(MCInst &Inst, unsigned N) const { 286 assert(N == 1 && "Invalid number of operands!"); 287 const SmallVectorImpl<unsigned> &RegList = getRegList(); 288 for (SmallVectorImpl<unsigned>::const_iterator 289 I = RegList.begin(), E = RegList.end(); I != E; ++I) 290 Inst.addOperand(MCOperand::CreateReg(*I)); 291 } 292 293 void addDPRRegListOperands(MCInst &Inst, unsigned N) const { 294 addRegListOperands(Inst, N); 295 } 296 297 void addSPRRegListOperands(MCInst &Inst, unsigned N) const { 298 addRegListOperands(Inst, N); 299 } 300 301 void addImmOperands(MCInst &Inst, unsigned N) const { 302 assert(N == 1 && "Invalid number of operands!"); 303 addExpr(Inst, getImm()); 304 } 305 306 void addMemMode5Operands(MCInst &Inst, unsigned N) const { 307 assert(N == 2 && isMemMode5() && "Invalid number of operands!"); 308 309 Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); 310 assert(!Mem.OffsetIsReg && "Invalid mode 5 operand"); 311 312 // FIXME: #-0 is encoded differently than #0. Does the parser preserve 313 // the difference? 314 if (Mem.Offset) { 315 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 316 assert(CE && "Non-constant mode 5 offset operand!"); 317 318 // The MCInst offset operand doesn't include the low two bits (like 319 // the instruction encoding). 320 int64_t Offset = CE->getValue() / 4; 321 if (Offset >= 0) 322 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, 323 Offset))); 324 else 325 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, 326 -Offset))); 327 } else { 328 Inst.addOperand(MCOperand::CreateImm(0)); 329 } 330 } 331 332 void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const { 333 assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!"); 334 Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); 335 Inst.addOperand(MCOperand::CreateReg(Mem.OffsetRegNum)); 336 } 337 338 void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const { 339 assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!"); 340 Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); 341 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 342 assert(CE && "Non-constant mode offset operand!"); 343 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 344 } 345 346 virtual void dump(raw_ostream &OS) const; 347 348 static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { 349 ARMOperand *Op = new ARMOperand(CondCode); 350 Op->CC.Val = CC; 351 Op->StartLoc = S; 352 Op->EndLoc = S; 353 return Op; 354 } 355 356 static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) { 357 ARMOperand *Op = new ARMOperand(CCOut); 358 Op->Reg.RegNum = RegNum; 359 Op->StartLoc = S; 360 Op->EndLoc = S; 361 return Op; 362 } 363 364 static ARMOperand *CreateToken(StringRef Str, SMLoc S) { 365 ARMOperand *Op = new ARMOperand(Token); 366 Op->Tok.Data = Str.data(); 367 Op->Tok.Length = Str.size(); 368 Op->StartLoc = S; 369 Op->EndLoc = S; 370 return Op; 371 } 372 373 static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { 374 ARMOperand *Op = new ARMOperand(Register); 375 Op->Reg.RegNum = RegNum; 376 Op->StartLoc = S; 377 Op->EndLoc = E; 378 return Op; 379 } 380 381 static ARMOperand * 382 CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, 383 SMLoc StartLoc, SMLoc EndLoc) { 384 KindTy Kind = RegisterList; 385 386 if (ARM::DPRRegClass.contains(Regs.front().first)) 387 Kind = DPRRegisterList; 388 else if (ARM::SPRRegClass.contains(Regs.front().first)) 389 Kind = SPRRegisterList; 390 391 ARMOperand *Op = new ARMOperand(Kind); 392 for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator 393 I = Regs.begin(), E = Regs.end(); I != E; ++I) 394 Op->Registers.push_back(I->first); 395 array_pod_sort(Op->Registers.begin(), Op->Registers.end()); 396 Op->StartLoc = StartLoc; 397 Op->EndLoc = EndLoc; 398 return Op; 399 } 400 401 static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { 402 ARMOperand *Op = new ARMOperand(Immediate); 403 Op->Imm.Val = Val; 404 Op->StartLoc = S; 405 Op->EndLoc = E; 406 return Op; 407 } 408 409 static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, 410 const MCExpr *Offset, unsigned OffsetRegNum, 411 bool OffsetRegShifted, enum ShiftType ShiftType, 412 const MCExpr *ShiftAmount, bool Preindexed, 413 bool Postindexed, bool Negative, bool Writeback, 414 SMLoc S, SMLoc E) { 415 ARMOperand *Op = new ARMOperand(Memory); 416 Op->Mem.BaseRegNum = BaseRegNum; 417 Op->Mem.OffsetIsReg = OffsetIsReg; 418 Op->Mem.Offset = Offset; 419 Op->Mem.OffsetRegNum = OffsetRegNum; 420 Op->Mem.OffsetRegShifted = OffsetRegShifted; 421 Op->Mem.ShiftType = ShiftType; 422 Op->Mem.ShiftAmount = ShiftAmount; 423 Op->Mem.Preindexed = Preindexed; 424 Op->Mem.Postindexed = Postindexed; 425 Op->Mem.Negative = Negative; 426 Op->Mem.Writeback = Writeback; 427 428 Op->StartLoc = S; 429 Op->EndLoc = E; 430 return Op; 431 } 432}; 433 434} // end anonymous namespace. 435 436void ARMOperand::dump(raw_ostream &OS) const { 437 switch (Kind) { 438 case CondCode: 439 OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">"; 440 break; 441 case CCOut: 442 OS << "<ccout " << getReg() << ">"; 443 break; 444 case Immediate: 445 getImm()->print(OS); 446 break; 447 case Memory: 448 OS << "<memory>"; 449 break; 450 case Register: 451 OS << "<register " << getReg() << ">"; 452 break; 453 case RegisterList: 454 case DPRRegisterList: 455 case SPRRegisterList: { 456 OS << "<register_list "; 457 458 const SmallVectorImpl<unsigned> &RegList = getRegList(); 459 for (SmallVectorImpl<unsigned>::const_iterator 460 I = RegList.begin(), E = RegList.end(); I != E; ) { 461 OS << *I; 462 if (++I < E) OS << ", "; 463 } 464 465 OS << ">"; 466 break; 467 } 468 case Token: 469 OS << "'" << getToken() << "'"; 470 break; 471 } 472} 473 474/// @name Auto-generated Match Functions 475/// { 476 477static unsigned MatchRegisterName(StringRef Name); 478 479/// } 480 481/// Try to parse a register name. The token must be an Identifier when called, 482/// and if it is a register name the token is eaten and the register number is 483/// returned. Otherwise return -1. 484/// 485int ARMAsmParser::TryParseRegister() { 486 const AsmToken &Tok = Parser.getTok(); 487 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); 488 489 // FIXME: Validate register for the current architecture; we have to do 490 // validation later, so maybe there is no need for this here. 491 unsigned RegNum = MatchRegisterName(Tok.getString()); 492 if (RegNum == 0) 493 return -1; 494 Parser.Lex(); // Eat identifier token. 495 return RegNum; 496} 497 498 499/// Try to parse a register name. The token must be an Identifier when called. 500/// If it's a register, an AsmOperand is created. Another AsmOperand is created 501/// if there is a "writeback". 'true' if it's not a register. 502/// 503/// TODO this is likely to change to allow different register types and or to 504/// parse for a specific register type. 505bool ARMAsmParser:: 506TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 507 SMLoc S = Parser.getTok().getLoc(); 508 int RegNo = TryParseRegister(); 509 if (RegNo == -1) 510 return true; 511 512 Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc())); 513 514 const AsmToken &ExclaimTok = Parser.getTok(); 515 if (ExclaimTok.is(AsmToken::Exclaim)) { 516 Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(), 517 ExclaimTok.getLoc())); 518 Parser.Lex(); // Eat exclaim token 519 } 520 521 return false; 522} 523 524/// Parse a register list, return it if successful else return null. The first 525/// token must be a '{' when called. 526bool ARMAsmParser:: 527ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 528 assert(Parser.getTok().is(AsmToken::LCurly) && 529 "Token is not a Left Curly Brace"); 530 SMLoc S = Parser.getTok().getLoc(); 531 532 // Read the rest of the registers in the list. 533 unsigned PrevRegNum = 0; 534 SmallVector<std::pair<unsigned, SMLoc>, 32> Registers; 535 536 do { 537 bool IsRange = Parser.getTok().is(AsmToken::Minus); 538 Parser.Lex(); // Eat non-identifier token. 539 540 const AsmToken &RegTok = Parser.getTok(); 541 SMLoc RegLoc = RegTok.getLoc(); 542 if (RegTok.isNot(AsmToken::Identifier)) { 543 Error(RegLoc, "register expected"); 544 return true; 545 } 546 547 int RegNum = TryParseRegister(); 548 if (RegNum == -1) { 549 Error(RegLoc, "register expected"); 550 return true; 551 } 552 553 if (IsRange) { 554 int Reg = PrevRegNum; 555 do { 556 ++Reg; 557 Registers.push_back(std::make_pair(Reg, RegLoc)); 558 } while (Reg != RegNum); 559 } else { 560 Registers.push_back(std::make_pair(RegNum, RegLoc)); 561 } 562 563 PrevRegNum = RegNum; 564 } while (Parser.getTok().is(AsmToken::Comma) || 565 Parser.getTok().is(AsmToken::Minus)); 566 567 // Process the right curly brace of the list. 568 const AsmToken &RCurlyTok = Parser.getTok(); 569 if (RCurlyTok.isNot(AsmToken::RCurly)) { 570 Error(RCurlyTok.getLoc(), "'}' expected"); 571 return true; 572 } 573 574 SMLoc E = RCurlyTok.getLoc(); 575 Parser.Lex(); // Eat right curly brace token. 576 577 // Verify the register list. 578 SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator 579 RI = Registers.begin(), RE = Registers.end(); 580 581 DenseMap<unsigned, bool> RegMap; 582 RegMap[RI->first] = true; 583 584 unsigned HighRegNum = RI->first; 585 bool EmittedWarning = false; 586 587 for (++RI; RI != RE; ++RI) { 588 const std::pair<unsigned, SMLoc> &RegInfo = *RI; 589 unsigned Reg = RegInfo.first; 590 591 if (RegMap[Reg]) { 592 Error(RegInfo.second, "register duplicated in register list"); 593 return true; 594 } 595 596 if (!EmittedWarning && Reg < HighRegNum) 597 Warning(RegInfo.second, 598 "register not in ascending order in register list"); 599 600 RegMap[Reg] = true; 601 HighRegNum = std::max(Reg, HighRegNum); 602 } 603 604 Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); 605 return false; 606} 607 608/// Parse an ARM memory expression, return false if successful else return true 609/// or an error. The first token must be a '[' when called. 610/// 611/// TODO Only preindexing and postindexing addressing are started, unindexed 612/// with option, etc are still to do. 613bool ARMAsmParser:: 614ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 615 SMLoc S, E; 616 assert(Parser.getTok().is(AsmToken::LBrac) && 617 "Token is not a Left Bracket"); 618 S = Parser.getTok().getLoc(); 619 Parser.Lex(); // Eat left bracket token. 620 621 const AsmToken &BaseRegTok = Parser.getTok(); 622 if (BaseRegTok.isNot(AsmToken::Identifier)) { 623 Error(BaseRegTok.getLoc(), "register expected"); 624 return true; 625 } 626 int BaseRegNum = TryParseRegister(); 627 if (BaseRegNum == -1) { 628 Error(BaseRegTok.getLoc(), "register expected"); 629 return true; 630 } 631 632 bool Preindexed = false; 633 bool Postindexed = false; 634 bool OffsetIsReg = false; 635 bool Negative = false; 636 bool Writeback = false; 637 638 // First look for preindexed address forms, that is after the "[Rn" we now 639 // have to see if the next token is a comma. 640 const AsmToken &Tok = Parser.getTok(); 641 if (Tok.is(AsmToken::Comma)) { 642 Preindexed = true; 643 Parser.Lex(); // Eat comma token. 644 int OffsetRegNum; 645 bool OffsetRegShifted; 646 enum ShiftType ShiftType; 647 const MCExpr *ShiftAmount = 0; 648 const MCExpr *Offset = 0; 649 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, 650 Offset, OffsetIsReg, OffsetRegNum, E)) 651 return true; 652 const AsmToken &RBracTok = Parser.getTok(); 653 if (RBracTok.isNot(AsmToken::RBrac)) { 654 Error(RBracTok.getLoc(), "']' expected"); 655 return true; 656 } 657 E = RBracTok.getLoc(); 658 Parser.Lex(); // Eat right bracket token. 659 660 661 const AsmToken &ExclaimTok = Parser.getTok(); 662 ARMOperand *WBOp = 0; 663 if (ExclaimTok.is(AsmToken::Exclaim)) { 664 WBOp = ARMOperand::CreateToken(ExclaimTok.getString(), 665 ExclaimTok.getLoc()); 666 Writeback = true; 667 Parser.Lex(); // Eat exclaim token 668 } 669 670 Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, 671 OffsetRegNum, OffsetRegShifted, 672 ShiftType, ShiftAmount, Preindexed, 673 Postindexed, Negative, Writeback, 674 S, E)); 675 if (WBOp) 676 Operands.push_back(WBOp); 677 678 return false; 679 } 680 // The "[Rn" we have so far was not followed by a comma. 681 else if (Tok.is(AsmToken::RBrac)) { 682 // If there's anything other than the right brace, this is a post indexing 683 // addressing form. 684 E = Tok.getLoc(); 685 Parser.Lex(); // Eat right bracket token. 686 687 int OffsetRegNum = 0; 688 bool OffsetRegShifted = false; 689 enum ShiftType ShiftType = Lsl; 690 const MCExpr *ShiftAmount = 0; 691 const MCExpr *Offset = 0; 692 693 const AsmToken &NextTok = Parser.getTok(); 694 695 if (NextTok.isNot(AsmToken::EndOfStatement)) { 696 Postindexed = true; 697 Writeback = true; 698 699 if (NextTok.isNot(AsmToken::Comma)) { 700 Error(NextTok.getLoc(), "',' expected"); 701 return true; 702 } 703 704 Parser.Lex(); // Eat comma token. 705 706 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, 707 ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, 708 E)) 709 return true; 710 } 711 712 Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, 713 OffsetRegNum, OffsetRegShifted, 714 ShiftType, ShiftAmount, Preindexed, 715 Postindexed, Negative, Writeback, 716 S, E)); 717 return false; 718 } 719 720 return true; 721} 722 723/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," 724/// we will parse the following (were +/- means that a plus or minus is 725/// optional): 726/// +/-Rm 727/// +/-Rm, shift 728/// #offset 729/// we return false on success or an error otherwise. 730bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, 731 bool &OffsetRegShifted, 732 enum ShiftType &ShiftType, 733 const MCExpr *&ShiftAmount, 734 const MCExpr *&Offset, 735 bool &OffsetIsReg, 736 int &OffsetRegNum, 737 SMLoc &E) { 738 Negative = false; 739 OffsetRegShifted = false; 740 OffsetIsReg = false; 741 OffsetRegNum = -1; 742 const AsmToken &NextTok = Parser.getTok(); 743 E = NextTok.getLoc(); 744 if (NextTok.is(AsmToken::Plus)) 745 Parser.Lex(); // Eat plus token. 746 else if (NextTok.is(AsmToken::Minus)) { 747 Negative = true; 748 Parser.Lex(); // Eat minus token 749 } 750 // See if there is a register following the "[Rn," or "[Rn]," we have so far. 751 const AsmToken &OffsetRegTok = Parser.getTok(); 752 if (OffsetRegTok.is(AsmToken::Identifier)) { 753 SMLoc CurLoc = OffsetRegTok.getLoc(); 754 OffsetRegNum = TryParseRegister(); 755 if (OffsetRegNum != -1) { 756 OffsetIsReg = true; 757 E = CurLoc; 758 } 759 } 760 761 // If we parsed a register as the offset then there can be a shift after that. 762 if (OffsetRegNum != -1) { 763 // Look for a comma then a shift 764 const AsmToken &Tok = Parser.getTok(); 765 if (Tok.is(AsmToken::Comma)) { 766 Parser.Lex(); // Eat comma token. 767 768 const AsmToken &Tok = Parser.getTok(); 769 if (ParseShift(ShiftType, ShiftAmount, E)) 770 return Error(Tok.getLoc(), "shift expected"); 771 OffsetRegShifted = true; 772 } 773 } 774 else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" 775 // Look for #offset following the "[Rn," or "[Rn]," 776 const AsmToken &HashTok = Parser.getTok(); 777 if (HashTok.isNot(AsmToken::Hash)) 778 return Error(HashTok.getLoc(), "'#' expected"); 779 780 Parser.Lex(); // Eat hash token. 781 782 if (getParser().ParseExpression(Offset)) 783 return true; 784 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 785 } 786 return false; 787} 788 789/// ParseShift as one of these two: 790/// ( lsl | lsr | asr | ror ) , # shift_amount 791/// rrx 792/// and returns true if it parses a shift otherwise it returns false. 793bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, 794 SMLoc &E) { 795 const AsmToken &Tok = Parser.getTok(); 796 if (Tok.isNot(AsmToken::Identifier)) 797 return true; 798 StringRef ShiftName = Tok.getString(); 799 if (ShiftName == "lsl" || ShiftName == "LSL") 800 St = Lsl; 801 else if (ShiftName == "lsr" || ShiftName == "LSR") 802 St = Lsr; 803 else if (ShiftName == "asr" || ShiftName == "ASR") 804 St = Asr; 805 else if (ShiftName == "ror" || ShiftName == "ROR") 806 St = Ror; 807 else if (ShiftName == "rrx" || ShiftName == "RRX") 808 St = Rrx; 809 else 810 return true; 811 Parser.Lex(); // Eat shift type token. 812 813 // Rrx stands alone. 814 if (St == Rrx) 815 return false; 816 817 // Otherwise, there must be a '#' and a shift amount. 818 const AsmToken &HashTok = Parser.getTok(); 819 if (HashTok.isNot(AsmToken::Hash)) 820 return Error(HashTok.getLoc(), "'#' expected"); 821 Parser.Lex(); // Eat hash token. 822 823 if (getParser().ParseExpression(ShiftAmount)) 824 return true; 825 826 return false; 827} 828 829/// Parse a arm instruction operand. For now this parses the operand regardless 830/// of the mnemonic. 831bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands){ 832 SMLoc S, E; 833 switch (getLexer().getKind()) { 834 default: 835 Error(Parser.getTok().getLoc(), "unexpected token in operand"); 836 return true; 837 case AsmToken::Identifier: { 838 if (!TryParseRegisterWithWriteBack(Operands)) 839 return false; 840 841 // This was not a register so parse other operands that start with an 842 // identifier (like labels) as expressions and create them as immediates. 843 const MCExpr *IdVal; 844 S = Parser.getTok().getLoc(); 845 if (getParser().ParseExpression(IdVal)) 846 return true; 847 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 848 Operands.push_back(ARMOperand::CreateImm(IdVal, S, E)); 849 return false; 850 } 851 case AsmToken::LBrac: 852 return ParseMemory(Operands); 853 case AsmToken::LCurly: 854 return ParseRegisterList(Operands); 855 case AsmToken::Hash: 856 // #42 -> immediate. 857 // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate 858 S = Parser.getTok().getLoc(); 859 Parser.Lex(); 860 const MCExpr *ImmVal; 861 if (getParser().ParseExpression(ImmVal)) 862 return true; 863 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 864 Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); 865 return false; 866 } 867} 868 869// FIXME: Would be nice to autogen this. 870static unsigned SplitMnemonicAndCC(StringRef &Mnemonic) { 871 // Ignore some mnemonics we know aren't predicated forms. 872 if (Mnemonic == "teq" || Mnemonic == "vceq" || 873 Mnemonic == "movs" || 874 Mnemonic == "svc" || 875 (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || 876 Mnemonic == "vmls" || Mnemonic == "vnmls") || 877 Mnemonic == "vacge" || Mnemonic == "vcge" || 878 Mnemonic == "vclt" || 879 Mnemonic == "vacgt" || Mnemonic == "vcgt" || 880 Mnemonic == "vcle" || 881 (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || 882 Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || 883 Mnemonic == "vqdmlal")) 884 return ARMCC::AL; 885 886 // Otherwise, determine the predicate. 887 // 888 // FIXME: We need a way to check whether a prefix supports predication, 889 // otherwise we will end up with an ambiguity for instructions that happen to 890 // end with a predicate name. 891 unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2)) 892 .Case("eq", ARMCC::EQ) 893 .Case("ne", ARMCC::NE) 894 .Case("hs", ARMCC::HS) 895 .Case("lo", ARMCC::LO) 896 .Case("mi", ARMCC::MI) 897 .Case("pl", ARMCC::PL) 898 .Case("vs", ARMCC::VS) 899 .Case("vc", ARMCC::VC) 900 .Case("hi", ARMCC::HI) 901 .Case("ls", ARMCC::LS) 902 .Case("ge", ARMCC::GE) 903 .Case("lt", ARMCC::LT) 904 .Case("gt", ARMCC::GT) 905 .Case("le", ARMCC::LE) 906 .Case("al", ARMCC::AL) 907 .Default(~0U); 908 if (CC != ~0U) { 909 Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2); 910 return CC; 911 } 912 913 return ARMCC::AL; 914} 915 916/// Parse an arm instruction mnemonic followed by its operands. 917bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, 918 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 919 // Create the leading tokens for the mnemonic, split by '.' characters. 920 size_t Start = 0, Next = Name.find('.'); 921 StringRef Head = Name.slice(Start, Next); 922 923 // Determine the predicate, if any. 924 unsigned CC = SplitMnemonicAndCC(Head); 925 926 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 927 928 // FIXME: Should only add this operand for predicated instructions 929 if (Head != "trap") { 930 Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), 931 NameLoc)); 932 } 933 934 // Add the remaining tokens in the mnemonic. 935 while (Next != StringRef::npos) { 936 Start = Next; 937 Next = Name.find('.', Start + 1); 938 Head = Name.slice(Start, Next); 939 940 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 941 } 942 943 // Read the remaining operands. 944 if (getLexer().isNot(AsmToken::EndOfStatement)) { 945 // Read the first operand. 946 if (ParseOperand(Operands)) { 947 Parser.EatToEndOfStatement(); 948 return true; 949 } 950 951 while (getLexer().is(AsmToken::Comma)) { 952 Parser.Lex(); // Eat the comma. 953 954 // Parse and remember the operand. 955 if (ParseOperand(Operands)) { 956 Parser.EatToEndOfStatement(); 957 return true; 958 } 959 } 960 } 961 962 if (getLexer().isNot(AsmToken::EndOfStatement)) { 963 Parser.EatToEndOfStatement(); 964 return TokError("unexpected token in argument list"); 965 } 966 967 Parser.Lex(); // Consume the EndOfStatement 968 return false; 969} 970 971bool ARMAsmParser:: 972MatchAndEmitInstruction(SMLoc IDLoc, 973 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 974 MCStreamer &Out) { 975 MCInst Inst; 976 unsigned ErrorInfo; 977 MatchResultTy MatchResult, MatchResult2; 978 MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo); 979 if (MatchResult != Match_Success) { 980 // If we get a Match_InvalidOperand it might be some arithmetic instruction 981 // that does not update the condition codes. So try adding a CCOut operand 982 // with a value of reg0. 983 if (MatchResult == Match_InvalidOperand) { 984 Operands.insert(Operands.begin() + 1, 985 ARMOperand::CreateCCOut(0, 986 ((ARMOperand*)Operands[0])->getStartLoc())); 987 MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo); 988 if (MatchResult2 == Match_Success) 989 MatchResult = Match_Success; 990 else { 991 ARMOperand *CCOut = ((ARMOperand*)Operands[1]); 992 Operands.erase(Operands.begin() + 1); 993 delete CCOut; 994 } 995 } 996 // If we get a Match_MnemonicFail it might be some arithmetic instruction 997 // that updates the condition codes if it ends in 's'. So see if the 998 // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut 999 // operand with a value of CPSR. 1000 else if(MatchResult == Match_MnemonicFail) { 1001 // Get the instruction mnemonic, which is the first token. 1002 StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken(); 1003 if (Mnemonic.substr(Mnemonic.size()-1) == "s") { 1004 // removed the 's' from the mnemonic for matching. 1005 StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1); 1006 SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc(); 1007 ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]); 1008 Operands.erase(Operands.begin()); 1009 delete OldMnemonic; 1010 Operands.insert(Operands.begin(), 1011 ARMOperand::CreateToken(MnemonicNoS, NameLoc)); 1012 Operands.insert(Operands.begin() + 1, 1013 ARMOperand::CreateCCOut(ARM::CPSR, NameLoc)); 1014 MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo); 1015 if (MatchResult2 == Match_Success) 1016 MatchResult = Match_Success; 1017 else { 1018 ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]); 1019 Operands.erase(Operands.begin()); 1020 delete OldMnemonic; 1021 Operands.insert(Operands.begin(), 1022 ARMOperand::CreateToken(Mnemonic, NameLoc)); 1023 ARMOperand *CCOut = ((ARMOperand*)Operands[1]); 1024 Operands.erase(Operands.begin() + 1); 1025 delete CCOut; 1026 } 1027 } 1028 } 1029 } 1030 switch (MatchResult) { 1031 case Match_Success: 1032 Out.EmitInstruction(Inst); 1033 return false; 1034 case Match_MissingFeature: 1035 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 1036 return true; 1037 case Match_InvalidOperand: { 1038 SMLoc ErrorLoc = IDLoc; 1039 if (ErrorInfo != ~0U) { 1040 if (ErrorInfo >= Operands.size()) 1041 return Error(IDLoc, "too few operands for instruction"); 1042 1043 ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); 1044 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; 1045 } 1046 1047 return Error(ErrorLoc, "invalid operand for instruction"); 1048 } 1049 case Match_MnemonicFail: 1050 return Error(IDLoc, "unrecognized instruction mnemonic"); 1051 } 1052 1053 llvm_unreachable("Implement any new match types added!"); 1054 return true; 1055} 1056 1057/// ParseDirective parses the arm specific directives 1058bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { 1059 StringRef IDVal = DirectiveID.getIdentifier(); 1060 if (IDVal == ".word") 1061 return ParseDirectiveWord(4, DirectiveID.getLoc()); 1062 else if (IDVal == ".thumb") 1063 return ParseDirectiveThumb(DirectiveID.getLoc()); 1064 else if (IDVal == ".thumb_func") 1065 return ParseDirectiveThumbFunc(DirectiveID.getLoc()); 1066 else if (IDVal == ".code") 1067 return ParseDirectiveCode(DirectiveID.getLoc()); 1068 else if (IDVal == ".syntax") 1069 return ParseDirectiveSyntax(DirectiveID.getLoc()); 1070 return true; 1071} 1072 1073/// ParseDirectiveWord 1074/// ::= .word [ expression (, expression)* ] 1075bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 1076 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1077 for (;;) { 1078 const MCExpr *Value; 1079 if (getParser().ParseExpression(Value)) 1080 return true; 1081 1082 getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); 1083 1084 if (getLexer().is(AsmToken::EndOfStatement)) 1085 break; 1086 1087 // FIXME: Improve diagnostic. 1088 if (getLexer().isNot(AsmToken::Comma)) 1089 return Error(L, "unexpected token in directive"); 1090 Parser.Lex(); 1091 } 1092 } 1093 1094 Parser.Lex(); 1095 return false; 1096} 1097 1098/// ParseDirectiveThumb 1099/// ::= .thumb 1100bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { 1101 if (getLexer().isNot(AsmToken::EndOfStatement)) 1102 return Error(L, "unexpected token in directive"); 1103 Parser.Lex(); 1104 1105 // TODO: set thumb mode 1106 // TODO: tell the MC streamer the mode 1107 // getParser().getStreamer().Emit???(); 1108 return false; 1109} 1110 1111/// ParseDirectiveThumbFunc 1112/// ::= .thumbfunc symbol_name 1113bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { 1114 const AsmToken &Tok = Parser.getTok(); 1115 if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) 1116 return Error(L, "unexpected token in .thumb_func directive"); 1117 StringRef Name = Tok.getString(); 1118 Parser.Lex(); // Consume the identifier token. 1119 if (getLexer().isNot(AsmToken::EndOfStatement)) 1120 return Error(L, "unexpected token in directive"); 1121 Parser.Lex(); 1122 1123 // Mark symbol as a thumb symbol. 1124 MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name); 1125 getParser().getStreamer().EmitThumbFunc(Func); 1126 return false; 1127} 1128 1129/// ParseDirectiveSyntax 1130/// ::= .syntax unified | divided 1131bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { 1132 const AsmToken &Tok = Parser.getTok(); 1133 if (Tok.isNot(AsmToken::Identifier)) 1134 return Error(L, "unexpected token in .syntax directive"); 1135 StringRef Mode = Tok.getString(); 1136 if (Mode == "unified" || Mode == "UNIFIED") 1137 Parser.Lex(); 1138 else if (Mode == "divided" || Mode == "DIVIDED") 1139 Parser.Lex(); 1140 else 1141 return Error(L, "unrecognized syntax mode in .syntax directive"); 1142 1143 if (getLexer().isNot(AsmToken::EndOfStatement)) 1144 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 1145 Parser.Lex(); 1146 1147 // TODO tell the MC streamer the mode 1148 // getParser().getStreamer().Emit???(); 1149 return false; 1150} 1151 1152/// ParseDirectiveCode 1153/// ::= .code 16 | 32 1154bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { 1155 const AsmToken &Tok = Parser.getTok(); 1156 if (Tok.isNot(AsmToken::Integer)) 1157 return Error(L, "unexpected token in .code directive"); 1158 int64_t Val = Parser.getTok().getIntVal(); 1159 if (Val == 16) 1160 Parser.Lex(); 1161 else if (Val == 32) 1162 Parser.Lex(); 1163 else 1164 return Error(L, "invalid operand to .code directive"); 1165 1166 if (getLexer().isNot(AsmToken::EndOfStatement)) 1167 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 1168 Parser.Lex(); 1169 1170 if (Val == 16) 1171 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 1172 else 1173 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 1174 1175 return false; 1176} 1177 1178extern "C" void LLVMInitializeARMAsmLexer(); 1179 1180/// Force static initialization. 1181extern "C" void LLVMInitializeARMAsmParser() { 1182 RegisterAsmParser<ARMAsmParser> X(TheARMTarget); 1183 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); 1184 LLVMInitializeARMAsmLexer(); 1185} 1186 1187#define GET_REGISTER_MATCHER 1188#define GET_MATCHER_IMPLEMENTATION 1189#include "ARMGenAsmMatcher.inc" 1190