ARMAsmParser.cpp revision 8d5acb7007decaf0c30bf4a3d4c55e5cc2cce0a7
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "ARM.h" 11#include "ARMAddressingModes.h" 12#include "ARMSubtarget.h" 13#include "llvm/MC/MCParser/MCAsmLexer.h" 14#include "llvm/MC/MCParser/MCAsmParser.h" 15#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16#include "llvm/MC/MCContext.h" 17#include "llvm/MC/MCStreamer.h" 18#include "llvm/MC/MCExpr.h" 19#include "llvm/MC/MCInst.h" 20#include "llvm/Target/TargetRegistry.h" 21#include "llvm/Target/TargetAsmParser.h" 22#include "llvm/Support/SourceMgr.h" 23#include "llvm/Support/raw_ostream.h" 24#include "llvm/ADT/SmallVector.h" 25#include "llvm/ADT/StringSwitch.h" 26#include "llvm/ADT/Twine.h" 27using namespace llvm; 28 29// The shift types for register controlled shifts in arm memory addressing 30enum ShiftType { 31 Lsl, 32 Lsr, 33 Asr, 34 Ror, 35 Rrx 36}; 37 38namespace { 39 struct ARMOperand; 40 41class ARMAsmParser : public TargetAsmParser { 42 MCAsmParser &Parser; 43 TargetMachine &TM; 44 45 MCAsmParser &getParser() const { return Parser; } 46 47 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 48 49 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 50 51 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 52 53 int TryParseRegister(); 54 ARMOperand *TryParseRegisterWithWriteBack(); 55 ARMOperand *ParseRegisterList(); 56 ARMOperand *ParseMemory(); 57 58 bool ParseMemoryOffsetReg(bool &Negative, 59 bool &OffsetRegShifted, 60 enum ShiftType &ShiftType, 61 const MCExpr *&ShiftAmount, 62 const MCExpr *&Offset, 63 bool &OffsetIsReg, 64 int &OffsetRegNum, 65 SMLoc &E); 66 67 bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); 68 69 ARMOperand *ParseOperand(); 70 71 bool ParseDirectiveWord(unsigned Size, SMLoc L); 72 73 bool ParseDirectiveThumb(SMLoc L); 74 75 bool ParseDirectiveThumbFunc(SMLoc L); 76 77 bool ParseDirectiveCode(SMLoc L); 78 79 bool ParseDirectiveSyntax(SMLoc L); 80 81 bool MatchAndEmitInstruction(SMLoc IDLoc, 82 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 83 MCStreamer &Out); 84 85 /// @name Auto-generated Match Functions 86 /// { 87 88#define GET_ASSEMBLER_HEADER 89#include "ARMGenAsmMatcher.inc" 90 91 /// } 92 93 94public: 95 ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) 96 : TargetAsmParser(T), Parser(_Parser), TM(_TM) { 97 // Initialize the set of available features. 98 setAvailableFeatures(ComputeAvailableFeatures( 99 &TM.getSubtarget<ARMSubtarget>())); 100 } 101 102 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 103 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 104 105 virtual bool ParseDirective(AsmToken DirectiveID); 106}; 107} // end anonymous namespace 108 109namespace { 110 111/// ARMOperand - Instances of this class represent a parsed ARM machine 112/// instruction. 113struct ARMOperand : public MCParsedAsmOperand { 114public: 115 enum KindTy { 116 CondCode, 117 Immediate, 118 Memory, 119 Register, 120 RegisterList, 121 Token 122 } Kind; 123 124 SMLoc StartLoc, EndLoc; 125 126 union { 127 struct { 128 ARMCC::CondCodes Val; 129 } CC; 130 131 struct { 132 const char *Data; 133 unsigned Length; 134 } Tok; 135 136 struct { 137 unsigned RegNum; 138 bool Writeback; 139 } Reg; 140 141 struct { 142 unsigned RegStart; 143 unsigned Number; 144 } RegList; 145 146 struct { 147 const MCExpr *Val; 148 } Imm; 149 150 // This is for all forms of ARM address expressions 151 struct { 152 unsigned BaseRegNum; 153 unsigned OffsetRegNum; // used when OffsetIsReg is true 154 const MCExpr *Offset; // used when OffsetIsReg is false 155 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true 156 enum ShiftType ShiftType; // used when OffsetRegShifted is true 157 unsigned 158 OffsetRegShifted : 1, // only used when OffsetIsReg is true 159 Preindexed : 1, 160 Postindexed : 1, 161 OffsetIsReg : 1, 162 Negative : 1, // only used when OffsetIsReg is true 163 Writeback : 1; 164 } Mem; 165 166 }; 167 168 ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { 169 Kind = o.Kind; 170 StartLoc = o.StartLoc; 171 EndLoc = o.EndLoc; 172 switch (Kind) { 173 case CondCode: 174 CC = o.CC; 175 break; 176 case Token: 177 Tok = o.Tok; 178 break; 179 case Register: 180 Reg = o.Reg; 181 break; 182 case RegisterList: 183 RegList = o.RegList; 184 break; 185 case Immediate: 186 Imm = o.Imm; 187 break; 188 case Memory: 189 Mem = o.Mem; 190 break; 191 } 192 } 193 194 /// getStartLoc - Get the location of the first token of this operand. 195 SMLoc getStartLoc() const { return StartLoc; } 196 /// getEndLoc - Get the location of the last token of this operand. 197 SMLoc getEndLoc() const { return EndLoc; } 198 199 ARMCC::CondCodes getCondCode() const { 200 assert(Kind == CondCode && "Invalid access!"); 201 return CC.Val; 202 } 203 204 StringRef getToken() const { 205 assert(Kind == Token && "Invalid access!"); 206 return StringRef(Tok.Data, Tok.Length); 207 } 208 209 unsigned getReg() const { 210 assert(Kind == Register && "Invalid access!"); 211 return Reg.RegNum; 212 } 213 214 std::pair<unsigned, unsigned> getRegList() const { 215 assert(Kind == RegisterList && "Invalid access!"); 216 return std::make_pair(RegList.RegStart, RegList.Number); 217 } 218 219 const MCExpr *getImm() const { 220 assert(Kind == Immediate && "Invalid access!"); 221 return Imm.Val; 222 } 223 224 bool isCondCode() const { return Kind == CondCode; } 225 bool isImm() const { return Kind == Immediate; } 226 bool isReg() const { return Kind == Register; } 227 bool isRegList() const { return Kind == RegisterList; } 228 bool isToken() const { return Kind == Token; } 229 bool isMemory() const { return Kind == Memory; } 230 231 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 232 // Add as immediates when possible. Null MCExpr = 0. 233 if (Expr == 0) 234 Inst.addOperand(MCOperand::CreateImm(0)); 235 else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 236 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 237 else 238 Inst.addOperand(MCOperand::CreateExpr(Expr)); 239 } 240 241 void addCondCodeOperands(MCInst &Inst, unsigned N) const { 242 assert(N == 2 && "Invalid number of operands!"); 243 Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); 244 // FIXME: What belongs here? 245 Inst.addOperand(MCOperand::CreateReg(0)); 246 } 247 248 void addRegOperands(MCInst &Inst, unsigned N) const { 249 assert(N == 1 && "Invalid number of operands!"); 250 Inst.addOperand(MCOperand::CreateReg(getReg())); 251 } 252 253 void addImmOperands(MCInst &Inst, unsigned N) const { 254 assert(N == 1 && "Invalid number of operands!"); 255 addExpr(Inst, getImm()); 256 } 257 258 259 bool isMemMode5() const { 260 if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted || 261 Mem.Writeback || Mem.Negative) 262 return false; 263 // If there is an offset expression, make sure it's valid. 264 if (!Mem.Offset) 265 return true; 266 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 267 if (!CE) 268 return false; 269 // The offset must be a multiple of 4 in the range 0-1020. 270 int64_t Value = CE->getValue(); 271 return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); 272 } 273 274 void addMemMode5Operands(MCInst &Inst, unsigned N) const { 275 assert(N == 2 && isMemMode5() && "Invalid number of operands!"); 276 277 Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); 278 assert(!Mem.OffsetIsReg && "Invalid mode 5 operand"); 279 280 // FIXME: #-0 is encoded differently than #0. Does the parser preserve 281 // the difference? 282 if (Mem.Offset) { 283 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 284 assert(CE && "Non-constant mode 5 offset operand!"); 285 286 // The MCInst offset operand doesn't include the low two bits (like 287 // the instruction encoding). 288 int64_t Offset = CE->getValue() / 4; 289 if (Offset >= 0) 290 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, 291 Offset))); 292 else 293 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, 294 -Offset))); 295 } else { 296 Inst.addOperand(MCOperand::CreateImm(0)); 297 } 298 } 299 300 virtual void dump(raw_ostream &OS) const; 301 302 static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { 303 ARMOperand *Op = new ARMOperand(CondCode); 304 Op->CC.Val = CC; 305 Op->StartLoc = S; 306 Op->EndLoc = S; 307 return Op; 308 } 309 310 static ARMOperand *CreateToken(StringRef Str, SMLoc S) { 311 ARMOperand *Op = new ARMOperand(Token); 312 Op->Tok.Data = Str.data(); 313 Op->Tok.Length = Str.size(); 314 Op->StartLoc = S; 315 Op->EndLoc = S; 316 return Op; 317 } 318 319 static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S, 320 SMLoc E) { 321 ARMOperand *Op = new ARMOperand(Register); 322 Op->Reg.RegNum = RegNum; 323 Op->Reg.Writeback = Writeback; 324 Op->StartLoc = S; 325 Op->EndLoc = E; 326 return Op; 327 } 328 329 static ARMOperand *CreateRegList(unsigned RegStart, unsigned Number, 330 SMLoc S, SMLoc E) { 331 ARMOperand *Op = new ARMOperand(RegisterList); 332 Op->RegList.RegStart = RegStart; 333 Op->RegList.Number = Number; 334 Op->StartLoc = S; 335 Op->EndLoc = E; 336 return Op; 337 } 338 339 static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { 340 ARMOperand *Op = new ARMOperand(Immediate); 341 Op->Imm.Val = Val; 342 Op->StartLoc = S; 343 Op->EndLoc = E; 344 return Op; 345 } 346 347 static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, 348 const MCExpr *Offset, unsigned OffsetRegNum, 349 bool OffsetRegShifted, enum ShiftType ShiftType, 350 const MCExpr *ShiftAmount, bool Preindexed, 351 bool Postindexed, bool Negative, bool Writeback, 352 SMLoc S, SMLoc E) { 353 ARMOperand *Op = new ARMOperand(Memory); 354 Op->Mem.BaseRegNum = BaseRegNum; 355 Op->Mem.OffsetIsReg = OffsetIsReg; 356 Op->Mem.Offset = Offset; 357 Op->Mem.OffsetRegNum = OffsetRegNum; 358 Op->Mem.OffsetRegShifted = OffsetRegShifted; 359 Op->Mem.ShiftType = ShiftType; 360 Op->Mem.ShiftAmount = ShiftAmount; 361 Op->Mem.Preindexed = Preindexed; 362 Op->Mem.Postindexed = Postindexed; 363 Op->Mem.Negative = Negative; 364 Op->Mem.Writeback = Writeback; 365 366 Op->StartLoc = S; 367 Op->EndLoc = E; 368 return Op; 369 } 370 371private: 372 ARMOperand(KindTy K) : Kind(K) {} 373}; 374 375} // end anonymous namespace. 376 377void ARMOperand::dump(raw_ostream &OS) const { 378 switch (Kind) { 379 case CondCode: 380 OS << ARMCondCodeToString(getCondCode()); 381 break; 382 case Immediate: 383 getImm()->print(OS); 384 break; 385 case Memory: 386 OS << "<memory>"; 387 break; 388 case Register: 389 OS << "<register " << getReg() << ">"; 390 break; 391 case RegisterList: { 392 OS << "<register_list "; 393 std::pair<unsigned, unsigned> List = getRegList(); 394 unsigned RegEnd = List.first + List.second; 395 396 for (unsigned Idx = List.first; Idx < RegEnd; ) { 397 OS << Idx; 398 if (++Idx < RegEnd) OS << ", "; 399 } 400 401 OS << ">"; 402 break; 403 } 404 case Token: 405 OS << "'" << getToken() << "'"; 406 break; 407 } 408} 409 410/// @name Auto-generated Match Functions 411/// { 412 413static unsigned MatchRegisterName(StringRef Name); 414 415/// } 416 417/// Try to parse a register name. The token must be an Identifier when called, 418/// and if it is a register name the token is eaten and the register number is 419/// returned. Otherwise return -1. 420/// 421int ARMAsmParser::TryParseRegister() { 422 const AsmToken &Tok = Parser.getTok(); 423 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); 424 425 // FIXME: Validate register for the current architecture; we have to do 426 // validation later, so maybe there is no need for this here. 427 unsigned RegNum = MatchRegisterName(Tok.getString()); 428 if (RegNum == 0) 429 return -1; 430 Parser.Lex(); // Eat identifier token. 431 return RegNum; 432} 433 434 435/// Try to parse a register name. The token must be an Identifier when called, 436/// and if it is a register name the token is eaten and the register number is 437/// returned. Otherwise return -1. 438/// 439/// TODO this is likely to change to allow different register types and or to 440/// parse for a specific register type. 441ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() { 442 SMLoc S = Parser.getTok().getLoc(); 443 int RegNo = TryParseRegister(); 444 if (RegNo == -1) return 0; 445 446 SMLoc E = Parser.getTok().getLoc(); 447 448 bool Writeback = false; 449 const AsmToken &ExclaimTok = Parser.getTok(); 450 if (ExclaimTok.is(AsmToken::Exclaim)) { 451 E = ExclaimTok.getLoc(); 452 Writeback = true; 453 Parser.Lex(); // Eat exclaim token 454 } 455 456 return ARMOperand::CreateReg(RegNo, Writeback, S, E); 457} 458 459/// Parse a register list, return it if successful else return null. The first 460/// token must be a '{' when called. 461ARMOperand *ARMAsmParser::ParseRegisterList() { 462 SMLoc S, E; 463 assert(Parser.getTok().is(AsmToken::LCurly) && 464 "Token is not a Left Curly Brace"); 465 S = Parser.getTok().getLoc(); 466 Parser.Lex(); // Eat left curly brace token. 467 468 const AsmToken &RegTok = Parser.getTok(); 469 SMLoc RegLoc = RegTok.getLoc(); 470 if (RegTok.isNot(AsmToken::Identifier)) { 471 Error(RegLoc, "register expected"); 472 return 0; 473 } 474 int RegNum = TryParseRegister(); 475 if (RegNum == -1) { 476 Error(RegLoc, "register expected"); 477 return 0; 478 } 479 480 unsigned RegList = 1 << RegNum; 481 482 int HighRegNum = RegNum; 483 // TODO ranges like "{Rn-Rm}" 484 while (Parser.getTok().is(AsmToken::Comma)) { 485 Parser.Lex(); // Eat comma token. 486 487 const AsmToken &RegTok = Parser.getTok(); 488 SMLoc RegLoc = RegTok.getLoc(); 489 if (RegTok.isNot(AsmToken::Identifier)) { 490 Error(RegLoc, "register expected"); 491 return 0; 492 } 493 int RegNum = TryParseRegister(); 494 if (RegNum == -1) { 495 Error(RegLoc, "register expected"); 496 return 0; 497 } 498 499 if (RegList & (1 << RegNum)) 500 Warning(RegLoc, "register duplicated in register list"); 501 else if (RegNum <= HighRegNum) 502 Warning(RegLoc, "register not in ascending order in register list"); 503 RegList |= 1 << RegNum; 504 HighRegNum = RegNum; 505 } 506 const AsmToken &RCurlyTok = Parser.getTok(); 507 if (RCurlyTok.isNot(AsmToken::RCurly)) { 508 Error(RCurlyTok.getLoc(), "'}' expected"); 509 return 0; 510 } 511 E = RCurlyTok.getLoc(); 512 Parser.Lex(); // Eat left curly brace token. 513 514 // FIXME: Need to return an operand! 515 Error(E, "FIXME: register list parsing not implemented"); 516 return 0; 517} 518 519/// Parse an arm memory expression, return false if successful else return true 520/// or an error. The first token must be a '[' when called. 521/// TODO Only preindexing and postindexing addressing are started, unindexed 522/// with option, etc are still to do. 523ARMOperand *ARMAsmParser::ParseMemory() { 524 SMLoc S, E; 525 assert(Parser.getTok().is(AsmToken::LBrac) && 526 "Token is not a Left Bracket"); 527 S = Parser.getTok().getLoc(); 528 Parser.Lex(); // Eat left bracket token. 529 530 const AsmToken &BaseRegTok = Parser.getTok(); 531 if (BaseRegTok.isNot(AsmToken::Identifier)) { 532 Error(BaseRegTok.getLoc(), "register expected"); 533 return 0; 534 } 535 int BaseRegNum = TryParseRegister(); 536 if (BaseRegNum == -1) { 537 Error(BaseRegTok.getLoc(), "register expected"); 538 return 0; 539 } 540 541 bool Preindexed = false; 542 bool Postindexed = false; 543 bool OffsetIsReg = false; 544 bool Negative = false; 545 bool Writeback = false; 546 547 // First look for preindexed address forms, that is after the "[Rn" we now 548 // have to see if the next token is a comma. 549 const AsmToken &Tok = Parser.getTok(); 550 if (Tok.is(AsmToken::Comma)) { 551 Preindexed = true; 552 Parser.Lex(); // Eat comma token. 553 int OffsetRegNum; 554 bool OffsetRegShifted; 555 enum ShiftType ShiftType; 556 const MCExpr *ShiftAmount; 557 const MCExpr *Offset; 558 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, 559 Offset, OffsetIsReg, OffsetRegNum, E)) 560 return 0; 561 const AsmToken &RBracTok = Parser.getTok(); 562 if (RBracTok.isNot(AsmToken::RBrac)) { 563 Error(RBracTok.getLoc(), "']' expected"); 564 return 0; 565 } 566 E = RBracTok.getLoc(); 567 Parser.Lex(); // Eat right bracket token. 568 569 const AsmToken &ExclaimTok = Parser.getTok(); 570 if (ExclaimTok.is(AsmToken::Exclaim)) { 571 E = ExclaimTok.getLoc(); 572 Writeback = true; 573 Parser.Lex(); // Eat exclaim token 574 } 575 return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 576 OffsetRegShifted, ShiftType, ShiftAmount, 577 Preindexed, Postindexed, Negative, Writeback, 578 S, E); 579 } 580 // The "[Rn" we have so far was not followed by a comma. 581 else if (Tok.is(AsmToken::RBrac)) { 582 // If there's anything other than the right brace, this is a post indexing 583 // addressing form. 584 E = Tok.getLoc(); 585 Parser.Lex(); // Eat right bracket token. 586 587 int OffsetRegNum = 0; 588 bool OffsetRegShifted = false; 589 enum ShiftType ShiftType; 590 const MCExpr *ShiftAmount; 591 const MCExpr *Offset = 0; 592 593 const AsmToken &NextTok = Parser.getTok(); 594 if (NextTok.isNot(AsmToken::EndOfStatement)) { 595 Postindexed = true; 596 Writeback = true; 597 if (NextTok.isNot(AsmToken::Comma)) { 598 Error(NextTok.getLoc(), "',' expected"); 599 return 0; 600 } 601 Parser.Lex(); // Eat comma token. 602 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, 603 ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, 604 E)) 605 return 0; 606 } 607 608 return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 609 OffsetRegShifted, ShiftType, ShiftAmount, 610 Preindexed, Postindexed, Negative, Writeback, 611 S, E); 612 } 613 614 return 0; 615} 616 617/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," 618/// we will parse the following (were +/- means that a plus or minus is 619/// optional): 620/// +/-Rm 621/// +/-Rm, shift 622/// #offset 623/// we return false on success or an error otherwise. 624bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, 625 bool &OffsetRegShifted, 626 enum ShiftType &ShiftType, 627 const MCExpr *&ShiftAmount, 628 const MCExpr *&Offset, 629 bool &OffsetIsReg, 630 int &OffsetRegNum, 631 SMLoc &E) { 632 Negative = false; 633 OffsetRegShifted = false; 634 OffsetIsReg = false; 635 OffsetRegNum = -1; 636 const AsmToken &NextTok = Parser.getTok(); 637 E = NextTok.getLoc(); 638 if (NextTok.is(AsmToken::Plus)) 639 Parser.Lex(); // Eat plus token. 640 else if (NextTok.is(AsmToken::Minus)) { 641 Negative = true; 642 Parser.Lex(); // Eat minus token 643 } 644 // See if there is a register following the "[Rn," or "[Rn]," we have so far. 645 const AsmToken &OffsetRegTok = Parser.getTok(); 646 if (OffsetRegTok.is(AsmToken::Identifier)) { 647 SMLoc CurLoc = OffsetRegTok.getLoc(); 648 OffsetRegNum = TryParseRegister(); 649 if (OffsetRegNum != -1) { 650 OffsetIsReg = true; 651 E = CurLoc; 652 } 653 } 654 655 // If we parsed a register as the offset then there can be a shift after that. 656 if (OffsetRegNum != -1) { 657 // Look for a comma then a shift 658 const AsmToken &Tok = Parser.getTok(); 659 if (Tok.is(AsmToken::Comma)) { 660 Parser.Lex(); // Eat comma token. 661 662 const AsmToken &Tok = Parser.getTok(); 663 if (ParseShift(ShiftType, ShiftAmount, E)) 664 return Error(Tok.getLoc(), "shift expected"); 665 OffsetRegShifted = true; 666 } 667 } 668 else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" 669 // Look for #offset following the "[Rn," or "[Rn]," 670 const AsmToken &HashTok = Parser.getTok(); 671 if (HashTok.isNot(AsmToken::Hash)) 672 return Error(HashTok.getLoc(), "'#' expected"); 673 674 Parser.Lex(); // Eat hash token. 675 676 if (getParser().ParseExpression(Offset)) 677 return true; 678 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 679 } 680 return false; 681} 682 683/// ParseShift as one of these two: 684/// ( lsl | lsr | asr | ror ) , # shift_amount 685/// rrx 686/// and returns true if it parses a shift otherwise it returns false. 687bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, 688 SMLoc &E) { 689 const AsmToken &Tok = Parser.getTok(); 690 if (Tok.isNot(AsmToken::Identifier)) 691 return true; 692 StringRef ShiftName = Tok.getString(); 693 if (ShiftName == "lsl" || ShiftName == "LSL") 694 St = Lsl; 695 else if (ShiftName == "lsr" || ShiftName == "LSR") 696 St = Lsr; 697 else if (ShiftName == "asr" || ShiftName == "ASR") 698 St = Asr; 699 else if (ShiftName == "ror" || ShiftName == "ROR") 700 St = Ror; 701 else if (ShiftName == "rrx" || ShiftName == "RRX") 702 St = Rrx; 703 else 704 return true; 705 Parser.Lex(); // Eat shift type token. 706 707 // Rrx stands alone. 708 if (St == Rrx) 709 return false; 710 711 // Otherwise, there must be a '#' and a shift amount. 712 const AsmToken &HashTok = Parser.getTok(); 713 if (HashTok.isNot(AsmToken::Hash)) 714 return Error(HashTok.getLoc(), "'#' expected"); 715 Parser.Lex(); // Eat hash token. 716 717 if (getParser().ParseExpression(ShiftAmount)) 718 return true; 719 720 return false; 721} 722 723/// Parse a arm instruction operand. For now this parses the operand regardless 724/// of the mnemonic. 725ARMOperand *ARMAsmParser::ParseOperand() { 726 SMLoc S, E; 727 728 switch (getLexer().getKind()) { 729 case AsmToken::Identifier: 730 if (ARMOperand *Op = TryParseRegisterWithWriteBack()) 731 return Op; 732 733 // This was not a register so parse other operands that start with an 734 // identifier (like labels) as expressions and create them as immediates. 735 const MCExpr *IdVal; 736 S = Parser.getTok().getLoc(); 737 if (getParser().ParseExpression(IdVal)) 738 return 0; 739 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 740 return ARMOperand::CreateImm(IdVal, S, E); 741 case AsmToken::LBrac: 742 return ParseMemory(); 743 case AsmToken::LCurly: 744 return ParseRegisterList(); 745 case AsmToken::Hash: 746 // #42 -> immediate. 747 // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate 748 S = Parser.getTok().getLoc(); 749 Parser.Lex(); 750 const MCExpr *ImmVal; 751 if (getParser().ParseExpression(ImmVal)) 752 return 0; 753 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 754 return ARMOperand::CreateImm(ImmVal, S, E); 755 default: 756 Error(Parser.getTok().getLoc(), "unexpected token in operand"); 757 return 0; 758 } 759} 760 761/// Parse an arm instruction mnemonic followed by its operands. 762bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, 763 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 764 // Create the leading tokens for the mnemonic, split by '.' characters. 765 size_t Start = 0, Next = Name.find('.'); 766 StringRef Head = Name.slice(Start, Next); 767 768 // Determine the predicate, if any. 769 // 770 // FIXME: We need a way to check whether a prefix supports predication, 771 // otherwise we will end up with an ambiguity for instructions that happen to 772 // end with a predicate name. 773 // FIXME: Likewise, some arithmetic instructions have an 's' prefix which 774 // indicates to update the condition codes. Those instructions have an 775 // additional immediate operand which encodes the prefix as reg0 or CPSR. 776 // Just checking for a suffix of 's' definitely creates ambiguities; e.g, 777 // the SMMLS instruction. 778 unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2)) 779 .Case("eq", ARMCC::EQ) 780 .Case("ne", ARMCC::NE) 781 .Case("hs", ARMCC::HS) 782 .Case("lo", ARMCC::LO) 783 .Case("mi", ARMCC::MI) 784 .Case("pl", ARMCC::PL) 785 .Case("vs", ARMCC::VS) 786 .Case("vc", ARMCC::VC) 787 .Case("hi", ARMCC::HI) 788 .Case("ls", ARMCC::LS) 789 .Case("ge", ARMCC::GE) 790 .Case("lt", ARMCC::LT) 791 .Case("gt", ARMCC::GT) 792 .Case("le", ARMCC::LE) 793 .Case("al", ARMCC::AL) 794 .Default(~0U); 795 796 if (CC == ~0U || 797 (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) { 798 CC = ARMCC::AL; 799 } else { 800 Head = Head.slice(0, Head.size() - 2); 801 } 802 803 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 804 // FIXME: Should only add this operand for predicated instructions 805 Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc)); 806 807 // Add the remaining tokens in the mnemonic. 808 while (Next != StringRef::npos) { 809 Start = Next; 810 Next = Name.find('.', Start + 1); 811 Head = Name.slice(Start, Next); 812 813 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 814 } 815 816 // Read the remaining operands. 817 if (getLexer().isNot(AsmToken::EndOfStatement)) { 818 // Read the first operand. 819 if (ARMOperand *Op = ParseOperand()) 820 Operands.push_back(Op); 821 else { 822 Parser.EatToEndOfStatement(); 823 return true; 824 } 825 826 while (getLexer().is(AsmToken::Comma)) { 827 Parser.Lex(); // Eat the comma. 828 829 // Parse and remember the operand. 830 if (ARMOperand *Op = ParseOperand()) 831 Operands.push_back(Op); 832 else { 833 Parser.EatToEndOfStatement(); 834 return true; 835 } 836 } 837 } 838 839 if (getLexer().isNot(AsmToken::EndOfStatement)) { 840 Parser.EatToEndOfStatement(); 841 return TokError("unexpected token in argument list"); 842 } 843 Parser.Lex(); // Consume the EndOfStatement 844 return false; 845} 846 847bool ARMAsmParser:: 848MatchAndEmitInstruction(SMLoc IDLoc, 849 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 850 MCStreamer &Out) { 851 MCInst Inst; 852 unsigned ErrorInfo; 853 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { 854 case Match_Success: 855 Out.EmitInstruction(Inst); 856 return false; 857 858 case Match_MissingFeature: 859 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 860 return true; 861 case Match_InvalidOperand: { 862 SMLoc ErrorLoc = IDLoc; 863 if (ErrorInfo != ~0U) { 864 if (ErrorInfo >= Operands.size()) 865 return Error(IDLoc, "too few operands for instruction"); 866 867 ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); 868 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; 869 } 870 871 return Error(ErrorLoc, "invalid operand for instruction"); 872 } 873 case Match_MnemonicFail: 874 return Error(IDLoc, "unrecognized instruction mnemonic"); 875 } 876 877 llvm_unreachable("Implement any new match types added!"); 878} 879 880 881 882/// ParseDirective parses the arm specific directives 883bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { 884 StringRef IDVal = DirectiveID.getIdentifier(); 885 if (IDVal == ".word") 886 return ParseDirectiveWord(4, DirectiveID.getLoc()); 887 else if (IDVal == ".thumb") 888 return ParseDirectiveThumb(DirectiveID.getLoc()); 889 else if (IDVal == ".thumb_func") 890 return ParseDirectiveThumbFunc(DirectiveID.getLoc()); 891 else if (IDVal == ".code") 892 return ParseDirectiveCode(DirectiveID.getLoc()); 893 else if (IDVal == ".syntax") 894 return ParseDirectiveSyntax(DirectiveID.getLoc()); 895 return true; 896} 897 898/// ParseDirectiveWord 899/// ::= .word [ expression (, expression)* ] 900bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 901 if (getLexer().isNot(AsmToken::EndOfStatement)) { 902 for (;;) { 903 const MCExpr *Value; 904 if (getParser().ParseExpression(Value)) 905 return true; 906 907 getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); 908 909 if (getLexer().is(AsmToken::EndOfStatement)) 910 break; 911 912 // FIXME: Improve diagnostic. 913 if (getLexer().isNot(AsmToken::Comma)) 914 return Error(L, "unexpected token in directive"); 915 Parser.Lex(); 916 } 917 } 918 919 Parser.Lex(); 920 return false; 921} 922 923/// ParseDirectiveThumb 924/// ::= .thumb 925bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { 926 if (getLexer().isNot(AsmToken::EndOfStatement)) 927 return Error(L, "unexpected token in directive"); 928 Parser.Lex(); 929 930 // TODO: set thumb mode 931 // TODO: tell the MC streamer the mode 932 // getParser().getStreamer().Emit???(); 933 return false; 934} 935 936/// ParseDirectiveThumbFunc 937/// ::= .thumbfunc symbol_name 938bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { 939 const AsmToken &Tok = Parser.getTok(); 940 if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) 941 return Error(L, "unexpected token in .thumb_func directive"); 942 StringRef Name = Tok.getString(); 943 Parser.Lex(); // Consume the identifier token. 944 if (getLexer().isNot(AsmToken::EndOfStatement)) 945 return Error(L, "unexpected token in directive"); 946 Parser.Lex(); 947 948 // Mark symbol as a thumb symbol. 949 MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name); 950 getParser().getStreamer().EmitThumbFunc(Func); 951 return false; 952} 953 954/// ParseDirectiveSyntax 955/// ::= .syntax unified | divided 956bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { 957 const AsmToken &Tok = Parser.getTok(); 958 if (Tok.isNot(AsmToken::Identifier)) 959 return Error(L, "unexpected token in .syntax directive"); 960 StringRef Mode = Tok.getString(); 961 if (Mode == "unified" || Mode == "UNIFIED") 962 Parser.Lex(); 963 else if (Mode == "divided" || Mode == "DIVIDED") 964 Parser.Lex(); 965 else 966 return Error(L, "unrecognized syntax mode in .syntax directive"); 967 968 if (getLexer().isNot(AsmToken::EndOfStatement)) 969 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 970 Parser.Lex(); 971 972 // TODO tell the MC streamer the mode 973 // getParser().getStreamer().Emit???(); 974 return false; 975} 976 977/// ParseDirectiveCode 978/// ::= .code 16 | 32 979bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { 980 const AsmToken &Tok = Parser.getTok(); 981 if (Tok.isNot(AsmToken::Integer)) 982 return Error(L, "unexpected token in .code directive"); 983 int64_t Val = Parser.getTok().getIntVal(); 984 if (Val == 16) 985 Parser.Lex(); 986 else if (Val == 32) 987 Parser.Lex(); 988 else 989 return Error(L, "invalid operand to .code directive"); 990 991 if (getLexer().isNot(AsmToken::EndOfStatement)) 992 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 993 Parser.Lex(); 994 995 if (Val == 16) 996 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 997 else 998 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 999 1000 return false; 1001} 1002 1003extern "C" void LLVMInitializeARMAsmLexer(); 1004 1005/// Force static initialization. 1006extern "C" void LLVMInitializeARMAsmParser() { 1007 RegisterAsmParser<ARMAsmParser> X(TheARMTarget); 1008 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); 1009 LLVMInitializeARMAsmLexer(); 1010} 1011 1012#define GET_REGISTER_MATCHER 1013#define GET_MATCHER_IMPLEMENTATION 1014#include "ARMGenAsmMatcher.inc" 1015