ARMAsmParser.cpp revision 7729e06c128be01fc564870d5ea3d22d236dddb5
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "ARM.h" 11#include "ARMAddressingModes.h" 12#include "ARMSubtarget.h" 13#include "llvm/MC/MCParser/MCAsmLexer.h" 14#include "llvm/MC/MCParser/MCAsmParser.h" 15#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16#include "llvm/MC/MCContext.h" 17#include "llvm/MC/MCStreamer.h" 18#include "llvm/MC/MCExpr.h" 19#include "llvm/MC/MCInst.h" 20#include "llvm/Target/TargetRegistry.h" 21#include "llvm/Target/TargetAsmParser.h" 22#include "llvm/Support/SourceMgr.h" 23#include "llvm/Support/raw_ostream.h" 24#include "llvm/ADT/SmallVector.h" 25#include "llvm/ADT/StringSwitch.h" 26#include "llvm/ADT/Twine.h" 27using namespace llvm; 28 29// The shift types for register controlled shifts in arm memory addressing 30enum ShiftType { 31 Lsl, 32 Lsr, 33 Asr, 34 Ror, 35 Rrx 36}; 37 38namespace { 39 40class ARMOperand; 41 42class ARMAsmParser : public TargetAsmParser { 43 MCAsmParser &Parser; 44 TargetMachine &TM; 45 46 MCAsmParser &getParser() const { return Parser; } 47 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 48 49 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 50 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 51 52 int TryParseRegister(); 53 ARMOperand *TryParseRegisterWithWriteBack(); 54 ARMOperand *ParseRegisterList(); 55 ARMOperand *ParseMemory(); 56 ARMOperand *ParseOperand(); 57 58 bool ParseMemoryOffsetReg(bool &Negative, 59 bool &OffsetRegShifted, 60 enum ShiftType &ShiftType, 61 const MCExpr *&ShiftAmount, 62 const MCExpr *&Offset, 63 bool &OffsetIsReg, 64 int &OffsetRegNum, 65 SMLoc &E); 66 bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); 67 bool ParseDirectiveWord(unsigned Size, SMLoc L); 68 bool ParseDirectiveThumb(SMLoc L); 69 bool ParseDirectiveThumbFunc(SMLoc L); 70 bool ParseDirectiveCode(SMLoc L); 71 bool ParseDirectiveSyntax(SMLoc L); 72 73 bool MatchAndEmitInstruction(SMLoc IDLoc, 74 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 75 MCStreamer &Out); 76 77 /// @name Auto-generated Match Functions 78 /// { 79 80#define GET_ASSEMBLER_HEADER 81#include "ARMGenAsmMatcher.inc" 82 83 /// } 84 85public: 86 ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) 87 : TargetAsmParser(T), Parser(_Parser), TM(_TM) { 88 // Initialize the set of available features. 89 setAvailableFeatures(ComputeAvailableFeatures( 90 &TM.getSubtarget<ARMSubtarget>())); 91 } 92 93 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 94 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 95 virtual bool ParseDirective(AsmToken DirectiveID); 96}; 97} // end anonymous namespace 98 99namespace { 100 101/// ARMOperand - Instances of this class represent a parsed ARM machine 102/// instruction. 103class ARMOperand : public MCParsedAsmOperand { 104 enum KindTy { 105 CondCode, 106 Immediate, 107 Memory, 108 Register, 109 RegisterList, 110 Token 111 } Kind; 112 113 SMLoc StartLoc, EndLoc; 114 115 union { 116 struct { 117 ARMCC::CondCodes Val; 118 } CC; 119 120 struct { 121 const char *Data; 122 unsigned Length; 123 } Tok; 124 125 struct { 126 unsigned RegNum; 127 bool Writeback; 128 } Reg; 129 130 struct { 131 std::vector<unsigned> *Registers; 132 } RegList; 133 134 struct { 135 const MCExpr *Val; 136 } Imm; 137 138 // This is for all forms of ARM address expressions 139 struct { 140 unsigned BaseRegNum; 141 unsigned OffsetRegNum; // used when OffsetIsReg is true 142 const MCExpr *Offset; // used when OffsetIsReg is false 143 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true 144 enum ShiftType ShiftType; // used when OffsetRegShifted is true 145 unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true 146 unsigned Preindexed : 1; 147 unsigned Postindexed : 1; 148 unsigned OffsetIsReg : 1; 149 unsigned Negative : 1; // only used when OffsetIsReg is true 150 unsigned Writeback : 1; 151 } Mem; 152 }; 153 154 ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} 155public: 156 ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { 157 Kind = o.Kind; 158 StartLoc = o.StartLoc; 159 EndLoc = o.EndLoc; 160 switch (Kind) { 161 case CondCode: 162 CC = o.CC; 163 break; 164 case Token: 165 Tok = o.Tok; 166 break; 167 case Register: 168 Reg = o.Reg; 169 break; 170 case RegisterList: 171 RegList = o.RegList; 172 break; 173 case Immediate: 174 Imm = o.Imm; 175 break; 176 case Memory: 177 Mem = o.Mem; 178 break; 179 } 180 } 181 182 /// getStartLoc - Get the location of the first token of this operand. 183 SMLoc getStartLoc() const { return StartLoc; } 184 /// getEndLoc - Get the location of the last token of this operand. 185 SMLoc getEndLoc() const { return EndLoc; } 186 187 ARMCC::CondCodes getCondCode() const { 188 assert(Kind == CondCode && "Invalid access!"); 189 return CC.Val; 190 } 191 192 StringRef getToken() const { 193 assert(Kind == Token && "Invalid access!"); 194 return StringRef(Tok.Data, Tok.Length); 195 } 196 197 unsigned getReg() const { 198 assert(Kind == Register && "Invalid access!"); 199 return Reg.RegNum; 200 } 201 202 const std::vector<unsigned> &getRegList() const { 203 assert(Kind == RegisterList && "Invalid access!"); 204 return *RegList.Registers; 205 } 206 207 const MCExpr *getImm() const { 208 assert(Kind == Immediate && "Invalid access!"); 209 return Imm.Val; 210 } 211 212 bool isCondCode() const { return Kind == CondCode; } 213 bool isImm() const { return Kind == Immediate; } 214 bool isReg() const { return Kind == Register; } 215 bool isRegList() const { return Kind == RegisterList; } 216 bool isToken() const { return Kind == Token; } 217 bool isMemory() const { return Kind == Memory; } 218 bool isMemMode5() const { 219 if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted || 220 Mem.Writeback || Mem.Negative) 221 return false; 222 // If there is an offset expression, make sure it's valid. 223 if (!Mem.Offset) 224 return true; 225 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 226 if (!CE) 227 return false; 228 // The offset must be a multiple of 4 in the range 0-1020. 229 int64_t Value = CE->getValue(); 230 return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); 231 } 232 233 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 234 // Add as immediates when possible. Null MCExpr = 0. 235 if (Expr == 0) 236 Inst.addOperand(MCOperand::CreateImm(0)); 237 else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 238 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 239 else 240 Inst.addOperand(MCOperand::CreateExpr(Expr)); 241 } 242 243 void addCondCodeOperands(MCInst &Inst, unsigned N) const { 244 assert(N == 2 && "Invalid number of operands!"); 245 Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); 246 // FIXME: What belongs here? 247 Inst.addOperand(MCOperand::CreateReg(0)); 248 } 249 250 void addRegOperands(MCInst &Inst, unsigned N) const { 251 assert(N == 1 && "Invalid number of operands!"); 252 Inst.addOperand(MCOperand::CreateReg(getReg())); 253 } 254 255 void addRegListOperands(MCInst &Inst, unsigned N) const { 256 assert(N == 1 && "Invalid number of operands!"); 257 const std::vector<unsigned> &RegList = getRegList(); 258 for (std::vector<unsigned>::const_iterator 259 I = RegList.begin(), E = RegList.end(); I != E; ++I) 260 Inst.addOperand(MCOperand::CreateReg(*I)); 261 } 262 263 void addImmOperands(MCInst &Inst, unsigned N) const { 264 assert(N == 1 && "Invalid number of operands!"); 265 addExpr(Inst, getImm()); 266 } 267 268 void addMemMode5Operands(MCInst &Inst, unsigned N) const { 269 assert(N == 2 && isMemMode5() && "Invalid number of operands!"); 270 271 Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); 272 assert(!Mem.OffsetIsReg && "Invalid mode 5 operand"); 273 274 // FIXME: #-0 is encoded differently than #0. Does the parser preserve 275 // the difference? 276 if (Mem.Offset) { 277 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 278 assert(CE && "Non-constant mode 5 offset operand!"); 279 280 // The MCInst offset operand doesn't include the low two bits (like 281 // the instruction encoding). 282 int64_t Offset = CE->getValue() / 4; 283 if (Offset >= 0) 284 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, 285 Offset))); 286 else 287 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, 288 -Offset))); 289 } else { 290 Inst.addOperand(MCOperand::CreateImm(0)); 291 } 292 } 293 294 virtual void dump(raw_ostream &OS) const; 295 296 static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { 297 ARMOperand *Op = new ARMOperand(CondCode); 298 Op->CC.Val = CC; 299 Op->StartLoc = S; 300 Op->EndLoc = S; 301 return Op; 302 } 303 304 static ARMOperand *CreateToken(StringRef Str, SMLoc S) { 305 ARMOperand *Op = new ARMOperand(Token); 306 Op->Tok.Data = Str.data(); 307 Op->Tok.Length = Str.size(); 308 Op->StartLoc = S; 309 Op->EndLoc = S; 310 return Op; 311 } 312 313 static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S, 314 SMLoc E) { 315 ARMOperand *Op = new ARMOperand(Register); 316 Op->Reg.RegNum = RegNum; 317 Op->Reg.Writeback = Writeback; 318 Op->StartLoc = S; 319 Op->EndLoc = E; 320 return Op; 321 } 322 323 static ARMOperand * 324 CreateRegList(std::vector<std::pair<unsigned, SMLoc> > &Regs, 325 SMLoc S, SMLoc E) { 326 ARMOperand *Op = new ARMOperand(RegisterList); 327 Op->RegList.Registers = new std::vector<unsigned>(); 328 for (std::vector<std::pair<unsigned, SMLoc> >::iterator 329 I = Regs.begin(), E = Regs.end(); I != E; ++I) 330 Op->RegList.Registers->push_back(I->first); 331 std::sort(Op->RegList.Registers->begin(), Op->RegList.Registers->end()); 332 Op->StartLoc = S; 333 Op->EndLoc = E; 334 return Op; 335 } 336 337 static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { 338 ARMOperand *Op = new ARMOperand(Immediate); 339 Op->Imm.Val = Val; 340 Op->StartLoc = S; 341 Op->EndLoc = E; 342 return Op; 343 } 344 345 static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, 346 const MCExpr *Offset, unsigned OffsetRegNum, 347 bool OffsetRegShifted, enum ShiftType ShiftType, 348 const MCExpr *ShiftAmount, bool Preindexed, 349 bool Postindexed, bool Negative, bool Writeback, 350 SMLoc S, SMLoc E) { 351 ARMOperand *Op = new ARMOperand(Memory); 352 Op->Mem.BaseRegNum = BaseRegNum; 353 Op->Mem.OffsetIsReg = OffsetIsReg; 354 Op->Mem.Offset = Offset; 355 Op->Mem.OffsetRegNum = OffsetRegNum; 356 Op->Mem.OffsetRegShifted = OffsetRegShifted; 357 Op->Mem.ShiftType = ShiftType; 358 Op->Mem.ShiftAmount = ShiftAmount; 359 Op->Mem.Preindexed = Preindexed; 360 Op->Mem.Postindexed = Postindexed; 361 Op->Mem.Negative = Negative; 362 Op->Mem.Writeback = Writeback; 363 364 Op->StartLoc = S; 365 Op->EndLoc = E; 366 return Op; 367 } 368}; 369 370} // end anonymous namespace. 371 372void ARMOperand::dump(raw_ostream &OS) const { 373 switch (Kind) { 374 case CondCode: 375 OS << ARMCondCodeToString(getCondCode()); 376 break; 377 case Immediate: 378 getImm()->print(OS); 379 break; 380 case Memory: 381 OS << "<memory>"; 382 break; 383 case Register: 384 OS << "<register " << getReg() << ">"; 385 break; 386 case RegisterList: { 387 OS << "<register_list "; 388 389 const std::vector<unsigned> &RegList = getRegList(); 390 for (std::vector<unsigned>::const_iterator 391 I = RegList.begin(), E = RegList.end(); I != E; ) { 392 OS << *I; 393 if (++I < E) OS << ", "; 394 } 395 396 OS << ">"; 397 break; 398 } 399 case Token: 400 OS << "'" << getToken() << "'"; 401 break; 402 } 403} 404 405/// @name Auto-generated Match Functions 406/// { 407 408static unsigned MatchRegisterName(StringRef Name); 409 410/// } 411 412/// Try to parse a register name. The token must be an Identifier when called, 413/// and if it is a register name the token is eaten and the register number is 414/// returned. Otherwise return -1. 415/// 416int ARMAsmParser::TryParseRegister() { 417 const AsmToken &Tok = Parser.getTok(); 418 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); 419 420 // FIXME: Validate register for the current architecture; we have to do 421 // validation later, so maybe there is no need for this here. 422 unsigned RegNum = MatchRegisterName(Tok.getString()); 423 if (RegNum == 0) 424 return -1; 425 Parser.Lex(); // Eat identifier token. 426 return RegNum; 427} 428 429 430/// Try to parse a register name. The token must be an Identifier when called, 431/// and if it is a register name the token is eaten and the register number is 432/// returned. Otherwise return -1. 433/// 434/// TODO this is likely to change to allow different register types and or to 435/// parse for a specific register type. 436ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() { 437 SMLoc S = Parser.getTok().getLoc(); 438 int RegNo = TryParseRegister(); 439 if (RegNo == -1) 440 return 0; 441 442 SMLoc E = Parser.getTok().getLoc(); 443 444 bool Writeback = false; 445 const AsmToken &ExclaimTok = Parser.getTok(); 446 if (ExclaimTok.is(AsmToken::Exclaim)) { 447 E = ExclaimTok.getLoc(); 448 Writeback = true; 449 Parser.Lex(); // Eat exclaim token 450 } 451 452 return ARMOperand::CreateReg(RegNo, Writeback, S, E); 453} 454 455/// Parse a register list, return it if successful else return null. The first 456/// token must be a '{' when called. 457ARMOperand *ARMAsmParser::ParseRegisterList() { 458 assert(Parser.getTok().is(AsmToken::LCurly) && 459 "Token is not a Left Curly Brace"); 460 SMLoc S = Parser.getTok().getLoc(); 461 462 // Read the rest of the registers in the list. 463 unsigned PrevRegNum = 0; 464 std::vector<std::pair<unsigned, SMLoc> > Registers; 465 Registers.reserve(32); 466 467 do { 468 bool IsRange = Parser.getTok().is(AsmToken::Minus); 469 Parser.Lex(); // Eat non-identifier token. 470 471 const AsmToken &RegTok = Parser.getTok(); 472 SMLoc RegLoc = RegTok.getLoc(); 473 if (RegTok.isNot(AsmToken::Identifier)) { 474 Error(RegLoc, "register expected"); 475 return 0; 476 } 477 478 int RegNum = TryParseRegister(); 479 if (RegNum == -1) { 480 Error(RegLoc, "register expected"); 481 return 0; 482 } 483 484 if (IsRange) { 485 int Reg = PrevRegNum; 486 do { 487 ++Reg; 488 Registers.push_back(std::make_pair(Reg, RegLoc)); 489 } while (Reg != RegNum); 490 } else { 491 Registers.push_back(std::make_pair(RegNum, RegLoc)); 492 } 493 494 PrevRegNum = RegNum; 495 } while (Parser.getTok().is(AsmToken::Comma) || 496 Parser.getTok().is(AsmToken::Minus)); 497 498 // Process the right curly brace of the list. 499 const AsmToken &RCurlyTok = Parser.getTok(); 500 if (RCurlyTok.isNot(AsmToken::RCurly)) { 501 Error(RCurlyTok.getLoc(), "'}' expected"); 502 return 0; 503 } 504 505 SMLoc E = RCurlyTok.getLoc(); 506 Parser.Lex(); // Eat right curly brace token. 507 508 // Verify the register list. 509 std::vector<std::pair<unsigned, SMLoc> >::const_iterator 510 RI = Registers.begin(), RE = Registers.end(); 511 512 unsigned HighRegNum = RI->first; 513 DenseMap<unsigned, bool> RegMap; 514 RegMap[RI->first] = true; 515 516 for (++RI; RI != RE; ++RI) { 517 const std::pair<unsigned, SMLoc> &RegInfo = *RI; 518 519 if (RegMap[RegInfo.first]) { 520 Error(RegInfo.second, "register duplicated in register list"); 521 return 0; 522 } 523 524 if (RegInfo.first < HighRegNum) 525 Warning(RegInfo.second, 526 "register not in ascending order in register list"); 527 528 RegMap[RegInfo.first] = true; 529 HighRegNum = std::max(RegInfo.first, HighRegNum); 530 } 531 532 return ARMOperand::CreateRegList(Registers, S, E); 533} 534 535/// Parse an ARM memory expression, return false if successful else return true 536/// or an error. The first token must be a '[' when called. 537/// TODO Only preindexing and postindexing addressing are started, unindexed 538/// with option, etc are still to do. 539ARMOperand *ARMAsmParser::ParseMemory() { 540 SMLoc S, E; 541 assert(Parser.getTok().is(AsmToken::LBrac) && 542 "Token is not a Left Bracket"); 543 S = Parser.getTok().getLoc(); 544 Parser.Lex(); // Eat left bracket token. 545 546 const AsmToken &BaseRegTok = Parser.getTok(); 547 if (BaseRegTok.isNot(AsmToken::Identifier)) { 548 Error(BaseRegTok.getLoc(), "register expected"); 549 return 0; 550 } 551 int BaseRegNum = TryParseRegister(); 552 if (BaseRegNum == -1) { 553 Error(BaseRegTok.getLoc(), "register expected"); 554 return 0; 555 } 556 557 bool Preindexed = false; 558 bool Postindexed = false; 559 bool OffsetIsReg = false; 560 bool Negative = false; 561 bool Writeback = false; 562 563 // First look for preindexed address forms, that is after the "[Rn" we now 564 // have to see if the next token is a comma. 565 const AsmToken &Tok = Parser.getTok(); 566 if (Tok.is(AsmToken::Comma)) { 567 Preindexed = true; 568 Parser.Lex(); // Eat comma token. 569 int OffsetRegNum; 570 bool OffsetRegShifted; 571 enum ShiftType ShiftType; 572 const MCExpr *ShiftAmount; 573 const MCExpr *Offset; 574 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, 575 Offset, OffsetIsReg, OffsetRegNum, E)) 576 return 0; 577 const AsmToken &RBracTok = Parser.getTok(); 578 if (RBracTok.isNot(AsmToken::RBrac)) { 579 Error(RBracTok.getLoc(), "']' expected"); 580 return 0; 581 } 582 E = RBracTok.getLoc(); 583 Parser.Lex(); // Eat right bracket token. 584 585 const AsmToken &ExclaimTok = Parser.getTok(); 586 if (ExclaimTok.is(AsmToken::Exclaim)) { 587 E = ExclaimTok.getLoc(); 588 Writeback = true; 589 Parser.Lex(); // Eat exclaim token 590 } 591 return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 592 OffsetRegShifted, ShiftType, ShiftAmount, 593 Preindexed, Postindexed, Negative, Writeback, 594 S, E); 595 } 596 // The "[Rn" we have so far was not followed by a comma. 597 else if (Tok.is(AsmToken::RBrac)) { 598 // If there's anything other than the right brace, this is a post indexing 599 // addressing form. 600 E = Tok.getLoc(); 601 Parser.Lex(); // Eat right bracket token. 602 603 int OffsetRegNum = 0; 604 bool OffsetRegShifted = false; 605 enum ShiftType ShiftType; 606 const MCExpr *ShiftAmount; 607 const MCExpr *Offset = 0; 608 609 const AsmToken &NextTok = Parser.getTok(); 610 if (NextTok.isNot(AsmToken::EndOfStatement)) { 611 Postindexed = true; 612 Writeback = true; 613 if (NextTok.isNot(AsmToken::Comma)) { 614 Error(NextTok.getLoc(), "',' expected"); 615 return 0; 616 } 617 Parser.Lex(); // Eat comma token. 618 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, 619 ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, 620 E)) 621 return 0; 622 } 623 624 return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 625 OffsetRegShifted, ShiftType, ShiftAmount, 626 Preindexed, Postindexed, Negative, Writeback, 627 S, E); 628 } 629 630 return 0; 631} 632 633/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," 634/// we will parse the following (were +/- means that a plus or minus is 635/// optional): 636/// +/-Rm 637/// +/-Rm, shift 638/// #offset 639/// we return false on success or an error otherwise. 640bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, 641 bool &OffsetRegShifted, 642 enum ShiftType &ShiftType, 643 const MCExpr *&ShiftAmount, 644 const MCExpr *&Offset, 645 bool &OffsetIsReg, 646 int &OffsetRegNum, 647 SMLoc &E) { 648 Negative = false; 649 OffsetRegShifted = false; 650 OffsetIsReg = false; 651 OffsetRegNum = -1; 652 const AsmToken &NextTok = Parser.getTok(); 653 E = NextTok.getLoc(); 654 if (NextTok.is(AsmToken::Plus)) 655 Parser.Lex(); // Eat plus token. 656 else if (NextTok.is(AsmToken::Minus)) { 657 Negative = true; 658 Parser.Lex(); // Eat minus token 659 } 660 // See if there is a register following the "[Rn," or "[Rn]," we have so far. 661 const AsmToken &OffsetRegTok = Parser.getTok(); 662 if (OffsetRegTok.is(AsmToken::Identifier)) { 663 SMLoc CurLoc = OffsetRegTok.getLoc(); 664 OffsetRegNum = TryParseRegister(); 665 if (OffsetRegNum != -1) { 666 OffsetIsReg = true; 667 E = CurLoc; 668 } 669 } 670 671 // If we parsed a register as the offset then there can be a shift after that. 672 if (OffsetRegNum != -1) { 673 // Look for a comma then a shift 674 const AsmToken &Tok = Parser.getTok(); 675 if (Tok.is(AsmToken::Comma)) { 676 Parser.Lex(); // Eat comma token. 677 678 const AsmToken &Tok = Parser.getTok(); 679 if (ParseShift(ShiftType, ShiftAmount, E)) 680 return Error(Tok.getLoc(), "shift expected"); 681 OffsetRegShifted = true; 682 } 683 } 684 else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" 685 // Look for #offset following the "[Rn," or "[Rn]," 686 const AsmToken &HashTok = Parser.getTok(); 687 if (HashTok.isNot(AsmToken::Hash)) 688 return Error(HashTok.getLoc(), "'#' expected"); 689 690 Parser.Lex(); // Eat hash token. 691 692 if (getParser().ParseExpression(Offset)) 693 return true; 694 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 695 } 696 return false; 697} 698 699/// ParseShift as one of these two: 700/// ( lsl | lsr | asr | ror ) , # shift_amount 701/// rrx 702/// and returns true if it parses a shift otherwise it returns false. 703bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, 704 SMLoc &E) { 705 const AsmToken &Tok = Parser.getTok(); 706 if (Tok.isNot(AsmToken::Identifier)) 707 return true; 708 StringRef ShiftName = Tok.getString(); 709 if (ShiftName == "lsl" || ShiftName == "LSL") 710 St = Lsl; 711 else if (ShiftName == "lsr" || ShiftName == "LSR") 712 St = Lsr; 713 else if (ShiftName == "asr" || ShiftName == "ASR") 714 St = Asr; 715 else if (ShiftName == "ror" || ShiftName == "ROR") 716 St = Ror; 717 else if (ShiftName == "rrx" || ShiftName == "RRX") 718 St = Rrx; 719 else 720 return true; 721 Parser.Lex(); // Eat shift type token. 722 723 // Rrx stands alone. 724 if (St == Rrx) 725 return false; 726 727 // Otherwise, there must be a '#' and a shift amount. 728 const AsmToken &HashTok = Parser.getTok(); 729 if (HashTok.isNot(AsmToken::Hash)) 730 return Error(HashTok.getLoc(), "'#' expected"); 731 Parser.Lex(); // Eat hash token. 732 733 if (getParser().ParseExpression(ShiftAmount)) 734 return true; 735 736 return false; 737} 738 739/// Parse a arm instruction operand. For now this parses the operand regardless 740/// of the mnemonic. 741ARMOperand *ARMAsmParser::ParseOperand() { 742 SMLoc S, E; 743 switch (getLexer().getKind()) { 744 default: 745 Error(Parser.getTok().getLoc(), "unexpected token in operand"); 746 return 0; 747 case AsmToken::Identifier: 748 if (ARMOperand *Op = TryParseRegisterWithWriteBack()) 749 return Op; 750 751 // This was not a register so parse other operands that start with an 752 // identifier (like labels) as expressions and create them as immediates. 753 const MCExpr *IdVal; 754 S = Parser.getTok().getLoc(); 755 if (getParser().ParseExpression(IdVal)) 756 return 0; 757 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 758 return ARMOperand::CreateImm(IdVal, S, E); 759 case AsmToken::LBrac: 760 return ParseMemory(); 761 case AsmToken::LCurly: 762 return ParseRegisterList(); 763 case AsmToken::Hash: 764 // #42 -> immediate. 765 // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate 766 S = Parser.getTok().getLoc(); 767 Parser.Lex(); 768 const MCExpr *ImmVal; 769 if (getParser().ParseExpression(ImmVal)) 770 return 0; 771 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 772 return ARMOperand::CreateImm(ImmVal, S, E); 773 } 774} 775 776/// Parse an arm instruction mnemonic followed by its operands. 777bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, 778 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 779 // Create the leading tokens for the mnemonic, split by '.' characters. 780 size_t Start = 0, Next = Name.find('.'); 781 StringRef Head = Name.slice(Start, Next); 782 783 // Determine the predicate, if any. 784 // 785 // FIXME: We need a way to check whether a prefix supports predication, 786 // otherwise we will end up with an ambiguity for instructions that happen to 787 // end with a predicate name. 788 // FIXME: Likewise, some arithmetic instructions have an 's' prefix which 789 // indicates to update the condition codes. Those instructions have an 790 // additional immediate operand which encodes the prefix as reg0 or CPSR. 791 // Just checking for a suffix of 's' definitely creates ambiguities; e.g, 792 // the SMMLS instruction. 793 unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2)) 794 .Case("eq", ARMCC::EQ) 795 .Case("ne", ARMCC::NE) 796 .Case("hs", ARMCC::HS) 797 .Case("lo", ARMCC::LO) 798 .Case("mi", ARMCC::MI) 799 .Case("pl", ARMCC::PL) 800 .Case("vs", ARMCC::VS) 801 .Case("vc", ARMCC::VC) 802 .Case("hi", ARMCC::HI) 803 .Case("ls", ARMCC::LS) 804 .Case("ge", ARMCC::GE) 805 .Case("lt", ARMCC::LT) 806 .Case("gt", ARMCC::GT) 807 .Case("le", ARMCC::LE) 808 .Case("al", ARMCC::AL) 809 .Default(~0U); 810 811 if (CC == ~0U || 812 (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) { 813 CC = ARMCC::AL; 814 } else { 815 Head = Head.slice(0, Head.size() - 2); 816 } 817 818 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 819 // FIXME: Should only add this operand for predicated instructions 820 Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc)); 821 822 // Add the remaining tokens in the mnemonic. 823 while (Next != StringRef::npos) { 824 Start = Next; 825 Next = Name.find('.', Start + 1); 826 Head = Name.slice(Start, Next); 827 828 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 829 } 830 831 // Read the remaining operands. 832 if (getLexer().isNot(AsmToken::EndOfStatement)) { 833 // Read the first operand. 834 if (ARMOperand *Op = ParseOperand()) 835 Operands.push_back(Op); 836 else { 837 Parser.EatToEndOfStatement(); 838 return true; 839 } 840 841 while (getLexer().is(AsmToken::Comma)) { 842 Parser.Lex(); // Eat the comma. 843 844 // Parse and remember the operand. 845 if (ARMOperand *Op = ParseOperand()) 846 Operands.push_back(Op); 847 else { 848 Parser.EatToEndOfStatement(); 849 return true; 850 } 851 } 852 } 853 854 if (getLexer().isNot(AsmToken::EndOfStatement)) { 855 Parser.EatToEndOfStatement(); 856 return TokError("unexpected token in argument list"); 857 } 858 859 Parser.Lex(); // Consume the EndOfStatement 860 return false; 861} 862 863bool ARMAsmParser:: 864MatchAndEmitInstruction(SMLoc IDLoc, 865 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 866 MCStreamer &Out) { 867 MCInst Inst; 868 unsigned ErrorInfo; 869 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { 870 case Match_Success: 871 Out.EmitInstruction(Inst); 872 return false; 873 case Match_MissingFeature: 874 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 875 return true; 876 case Match_InvalidOperand: { 877 SMLoc ErrorLoc = IDLoc; 878 if (ErrorInfo != ~0U) { 879 if (ErrorInfo >= Operands.size()) 880 return Error(IDLoc, "too few operands for instruction"); 881 882 ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); 883 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; 884 } 885 886 return Error(ErrorLoc, "invalid operand for instruction"); 887 } 888 case Match_MnemonicFail: 889 return Error(IDLoc, "unrecognized instruction mnemonic"); 890 } 891 892 llvm_unreachable("Implement any new match types added!"); 893 return true; 894} 895 896/// ParseDirective parses the arm specific directives 897bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { 898 StringRef IDVal = DirectiveID.getIdentifier(); 899 if (IDVal == ".word") 900 return ParseDirectiveWord(4, DirectiveID.getLoc()); 901 else if (IDVal == ".thumb") 902 return ParseDirectiveThumb(DirectiveID.getLoc()); 903 else if (IDVal == ".thumb_func") 904 return ParseDirectiveThumbFunc(DirectiveID.getLoc()); 905 else if (IDVal == ".code") 906 return ParseDirectiveCode(DirectiveID.getLoc()); 907 else if (IDVal == ".syntax") 908 return ParseDirectiveSyntax(DirectiveID.getLoc()); 909 return true; 910} 911 912/// ParseDirectiveWord 913/// ::= .word [ expression (, expression)* ] 914bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 915 if (getLexer().isNot(AsmToken::EndOfStatement)) { 916 for (;;) { 917 const MCExpr *Value; 918 if (getParser().ParseExpression(Value)) 919 return true; 920 921 getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); 922 923 if (getLexer().is(AsmToken::EndOfStatement)) 924 break; 925 926 // FIXME: Improve diagnostic. 927 if (getLexer().isNot(AsmToken::Comma)) 928 return Error(L, "unexpected token in directive"); 929 Parser.Lex(); 930 } 931 } 932 933 Parser.Lex(); 934 return false; 935} 936 937/// ParseDirectiveThumb 938/// ::= .thumb 939bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { 940 if (getLexer().isNot(AsmToken::EndOfStatement)) 941 return Error(L, "unexpected token in directive"); 942 Parser.Lex(); 943 944 // TODO: set thumb mode 945 // TODO: tell the MC streamer the mode 946 // getParser().getStreamer().Emit???(); 947 return false; 948} 949 950/// ParseDirectiveThumbFunc 951/// ::= .thumbfunc symbol_name 952bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { 953 const AsmToken &Tok = Parser.getTok(); 954 if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) 955 return Error(L, "unexpected token in .thumb_func directive"); 956 StringRef Name = Tok.getString(); 957 Parser.Lex(); // Consume the identifier token. 958 if (getLexer().isNot(AsmToken::EndOfStatement)) 959 return Error(L, "unexpected token in directive"); 960 Parser.Lex(); 961 962 // Mark symbol as a thumb symbol. 963 MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name); 964 getParser().getStreamer().EmitThumbFunc(Func); 965 return false; 966} 967 968/// ParseDirectiveSyntax 969/// ::= .syntax unified | divided 970bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { 971 const AsmToken &Tok = Parser.getTok(); 972 if (Tok.isNot(AsmToken::Identifier)) 973 return Error(L, "unexpected token in .syntax directive"); 974 StringRef Mode = Tok.getString(); 975 if (Mode == "unified" || Mode == "UNIFIED") 976 Parser.Lex(); 977 else if (Mode == "divided" || Mode == "DIVIDED") 978 Parser.Lex(); 979 else 980 return Error(L, "unrecognized syntax mode in .syntax directive"); 981 982 if (getLexer().isNot(AsmToken::EndOfStatement)) 983 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 984 Parser.Lex(); 985 986 // TODO tell the MC streamer the mode 987 // getParser().getStreamer().Emit???(); 988 return false; 989} 990 991/// ParseDirectiveCode 992/// ::= .code 16 | 32 993bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { 994 const AsmToken &Tok = Parser.getTok(); 995 if (Tok.isNot(AsmToken::Integer)) 996 return Error(L, "unexpected token in .code directive"); 997 int64_t Val = Parser.getTok().getIntVal(); 998 if (Val == 16) 999 Parser.Lex(); 1000 else if (Val == 32) 1001 Parser.Lex(); 1002 else 1003 return Error(L, "invalid operand to .code directive"); 1004 1005 if (getLexer().isNot(AsmToken::EndOfStatement)) 1006 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 1007 Parser.Lex(); 1008 1009 if (Val == 16) 1010 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 1011 else 1012 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 1013 1014 return false; 1015} 1016 1017extern "C" void LLVMInitializeARMAsmLexer(); 1018 1019/// Force static initialization. 1020extern "C" void LLVMInitializeARMAsmParser() { 1021 RegisterAsmParser<ARMAsmParser> X(TheARMTarget); 1022 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); 1023 LLVMInitializeARMAsmLexer(); 1024} 1025 1026#define GET_REGISTER_MATCHER 1027#define GET_MATCHER_IMPLEMENTATION 1028#include "ARMGenAsmMatcher.inc" 1029