ARMAsmParser.cpp revision e717610f53e0465cde198536561a3c00ce29d59f
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "ARM.h" 11#include "ARMAddressingModes.h" 12#include "ARMSubtarget.h" 13#include "llvm/MC/MCParser/MCAsmLexer.h" 14#include "llvm/MC/MCParser/MCAsmParser.h" 15#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16#include "llvm/MC/MCContext.h" 17#include "llvm/MC/MCStreamer.h" 18#include "llvm/MC/MCExpr.h" 19#include "llvm/MC/MCInst.h" 20#include "llvm/Target/TargetRegistry.h" 21#include "llvm/Target/TargetAsmParser.h" 22#include "llvm/Support/SourceMgr.h" 23#include "llvm/Support/raw_ostream.h" 24#include "llvm/ADT/SmallVector.h" 25#include "llvm/ADT/StringSwitch.h" 26#include "llvm/ADT/Twine.h" 27using namespace llvm; 28 29// The shift types for register controlled shifts in arm memory addressing 30enum ShiftType { 31 Lsl, 32 Lsr, 33 Asr, 34 Ror, 35 Rrx 36}; 37 38namespace { 39 40class ARMOperand; 41 42class ARMAsmParser : public TargetAsmParser { 43 MCAsmParser &Parser; 44 TargetMachine &TM; 45 46 MCAsmParser &getParser() const { return Parser; } 47 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 48 49 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 50 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 51 52 int TryParseRegister(); 53 ARMOperand *TryParseRegisterWithWriteBack(); 54 ARMOperand *ParseRegisterList(); 55 ARMOperand *ParseMemory(); 56 ARMOperand *ParseOperand(); 57 58 bool ParseMemoryOffsetReg(bool &Negative, 59 bool &OffsetRegShifted, 60 enum ShiftType &ShiftType, 61 const MCExpr *&ShiftAmount, 62 const MCExpr *&Offset, 63 bool &OffsetIsReg, 64 int &OffsetRegNum, 65 SMLoc &E); 66 bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); 67 bool ParseDirectiveWord(unsigned Size, SMLoc L); 68 bool ParseDirectiveThumb(SMLoc L); 69 bool ParseDirectiveThumbFunc(SMLoc L); 70 bool ParseDirectiveCode(SMLoc L); 71 bool ParseDirectiveSyntax(SMLoc L); 72 73 bool MatchAndEmitInstruction(SMLoc IDLoc, 74 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 75 MCStreamer &Out); 76 77 /// @name Auto-generated Match Functions 78 /// { 79 80#define GET_ASSEMBLER_HEADER 81#include "ARMGenAsmMatcher.inc" 82 83 /// } 84 85public: 86 ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) 87 : TargetAsmParser(T), Parser(_Parser), TM(_TM) { 88 // Initialize the set of available features. 89 setAvailableFeatures(ComputeAvailableFeatures( 90 &TM.getSubtarget<ARMSubtarget>())); 91 } 92 93 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 94 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 95 virtual bool ParseDirective(AsmToken DirectiveID); 96}; 97} // end anonymous namespace 98 99namespace { 100 101/// ARMOperand - Instances of this class represent a parsed ARM machine 102/// instruction. 103class ARMOperand : public MCParsedAsmOperand { 104 enum KindTy { 105 CondCode, 106 Immediate, 107 Memory, 108 Register, 109 RegisterList, 110 Token 111 } Kind; 112 113 SMLoc StartLoc, EndLoc; 114 115 union { 116 struct { 117 ARMCC::CondCodes Val; 118 } CC; 119 120 struct { 121 const char *Data; 122 unsigned Length; 123 } Tok; 124 125 struct { 126 unsigned RegNum; 127 bool Writeback; 128 } Reg; 129 130 struct { 131 unsigned RegStart; 132 unsigned Number; 133 } RegList; 134 135 struct { 136 const MCExpr *Val; 137 } Imm; 138 139 // This is for all forms of ARM address expressions 140 struct { 141 unsigned BaseRegNum; 142 unsigned OffsetRegNum; // used when OffsetIsReg is true 143 const MCExpr *Offset; // used when OffsetIsReg is false 144 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true 145 enum ShiftType ShiftType; // used when OffsetRegShifted is true 146 unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true 147 unsigned Preindexed : 1; 148 unsigned Postindexed : 1; 149 unsigned OffsetIsReg : 1; 150 unsigned Negative : 1; // only used when OffsetIsReg is true 151 unsigned Writeback : 1; 152 } Mem; 153 }; 154 155 ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} 156public: 157 ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { 158 Kind = o.Kind; 159 StartLoc = o.StartLoc; 160 EndLoc = o.EndLoc; 161 switch (Kind) { 162 case CondCode: 163 CC = o.CC; 164 break; 165 case Token: 166 Tok = o.Tok; 167 break; 168 case Register: 169 Reg = o.Reg; 170 break; 171 case RegisterList: 172 RegList = o.RegList; 173 break; 174 case Immediate: 175 Imm = o.Imm; 176 break; 177 case Memory: 178 Mem = o.Mem; 179 break; 180 } 181 } 182 183 /// getStartLoc - Get the location of the first token of this operand. 184 SMLoc getStartLoc() const { return StartLoc; } 185 /// getEndLoc - Get the location of the last token of this operand. 186 SMLoc getEndLoc() const { return EndLoc; } 187 188 ARMCC::CondCodes getCondCode() const { 189 assert(Kind == CondCode && "Invalid access!"); 190 return CC.Val; 191 } 192 193 StringRef getToken() const { 194 assert(Kind == Token && "Invalid access!"); 195 return StringRef(Tok.Data, Tok.Length); 196 } 197 198 unsigned getReg() const { 199 assert((Kind == Register || Kind == RegisterList) && "Invalid access!"); 200 unsigned RegNum = 0; 201 if (Kind == Register) 202 RegNum = Reg.RegNum; 203 else 204 RegNum = RegList.RegStart; 205 return RegNum; 206 } 207 208 std::pair<unsigned, unsigned> getRegList() const { 209 assert(Kind == RegisterList && "Invalid access!"); 210 return std::make_pair(RegList.RegStart, RegList.Number); 211 } 212 213 const MCExpr *getImm() const { 214 assert(Kind == Immediate && "Invalid access!"); 215 return Imm.Val; 216 } 217 218 bool isCondCode() const { return Kind == CondCode; } 219 bool isImm() const { return Kind == Immediate; } 220 bool isReg() const { return Kind == Register; } 221 bool isRegList() const { return Kind == RegisterList; } 222 bool isToken() const { return Kind == Token; } 223 bool isMemory() const { return Kind == Memory; } 224 225 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 226 // Add as immediates when possible. Null MCExpr = 0. 227 if (Expr == 0) 228 Inst.addOperand(MCOperand::CreateImm(0)); 229 else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 230 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 231 else 232 Inst.addOperand(MCOperand::CreateExpr(Expr)); 233 } 234 235 void addCondCodeOperands(MCInst &Inst, unsigned N) const { 236 assert(N == 2 && "Invalid number of operands!"); 237 Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); 238 // FIXME: What belongs here? 239 Inst.addOperand(MCOperand::CreateReg(0)); 240 } 241 242 void addRegOperands(MCInst &Inst, unsigned N) const { 243 assert(N == 1 && "Invalid number of operands!"); 244 Inst.addOperand(MCOperand::CreateReg(getReg())); 245 } 246 247 void addImmOperands(MCInst &Inst, unsigned N) const { 248 assert(N == 1 && "Invalid number of operands!"); 249 addExpr(Inst, getImm()); 250 } 251 252 bool isMemMode5() const { 253 if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted || 254 Mem.Writeback || Mem.Negative) 255 return false; 256 // If there is an offset expression, make sure it's valid. 257 if (!Mem.Offset) 258 return true; 259 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 260 if (!CE) 261 return false; 262 // The offset must be a multiple of 4 in the range 0-1020. 263 int64_t Value = CE->getValue(); 264 return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); 265 } 266 267 void addMemMode5Operands(MCInst &Inst, unsigned N) const { 268 assert(N == 2 && isMemMode5() && "Invalid number of operands!"); 269 270 Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); 271 assert(!Mem.OffsetIsReg && "Invalid mode 5 operand"); 272 273 // FIXME: #-0 is encoded differently than #0. Does the parser preserve 274 // the difference? 275 if (Mem.Offset) { 276 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 277 assert(CE && "Non-constant mode 5 offset operand!"); 278 279 // The MCInst offset operand doesn't include the low two bits (like 280 // the instruction encoding). 281 int64_t Offset = CE->getValue() / 4; 282 if (Offset >= 0) 283 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, 284 Offset))); 285 else 286 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, 287 -Offset))); 288 } else { 289 Inst.addOperand(MCOperand::CreateImm(0)); 290 } 291 } 292 293 virtual void dump(raw_ostream &OS) const; 294 295 static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { 296 ARMOperand *Op = new ARMOperand(CondCode); 297 Op->CC.Val = CC; 298 Op->StartLoc = S; 299 Op->EndLoc = S; 300 return Op; 301 } 302 303 static ARMOperand *CreateToken(StringRef Str, SMLoc S) { 304 ARMOperand *Op = new ARMOperand(Token); 305 Op->Tok.Data = Str.data(); 306 Op->Tok.Length = Str.size(); 307 Op->StartLoc = S; 308 Op->EndLoc = S; 309 return Op; 310 } 311 312 static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S, 313 SMLoc E) { 314 ARMOperand *Op = new ARMOperand(Register); 315 Op->Reg.RegNum = RegNum; 316 Op->Reg.Writeback = Writeback; 317 Op->StartLoc = S; 318 Op->EndLoc = E; 319 return Op; 320 } 321 322 static ARMOperand *CreateRegList(unsigned RegStart, unsigned Number, 323 SMLoc S, SMLoc E) { 324 ARMOperand *Op = new ARMOperand(RegisterList); 325 Op->RegList.RegStart = RegStart; 326 Op->RegList.Number = Number; 327 Op->StartLoc = S; 328 Op->EndLoc = E; 329 return Op; 330 } 331 332 static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { 333 ARMOperand *Op = new ARMOperand(Immediate); 334 Op->Imm.Val = Val; 335 Op->StartLoc = S; 336 Op->EndLoc = E; 337 return Op; 338 } 339 340 static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, 341 const MCExpr *Offset, unsigned OffsetRegNum, 342 bool OffsetRegShifted, enum ShiftType ShiftType, 343 const MCExpr *ShiftAmount, bool Preindexed, 344 bool Postindexed, bool Negative, bool Writeback, 345 SMLoc S, SMLoc E) { 346 ARMOperand *Op = new ARMOperand(Memory); 347 Op->Mem.BaseRegNum = BaseRegNum; 348 Op->Mem.OffsetIsReg = OffsetIsReg; 349 Op->Mem.Offset = Offset; 350 Op->Mem.OffsetRegNum = OffsetRegNum; 351 Op->Mem.OffsetRegShifted = OffsetRegShifted; 352 Op->Mem.ShiftType = ShiftType; 353 Op->Mem.ShiftAmount = ShiftAmount; 354 Op->Mem.Preindexed = Preindexed; 355 Op->Mem.Postindexed = Postindexed; 356 Op->Mem.Negative = Negative; 357 Op->Mem.Writeback = Writeback; 358 359 Op->StartLoc = S; 360 Op->EndLoc = E; 361 return Op; 362 } 363}; 364 365} // end anonymous namespace. 366 367void ARMOperand::dump(raw_ostream &OS) const { 368 switch (Kind) { 369 case CondCode: 370 OS << ARMCondCodeToString(getCondCode()); 371 break; 372 case Immediate: 373 getImm()->print(OS); 374 break; 375 case Memory: 376 OS << "<memory>"; 377 break; 378 case Register: 379 OS << "<register " << getReg() << ">"; 380 break; 381 case RegisterList: { 382 OS << "<register_list "; 383 std::pair<unsigned, unsigned> List = getRegList(); 384 unsigned RegEnd = List.first + List.second; 385 386 for (unsigned Idx = List.first; Idx < RegEnd; ) { 387 OS << Idx; 388 if (++Idx < RegEnd) OS << ", "; 389 } 390 391 OS << ">"; 392 break; 393 } 394 case Token: 395 OS << "'" << getToken() << "'"; 396 break; 397 } 398} 399 400/// @name Auto-generated Match Functions 401/// { 402 403static unsigned MatchRegisterName(StringRef Name); 404 405/// } 406 407/// Try to parse a register name. The token must be an Identifier when called, 408/// and if it is a register name the token is eaten and the register number is 409/// returned. Otherwise return -1. 410/// 411int ARMAsmParser::TryParseRegister() { 412 const AsmToken &Tok = Parser.getTok(); 413 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); 414 415 // FIXME: Validate register for the current architecture; we have to do 416 // validation later, so maybe there is no need for this here. 417 unsigned RegNum = MatchRegisterName(Tok.getString()); 418 if (RegNum == 0) 419 return -1; 420 Parser.Lex(); // Eat identifier token. 421 return RegNum; 422} 423 424 425/// Try to parse a register name. The token must be an Identifier when called, 426/// and if it is a register name the token is eaten and the register number is 427/// returned. Otherwise return -1. 428/// 429/// TODO this is likely to change to allow different register types and or to 430/// parse for a specific register type. 431ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() { 432 SMLoc S = Parser.getTok().getLoc(); 433 int RegNo = TryParseRegister(); 434 if (RegNo == -1) 435 return 0; 436 437 SMLoc E = Parser.getTok().getLoc(); 438 439 bool Writeback = false; 440 const AsmToken &ExclaimTok = Parser.getTok(); 441 if (ExclaimTok.is(AsmToken::Exclaim)) { 442 E = ExclaimTok.getLoc(); 443 Writeback = true; 444 Parser.Lex(); // Eat exclaim token 445 } 446 447 return ARMOperand::CreateReg(RegNo, Writeback, S, E); 448} 449 450/// Parse a register list, return it if successful else return null. The first 451/// token must be a '{' when called. 452ARMOperand *ARMAsmParser::ParseRegisterList() { 453 assert(Parser.getTok().is(AsmToken::LCurly) && 454 "Token is not a Left Curly Brace"); 455 SMLoc S = Parser.getTok().getLoc(); 456 Parser.Lex(); // Eat left curly brace token. 457 458 const AsmToken &RegTok = Parser.getTok(); 459 SMLoc RegLoc = RegTok.getLoc(); 460 if (RegTok.isNot(AsmToken::Identifier)) { 461 Error(RegLoc, "register expected"); 462 return 0; 463 } 464 465 int RegNum = TryParseRegister(); 466 if (RegNum == -1) { 467 Error(RegLoc, "register expected"); 468 return 0; 469 } 470 471 unsigned PrevRegNum = RegNum; 472 std::vector<std::pair<unsigned, SMLoc> > Registers; 473 Registers.reserve(32); 474 Registers.push_back(std::make_pair(RegNum, RegLoc)); 475 476 while (Parser.getTok().is(AsmToken::Comma) || 477 Parser.getTok().is(AsmToken::Minus)) { 478 bool IsRange = Parser.getTok().is(AsmToken::Minus); 479 Parser.Lex(); // Eat comma or minus token. 480 481 const AsmToken &RegTok = Parser.getTok(); 482 SMLoc RegLoc = RegTok.getLoc(); 483 if (RegTok.isNot(AsmToken::Identifier)) { 484 Error(RegLoc, "register expected"); 485 return 0; 486 } 487 488 int RegNum = TryParseRegister(); 489 if (RegNum == -1) { 490 Error(RegLoc, "register expected"); 491 return 0; 492 } 493 494 if (IsRange) { 495 int Reg = PrevRegNum; 496 do { 497 ++Reg; 498 Registers.push_back(std::make_pair(Reg, RegLoc)); 499 } while (Reg != RegNum); 500 } else { 501 Registers.push_back(std::make_pair(RegNum, RegLoc)); 502 } 503 504 PrevRegNum = RegNum; 505 } 506 507 // Process the right curly brace of the list. 508 const AsmToken &RCurlyTok = Parser.getTok(); 509 if (RCurlyTok.isNot(AsmToken::RCurly)) { 510 Error(RCurlyTok.getLoc(), "'}' expected"); 511 return 0; 512 } 513 514 SMLoc E = RCurlyTok.getLoc(); 515 Parser.Lex(); // Eat right curly brace token. 516 517 // Verify the register list. 518 std::vector<std::pair<unsigned, SMLoc> >::iterator 519 RI = Registers.begin(), RE = Registers.end(); 520 521 unsigned Number = Registers.size(); 522 unsigned HighRegNum = RI->first; 523 unsigned RegStart = RI->first; 524 525 DenseMap<unsigned, bool> RegMap; 526 RegMap[RI->first] = true; 527 528 for (++RI; RI != RE; ++RI) { 529 std::pair<unsigned, SMLoc> &RegInfo = *RI; 530 531 if (RegMap[RegInfo.first]) { 532 Error(RegInfo.second, "register duplicated in register list"); 533 return 0; 534 } 535 536 if (RegInfo.first < HighRegNum) 537 Warning(RegInfo.second, 538 "register not in ascending order in register list"); 539 540 RegMap[RegInfo.first] = true; 541 HighRegNum = std::max(RegInfo.first, HighRegNum); 542 RegStart = std::min(RegInfo.first, RegStart); 543 } 544 545 if (RegStart + Number - 1 != HighRegNum) { 546 Error(RegLoc, "non-contiguous register range"); 547 return 0; 548 } 549 550 return ARMOperand::CreateRegList(RegStart, Number, S, E); 551} 552 553/// Parse an ARM memory expression, return false if successful else return true 554/// or an error. The first token must be a '[' when called. 555/// TODO Only preindexing and postindexing addressing are started, unindexed 556/// with option, etc are still to do. 557ARMOperand *ARMAsmParser::ParseMemory() { 558 SMLoc S, E; 559 assert(Parser.getTok().is(AsmToken::LBrac) && 560 "Token is not a Left Bracket"); 561 S = Parser.getTok().getLoc(); 562 Parser.Lex(); // Eat left bracket token. 563 564 const AsmToken &BaseRegTok = Parser.getTok(); 565 if (BaseRegTok.isNot(AsmToken::Identifier)) { 566 Error(BaseRegTok.getLoc(), "register expected"); 567 return 0; 568 } 569 int BaseRegNum = TryParseRegister(); 570 if (BaseRegNum == -1) { 571 Error(BaseRegTok.getLoc(), "register expected"); 572 return 0; 573 } 574 575 bool Preindexed = false; 576 bool Postindexed = false; 577 bool OffsetIsReg = false; 578 bool Negative = false; 579 bool Writeback = false; 580 581 // First look for preindexed address forms, that is after the "[Rn" we now 582 // have to see if the next token is a comma. 583 const AsmToken &Tok = Parser.getTok(); 584 if (Tok.is(AsmToken::Comma)) { 585 Preindexed = true; 586 Parser.Lex(); // Eat comma token. 587 int OffsetRegNum; 588 bool OffsetRegShifted; 589 enum ShiftType ShiftType; 590 const MCExpr *ShiftAmount; 591 const MCExpr *Offset; 592 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, 593 Offset, OffsetIsReg, OffsetRegNum, E)) 594 return 0; 595 const AsmToken &RBracTok = Parser.getTok(); 596 if (RBracTok.isNot(AsmToken::RBrac)) { 597 Error(RBracTok.getLoc(), "']' expected"); 598 return 0; 599 } 600 E = RBracTok.getLoc(); 601 Parser.Lex(); // Eat right bracket token. 602 603 const AsmToken &ExclaimTok = Parser.getTok(); 604 if (ExclaimTok.is(AsmToken::Exclaim)) { 605 E = ExclaimTok.getLoc(); 606 Writeback = true; 607 Parser.Lex(); // Eat exclaim token 608 } 609 return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 610 OffsetRegShifted, ShiftType, ShiftAmount, 611 Preindexed, Postindexed, Negative, Writeback, 612 S, E); 613 } 614 // The "[Rn" we have so far was not followed by a comma. 615 else if (Tok.is(AsmToken::RBrac)) { 616 // If there's anything other than the right brace, this is a post indexing 617 // addressing form. 618 E = Tok.getLoc(); 619 Parser.Lex(); // Eat right bracket token. 620 621 int OffsetRegNum = 0; 622 bool OffsetRegShifted = false; 623 enum ShiftType ShiftType; 624 const MCExpr *ShiftAmount; 625 const MCExpr *Offset = 0; 626 627 const AsmToken &NextTok = Parser.getTok(); 628 if (NextTok.isNot(AsmToken::EndOfStatement)) { 629 Postindexed = true; 630 Writeback = true; 631 if (NextTok.isNot(AsmToken::Comma)) { 632 Error(NextTok.getLoc(), "',' expected"); 633 return 0; 634 } 635 Parser.Lex(); // Eat comma token. 636 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, 637 ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, 638 E)) 639 return 0; 640 } 641 642 return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 643 OffsetRegShifted, ShiftType, ShiftAmount, 644 Preindexed, Postindexed, Negative, Writeback, 645 S, E); 646 } 647 648 return 0; 649} 650 651/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," 652/// we will parse the following (were +/- means that a plus or minus is 653/// optional): 654/// +/-Rm 655/// +/-Rm, shift 656/// #offset 657/// we return false on success or an error otherwise. 658bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, 659 bool &OffsetRegShifted, 660 enum ShiftType &ShiftType, 661 const MCExpr *&ShiftAmount, 662 const MCExpr *&Offset, 663 bool &OffsetIsReg, 664 int &OffsetRegNum, 665 SMLoc &E) { 666 Negative = false; 667 OffsetRegShifted = false; 668 OffsetIsReg = false; 669 OffsetRegNum = -1; 670 const AsmToken &NextTok = Parser.getTok(); 671 E = NextTok.getLoc(); 672 if (NextTok.is(AsmToken::Plus)) 673 Parser.Lex(); // Eat plus token. 674 else if (NextTok.is(AsmToken::Minus)) { 675 Negative = true; 676 Parser.Lex(); // Eat minus token 677 } 678 // See if there is a register following the "[Rn," or "[Rn]," we have so far. 679 const AsmToken &OffsetRegTok = Parser.getTok(); 680 if (OffsetRegTok.is(AsmToken::Identifier)) { 681 SMLoc CurLoc = OffsetRegTok.getLoc(); 682 OffsetRegNum = TryParseRegister(); 683 if (OffsetRegNum != -1) { 684 OffsetIsReg = true; 685 E = CurLoc; 686 } 687 } 688 689 // If we parsed a register as the offset then there can be a shift after that. 690 if (OffsetRegNum != -1) { 691 // Look for a comma then a shift 692 const AsmToken &Tok = Parser.getTok(); 693 if (Tok.is(AsmToken::Comma)) { 694 Parser.Lex(); // Eat comma token. 695 696 const AsmToken &Tok = Parser.getTok(); 697 if (ParseShift(ShiftType, ShiftAmount, E)) 698 return Error(Tok.getLoc(), "shift expected"); 699 OffsetRegShifted = true; 700 } 701 } 702 else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" 703 // Look for #offset following the "[Rn," or "[Rn]," 704 const AsmToken &HashTok = Parser.getTok(); 705 if (HashTok.isNot(AsmToken::Hash)) 706 return Error(HashTok.getLoc(), "'#' expected"); 707 708 Parser.Lex(); // Eat hash token. 709 710 if (getParser().ParseExpression(Offset)) 711 return true; 712 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 713 } 714 return false; 715} 716 717/// ParseShift as one of these two: 718/// ( lsl | lsr | asr | ror ) , # shift_amount 719/// rrx 720/// and returns true if it parses a shift otherwise it returns false. 721bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, 722 SMLoc &E) { 723 const AsmToken &Tok = Parser.getTok(); 724 if (Tok.isNot(AsmToken::Identifier)) 725 return true; 726 StringRef ShiftName = Tok.getString(); 727 if (ShiftName == "lsl" || ShiftName == "LSL") 728 St = Lsl; 729 else if (ShiftName == "lsr" || ShiftName == "LSR") 730 St = Lsr; 731 else if (ShiftName == "asr" || ShiftName == "ASR") 732 St = Asr; 733 else if (ShiftName == "ror" || ShiftName == "ROR") 734 St = Ror; 735 else if (ShiftName == "rrx" || ShiftName == "RRX") 736 St = Rrx; 737 else 738 return true; 739 Parser.Lex(); // Eat shift type token. 740 741 // Rrx stands alone. 742 if (St == Rrx) 743 return false; 744 745 // Otherwise, there must be a '#' and a shift amount. 746 const AsmToken &HashTok = Parser.getTok(); 747 if (HashTok.isNot(AsmToken::Hash)) 748 return Error(HashTok.getLoc(), "'#' expected"); 749 Parser.Lex(); // Eat hash token. 750 751 if (getParser().ParseExpression(ShiftAmount)) 752 return true; 753 754 return false; 755} 756 757/// Parse a arm instruction operand. For now this parses the operand regardless 758/// of the mnemonic. 759ARMOperand *ARMAsmParser::ParseOperand() { 760 SMLoc S, E; 761 switch (getLexer().getKind()) { 762 default: 763 Error(Parser.getTok().getLoc(), "unexpected token in operand"); 764 return 0; 765 case AsmToken::Identifier: 766 if (ARMOperand *Op = TryParseRegisterWithWriteBack()) 767 return Op; 768 769 // This was not a register so parse other operands that start with an 770 // identifier (like labels) as expressions and create them as immediates. 771 const MCExpr *IdVal; 772 S = Parser.getTok().getLoc(); 773 if (getParser().ParseExpression(IdVal)) 774 return 0; 775 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 776 return ARMOperand::CreateImm(IdVal, S, E); 777 case AsmToken::LBrac: 778 return ParseMemory(); 779 case AsmToken::LCurly: 780 return ParseRegisterList(); 781 case AsmToken::Hash: 782 // #42 -> immediate. 783 // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate 784 S = Parser.getTok().getLoc(); 785 Parser.Lex(); 786 const MCExpr *ImmVal; 787 if (getParser().ParseExpression(ImmVal)) 788 return 0; 789 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 790 return ARMOperand::CreateImm(ImmVal, S, E); 791 } 792} 793 794/// Parse an arm instruction mnemonic followed by its operands. 795bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, 796 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 797 // Create the leading tokens for the mnemonic, split by '.' characters. 798 size_t Start = 0, Next = Name.find('.'); 799 StringRef Head = Name.slice(Start, Next); 800 801 // Determine the predicate, if any. 802 // 803 // FIXME: We need a way to check whether a prefix supports predication, 804 // otherwise we will end up with an ambiguity for instructions that happen to 805 // end with a predicate name. 806 // FIXME: Likewise, some arithmetic instructions have an 's' prefix which 807 // indicates to update the condition codes. Those instructions have an 808 // additional immediate operand which encodes the prefix as reg0 or CPSR. 809 // Just checking for a suffix of 's' definitely creates ambiguities; e.g, 810 // the SMMLS instruction. 811 unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2)) 812 .Case("eq", ARMCC::EQ) 813 .Case("ne", ARMCC::NE) 814 .Case("hs", ARMCC::HS) 815 .Case("lo", ARMCC::LO) 816 .Case("mi", ARMCC::MI) 817 .Case("pl", ARMCC::PL) 818 .Case("vs", ARMCC::VS) 819 .Case("vc", ARMCC::VC) 820 .Case("hi", ARMCC::HI) 821 .Case("ls", ARMCC::LS) 822 .Case("ge", ARMCC::GE) 823 .Case("lt", ARMCC::LT) 824 .Case("gt", ARMCC::GT) 825 .Case("le", ARMCC::LE) 826 .Case("al", ARMCC::AL) 827 .Default(~0U); 828 829 if (CC == ~0U || 830 (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) { 831 CC = ARMCC::AL; 832 } else { 833 Head = Head.slice(0, Head.size() - 2); 834 } 835 836 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 837 // FIXME: Should only add this operand for predicated instructions 838 Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc)); 839 840 // Add the remaining tokens in the mnemonic. 841 while (Next != StringRef::npos) { 842 Start = Next; 843 Next = Name.find('.', Start + 1); 844 Head = Name.slice(Start, Next); 845 846 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 847 } 848 849 // Read the remaining operands. 850 if (getLexer().isNot(AsmToken::EndOfStatement)) { 851 // Read the first operand. 852 if (ARMOperand *Op = ParseOperand()) 853 Operands.push_back(Op); 854 else { 855 Parser.EatToEndOfStatement(); 856 return true; 857 } 858 859 while (getLexer().is(AsmToken::Comma)) { 860 Parser.Lex(); // Eat the comma. 861 862 // Parse and remember the operand. 863 if (ARMOperand *Op = ParseOperand()) 864 Operands.push_back(Op); 865 else { 866 Parser.EatToEndOfStatement(); 867 return true; 868 } 869 } 870 } 871 872 if (getLexer().isNot(AsmToken::EndOfStatement)) { 873 Parser.EatToEndOfStatement(); 874 return TokError("unexpected token in argument list"); 875 } 876 877 Parser.Lex(); // Consume the EndOfStatement 878 return false; 879} 880 881bool ARMAsmParser:: 882MatchAndEmitInstruction(SMLoc IDLoc, 883 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 884 MCStreamer &Out) { 885 MCInst Inst; 886 unsigned ErrorInfo; 887 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { 888 case Match_Success: 889 Out.EmitInstruction(Inst); 890 return false; 891 case Match_MissingFeature: 892 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 893 return true; 894 case Match_InvalidOperand: { 895 SMLoc ErrorLoc = IDLoc; 896 if (ErrorInfo != ~0U) { 897 if (ErrorInfo >= Operands.size()) 898 return Error(IDLoc, "too few operands for instruction"); 899 900 ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); 901 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; 902 } 903 904 return Error(ErrorLoc, "invalid operand for instruction"); 905 } 906 case Match_MnemonicFail: 907 return Error(IDLoc, "unrecognized instruction mnemonic"); 908 } 909 910 llvm_unreachable("Implement any new match types added!"); 911 return true; 912} 913 914/// ParseDirective parses the arm specific directives 915bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { 916 StringRef IDVal = DirectiveID.getIdentifier(); 917 if (IDVal == ".word") 918 return ParseDirectiveWord(4, DirectiveID.getLoc()); 919 else if (IDVal == ".thumb") 920 return ParseDirectiveThumb(DirectiveID.getLoc()); 921 else if (IDVal == ".thumb_func") 922 return ParseDirectiveThumbFunc(DirectiveID.getLoc()); 923 else if (IDVal == ".code") 924 return ParseDirectiveCode(DirectiveID.getLoc()); 925 else if (IDVal == ".syntax") 926 return ParseDirectiveSyntax(DirectiveID.getLoc()); 927 return true; 928} 929 930/// ParseDirectiveWord 931/// ::= .word [ expression (, expression)* ] 932bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 933 if (getLexer().isNot(AsmToken::EndOfStatement)) { 934 for (;;) { 935 const MCExpr *Value; 936 if (getParser().ParseExpression(Value)) 937 return true; 938 939 getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); 940 941 if (getLexer().is(AsmToken::EndOfStatement)) 942 break; 943 944 // FIXME: Improve diagnostic. 945 if (getLexer().isNot(AsmToken::Comma)) 946 return Error(L, "unexpected token in directive"); 947 Parser.Lex(); 948 } 949 } 950 951 Parser.Lex(); 952 return false; 953} 954 955/// ParseDirectiveThumb 956/// ::= .thumb 957bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { 958 if (getLexer().isNot(AsmToken::EndOfStatement)) 959 return Error(L, "unexpected token in directive"); 960 Parser.Lex(); 961 962 // TODO: set thumb mode 963 // TODO: tell the MC streamer the mode 964 // getParser().getStreamer().Emit???(); 965 return false; 966} 967 968/// ParseDirectiveThumbFunc 969/// ::= .thumbfunc symbol_name 970bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { 971 const AsmToken &Tok = Parser.getTok(); 972 if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) 973 return Error(L, "unexpected token in .thumb_func directive"); 974 StringRef Name = Tok.getString(); 975 Parser.Lex(); // Consume the identifier token. 976 if (getLexer().isNot(AsmToken::EndOfStatement)) 977 return Error(L, "unexpected token in directive"); 978 Parser.Lex(); 979 980 // Mark symbol as a thumb symbol. 981 MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name); 982 getParser().getStreamer().EmitThumbFunc(Func); 983 return false; 984} 985 986/// ParseDirectiveSyntax 987/// ::= .syntax unified | divided 988bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { 989 const AsmToken &Tok = Parser.getTok(); 990 if (Tok.isNot(AsmToken::Identifier)) 991 return Error(L, "unexpected token in .syntax directive"); 992 StringRef Mode = Tok.getString(); 993 if (Mode == "unified" || Mode == "UNIFIED") 994 Parser.Lex(); 995 else if (Mode == "divided" || Mode == "DIVIDED") 996 Parser.Lex(); 997 else 998 return Error(L, "unrecognized syntax mode in .syntax directive"); 999 1000 if (getLexer().isNot(AsmToken::EndOfStatement)) 1001 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 1002 Parser.Lex(); 1003 1004 // TODO tell the MC streamer the mode 1005 // getParser().getStreamer().Emit???(); 1006 return false; 1007} 1008 1009/// ParseDirectiveCode 1010/// ::= .code 16 | 32 1011bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { 1012 const AsmToken &Tok = Parser.getTok(); 1013 if (Tok.isNot(AsmToken::Integer)) 1014 return Error(L, "unexpected token in .code directive"); 1015 int64_t Val = Parser.getTok().getIntVal(); 1016 if (Val == 16) 1017 Parser.Lex(); 1018 else if (Val == 32) 1019 Parser.Lex(); 1020 else 1021 return Error(L, "invalid operand to .code directive"); 1022 1023 if (getLexer().isNot(AsmToken::EndOfStatement)) 1024 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 1025 Parser.Lex(); 1026 1027 if (Val == 16) 1028 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 1029 else 1030 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 1031 1032 return false; 1033} 1034 1035extern "C" void LLVMInitializeARMAsmLexer(); 1036 1037/// Force static initialization. 1038extern "C" void LLVMInitializeARMAsmParser() { 1039 RegisterAsmParser<ARMAsmParser> X(TheARMTarget); 1040 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); 1041 LLVMInitializeARMAsmLexer(); 1042} 1043 1044#define GET_REGISTER_MATCHER 1045#define GET_MATCHER_IMPLEMENTATION 1046#include "ARMGenAsmMatcher.inc" 1047