ARMAsmParser.cpp revision 04f74942f2994a7c1d8e62c207c4005ed4652b6a
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "ARM.h" 11#include "ARMAddressingModes.h" 12#include "ARMSubtarget.h" 13#include "llvm/MC/MCParser/MCAsmLexer.h" 14#include "llvm/MC/MCParser/MCAsmParser.h" 15#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16#include "llvm/MC/MCContext.h" 17#include "llvm/MC/MCStreamer.h" 18#include "llvm/MC/MCExpr.h" 19#include "llvm/MC/MCInst.h" 20#include "llvm/Target/TargetRegistry.h" 21#include "llvm/Target/TargetAsmParser.h" 22#include "llvm/Support/SourceMgr.h" 23#include "llvm/Support/raw_ostream.h" 24#include "llvm/ADT/SmallVector.h" 25#include "llvm/ADT/StringSwitch.h" 26#include "llvm/ADT/Twine.h" 27using namespace llvm; 28 29// The shift types for register controlled shifts in arm memory addressing 30enum ShiftType { 31 Lsl, 32 Lsr, 33 Asr, 34 Ror, 35 Rrx 36}; 37 38namespace { 39 40class ARMOperand; 41 42class ARMAsmParser : public TargetAsmParser { 43 MCAsmParser &Parser; 44 TargetMachine &TM; 45 46 MCAsmParser &getParser() const { return Parser; } 47 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 48 49 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 50 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 51 52 int TryParseRegister(); 53 bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); 54 bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); 55 bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); 56 bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &); 57 58 bool ParseMemoryOffsetReg(bool &Negative, 59 bool &OffsetRegShifted, 60 enum ShiftType &ShiftType, 61 const MCExpr *&ShiftAmount, 62 const MCExpr *&Offset, 63 bool &OffsetIsReg, 64 int &OffsetRegNum, 65 SMLoc &E); 66 bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); 67 bool ParseDirectiveWord(unsigned Size, SMLoc L); 68 bool ParseDirectiveThumb(SMLoc L); 69 bool ParseDirectiveThumbFunc(SMLoc L); 70 bool ParseDirectiveCode(SMLoc L); 71 bool ParseDirectiveSyntax(SMLoc L); 72 73 bool MatchAndEmitInstruction(SMLoc IDLoc, 74 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 75 MCStreamer &Out); 76 77 /// @name Auto-generated Match Functions 78 /// { 79 80#define GET_ASSEMBLER_HEADER 81#include "ARMGenAsmMatcher.inc" 82 83 /// } 84 85public: 86 ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) 87 : TargetAsmParser(T), Parser(_Parser), TM(_TM) { 88 // Initialize the set of available features. 89 setAvailableFeatures(ComputeAvailableFeatures( 90 &TM.getSubtarget<ARMSubtarget>())); 91 } 92 93 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 94 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 95 virtual bool ParseDirective(AsmToken DirectiveID); 96}; 97} // end anonymous namespace 98 99namespace { 100 101/// ARMOperand - Instances of this class represent a parsed ARM machine 102/// instruction. 103class ARMOperand : public MCParsedAsmOperand { 104 enum KindTy { 105 CondCode, 106 CCOut, 107 Immediate, 108 Memory, 109 Register, 110 RegisterList, 111 DPRRegisterList, 112 SPRRegisterList, 113 Token 114 } Kind; 115 116 SMLoc StartLoc, EndLoc; 117 SmallVector<unsigned, 8> Registers; 118 119 union { 120 struct { 121 ARMCC::CondCodes Val; 122 } CC; 123 124 struct { 125 const char *Data; 126 unsigned Length; 127 } Tok; 128 129 struct { 130 unsigned RegNum; 131 } Reg; 132 133 struct { 134 const MCExpr *Val; 135 } Imm; 136 137 // This is for all forms of ARM address expressions 138 struct { 139 unsigned BaseRegNum; 140 unsigned OffsetRegNum; // used when OffsetIsReg is true 141 const MCExpr *Offset; // used when OffsetIsReg is false 142 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true 143 enum ShiftType ShiftType; // used when OffsetRegShifted is true 144 unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true 145 unsigned Preindexed : 1; 146 unsigned Postindexed : 1; 147 unsigned OffsetIsReg : 1; 148 unsigned Negative : 1; // only used when OffsetIsReg is true 149 unsigned Writeback : 1; 150 } Mem; 151 }; 152 153 ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} 154public: 155 ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { 156 Kind = o.Kind; 157 StartLoc = o.StartLoc; 158 EndLoc = o.EndLoc; 159 switch (Kind) { 160 case CondCode: 161 CC = o.CC; 162 break; 163 case Token: 164 Tok = o.Tok; 165 break; 166 case CCOut: 167 case Register: 168 Reg = o.Reg; 169 break; 170 case RegisterList: 171 case DPRRegisterList: 172 case SPRRegisterList: 173 Registers = o.Registers; 174 break; 175 case Immediate: 176 Imm = o.Imm; 177 break; 178 case Memory: 179 Mem = o.Mem; 180 break; 181 } 182 } 183 184 /// getStartLoc - Get the location of the first token of this operand. 185 SMLoc getStartLoc() const { return StartLoc; } 186 /// getEndLoc - Get the location of the last token of this operand. 187 SMLoc getEndLoc() const { return EndLoc; } 188 189 ARMCC::CondCodes getCondCode() const { 190 assert(Kind == CondCode && "Invalid access!"); 191 return CC.Val; 192 } 193 194 StringRef getToken() const { 195 assert(Kind == Token && "Invalid access!"); 196 return StringRef(Tok.Data, Tok.Length); 197 } 198 199 unsigned getReg() const { 200 assert(Kind == Register || Kind == CCOut && "Invalid access!"); 201 return Reg.RegNum; 202 } 203 204 const SmallVectorImpl<unsigned> &getRegList() const { 205 assert((Kind == RegisterList || Kind == DPRRegisterList || 206 Kind == SPRRegisterList) && "Invalid access!"); 207 return Registers; 208 } 209 210 const MCExpr *getImm() const { 211 assert(Kind == Immediate && "Invalid access!"); 212 return Imm.Val; 213 } 214 215 bool isCondCode() const { return Kind == CondCode; } 216 bool isCCOut() const { return Kind == CCOut; } 217 bool isImm() const { return Kind == Immediate; } 218 bool isReg() const { return Kind == Register; } 219 bool isRegList() const { return Kind == RegisterList; } 220 bool isDPRRegList() const { return Kind == DPRRegisterList; } 221 bool isSPRRegList() const { return Kind == SPRRegisterList; } 222 bool isToken() const { return Kind == Token; } 223 bool isMemory() const { return Kind == Memory; } 224 bool isMemMode5() const { 225 if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted || 226 Mem.Writeback || Mem.Negative) 227 return false; 228 229 // If there is an offset expression, make sure it's valid. 230 if (!Mem.Offset) return true; 231 232 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 233 if (!CE) return false; 234 235 // The offset must be a multiple of 4 in the range 0-1020. 236 int64_t Value = CE->getValue(); 237 return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); 238 } 239 bool isMemModeThumb() const { 240 if (!isMemory() || (!Mem.OffsetIsReg && !Mem.Offset) || Mem.Writeback) 241 return false; 242 243 if (!Mem.Offset) return true; 244 245 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 246 if (!CE) return false; 247 248 // The offset must be a multiple of 4 in the range 0-124. 249 uint64_t Value = CE->getValue(); 250 return ((Value & 0x3) == 0 && Value <= 124); 251 } 252 253 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 254 // Add as immediates when possible. Null MCExpr = 0. 255 if (Expr == 0) 256 Inst.addOperand(MCOperand::CreateImm(0)); 257 else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 258 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 259 else 260 Inst.addOperand(MCOperand::CreateExpr(Expr)); 261 } 262 263 void addCondCodeOperands(MCInst &Inst, unsigned N) const { 264 assert(N == 2 && "Invalid number of operands!"); 265 Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); 266 unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR; 267 Inst.addOperand(MCOperand::CreateReg(RegNum)); 268 } 269 270 void addCCOutOperands(MCInst &Inst, unsigned N) const { 271 assert(N == 1 && "Invalid number of operands!"); 272 Inst.addOperand(MCOperand::CreateReg(getReg())); 273 } 274 275 void addRegOperands(MCInst &Inst, unsigned N) const { 276 assert(N == 1 && "Invalid number of operands!"); 277 Inst.addOperand(MCOperand::CreateReg(getReg())); 278 } 279 280 void addRegListOperands(MCInst &Inst, unsigned N) const { 281 assert(N == 1 && "Invalid number of operands!"); 282 const SmallVectorImpl<unsigned> &RegList = getRegList(); 283 for (SmallVectorImpl<unsigned>::const_iterator 284 I = RegList.begin(), E = RegList.end(); I != E; ++I) 285 Inst.addOperand(MCOperand::CreateReg(*I)); 286 } 287 288 void addDPRRegListOperands(MCInst &Inst, unsigned N) const { 289 addRegListOperands(Inst, N); 290 } 291 292 void addSPRRegListOperands(MCInst &Inst, unsigned N) const { 293 addRegListOperands(Inst, N); 294 } 295 296 void addImmOperands(MCInst &Inst, unsigned N) const { 297 assert(N == 1 && "Invalid number of operands!"); 298 addExpr(Inst, getImm()); 299 } 300 301 void addMemMode5Operands(MCInst &Inst, unsigned N) const { 302 assert(N == 2 && isMemMode5() && "Invalid number of operands!"); 303 304 Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); 305 assert(!Mem.OffsetIsReg && "Invalid mode 5 operand"); 306 307 // FIXME: #-0 is encoded differently than #0. Does the parser preserve 308 // the difference? 309 if (Mem.Offset) { 310 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 311 assert(CE && "Non-constant mode 5 offset operand!"); 312 313 // The MCInst offset operand doesn't include the low two bits (like 314 // the instruction encoding). 315 int64_t Offset = CE->getValue() / 4; 316 if (Offset >= 0) 317 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, 318 Offset))); 319 else 320 Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, 321 -Offset))); 322 } else { 323 Inst.addOperand(MCOperand::CreateImm(0)); 324 } 325 } 326 327 void addMemModeThumbOperands(MCInst &Inst, unsigned N) const { 328 assert(N == 3 && isMemModeThumb() && "Invalid number of operands!"); 329 Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); 330 331 if (Mem.Offset) { 332 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset); 333 assert(CE && "Non-constant mode offset operand!"); 334 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 335 Inst.addOperand(MCOperand::CreateReg(0)); 336 } else { 337 Inst.addOperand(MCOperand::CreateImm(0)); 338 Inst.addOperand(MCOperand::CreateReg(Mem.OffsetRegNum)); 339 } 340 } 341 342 virtual void dump(raw_ostream &OS) const; 343 344 static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { 345 ARMOperand *Op = new ARMOperand(CondCode); 346 Op->CC.Val = CC; 347 Op->StartLoc = S; 348 Op->EndLoc = S; 349 return Op; 350 } 351 352 static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) { 353 ARMOperand *Op = new ARMOperand(CCOut); 354 Op->Reg.RegNum = RegNum; 355 Op->StartLoc = S; 356 Op->EndLoc = S; 357 return Op; 358 } 359 360 static ARMOperand *CreateToken(StringRef Str, SMLoc S) { 361 ARMOperand *Op = new ARMOperand(Token); 362 Op->Tok.Data = Str.data(); 363 Op->Tok.Length = Str.size(); 364 Op->StartLoc = S; 365 Op->EndLoc = S; 366 return Op; 367 } 368 369 static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { 370 ARMOperand *Op = new ARMOperand(Register); 371 Op->Reg.RegNum = RegNum; 372 Op->StartLoc = S; 373 Op->EndLoc = E; 374 return Op; 375 } 376 377 static ARMOperand * 378 CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, 379 SMLoc StartLoc, SMLoc EndLoc) { 380 KindTy Kind = RegisterList; 381 382 if (ARM::DPRRegClass.contains(Regs.front().first)) 383 Kind = DPRRegisterList; 384 else if (ARM::SPRRegClass.contains(Regs.front().first)) 385 Kind = SPRRegisterList; 386 387 ARMOperand *Op = new ARMOperand(Kind); 388 for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator 389 I = Regs.begin(), E = Regs.end(); I != E; ++I) 390 Op->Registers.push_back(I->first); 391 array_pod_sort(Op->Registers.begin(), Op->Registers.end()); 392 Op->StartLoc = StartLoc; 393 Op->EndLoc = EndLoc; 394 return Op; 395 } 396 397 static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { 398 ARMOperand *Op = new ARMOperand(Immediate); 399 Op->Imm.Val = Val; 400 Op->StartLoc = S; 401 Op->EndLoc = E; 402 return Op; 403 } 404 405 static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, 406 const MCExpr *Offset, unsigned OffsetRegNum, 407 bool OffsetRegShifted, enum ShiftType ShiftType, 408 const MCExpr *ShiftAmount, bool Preindexed, 409 bool Postindexed, bool Negative, bool Writeback, 410 SMLoc S, SMLoc E) { 411 ARMOperand *Op = new ARMOperand(Memory); 412 Op->Mem.BaseRegNum = BaseRegNum; 413 Op->Mem.OffsetIsReg = OffsetIsReg; 414 Op->Mem.Offset = Offset; 415 Op->Mem.OffsetRegNum = OffsetRegNum; 416 Op->Mem.OffsetRegShifted = OffsetRegShifted; 417 Op->Mem.ShiftType = ShiftType; 418 Op->Mem.ShiftAmount = ShiftAmount; 419 Op->Mem.Preindexed = Preindexed; 420 Op->Mem.Postindexed = Postindexed; 421 Op->Mem.Negative = Negative; 422 Op->Mem.Writeback = Writeback; 423 424 Op->StartLoc = S; 425 Op->EndLoc = E; 426 return Op; 427 } 428}; 429 430} // end anonymous namespace. 431 432void ARMOperand::dump(raw_ostream &OS) const { 433 switch (Kind) { 434 case CondCode: 435 OS << ARMCondCodeToString(getCondCode()); 436 break; 437 case CCOut: 438 OS << "<ccout " << getReg() << ">"; 439 break; 440 case Immediate: 441 getImm()->print(OS); 442 break; 443 case Memory: 444 OS << "<memory>"; 445 break; 446 case Register: 447 OS << "<register " << getReg() << ">"; 448 break; 449 case RegisterList: 450 case DPRRegisterList: 451 case SPRRegisterList: { 452 OS << "<register_list "; 453 454 const SmallVectorImpl<unsigned> &RegList = getRegList(); 455 for (SmallVectorImpl<unsigned>::const_iterator 456 I = RegList.begin(), E = RegList.end(); I != E; ) { 457 OS << *I; 458 if (++I < E) OS << ", "; 459 } 460 461 OS << ">"; 462 break; 463 } 464 case Token: 465 OS << "'" << getToken() << "'"; 466 break; 467 } 468} 469 470/// @name Auto-generated Match Functions 471/// { 472 473static unsigned MatchRegisterName(StringRef Name); 474 475/// } 476 477/// Try to parse a register name. The token must be an Identifier when called, 478/// and if it is a register name the token is eaten and the register number is 479/// returned. Otherwise return -1. 480/// 481int ARMAsmParser::TryParseRegister() { 482 const AsmToken &Tok = Parser.getTok(); 483 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); 484 485 // FIXME: Validate register for the current architecture; we have to do 486 // validation later, so maybe there is no need for this here. 487 unsigned RegNum = MatchRegisterName(Tok.getString()); 488 if (RegNum == 0) 489 return -1; 490 Parser.Lex(); // Eat identifier token. 491 return RegNum; 492} 493 494 495/// Try to parse a register name. The token must be an Identifier when called. 496/// If it's a register, an AsmOperand is created. Another AsmOperand is created 497/// if there is a "writeback". 'true' if it's not a register. 498/// 499/// TODO this is likely to change to allow different register types and or to 500/// parse for a specific register type. 501bool ARMAsmParser:: 502TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 503 SMLoc S = Parser.getTok().getLoc(); 504 int RegNo = TryParseRegister(); 505 if (RegNo == -1) 506 return true; 507 508 Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc())); 509 510 const AsmToken &ExclaimTok = Parser.getTok(); 511 if (ExclaimTok.is(AsmToken::Exclaim)) { 512 Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(), 513 ExclaimTok.getLoc())); 514 Parser.Lex(); // Eat exclaim token 515 } 516 517 return false; 518} 519 520/// Parse a register list, return it if successful else return null. The first 521/// token must be a '{' when called. 522bool ARMAsmParser:: 523ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 524 assert(Parser.getTok().is(AsmToken::LCurly) && 525 "Token is not a Left Curly Brace"); 526 SMLoc S = Parser.getTok().getLoc(); 527 528 // Read the rest of the registers in the list. 529 unsigned PrevRegNum = 0; 530 SmallVector<std::pair<unsigned, SMLoc>, 32> Registers; 531 532 do { 533 bool IsRange = Parser.getTok().is(AsmToken::Minus); 534 Parser.Lex(); // Eat non-identifier token. 535 536 const AsmToken &RegTok = Parser.getTok(); 537 SMLoc RegLoc = RegTok.getLoc(); 538 if (RegTok.isNot(AsmToken::Identifier)) { 539 Error(RegLoc, "register expected"); 540 return true; 541 } 542 543 int RegNum = TryParseRegister(); 544 if (RegNum == -1) { 545 Error(RegLoc, "register expected"); 546 return true; 547 } 548 549 if (IsRange) { 550 int Reg = PrevRegNum; 551 do { 552 ++Reg; 553 Registers.push_back(std::make_pair(Reg, RegLoc)); 554 } while (Reg != RegNum); 555 } else { 556 Registers.push_back(std::make_pair(RegNum, RegLoc)); 557 } 558 559 PrevRegNum = RegNum; 560 } while (Parser.getTok().is(AsmToken::Comma) || 561 Parser.getTok().is(AsmToken::Minus)); 562 563 // Process the right curly brace of the list. 564 const AsmToken &RCurlyTok = Parser.getTok(); 565 if (RCurlyTok.isNot(AsmToken::RCurly)) { 566 Error(RCurlyTok.getLoc(), "'}' expected"); 567 return true; 568 } 569 570 SMLoc E = RCurlyTok.getLoc(); 571 Parser.Lex(); // Eat right curly brace token. 572 573 // Verify the register list. 574 SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator 575 RI = Registers.begin(), RE = Registers.end(); 576 577 DenseMap<unsigned, bool> RegMap; 578 RegMap[RI->first] = true; 579 580 unsigned HighRegNum = RI->first; 581 bool EmittedWarning = false; 582 583 for (++RI; RI != RE; ++RI) { 584 const std::pair<unsigned, SMLoc> &RegInfo = *RI; 585 unsigned Reg = RegInfo.first; 586 587 if (RegMap[Reg]) { 588 Error(RegInfo.second, "register duplicated in register list"); 589 return true; 590 } 591 592 if (!EmittedWarning && Reg < HighRegNum) 593 Warning(RegInfo.second, 594 "register not in ascending order in register list"); 595 596 RegMap[Reg] = true; 597 HighRegNum = std::max(Reg, HighRegNum); 598 } 599 600 Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); 601 return false; 602} 603 604/// Parse an ARM memory expression, return false if successful else return true 605/// or an error. The first token must be a '[' when called. 606/// 607/// TODO Only preindexing and postindexing addressing are started, unindexed 608/// with option, etc are still to do. 609bool ARMAsmParser:: 610ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 611 SMLoc S, E; 612 assert(Parser.getTok().is(AsmToken::LBrac) && 613 "Token is not a Left Bracket"); 614 S = Parser.getTok().getLoc(); 615 Parser.Lex(); // Eat left bracket token. 616 617 const AsmToken &BaseRegTok = Parser.getTok(); 618 if (BaseRegTok.isNot(AsmToken::Identifier)) { 619 Error(BaseRegTok.getLoc(), "register expected"); 620 return true; 621 } 622 int BaseRegNum = TryParseRegister(); 623 if (BaseRegNum == -1) { 624 Error(BaseRegTok.getLoc(), "register expected"); 625 return true; 626 } 627 628 bool Preindexed = false; 629 bool Postindexed = false; 630 bool OffsetIsReg = false; 631 bool Negative = false; 632 bool Writeback = false; 633 634 // First look for preindexed address forms, that is after the "[Rn" we now 635 // have to see if the next token is a comma. 636 const AsmToken &Tok = Parser.getTok(); 637 if (Tok.is(AsmToken::Comma)) { 638 Preindexed = true; 639 Parser.Lex(); // Eat comma token. 640 int OffsetRegNum; 641 bool OffsetRegShifted; 642 enum ShiftType ShiftType; 643 const MCExpr *ShiftAmount = 0; 644 const MCExpr *Offset = 0; 645 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, 646 Offset, OffsetIsReg, OffsetRegNum, E)) 647 return true; 648 const AsmToken &RBracTok = Parser.getTok(); 649 if (RBracTok.isNot(AsmToken::RBrac)) { 650 Error(RBracTok.getLoc(), "']' expected"); 651 return true; 652 } 653 E = RBracTok.getLoc(); 654 Parser.Lex(); // Eat right bracket token. 655 656 657 const AsmToken &ExclaimTok = Parser.getTok(); 658 ARMOperand *WBOp = 0; 659 if (ExclaimTok.is(AsmToken::Exclaim)) { 660 WBOp = ARMOperand::CreateToken(ExclaimTok.getString(), 661 ExclaimTok.getLoc()); 662 Writeback = true; 663 Parser.Lex(); // Eat exclaim token 664 } 665 666 Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, 667 OffsetRegNum, OffsetRegShifted, 668 ShiftType, ShiftAmount, Preindexed, 669 Postindexed, Negative, Writeback, 670 S, E)); 671 if (WBOp) 672 Operands.push_back(WBOp); 673 674 return false; 675 } 676 // The "[Rn" we have so far was not followed by a comma. 677 else if (Tok.is(AsmToken::RBrac)) { 678 // If there's anything other than the right brace, this is a post indexing 679 // addressing form. 680 E = Tok.getLoc(); 681 Parser.Lex(); // Eat right bracket token. 682 683 int OffsetRegNum = 0; 684 bool OffsetRegShifted = false; 685 enum ShiftType ShiftType = Lsl; 686 const MCExpr *ShiftAmount = 0; 687 const MCExpr *Offset = 0; 688 689 const AsmToken &NextTok = Parser.getTok(); 690 691 if (NextTok.isNot(AsmToken::EndOfStatement)) { 692 Postindexed = true; 693 Writeback = true; 694 695 if (NextTok.isNot(AsmToken::Comma)) { 696 Error(NextTok.getLoc(), "',' expected"); 697 return true; 698 } 699 700 Parser.Lex(); // Eat comma token. 701 702 if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, 703 ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, 704 E)) 705 return true; 706 } 707 708 Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, 709 OffsetRegNum, OffsetRegShifted, 710 ShiftType, ShiftAmount, Preindexed, 711 Postindexed, Negative, Writeback, 712 S, E)); 713 return false; 714 } 715 716 return true; 717} 718 719/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," 720/// we will parse the following (were +/- means that a plus or minus is 721/// optional): 722/// +/-Rm 723/// +/-Rm, shift 724/// #offset 725/// we return false on success or an error otherwise. 726bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, 727 bool &OffsetRegShifted, 728 enum ShiftType &ShiftType, 729 const MCExpr *&ShiftAmount, 730 const MCExpr *&Offset, 731 bool &OffsetIsReg, 732 int &OffsetRegNum, 733 SMLoc &E) { 734 Negative = false; 735 OffsetRegShifted = false; 736 OffsetIsReg = false; 737 OffsetRegNum = -1; 738 const AsmToken &NextTok = Parser.getTok(); 739 E = NextTok.getLoc(); 740 if (NextTok.is(AsmToken::Plus)) 741 Parser.Lex(); // Eat plus token. 742 else if (NextTok.is(AsmToken::Minus)) { 743 Negative = true; 744 Parser.Lex(); // Eat minus token 745 } 746 // See if there is a register following the "[Rn," or "[Rn]," we have so far. 747 const AsmToken &OffsetRegTok = Parser.getTok(); 748 if (OffsetRegTok.is(AsmToken::Identifier)) { 749 SMLoc CurLoc = OffsetRegTok.getLoc(); 750 OffsetRegNum = TryParseRegister(); 751 if (OffsetRegNum != -1) { 752 OffsetIsReg = true; 753 E = CurLoc; 754 } 755 } 756 757 // If we parsed a register as the offset then there can be a shift after that. 758 if (OffsetRegNum != -1) { 759 // Look for a comma then a shift 760 const AsmToken &Tok = Parser.getTok(); 761 if (Tok.is(AsmToken::Comma)) { 762 Parser.Lex(); // Eat comma token. 763 764 const AsmToken &Tok = Parser.getTok(); 765 if (ParseShift(ShiftType, ShiftAmount, E)) 766 return Error(Tok.getLoc(), "shift expected"); 767 OffsetRegShifted = true; 768 } 769 } 770 else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" 771 // Look for #offset following the "[Rn," or "[Rn]," 772 const AsmToken &HashTok = Parser.getTok(); 773 if (HashTok.isNot(AsmToken::Hash)) 774 return Error(HashTok.getLoc(), "'#' expected"); 775 776 Parser.Lex(); // Eat hash token. 777 778 if (getParser().ParseExpression(Offset)) 779 return true; 780 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 781 } 782 return false; 783} 784 785/// ParseShift as one of these two: 786/// ( lsl | lsr | asr | ror ) , # shift_amount 787/// rrx 788/// and returns true if it parses a shift otherwise it returns false. 789bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, 790 SMLoc &E) { 791 const AsmToken &Tok = Parser.getTok(); 792 if (Tok.isNot(AsmToken::Identifier)) 793 return true; 794 StringRef ShiftName = Tok.getString(); 795 if (ShiftName == "lsl" || ShiftName == "LSL") 796 St = Lsl; 797 else if (ShiftName == "lsr" || ShiftName == "LSR") 798 St = Lsr; 799 else if (ShiftName == "asr" || ShiftName == "ASR") 800 St = Asr; 801 else if (ShiftName == "ror" || ShiftName == "ROR") 802 St = Ror; 803 else if (ShiftName == "rrx" || ShiftName == "RRX") 804 St = Rrx; 805 else 806 return true; 807 Parser.Lex(); // Eat shift type token. 808 809 // Rrx stands alone. 810 if (St == Rrx) 811 return false; 812 813 // Otherwise, there must be a '#' and a shift amount. 814 const AsmToken &HashTok = Parser.getTok(); 815 if (HashTok.isNot(AsmToken::Hash)) 816 return Error(HashTok.getLoc(), "'#' expected"); 817 Parser.Lex(); // Eat hash token. 818 819 if (getParser().ParseExpression(ShiftAmount)) 820 return true; 821 822 return false; 823} 824 825/// Parse a arm instruction operand. For now this parses the operand regardless 826/// of the mnemonic. 827bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands){ 828 SMLoc S, E; 829 switch (getLexer().getKind()) { 830 default: 831 Error(Parser.getTok().getLoc(), "unexpected token in operand"); 832 return true; 833 case AsmToken::Identifier: { 834 if (!TryParseRegisterWithWriteBack(Operands)) 835 return false; 836 837 // This was not a register so parse other operands that start with an 838 // identifier (like labels) as expressions and create them as immediates. 839 const MCExpr *IdVal; 840 S = Parser.getTok().getLoc(); 841 if (getParser().ParseExpression(IdVal)) 842 return true; 843 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 844 Operands.push_back(ARMOperand::CreateImm(IdVal, S, E)); 845 return false; 846 } 847 case AsmToken::LBrac: 848 return ParseMemory(Operands); 849 case AsmToken::LCurly: 850 return ParseRegisterList(Operands); 851 case AsmToken::Hash: 852 // #42 -> immediate. 853 // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate 854 S = Parser.getTok().getLoc(); 855 Parser.Lex(); 856 const MCExpr *ImmVal; 857 if (getParser().ParseExpression(ImmVal)) 858 return true; 859 E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); 860 Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); 861 return false; 862 } 863} 864 865/// Parse an arm instruction mnemonic followed by its operands. 866bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, 867 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 868 // Create the leading tokens for the mnemonic, split by '.' characters. 869 size_t Start = 0, Next = Name.find('.'); 870 StringRef Head = Name.slice(Start, Next); 871 872 // Determine the predicate, if any. 873 // 874 // FIXME: We need a way to check whether a prefix supports predication, 875 // otherwise we will end up with an ambiguity for instructions that happen to 876 // end with a predicate name. 877 // FIXME: Likewise, some arithmetic instructions have an 's' prefix which 878 // indicates to update the condition codes. Those instructions have an 879 // additional immediate operand which encodes the prefix as reg0 or CPSR. 880 // Just checking for a suffix of 's' definitely creates ambiguities; e.g, 881 // the SMMLS instruction. 882 unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2)) 883 .Case("eq", ARMCC::EQ) 884 .Case("ne", ARMCC::NE) 885 .Case("hs", ARMCC::HS) 886 .Case("lo", ARMCC::LO) 887 .Case("mi", ARMCC::MI) 888 .Case("pl", ARMCC::PL) 889 .Case("vs", ARMCC::VS) 890 .Case("vc", ARMCC::VC) 891 .Case("hi", ARMCC::HI) 892 .Case("ls", ARMCC::LS) 893 .Case("ge", ARMCC::GE) 894 .Case("lt", ARMCC::LT) 895 .Case("gt", ARMCC::GT) 896 .Case("le", ARMCC::LE) 897 .Case("al", ARMCC::AL) 898 .Default(~0U); 899 900 if (CC == ~0U || 901 (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) { 902 CC = ARMCC::AL; 903 } else { 904 Head = Head.slice(0, Head.size() - 2); 905 } 906 907 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 908 909 if (Head != "trap") 910 // FIXME: Should only add this operand for predicated instructions 911 Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), 912 NameLoc)); 913 914 // Add the remaining tokens in the mnemonic. 915 while (Next != StringRef::npos) { 916 Start = Next; 917 Next = Name.find('.', Start + 1); 918 Head = Name.slice(Start, Next); 919 920 Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); 921 } 922 923 // Read the remaining operands. 924 if (getLexer().isNot(AsmToken::EndOfStatement)) { 925 // Read the first operand. 926 if (ParseOperand(Operands)) { 927 Parser.EatToEndOfStatement(); 928 return true; 929 } 930 931 while (getLexer().is(AsmToken::Comma)) { 932 Parser.Lex(); // Eat the comma. 933 934 // Parse and remember the operand. 935 if (ParseOperand(Operands)) { 936 Parser.EatToEndOfStatement(); 937 return true; 938 } 939 } 940 } 941 942 if (getLexer().isNot(AsmToken::EndOfStatement)) { 943 Parser.EatToEndOfStatement(); 944 return TokError("unexpected token in argument list"); 945 } 946 947 Parser.Lex(); // Consume the EndOfStatement 948 return false; 949} 950 951bool ARMAsmParser:: 952MatchAndEmitInstruction(SMLoc IDLoc, 953 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 954 MCStreamer &Out) { 955 MCInst Inst; 956 unsigned ErrorInfo; 957 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { 958 case Match_Success: 959 Out.EmitInstruction(Inst); 960 return false; 961 case Match_MissingFeature: 962 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 963 return true; 964 case Match_InvalidOperand: { 965 SMLoc ErrorLoc = IDLoc; 966 if (ErrorInfo != ~0U) { 967 if (ErrorInfo >= Operands.size()) 968 return Error(IDLoc, "too few operands for instruction"); 969 970 ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); 971 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; 972 } 973 974 return Error(ErrorLoc, "invalid operand for instruction"); 975 } 976 case Match_MnemonicFail: 977 return Error(IDLoc, "unrecognized instruction mnemonic"); 978 } 979 980 llvm_unreachable("Implement any new match types added!"); 981 return true; 982} 983 984/// ParseDirective parses the arm specific directives 985bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { 986 StringRef IDVal = DirectiveID.getIdentifier(); 987 if (IDVal == ".word") 988 return ParseDirectiveWord(4, DirectiveID.getLoc()); 989 else if (IDVal == ".thumb") 990 return ParseDirectiveThumb(DirectiveID.getLoc()); 991 else if (IDVal == ".thumb_func") 992 return ParseDirectiveThumbFunc(DirectiveID.getLoc()); 993 else if (IDVal == ".code") 994 return ParseDirectiveCode(DirectiveID.getLoc()); 995 else if (IDVal == ".syntax") 996 return ParseDirectiveSyntax(DirectiveID.getLoc()); 997 return true; 998} 999 1000/// ParseDirectiveWord 1001/// ::= .word [ expression (, expression)* ] 1002bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 1003 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1004 for (;;) { 1005 const MCExpr *Value; 1006 if (getParser().ParseExpression(Value)) 1007 return true; 1008 1009 getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); 1010 1011 if (getLexer().is(AsmToken::EndOfStatement)) 1012 break; 1013 1014 // FIXME: Improve diagnostic. 1015 if (getLexer().isNot(AsmToken::Comma)) 1016 return Error(L, "unexpected token in directive"); 1017 Parser.Lex(); 1018 } 1019 } 1020 1021 Parser.Lex(); 1022 return false; 1023} 1024 1025/// ParseDirectiveThumb 1026/// ::= .thumb 1027bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { 1028 if (getLexer().isNot(AsmToken::EndOfStatement)) 1029 return Error(L, "unexpected token in directive"); 1030 Parser.Lex(); 1031 1032 // TODO: set thumb mode 1033 // TODO: tell the MC streamer the mode 1034 // getParser().getStreamer().Emit???(); 1035 return false; 1036} 1037 1038/// ParseDirectiveThumbFunc 1039/// ::= .thumbfunc symbol_name 1040bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { 1041 const AsmToken &Tok = Parser.getTok(); 1042 if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) 1043 return Error(L, "unexpected token in .thumb_func directive"); 1044 StringRef Name = Tok.getString(); 1045 Parser.Lex(); // Consume the identifier token. 1046 if (getLexer().isNot(AsmToken::EndOfStatement)) 1047 return Error(L, "unexpected token in directive"); 1048 Parser.Lex(); 1049 1050 // Mark symbol as a thumb symbol. 1051 MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name); 1052 getParser().getStreamer().EmitThumbFunc(Func); 1053 return false; 1054} 1055 1056/// ParseDirectiveSyntax 1057/// ::= .syntax unified | divided 1058bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { 1059 const AsmToken &Tok = Parser.getTok(); 1060 if (Tok.isNot(AsmToken::Identifier)) 1061 return Error(L, "unexpected token in .syntax directive"); 1062 StringRef Mode = Tok.getString(); 1063 if (Mode == "unified" || Mode == "UNIFIED") 1064 Parser.Lex(); 1065 else if (Mode == "divided" || Mode == "DIVIDED") 1066 Parser.Lex(); 1067 else 1068 return Error(L, "unrecognized syntax mode in .syntax directive"); 1069 1070 if (getLexer().isNot(AsmToken::EndOfStatement)) 1071 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 1072 Parser.Lex(); 1073 1074 // TODO tell the MC streamer the mode 1075 // getParser().getStreamer().Emit???(); 1076 return false; 1077} 1078 1079/// ParseDirectiveCode 1080/// ::= .code 16 | 32 1081bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { 1082 const AsmToken &Tok = Parser.getTok(); 1083 if (Tok.isNot(AsmToken::Integer)) 1084 return Error(L, "unexpected token in .code directive"); 1085 int64_t Val = Parser.getTok().getIntVal(); 1086 if (Val == 16) 1087 Parser.Lex(); 1088 else if (Val == 32) 1089 Parser.Lex(); 1090 else 1091 return Error(L, "invalid operand to .code directive"); 1092 1093 if (getLexer().isNot(AsmToken::EndOfStatement)) 1094 return Error(Parser.getTok().getLoc(), "unexpected token in directive"); 1095 Parser.Lex(); 1096 1097 if (Val == 16) 1098 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 1099 else 1100 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 1101 1102 return false; 1103} 1104 1105extern "C" void LLVMInitializeARMAsmLexer(); 1106 1107/// Force static initialization. 1108extern "C" void LLVMInitializeARMAsmParser() { 1109 RegisterAsmParser<ARMAsmParser> X(TheARMTarget); 1110 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); 1111 LLVMInitializeARMAsmLexer(); 1112} 1113 1114#define GET_REGISTER_MATCHER 1115#define GET_MATCHER_IMPLEMENTATION 1116#include "ARMGenAsmMatcher.inc" 1117