ARMAsmParser.cpp revision cfe072401658bbe9336b200b79526b65c5213b74
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "ARM.h" 11#include "llvm/ADT/SmallVector.h" 12#include "llvm/ADT/Twine.h" 13#include "llvm/MC/MCAsmLexer.h" 14#include "llvm/MC/MCAsmParser.h" 15#include "llvm/MC/MCStreamer.h" 16#include "llvm/MC/MCExpr.h" 17#include "llvm/MC/MCInst.h" 18#include "llvm/Support/SourceMgr.h" 19#include "llvm/Target/TargetRegistry.h" 20#include "llvm/Target/TargetAsmParser.h" 21using namespace llvm; 22 23namespace { 24struct ARMOperand; 25 26// The shift types for register controlled shifts in arm memory addressing 27enum ShiftType { 28 Lsl, 29 Lsr, 30 Asr, 31 Ror, 32 Rrx 33}; 34 35class ARMAsmParser : public TargetAsmParser { 36 MCAsmParser &Parser; 37 38private: 39 MCAsmParser &getParser() const { return Parser; } 40 41 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 42 43 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 44 45 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 46 47 bool ParseRegister(ARMOperand &Op); 48 49 bool ParseRegisterList(ARMOperand &Op); 50 51 bool ParseMemory(ARMOperand &Op); 52 53 bool ParseShift(enum ShiftType *St, const MCExpr *&ShiftAmount); 54 55 bool ParseOperand(ARMOperand &Op); 56 57 bool ParseDirectiveWord(unsigned Size, SMLoc L); 58 59 // TODO - For now hacked versions of the next two are in here in this file to 60 // allow some parser testing until the table gen versions are implemented. 61 62 /// @name Auto-generated Match Functions 63 /// { 64 bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, 65 MCInst &Inst); 66 67 /// MatchRegisterName - Match the given string to a register name and return 68 /// its register number, or -1 if there is no match. To allow return values 69 /// to be used directly in register lists, arm registers have values between 70 /// 0 and 15. 71 int MatchRegisterName(const StringRef &Name); 72 73 /// } 74 75 76public: 77 ARMAsmParser(const Target &T, MCAsmParser &_Parser) 78 : TargetAsmParser(T), Parser(_Parser) {} 79 80 virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); 81 82 virtual bool ParseDirective(AsmToken DirectiveID); 83}; 84 85} // end anonymous namespace 86 87namespace { 88 89/// ARMOperand - Instances of this class represent a parsed ARM machine 90/// instruction. 91struct ARMOperand { 92 enum { 93 Token, 94 Register, 95 Immediate, 96 Memory 97 } Kind; 98 99 100 union { 101 struct { 102 const char *Data; 103 unsigned Length; 104 } Tok; 105 106 struct { 107 unsigned RegNum; 108 bool Writeback; 109 } Reg; 110 111 struct { 112 const MCExpr *Val; 113 } Imm; 114 115 // This is for all forms of ARM address expressions 116 struct { 117 unsigned BaseRegNum; 118 bool OffsetIsReg; 119 const MCExpr *Offset; // used when OffsetIsReg is false 120 unsigned OffsetRegNum; // used when OffsetIsReg is true 121 bool OffsetRegShifted; // only used when OffsetIsReg is true 122 enum ShiftType ShiftType; // used when OffsetRegShifted is true 123 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true 124 bool Preindexed; 125 bool Postindexed; 126 bool Negative; // only used when OffsetIsReg is true 127 bool Writeback; 128 } Mem; 129 130 }; 131 132 StringRef getToken() const { 133 assert(Kind == Token && "Invalid access!"); 134 return StringRef(Tok.Data, Tok.Length); 135 } 136 137 unsigned getReg() const { 138 assert(Kind == Register && "Invalid access!"); 139 return Reg.RegNum; 140 } 141 142 const MCExpr *getImm() const { 143 assert(Kind == Immediate && "Invalid access!"); 144 return Imm.Val; 145 } 146 147 bool isToken() const {return Kind == Token; } 148 149 bool isReg() const { return Kind == Register; } 150 151 void addRegOperands(MCInst &Inst, unsigned N) const { 152 assert(N == 1 && "Invalid number of operands!"); 153 Inst.addOperand(MCOperand::CreateReg(getReg())); 154 } 155 156 static ARMOperand CreateToken(StringRef Str) { 157 ARMOperand Res; 158 Res.Kind = Token; 159 Res.Tok.Data = Str.data(); 160 Res.Tok.Length = Str.size(); 161 return Res; 162 } 163 164 static ARMOperand CreateReg(unsigned RegNum, bool Writeback) { 165 ARMOperand Res; 166 Res.Kind = Register; 167 Res.Reg.RegNum = RegNum; 168 Res.Reg.Writeback = Writeback; 169 return Res; 170 } 171 172 static ARMOperand CreateImm(const MCExpr *Val) { 173 ARMOperand Res; 174 Res.Kind = Immediate; 175 Res.Imm.Val = Val; 176 return Res; 177 } 178 179 static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg, 180 const MCExpr *Offset, unsigned OffsetRegNum, 181 bool OffsetRegShifted, enum ShiftType ShiftType, 182 const MCExpr *ShiftAmount, bool Preindexed, 183 bool Postindexed, bool Negative, bool Writeback) { 184 ARMOperand Res; 185 Res.Kind = Memory; 186 Res.Mem.BaseRegNum = BaseRegNum; 187 Res.Mem.OffsetIsReg = OffsetIsReg; 188 Res.Mem.Offset = Offset; 189 Res.Mem.OffsetRegNum = OffsetRegNum; 190 Res.Mem.OffsetRegShifted = OffsetRegShifted; 191 Res.Mem.ShiftType = ShiftType; 192 Res.Mem.ShiftAmount = ShiftAmount; 193 Res.Mem.Preindexed = Preindexed; 194 Res.Mem.Postindexed = Postindexed; 195 Res.Mem.Negative = Negative; 196 Res.Mem.Writeback = Writeback; 197 return Res; 198 } 199}; 200 201} // end anonymous namespace. 202 203// Try to parse a register name. The token must be an Identifier when called, 204// and if it is a register name a Reg operand is created, the token is eaten 205// and false is returned. Else true is returned and no token is eaten. 206// TODO this is likely to change to allow different register types and or to 207// parse for a specific register type. 208bool ARMAsmParser::ParseRegister(ARMOperand &Op) { 209 const AsmToken &Tok = getLexer().getTok(); 210 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); 211 212 // FIXME: Validate register for the current architecture; we have to do 213 // validation later, so maybe there is no need for this here. 214 int RegNum; 215 216 RegNum = MatchRegisterName(Tok.getString()); 217 if (RegNum == -1) 218 return true; 219 getLexer().Lex(); // Eat identifier token. 220 221 bool Writeback = false; 222 const AsmToken &ExclaimTok = getLexer().getTok(); 223 if (ExclaimTok.is(AsmToken::Exclaim)) { 224 Writeback = true; 225 getLexer().Lex(); // Eat exclaim token 226 } 227 228 Op = ARMOperand::CreateReg(RegNum, Writeback); 229 230 return false; 231} 232 233// Try to parse a register list. The first token must be a '{' when called 234// for now. 235bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { 236 assert(getLexer().getTok().is(AsmToken::LCurly) && 237 "Token is not an Left Curly Brace"); 238 getLexer().Lex(); // Eat left curly brace token. 239 240 const AsmToken &RegTok = getLexer().getTok(); 241 SMLoc RegLoc = RegTok.getLoc(); 242 if (RegTok.isNot(AsmToken::Identifier)) 243 return Error(RegLoc, "register expected"); 244 int RegNum = MatchRegisterName(RegTok.getString()); 245 if (RegNum == -1) 246 return Error(RegLoc, "register expected"); 247 getLexer().Lex(); // Eat identifier token. 248 unsigned RegList = 1 << RegNum; 249 250 int HighRegNum = RegNum; 251 // TODO ranges like "{Rn-Rm}" 252 while (getLexer().getTok().is(AsmToken::Comma)) { 253 getLexer().Lex(); // Eat comma token. 254 255 const AsmToken &RegTok = getLexer().getTok(); 256 SMLoc RegLoc = RegTok.getLoc(); 257 if (RegTok.isNot(AsmToken::Identifier)) 258 return Error(RegLoc, "register expected"); 259 int RegNum = MatchRegisterName(RegTok.getString()); 260 if (RegNum == -1) 261 return Error(RegLoc, "register expected"); 262 263 if (RegList & (1 << RegNum)) 264 Warning(RegLoc, "register duplicated in register list"); 265 else if (RegNum <= HighRegNum) 266 Warning(RegLoc, "register not in ascending order in register list"); 267 RegList |= 1 << RegNum; 268 HighRegNum = RegNum; 269 270 getLexer().Lex(); // Eat identifier token. 271 } 272 const AsmToken &RCurlyTok = getLexer().getTok(); 273 if (RCurlyTok.isNot(AsmToken::RCurly)) 274 return Error(RCurlyTok.getLoc(), "'}' expected"); 275 getLexer().Lex(); // Eat left curly brace token. 276 277 return false; 278} 279 280// Try to parse an arm memory expression. It must start with a '[' token. 281// TODO Only preindexing and postindexing addressing are started, unindexed 282// with option, etc are still to do. 283bool ARMAsmParser::ParseMemory(ARMOperand &Op) { 284 assert(getLexer().getTok().is(AsmToken::LBrac) && 285 "Token is not an Left Bracket"); 286 getLexer().Lex(); // Eat left bracket token. 287 288 const AsmToken &BaseRegTok = getLexer().getTok(); 289 if (BaseRegTok.isNot(AsmToken::Identifier)) 290 return Error(BaseRegTok.getLoc(), "register expected"); 291 int BaseRegNum = MatchRegisterName(BaseRegTok.getString()); 292 if (BaseRegNum == -1) 293 return Error(BaseRegTok.getLoc(), "register expected"); 294 getLexer().Lex(); // Eat identifier token. 295 296 bool Preindexed = false; 297 bool Postindexed = false; 298 bool OffsetIsReg = false; 299 bool Negative = false; 300 bool Writeback = false; 301 302 // First look for preindexed address forms: 303 // [Rn, +/-Rm] 304 // [Rn, #offset] 305 // [Rn, +/-Rm, shift] 306 // that is after the "[Rn" we now have see if the next token is a comma. 307 const AsmToken &Tok = getLexer().getTok(); 308 if (Tok.is(AsmToken::Comma)) { 309 Preindexed = true; 310 getLexer().Lex(); // Eat comma token. 311 312 const AsmToken &NextTok = getLexer().getTok(); 313 if (NextTok.is(AsmToken::Plus)) 314 getLexer().Lex(); // Eat plus token. 315 else if (NextTok.is(AsmToken::Minus)) { 316 Negative = true; 317 getLexer().Lex(); // Eat minus token 318 } 319 320 // See if there is a register following the "[Rn," we have so far. 321 const AsmToken &OffsetRegTok = getLexer().getTok(); 322 int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); 323 bool OffsetRegShifted = false; 324 enum ShiftType ShiftType; 325 const MCExpr *ShiftAmount; 326 const MCExpr *Offset; 327 if (OffsetRegNum != -1) { 328 OffsetIsReg = true; 329 getLexer().Lex(); // Eat identifier token for the offset register. 330 // Look for a comma then a shift 331 const AsmToken &Tok = getLexer().getTok(); 332 if (Tok.is(AsmToken::Comma)) { 333 getLexer().Lex(); // Eat comma token. 334 335 const AsmToken &Tok = getLexer().getTok(); 336 if (ParseShift(&ShiftType, ShiftAmount)) 337 return Error(Tok.getLoc(), "shift expected"); 338 OffsetRegShifted = true; 339 } 340 } 341 else { // "[Rn," we have so far was not followed by "Rm" 342 // Look for #offset following the "[Rn," 343 const AsmToken &HashTok = getLexer().getTok(); 344 if (HashTok.isNot(AsmToken::Hash)) 345 return Error(HashTok.getLoc(), "'#' expected"); 346 getLexer().Lex(); // Eat hash token. 347 348 if (getParser().ParseExpression(Offset)) 349 return true; 350 } 351 const AsmToken &RBracTok = getLexer().getTok(); 352 if (RBracTok.isNot(AsmToken::RBrac)) 353 return Error(RBracTok.getLoc(), "']' expected"); 354 getLexer().Lex(); // Eat right bracket token. 355 356 const AsmToken &ExclaimTok = getLexer().getTok(); 357 if (ExclaimTok.is(AsmToken::Exclaim)) { 358 Writeback = true; 359 getLexer().Lex(); // Eat exclaim token 360 } 361 Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 362 OffsetRegShifted, ShiftType, ShiftAmount, 363 Preindexed, Postindexed, Negative, Writeback); 364 return false; 365 } 366 // The "[Rn" we have so far was not followed by a comma. 367 else if (Tok.is(AsmToken::RBrac)) { 368 // This is a post indexing addressing forms: 369 // [Rn], #offset 370 // [Rn], +/-Rm 371 // [Rn], +/-Rm, shift 372 // that is a ']' follows after the "[Rn". 373 Postindexed = true; 374 Writeback = true; 375 getLexer().Lex(); // Eat right bracket token. 376 377 const AsmToken &CommaTok = getLexer().getTok(); 378 if (CommaTok.isNot(AsmToken::Comma)) 379 return Error(CommaTok.getLoc(), "',' expected"); 380 getLexer().Lex(); // Eat comma token. 381 382 const AsmToken &NextTok = getLexer().getTok(); 383 if (NextTok.is(AsmToken::Plus)) 384 getLexer().Lex(); // Eat plus token. 385 else if (NextTok.is(AsmToken::Minus)) { 386 Negative = true; 387 getLexer().Lex(); // Eat minus token 388 } 389 390 // See if there is a register following the "[Rn]," we have so far. 391 const AsmToken &OffsetRegTok = getLexer().getTok(); 392 int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); 393 bool OffsetRegShifted = false; 394 enum ShiftType ShiftType; 395 const MCExpr *ShiftAmount; 396 const MCExpr *Offset; 397 if (OffsetRegNum != -1) { 398 OffsetIsReg = true; 399 getLexer().Lex(); // Eat identifier token for the offset register. 400 // Look for a comma then a shift 401 const AsmToken &Tok = getLexer().getTok(); 402 if (Tok.is(AsmToken::Comma)) { 403 getLexer().Lex(); // Eat comma token. 404 405 const AsmToken &Tok = getLexer().getTok(); 406 if (ParseShift(&ShiftType, ShiftAmount)) 407 return Error(Tok.getLoc(), "shift expected"); 408 OffsetRegShifted = true; 409 } 410 } 411 else { // "[Rn]," we have so far was not followed by "Rm" 412 // Look for #offset following the "[Rn]," 413 const AsmToken &HashTok = getLexer().getTok(); 414 if (HashTok.isNot(AsmToken::Hash)) 415 return Error(HashTok.getLoc(), "'#' expected"); 416 getLexer().Lex(); // Eat hash token. 417 418 if (getParser().ParseExpression(Offset)) 419 return true; 420 } 421 Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 422 OffsetRegShifted, ShiftType, ShiftAmount, 423 Preindexed, Postindexed, Negative, Writeback); 424 return false; 425 } 426 427 return true; 428} 429 430/// ParseShift as one of these two: 431/// ( lsl | lsr | asr | ror ) , # shift_amount 432/// rrx 433/// and returns true if it parses a shift otherwise it returns false. 434bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *&ShiftAmount) { 435 const AsmToken &Tok = getLexer().getTok(); 436 if (Tok.isNot(AsmToken::Identifier)) 437 return true; 438 const StringRef &ShiftName = Tok.getString(); 439 if (ShiftName == "lsl" || ShiftName == "LSL") 440 *St = Lsl; 441 else if (ShiftName == "lsr" || ShiftName == "LSR") 442 *St = Lsr; 443 else if (ShiftName == "asr" || ShiftName == "ASR") 444 *St = Asr; 445 else if (ShiftName == "ror" || ShiftName == "ROR") 446 *St = Ror; 447 else if (ShiftName == "rrx" || ShiftName == "RRX") 448 *St = Rrx; 449 else 450 return true; 451 getLexer().Lex(); // Eat shift type token. 452 453 // For all but a Rotate right there must be a '#' and a shift amount 454 if (*St != Rrx) { 455 // Look for # following the shift type 456 const AsmToken &HashTok = getLexer().getTok(); 457 if (HashTok.isNot(AsmToken::Hash)) 458 return Error(HashTok.getLoc(), "'#' expected"); 459 getLexer().Lex(); // Eat hash token. 460 461 if (getParser().ParseExpression(ShiftAmount)) 462 return true; 463 } 464 465 return false; 466} 467 468// A hack to allow some testing 469int ARMAsmParser::MatchRegisterName(const StringRef &Name) { 470 if (Name == "r0" || Name == "R0") 471 return 0; 472 else if (Name == "r1" || Name == "R1") 473 return 1; 474 else if (Name == "r2" || Name == "R2") 475 return 2; 476 else if (Name == "r3" || Name == "R3") 477 return 3; 478 else if (Name == "r3" || Name == "R3") 479 return 3; 480 else if (Name == "r4" || Name == "R4") 481 return 4; 482 else if (Name == "r5" || Name == "R5") 483 return 5; 484 else if (Name == "r6" || Name == "R6") 485 return 6; 486 else if (Name == "r7" || Name == "R7") 487 return 7; 488 else if (Name == "r8" || Name == "R8") 489 return 8; 490 else if (Name == "r9" || Name == "R9") 491 return 9; 492 else if (Name == "r10" || Name == "R10") 493 return 10; 494 else if (Name == "r11" || Name == "R11" || Name == "fp") 495 return 11; 496 else if (Name == "r12" || Name == "R12" || Name == "ip") 497 return 12; 498 else if (Name == "r13" || Name == "R13" || Name == "sp") 499 return 13; 500 else if (Name == "r14" || Name == "R14" || Name == "lr") 501 return 14; 502 else if (Name == "r15" || Name == "R15" || Name == "pc") 503 return 15; 504 return -1; 505} 506 507// A hack to allow some testing 508bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, 509 MCInst &Inst) { 510 struct ARMOperand Op0 = Operands[0]; 511 assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); 512 const StringRef &Mnemonic = Op0.getToken(); 513 if (Mnemonic == "add" || 514 Mnemonic == "stmfd" || 515 Mnemonic == "str" || 516 Mnemonic == "ldmfd" || 517 Mnemonic == "ldr" || 518 Mnemonic == "mov" || 519 Mnemonic == "sub") 520 return false; 521 522 return true; 523} 524 525// TODO - this is a work in progress 526bool ARMAsmParser::ParseOperand(ARMOperand &Op) { 527 switch (getLexer().getKind()) { 528 case AsmToken::Identifier: 529 if (!ParseRegister(Op)) 530 return false; 531 // TODO parse other operands that start with an identifier like labels 532 return Error(getLexer().getTok().getLoc(), "labels not yet supported"); 533 case AsmToken::LBrac: 534 if (!ParseMemory(Op)) 535 return false; 536 case AsmToken::LCurly: 537 if (!ParseRegisterList(Op)) 538 return false; 539 case AsmToken::Hash: 540 // $42 -> immediate. 541 getLexer().Lex(); 542 const MCExpr *Val; 543 if (getParser().ParseExpression(Val)) 544 return true; 545 Op = ARMOperand::CreateImm(Val); 546 return false; 547 default: 548 return Error(getLexer().getTok().getLoc(), "unexpected token in operand"); 549 } 550} 551 552bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { 553 SmallVector<ARMOperand, 7> Operands; 554 555 Operands.push_back(ARMOperand::CreateToken(Name)); 556 557 SMLoc Loc = getLexer().getTok().getLoc(); 558 if (getLexer().isNot(AsmToken::EndOfStatement)) { 559 560 // Read the first operand. 561 Operands.push_back(ARMOperand()); 562 if (ParseOperand(Operands.back())) 563 return true; 564 565 while (getLexer().is(AsmToken::Comma)) { 566 getLexer().Lex(); // Eat the comma. 567 568 // Parse and remember the operand. 569 Operands.push_back(ARMOperand()); 570 if (ParseOperand(Operands.back())) 571 return true; 572 } 573 } 574 if (!MatchInstruction(Operands, Inst)) 575 return false; 576 577 Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented"); 578 return true; 579} 580 581bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { 582 StringRef IDVal = DirectiveID.getIdentifier(); 583 if (IDVal == ".word") 584 return ParseDirectiveWord(4, DirectiveID.getLoc()); 585 return true; 586} 587 588/// ParseDirectiveWord 589/// ::= .word [ expression (, expression)* ] 590bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 591 if (getLexer().isNot(AsmToken::EndOfStatement)) { 592 for (;;) { 593 const MCExpr *Value; 594 if (getParser().ParseExpression(Value)) 595 return true; 596 597 getParser().getStreamer().EmitValue(Value, Size); 598 599 if (getLexer().is(AsmToken::EndOfStatement)) 600 break; 601 602 // FIXME: Improve diagnostic. 603 if (getLexer().isNot(AsmToken::Comma)) 604 return Error(L, "unexpected token in directive"); 605 getLexer().Lex(); 606 } 607 } 608 609 getLexer().Lex(); 610 return false; 611} 612 613// Force static initialization. 614extern "C" void LLVMInitializeARMAsmParser() { 615 RegisterAsmParser<ARMAsmParser> X(TheARMTarget); 616 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); 617} 618