ARMAsmParser.cpp revision d7894f105a3c397a3d7f5c5136eee39f5865e64b
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "ARM.h" 11#include "llvm/ADT/SmallVector.h" 12#include "llvm/ADT/Twine.h" 13#include "llvm/MC/MCAsmLexer.h" 14#include "llvm/MC/MCAsmParser.h" 15#include "llvm/MC/MCStreamer.h" 16#include "llvm/MC/MCExpr.h" 17#include "llvm/MC/MCInst.h" 18#include "llvm/Support/SourceMgr.h" 19#include "llvm/Target/TargetRegistry.h" 20#include "llvm/Target/TargetAsmParser.h" 21using namespace llvm; 22 23namespace { 24struct ARMOperand; 25 26// The shift types for register controlled shifts in arm memory addressing 27enum ShiftType { 28 Lsl, 29 Lsr, 30 Asr, 31 Ror, 32 Rrx 33}; 34 35class ARMAsmParser : public TargetAsmParser { 36 MCAsmParser &Parser; 37 38private: 39 MCAsmParser &getParser() const { return Parser; } 40 41 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 42 43 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 44 45 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 46 47 bool ParseRegister(ARMOperand &Op); 48 49 bool ParseRegisterList(ARMOperand &Op); 50 51 bool ParseMemory(ARMOperand &Op); 52 53 bool ParseShift(enum ShiftType *St, const MCExpr *ShiftAmount); 54 55 bool ParseOperand(ARMOperand &Op); 56 57 bool ParseDirectiveWord(unsigned Size, SMLoc L); 58 59 // TODO - For now hacked versions of the next two are in here in this file to 60 // allow some parser testing until the table gen versions are implemented. 61 62 /// @name Auto-generated Match Functions 63 /// { 64 bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, 65 MCInst &Inst); 66 67 /// MatchRegisterName - Match the given string to a register name and return 68 /// its register number, or -1 if there is no match. To allow return values 69 /// to be used directly in register lists, arm registers have values between 70 /// 0 and 15. 71 int MatchRegisterName(const StringRef &Name); 72 73 /// } 74 75 76public: 77 ARMAsmParser(const Target &T, MCAsmParser &_Parser) 78 : TargetAsmParser(T), Parser(_Parser) {} 79 80 virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); 81 82 virtual bool ParseDirective(AsmToken DirectiveID); 83}; 84 85} // end anonymous namespace 86 87namespace { 88 89/// ARMOperand - Instances of this class represent a parsed ARM machine 90/// instruction. 91struct ARMOperand { 92 enum { 93 Token, 94 Register, 95 Memory 96 } Kind; 97 98 99 union { 100 struct { 101 const char *Data; 102 unsigned Length; 103 } Tok; 104 105 struct { 106 unsigned RegNum; 107 bool Writeback; 108 } Reg; 109 110 // This is for all forms of ARM address expressions 111 struct { 112 unsigned BaseRegNum; 113 bool OffsetIsReg; 114 const MCExpr *Offset; // used when OffsetIsReg is false 115 unsigned OffsetRegNum; // used when OffsetIsReg is true 116 bool OffsetRegShifted; // only used when OffsetIsReg is true 117 enum ShiftType ShiftType; // used when OffsetRegShifted is true 118 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true 119 bool Preindexed; 120 bool Postindexed; 121 bool Negative; // only used when OffsetIsReg is true 122 bool Writeback; 123 } Mem; 124 125 }; 126 127 StringRef getToken() const { 128 assert(Kind == Token && "Invalid access!"); 129 return StringRef(Tok.Data, Tok.Length); 130 } 131 132 unsigned getReg() const { 133 assert(Kind == Register && "Invalid access!"); 134 return Reg.RegNum; 135 } 136 137 bool isToken() const {return Kind == Token; } 138 139 bool isReg() const { return Kind == Register; } 140 141 void addRegOperands(MCInst &Inst, unsigned N) const { 142 assert(N == 1 && "Invalid number of operands!"); 143 Inst.addOperand(MCOperand::CreateReg(getReg())); 144 } 145 146 static ARMOperand CreateToken(StringRef Str) { 147 ARMOperand Res; 148 Res.Kind = Token; 149 Res.Tok.Data = Str.data(); 150 Res.Tok.Length = Str.size(); 151 return Res; 152 } 153 154 static ARMOperand CreateReg(unsigned RegNum, bool Writeback) { 155 ARMOperand Res; 156 Res.Kind = Register; 157 Res.Reg.RegNum = RegNum; 158 Res.Reg.Writeback = Writeback; 159 return Res; 160 } 161 162 static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg, 163 const MCExpr *Offset, unsigned OffsetRegNum, 164 bool OffsetRegShifted, enum ShiftType ShiftType, 165 const MCExpr *ShiftAmount, bool Preindexed, 166 bool Postindexed, bool Negative, bool Writeback) { 167 ARMOperand Res; 168 Res.Kind = Memory; 169 Res.Mem.BaseRegNum = BaseRegNum; 170 Res.Mem.OffsetIsReg = OffsetIsReg; 171 Res.Mem.Offset = Offset; 172 Res.Mem.OffsetRegNum = OffsetRegNum; 173 Res.Mem.OffsetRegShifted = OffsetRegShifted; 174 Res.Mem.ShiftType = ShiftType; 175 Res.Mem.ShiftAmount = ShiftAmount; 176 Res.Mem.Preindexed = Preindexed; 177 Res.Mem.Postindexed = Postindexed; 178 Res.Mem.Negative = Negative; 179 Res.Mem.Writeback = Writeback; 180 return Res; 181 } 182}; 183 184} // end anonymous namespace. 185 186// Try to parse a register name. The token must be an Identifier when called, 187// and if it is a register name a Reg operand is created, the token is eaten 188// and false is returned. Else true is returned and no token is eaten. 189// TODO this is likely to change to allow different register types and or to 190// parse for a specific register type. 191bool ARMAsmParser::ParseRegister(ARMOperand &Op) { 192 const AsmToken &Tok = getLexer().getTok(); 193 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); 194 195 // FIXME: Validate register for the current architecture; we have to do 196 // validation later, so maybe there is no need for this here. 197 int RegNum; 198 199 RegNum = MatchRegisterName(Tok.getString()); 200 if (RegNum == -1) 201 return true; 202 getLexer().Lex(); // Eat identifier token. 203 204 bool Writeback = false; 205 const AsmToken &ExclaimTok = getLexer().getTok(); 206 if (ExclaimTok.is(AsmToken::Exclaim)) { 207 Writeback = true; 208 getLexer().Lex(); // Eat exclaim token 209 } 210 211 Op = ARMOperand::CreateReg(RegNum, Writeback); 212 213 return false; 214} 215 216// Try to parse a register list. The first token must be a '{' when called 217// for now. 218bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { 219 const AsmToken &LCurlyTok = getLexer().getTok(); 220 assert(LCurlyTok.is(AsmToken::LCurly) && "Token is not an Left Curly Brace"); 221 getLexer().Lex(); // Eat left curly brace token. 222 223 const AsmToken &RegTok = getLexer().getTok(); 224 SMLoc RegLoc = RegTok.getLoc(); 225 if (RegTok.isNot(AsmToken::Identifier)) 226 return Error(RegLoc, "register expected"); 227 int RegNum = MatchRegisterName(RegTok.getString()); 228 if (RegNum == -1) 229 return Error(RegLoc, "register expected"); 230 getLexer().Lex(); // Eat identifier token. 231 unsigned RegList = 1 << RegNum; 232 233 int HighRegNum = RegNum; 234 // TODO ranges like "{Rn-Rm}" 235 while (getLexer().getTok().is(AsmToken::Comma)) { 236 getLexer().Lex(); // Eat comma token. 237 238 const AsmToken &RegTok = getLexer().getTok(); 239 SMLoc RegLoc = RegTok.getLoc(); 240 if (RegTok.isNot(AsmToken::Identifier)) 241 return Error(RegLoc, "register expected"); 242 int RegNum = MatchRegisterName(RegTok.getString()); 243 if (RegNum == -1) 244 return Error(RegLoc, "register expected"); 245 246 if (RegList & (1 << RegNum)) 247 Warning(RegLoc, "register duplicated in register list"); 248 else if (RegNum <= HighRegNum) 249 Warning(RegLoc, "register not in ascending order in register list"); 250 RegList |= 1 << RegNum; 251 HighRegNum = RegNum; 252 253 getLexer().Lex(); // Eat identifier token. 254 } 255 const AsmToken &RCurlyTok = getLexer().getTok(); 256 if (RCurlyTok.isNot(AsmToken::RCurly)) 257 return Error(RCurlyTok.getLoc(), "'}' expected"); 258 getLexer().Lex(); // Eat left curly brace token. 259 260 return false; 261} 262 263// Try to parse an arm memory expression. It must start with a '[' token. 264// TODO Only preindexing and postindexing addressing are started, unindexed 265// with option, etc are still to do. 266bool ARMAsmParser::ParseMemory(ARMOperand &Op) { 267 const AsmToken &LBracTok = getLexer().getTok(); 268 assert(LBracTok.is(AsmToken::LBrac) && "Token is not an Left Bracket"); 269 getLexer().Lex(); // Eat left bracket token. 270 271 const AsmToken &BaseRegTok = getLexer().getTok(); 272 if (BaseRegTok.isNot(AsmToken::Identifier)) 273 return Error(BaseRegTok.getLoc(), "register expected"); 274 int BaseRegNum = MatchRegisterName(BaseRegTok.getString()); 275 if (BaseRegNum == -1) 276 return Error(BaseRegTok.getLoc(), "register expected"); 277 getLexer().Lex(); // Eat identifier token. 278 279 bool Preindexed = false; 280 bool Postindexed = false; 281 bool OffsetIsReg = false; 282 bool Negative = false; 283 bool Writeback = false; 284 285 // First look for preindexed address forms: 286 // [Rn, +/-Rm] 287 // [Rn, #offset] 288 // [Rn, +/-Rm, shift] 289 // that is after the "[Rn" we now have see if the next token is a comma. 290 const AsmToken &Tok = getLexer().getTok(); 291 if (Tok.is(AsmToken::Comma)) { 292 Preindexed = true; 293 getLexer().Lex(); // Eat comma token. 294 295 const AsmToken &NextTok = getLexer().getTok(); 296 if (NextTok.is(AsmToken::Plus)) 297 getLexer().Lex(); // Eat plus token. 298 else if (NextTok.is(AsmToken::Minus)) { 299 Negative = true; 300 getLexer().Lex(); // Eat minus token 301 } 302 303 // See if there is a register following the "[Rn," we have so far. 304 const AsmToken &OffsetRegTok = getLexer().getTok(); 305 int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); 306 bool OffsetRegShifted = false; 307 enum ShiftType ShiftType; 308 const MCExpr *ShiftAmount; 309 const MCExpr *Offset; 310 if (OffsetRegNum != -1) { 311 OffsetIsReg = true; 312 getLexer().Lex(); // Eat identifier token for the offset register. 313 // Look for a comma then a shift 314 const AsmToken &Tok = getLexer().getTok(); 315 if (Tok.is(AsmToken::Comma)) { 316 getLexer().Lex(); // Eat comma token. 317 318 const AsmToken &Tok = getLexer().getTok(); 319 if (ParseShift(&ShiftType, ShiftAmount)) 320 return Error(Tok.getLoc(), "shift expected"); 321 OffsetRegShifted = true; 322 } 323 } 324 else { // "[Rn," we have so far was not followed by "Rm" 325 // Look for #offset following the "[Rn," 326 const AsmToken &HashTok = getLexer().getTok(); 327 if (HashTok.isNot(AsmToken::Hash)) 328 return Error(HashTok.getLoc(), "'#' expected"); 329 getLexer().Lex(); // Eat hash token. 330 331 if (getParser().ParseExpression(Offset)) 332 return true; 333 } 334 const AsmToken &RBracTok = getLexer().getTok(); 335 if (RBracTok.isNot(AsmToken::RBrac)) 336 return Error(RBracTok.getLoc(), "']' expected"); 337 getLexer().Lex(); // Eat right bracket token. 338 339 const AsmToken &ExclaimTok = getLexer().getTok(); 340 if (ExclaimTok.is(AsmToken::Exclaim)) { 341 Writeback = true; 342 getLexer().Lex(); // Eat exclaim token 343 } 344 Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 345 OffsetRegShifted, ShiftType, ShiftAmount, 346 Preindexed, Postindexed, Negative, Writeback); 347 return false; 348 } 349 // The "[Rn" we have so far was not followed by a comma. 350 else if (Tok.is(AsmToken::RBrac)) { 351 // This is a post indexing addressing forms: 352 // [Rn], #offset 353 // [Rn], +/-Rm 354 // [Rn], +/-Rm, shift 355 // that is a ']' follows after the "[Rn". 356 Postindexed = true; 357 Writeback = true; 358 getLexer().Lex(); // Eat right bracket token. 359 360 const AsmToken &CommaTok = getLexer().getTok(); 361 if (CommaTok.isNot(AsmToken::Comma)) 362 return Error(CommaTok.getLoc(), "',' expected"); 363 getLexer().Lex(); // Eat comma token. 364 365 const AsmToken &NextTok = getLexer().getTok(); 366 if (NextTok.is(AsmToken::Plus)) 367 getLexer().Lex(); // Eat plus token. 368 else if (NextTok.is(AsmToken::Minus)) { 369 Negative = true; 370 getLexer().Lex(); // Eat minus token 371 } 372 373 // See if there is a register following the "[Rn]," we have so far. 374 const AsmToken &OffsetRegTok = getLexer().getTok(); 375 int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); 376 bool OffsetRegShifted = false; 377 enum ShiftType ShiftType; 378 const MCExpr *ShiftAmount; 379 const MCExpr *Offset; 380 if (OffsetRegNum != -1) { 381 OffsetIsReg = true; 382 getLexer().Lex(); // Eat identifier token for the offset register. 383 // Look for a comma then a shift 384 const AsmToken &Tok = getLexer().getTok(); 385 if (Tok.is(AsmToken::Comma)) { 386 getLexer().Lex(); // Eat comma token. 387 388 const AsmToken &Tok = getLexer().getTok(); 389 if (ParseShift(&ShiftType, ShiftAmount)) 390 return Error(Tok.getLoc(), "shift expected"); 391 OffsetRegShifted = true; 392 } 393 } 394 else { // "[Rn]," we have so far was not followed by "Rm" 395 // Look for #offset following the "[Rn]," 396 const AsmToken &HashTok = getLexer().getTok(); 397 if (HashTok.isNot(AsmToken::Hash)) 398 return Error(HashTok.getLoc(), "'#' expected"); 399 getLexer().Lex(); // Eat hash token. 400 401 if (getParser().ParseExpression(Offset)) 402 return true; 403 } 404 Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, 405 OffsetRegShifted, ShiftType, ShiftAmount, 406 Preindexed, Postindexed, Negative, Writeback); 407 return false; 408 } 409 410 return true; 411} 412 413/// ParseShift as one of these two: 414/// ( lsl | lsr | asr | ror ) , # shift_amount 415/// rrx 416/// and returns true if it parses a shift otherwise it returns false. 417bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *ShiftAmount) { 418 const AsmToken &Tok = getLexer().getTok(); 419 if (Tok.isNot(AsmToken::Identifier)) 420 return true; 421 const StringRef &ShiftName = Tok.getString(); 422 if (ShiftName == "lsl" || ShiftName == "LSL") 423 *St = Lsl; 424 else if (ShiftName == "lsr" || ShiftName == "LSR") 425 *St = Lsr; 426 else if (ShiftName == "asr" || ShiftName == "ASR") 427 *St = Asr; 428 else if (ShiftName == "ror" || ShiftName == "ROR") 429 *St = Ror; 430 else if (ShiftName == "rrx" || ShiftName == "RRX") 431 *St = Rrx; 432 else 433 return true; 434 getLexer().Lex(); // Eat shift type token. 435 436 // For all but a Rotate right there must be a '#' and a shift amount 437 if (*St != Rrx) { 438 // Look for # following the shift type 439 const AsmToken &HashTok = getLexer().getTok(); 440 if (HashTok.isNot(AsmToken::Hash)) 441 return Error(HashTok.getLoc(), "'#' expected"); 442 getLexer().Lex(); // Eat hash token. 443 444 if (getParser().ParseExpression(ShiftAmount)) 445 return true; 446 } 447 448 return false; 449} 450 451// A hack to allow some testing 452int ARMAsmParser::MatchRegisterName(const StringRef &Name) { 453 if (Name == "r0" || Name == "R0") 454 return 0; 455 else if (Name == "r1" || Name == "R1") 456 return 1; 457 else if (Name == "r2" || Name == "R2") 458 return 2; 459 else if (Name == "r3" || Name == "R3") 460 return 3; 461 else if (Name == "r3" || Name == "R3") 462 return 3; 463 else if (Name == "r4" || Name == "R4") 464 return 4; 465 else if (Name == "r5" || Name == "R5") 466 return 5; 467 else if (Name == "r6" || Name == "R6") 468 return 6; 469 else if (Name == "r7" || Name == "R7") 470 return 7; 471 else if (Name == "r8" || Name == "R8") 472 return 8; 473 else if (Name == "r9" || Name == "R9") 474 return 9; 475 else if (Name == "r10" || Name == "R10") 476 return 10; 477 else if (Name == "r11" || Name == "R11" || Name == "fp") 478 return 11; 479 else if (Name == "r12" || Name == "R12" || Name == "ip") 480 return 12; 481 else if (Name == "r13" || Name == "R13" || Name == "sp") 482 return 13; 483 else if (Name == "r14" || Name == "R14" || Name == "lr") 484 return 14; 485 else if (Name == "r15" || Name == "R15" || Name == "pc") 486 return 15; 487 return -1; 488} 489 490// A hack to allow some testing 491bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, 492 MCInst &Inst) { 493 struct ARMOperand Op0 = Operands[0]; 494 assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); 495 const StringRef &Mnemonic = Op0.getToken(); 496 if (Mnemonic == "add" || 497 Mnemonic == "stmfd" || 498 Mnemonic == "str" || 499 Mnemonic == "ldmfd" || 500 Mnemonic == "ldr" || 501 Mnemonic == "mov") 502 return false; 503 504 return true; 505} 506 507// TODO - this is a work in progress 508bool ARMAsmParser::ParseOperand(ARMOperand &Op) { 509 switch (getLexer().getKind()) { 510 case AsmToken::Identifier: 511 if (!ParseRegister(Op)) 512 return false; 513 // TODO parse other operands that start with an identifier like labels 514 return Error(getLexer().getTok().getLoc(), "labels not yet supported"); 515 case AsmToken::LBrac: 516 if (!ParseMemory(Op)) 517 return false; 518 case AsmToken::LCurly: 519 if (!ParseRegisterList(Op)) 520 return(false); 521 case AsmToken::Hash: 522 return Error(getLexer().getTok().getLoc(), "immediates not yet supported"); 523 default: 524 return Error(getLexer().getTok().getLoc(), "unexpected token in operand"); 525 } 526} 527 528bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { 529 SmallVector<ARMOperand, 7> Operands; 530 531 Operands.push_back(ARMOperand::CreateToken(Name)); 532 533 SMLoc Loc = getLexer().getTok().getLoc(); 534 if (getLexer().isNot(AsmToken::EndOfStatement)) { 535 536 // Read the first operand. 537 Operands.push_back(ARMOperand()); 538 if (ParseOperand(Operands.back())) 539 return true; 540 541 while (getLexer().is(AsmToken::Comma)) { 542 getLexer().Lex(); // Eat the comma. 543 544 // Parse and remember the operand. 545 Operands.push_back(ARMOperand()); 546 if (ParseOperand(Operands.back())) 547 return true; 548 } 549 } 550 if (!MatchInstruction(Operands, Inst)) 551 return false; 552 553 Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented"); 554 return true; 555} 556 557bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { 558 StringRef IDVal = DirectiveID.getIdentifier(); 559 if (IDVal == ".word") 560 return ParseDirectiveWord(4, DirectiveID.getLoc()); 561 return true; 562} 563 564/// ParseDirectiveWord 565/// ::= .word [ expression (, expression)* ] 566bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 567 if (getLexer().isNot(AsmToken::EndOfStatement)) { 568 for (;;) { 569 const MCExpr *Value; 570 if (getParser().ParseExpression(Value)) 571 return true; 572 573 getParser().getStreamer().EmitValue(Value, Size); 574 575 if (getLexer().is(AsmToken::EndOfStatement)) 576 break; 577 578 // FIXME: Improve diagnostic. 579 if (getLexer().isNot(AsmToken::Comma)) 580 return Error(L, "unexpected token in directive"); 581 getLexer().Lex(); 582 } 583 } 584 585 getLexer().Lex(); 586 return false; 587} 588 589// Force static initialization. 590extern "C" void LLVMInitializeARMAsmParser() { 591 RegisterAsmParser<ARMAsmParser> X(TheARMTarget); 592 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); 593} 594