ScriptScanner.ll revision f33f6de54db174aa679a4b6d1e040d37e95541c0
1/*===- ScriptScanner.ll ---------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===*/
9
10%{
11/* C/C++ Declarations */
12
13#include <mcld/Script/ScriptScanner.h>
14#include <mcld/Script/ScriptFile.h>
15#include <mcld/Support/MsgHandling.h>
16#include <llvm/ADT/StringRef.h>
17#include <string>
18
19typedef mcld::ScriptParser::token token;
20typedef mcld::ScriptParser::token_type token_type;
21
22#define yyterminate() return token::END
23#define YY_NO_UNISTD_H
24%}
25
26/* Flex Declarations and Options */
27%option c++
28%option batch
29%option noyywrap
30%option nounput
31%option stack
32
33%{
34#define YY_USER_ACTION  yylloc->columns(yyleng);
35%}
36
37/* abbrev. of RE @ref binutils ld/ldlex.l */
38FILENAMECHAR1   [_a-zA-Z\/\.\\\$\_\~]
39SYMBOLCHARN     [_a-zA-Z\/\.\\\$\_\~0-9]
40NOCFILENAMECHAR [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\~]
41WILDCHAR        [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\,\~\?\*\^\!]
42WS [ \t\r]
43
44/* Start conditions */
45%s LDSCRIPT
46%s EXPRESSION
47
48%% /* Regular Expressions */
49
50 /* code to place at the beginning of yylex() */
51%{
52  /* reset location */
53  yylloc->step();
54
55  /* determine the initial parser state */
56  if (m_Kind == ScriptFile::Unknown) {
57    m_Kind = pScriptFile.getKind();
58    switch (pScriptFile.getKind()) {
59    case ScriptFile::LDScript:
60    case ScriptFile::Expression:
61      return token::LINKER_SCRIPT;
62    case ScriptFile::VersionScript:
63    case ScriptFile::DynamicList:
64    default:
65      assert(0 && "Unsupported script type!");
66      break;
67    }
68  }
69%}
70
71 /* Entry Point */
72<LDSCRIPT>"ENTRY"                      { return token::ENTRY; }
73 /* File Commands */
74<LDSCRIPT>"INCLUDE"                    { return token::INCLUDE; }
75<LDSCRIPT>"INPUT"                      { return token::INPUT; }
76<LDSCRIPT>"GROUP"                      { return token::GROUP; }
77<LDSCRIPT>"AS_NEEDED"                  { return token::AS_NEEDED; }
78<LDSCRIPT>"OUTPUT"                     { return token::OUTPUT; }
79<LDSCRIPT>"SEARCH_DIR"                 { return token::SEARCH_DIR; }
80<LDSCRIPT>"STARTUP"                    { return token::STARTUP; }
81 /* Format Commands */
82<LDSCRIPT>"OUTPUT_FORMAT"              { return token::OUTPUT_FORMAT; }
83<LDSCRIPT>"TARGET"                     { return token::TARGET; }
84 /* Misc Commands */
85<LDSCRIPT>"ASSERT"                     { return token::ASSERT; }
86<LDSCRIPT>"EXTERN"                     { return token::EXTERN; }
87<LDSCRIPT>"FORCE_COMMON_ALLOCATION"    { return token::FORCE_COMMON_ALLOCATION; }
88<LDSCRIPT>"INHIBIT_COMMON_ALLOCATION"  { return token::INHIBIT_COMMON_ALLOCATION; }
89<LDSCRIPT>"INSERT"                     { return token::INSERT; }
90<LDSCRIPT>"NOCROSSREFS"                { return token::NOCROSSREFS; }
91<LDSCRIPT>"OUTPUT_ARCH"                { return token::OUTPUT_ARCH; }
92<LDSCRIPT>"LD_FEATURE"                 { return token::LD_FEATURE; }
93 /* Assignemnts */
94<LDSCRIPT,EXPRESSION>"HIDDEN"          { return token::HIDDEN; }
95<LDSCRIPT,EXPRESSION>"PROVIDE"         { return token::PROVIDE; }
96<LDSCRIPT,EXPRESSION>"PROVIDE_HIDDEN"  { return token::PROVIDE_HIDDEN; }
97 /* SECTIONS Command */
98<LDSCRIPT>"SECTIONS"                   { return token::SECTIONS; }
99 /* MEMORY Command */
100<LDSCRIPT>"MEMORY"                     { return token::MEMORY; }
101 /* PHDRS Command */
102<LDSCRIPT>"PHDRS"                      { return token::PHDRS; }
103 /* Builtin Functions */
104<EXPRESSION>"ABSOLUTE"                 { return token::ABSOLUTE; }
105<EXPRESSION>"ADDR"                     { return token::ADDR; }
106<LDSCRIPT,EXPRESSION>"ALIGN"           { return token::ALIGN; }
107<EXPRESSION>"ALIGNOF"                  { return token::ALIGNOF; }
108<EXPRESSION>"BLOCK"                    { return token::BLOCK; }
109<EXPRESSION>"DATA_SEGMENT_ALIGN"       { return token::DATA_SEGMENT_ALIGN; }
110<EXPRESSION>"DATA_SEGMENT_END"         { return token::DATA_SEGMENT_END; }
111<EXPRESSION>"DATA_SEGMENT_RELRO_END"   { return token::DATA_SEGMENT_RELRO_END; }
112<EXPRESSION>"DEFINED"                  { return token::DEFINED; }
113<EXPRESSION>"LENGTH"                   { return token::LENGTH; }
114<EXPRESSION>"LOADADDR"                 { return token::LOADADDR; }
115<EXPRESSION>"MAX"                      { return token::MAX; }
116<EXPRESSION>"MIN"                      { return token::MIN; }
117<EXPRESSION>"NEXT"                     { return token::NEXT; }
118<EXPRESSION>"ORIGIN"                   { return token::ORIGIN; }
119<EXPRESSION>"SEGMENT_START"            { return token::SEGMENT_START; }
120<EXPRESSION>"SIZEOF"                   { return token::SIZEOF; }
121<EXPRESSION>"SIZEOF_HEADERS"           { return token::SIZEOF_HEADERS; }
122<EXPRESSION>"CONSTANT"                 { return token::CONSTANT; }
123 /* Symbolic Constants */
124<EXPRESSION>"MAXPAGESIZE"              { return token::MAXPAGESIZE; }
125<EXPRESSION>"COMMONPAGESIZE"           { return token::COMMONPAGESIZE; }
126 /* Input Section Description */
127<LDSCRIPT>"EXCLUDE_FILE"               { return token::EXCLUDE_FILE; }
128<LDSCRIPT>"KEEP"                       { return token::KEEP; }
129<LDSCRIPT>"SORT"                       { return token::SORT_BY_NAME; }
130<LDSCRIPT>"SORT_BY_NAME"               { return token::SORT_BY_NAME; }
131<LDSCRIPT>"SORT_BY_ALIGNMENT"          { return token::SORT_BY_ALIGNMENT; }
132<LDSCRIPT>"SORT_NONE"                  { return token::SORT_NONE; }
133<LDSCRIPT>"SORT_BY_INIT_PRIORITY"      { return token::SORT_BY_INIT_PRIORITY; }
134 /* Output Section Data */
135<LDSCRIPT>"BYTE"                       { return token::BYTE; }
136<LDSCRIPT>"SHORT"                      { return token::SHORT; }
137<LDSCRIPT>"LONG"                       { return token::LONG; }
138<LDSCRIPT>"QUAD"                       { return token::QUAD; }
139<LDSCRIPT>"SQUAD"                      { return token::SQUAD; }
140<LDSCRIPT>"FILL"                       { return token::FILL; }
141 /* Output Section Discarding */
142<LDSCRIPT>"DISCARD"                    { return token::DISCARD; }
143 /* Output Section Keywords */
144<LDSCRIPT>"CREATE_OBJECT_SYMBOLS"      { return token::CREATE_OBJECT_SYMBOLS; }
145<LDSCRIPT>"CONSTRUCTORS"               { return token::CONSTRUCTORS; }
146 /* Output Section Attributes */
147 /* Output Section Type */
148<LDSCRIPT,EXPRESSION>"NOLOAD"          { return token::NOLOAD; }
149<LDSCRIPT,EXPRESSION>"DSECT"           { return token::DSECT; }
150<LDSCRIPT,EXPRESSION>"COPY"            { return token::COPY; }
151<LDSCRIPT,EXPRESSION>"INFO"            { return token::INFO; }
152<LDSCRIPT,EXPRESSION>"OVERLAY"         { return token::OVERLAY; }
153 /* Output Section LMA */
154<LDSCRIPT>"AT"                         { return token::AT; }
155 /* Forced Input Alignment */
156<LDSCRIPT>"SUBALIGN"                   { return token::SUBALIGN; }
157 /* Output Section Constraint */
158<LDSCRIPT>"ONLY_IF_RO"                 { return token::ONLY_IF_RO; }
159<LDSCRIPT>"ONLY_IF_RW"                 { return token::ONLY_IF_RW; }
160 /* Operators */
161<LDSCRIPT,EXPRESSION>"<<"              { return token::LSHIFT; }
162<LDSCRIPT,EXPRESSION>">>"              { return token::RSHIFT; }
163<LDSCRIPT,EXPRESSION>"=="              { return token::EQ; }
164<LDSCRIPT,EXPRESSION>"!="              { return token::NE; }
165<LDSCRIPT,EXPRESSION>"<="              { return token::LE; }
166<LDSCRIPT,EXPRESSION>">="              { return token::GE; }
167<LDSCRIPT,EXPRESSION>"&&"              { return token::LOGICAL_AND; }
168<LDSCRIPT,EXPRESSION>"||"              { return token::LOGICAL_OR; }
169<LDSCRIPT,EXPRESSION>"+="              { return token::ADD_ASSIGN; }
170<LDSCRIPT,EXPRESSION>"-="              { return token::SUB_ASSIGN; }
171<LDSCRIPT,EXPRESSION>"*="              { return token::MUL_ASSIGN; }
172<LDSCRIPT,EXPRESSION>"/="              { return token::DIV_ASSIGN; }
173<LDSCRIPT,EXPRESSION>"&="              { return token::AND_ASSIGN; }
174<LDSCRIPT,EXPRESSION>"|="              { return token::OR_ASSIGN; }
175<LDSCRIPT,EXPRESSION>"<<="             { return token::LS_ASSIGN; }
176<LDSCRIPT,EXPRESSION>">>="             { return token::RS_ASSIGN; }
177<LDSCRIPT,EXPRESSION>","               { return static_cast<token_type>(*yytext); }
178<LDSCRIPT,EXPRESSION>"="               { return static_cast<token_type>(*yytext); }
179<LDSCRIPT,EXPRESSION>"?"               { return static_cast<token_type>(*yytext); }
180<LDSCRIPT,EXPRESSION>":"               { return static_cast<token_type>(*yytext); }
181<LDSCRIPT,EXPRESSION>"|"               { return static_cast<token_type>(*yytext); }
182<LDSCRIPT,EXPRESSION>"^"               { return static_cast<token_type>(*yytext); }
183<LDSCRIPT,EXPRESSION>"&"               { return static_cast<token_type>(*yytext); }
184<LDSCRIPT,EXPRESSION>"<"               { return static_cast<token_type>(*yytext); }
185<LDSCRIPT,EXPRESSION>">"               { return static_cast<token_type>(*yytext); }
186<LDSCRIPT,EXPRESSION>"+"               { return static_cast<token_type>(*yytext); }
187<LDSCRIPT,EXPRESSION>"-"               { return static_cast<token_type>(*yytext); }
188<LDSCRIPT,EXPRESSION>"*"               { return static_cast<token_type>(*yytext); }
189<LDSCRIPT,EXPRESSION>"/"               { return static_cast<token_type>(*yytext); }
190<LDSCRIPT,EXPRESSION>"%"               { return static_cast<token_type>(*yytext); }
191<LDSCRIPT,EXPRESSION>"!"               { return static_cast<token_type>(*yytext); }
192<LDSCRIPT,EXPRESSION>"~"               { return static_cast<token_type>(*yytext); }
193<LDSCRIPT,EXPRESSION>";"               { return static_cast<token_type>(*yytext); }
194<LDSCRIPT,EXPRESSION>"("               { return static_cast<token_type>(*yytext); }
195<LDSCRIPT,EXPRESSION>")"               { return static_cast<token_type>(*yytext); }
196<LDSCRIPT,EXPRESSION>"{"               { return static_cast<token_type>(*yytext); }
197<LDSCRIPT,EXPRESSION>"}"               { return static_cast<token_type>(*yytext); }
198
199 /* Numbers */
200<LDSCRIPT,EXPRESSION>((("$"|0[xX])([0-9A-Fa-f])+)|(([0-9])+))(M|K|m|k)? {
201  llvm::StringRef str(yytext, yyleng);
202  switch (str.back()) {
203  case 'k':
204  case 'K':
205    str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer);
206    yylval->integer *= 1024;
207    break;
208  case 'm':
209  case 'M':
210    str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer);
211    yylval->integer *= 1024 * 1024;
212    break;
213  default:
214    str.getAsInteger(0, yylval->integer);
215    break;
216  }
217  return token::INTEGER;
218}
219
220 /* Expression string */
221<EXPRESSION>{FILENAMECHAR1}{SYMBOLCHARN}* {
222  const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
223  yylval->string = &str;
224  return token::STRING;
225}
226
227 /* String */
228<LDSCRIPT>{FILENAMECHAR1}{NOCFILENAMECHAR}* {
229  const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
230  yylval->string = &str;
231  return token::STRING;
232}
233
234<LDSCRIPT,EXPRESSION>\"(\\.|[^\\"])*\" {
235  /*" c string literal */
236  const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
237  yylval->string = &str;
238  return token::STRING;
239}
240
241 /* -l namespec */
242<LDSCRIPT>"-l"{FILENAMECHAR1}{NOCFILENAMECHAR}* {
243  const std::string& str = pScriptFile.createParserStr(yytext + 2, yyleng - 2);
244  yylval->string = &str;
245  return token::LNAMESPEC;
246}
247
248 /* WILDCHAR String */
249<LDSCRIPT>{WILDCHAR}* {
250  if (yytext[0] == '/' && yytext[1] == '*') {
251    yyless (2);
252    enterComments(*yylloc);
253  } else {
254    const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
255    yylval->string = &str;
256    return token::STRING;
257  }
258}
259
260 /* gobble up C comments */
261<LDSCRIPT,EXPRESSION>"/*" {
262  enterComments(*yylloc);
263  yylloc->step();
264}
265
266 /* gobble up white-spaces */
267<LDSCRIPT,EXPRESSION>{WS}+ {
268  yylloc->step();
269}
270
271 /* gobble up end-of-lines */
272<LDSCRIPT,EXPRESSION>\n {
273  yylloc->lines(1);
274  yylloc->step();
275}
276
277%% /* Additional Code */
278
279namespace mcld {
280
281ScriptScanner::ScriptScanner(std::istream* yyin, std::ostream* yyout)
282  : yyFlexLexer(yyin, yyout), m_Kind(ScriptFile::Unknown)
283{
284}
285
286ScriptScanner::~ScriptScanner()
287{
288}
289
290void ScriptScanner::enterComments(ScriptParser::location_type& pLocation)
291{
292  const int start_line = pLocation.begin.line;
293  const int start_col  = pLocation.begin.column;
294
295  int ch = 0;
296
297  while (true) {
298    ch = yyinput();
299    pLocation.columns(1);
300
301    while (ch != '*' && ch != EOF) {
302      if (ch == '\n') {
303        pLocation.lines(1);
304      }
305
306      ch = yyinput();
307      pLocation.columns(1);
308    }
309
310    if (ch == '*') {
311      ch = yyinput();
312      pLocation.columns(1);
313
314      while (ch == '*') {
315        ch = yyinput();
316        pLocation.columns(1);
317      }
318
319      if (ch == '/')
320        break;
321    }
322
323    if (ch == '\n')
324      pLocation.lines(1);
325
326    if (ch == EOF) {
327      error(diag::err_unterminated_comment) << pLocation.begin.filename
328                                            << start_line
329                                            << start_col;
330      break;
331    }
332  }
333}
334
335void ScriptScanner::setLexState(ScriptFile::Kind pKind)
336{
337  /* push the state into the top of stach */
338  m_StateStack.push(pKind);
339
340  switch (pKind) {
341  case ScriptFile::LDScript:
342    BEGIN(LDSCRIPT);
343    break;
344  case ScriptFile::Expression:
345    BEGIN(EXPRESSION);
346    break;
347  case ScriptFile::VersionScript:
348  case ScriptFile::DynamicList:
349  default:
350    assert(0 && "Unsupported script type!");
351    break;
352  }
353}
354
355void ScriptScanner::popLexState()
356{
357  /* pop the last state */
358  m_StateStack.pop();
359
360  /* resume the appropriate state */
361  if (!m_StateStack.empty()) {
362    switch (m_StateStack.top()) {
363    case ScriptFile::LDScript:
364      BEGIN(LDSCRIPT);
365      break;
366    case ScriptFile::Expression:
367      BEGIN(EXPRESSION);
368      break;
369    case ScriptFile::VersionScript:
370    case ScriptFile::DynamicList:
371    default:
372      assert(0 && "Unsupported script type!");
373      break;
374    }
375  }
376}
377
378} /* namespace of mcld */
379
380#ifdef yylex
381#undef yylex
382#endif
383
384int yyFlexLexer::yylex()
385{
386  return 0;
387}
388
389