1/*===- ScriptScanner.ll ---------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===*/
9
10%{
11/* C/C++ Declarations */
12
13#include "mcld/Script/ScriptScanner.h"
14#include "mcld/Script/ScriptFile.h"
15#include "mcld/Support/MsgHandling.h"
16#include <llvm/ADT/StringRef.h>
17#include <string>
18
19typedef mcld::ScriptParser::token token;
20typedef mcld::ScriptParser::token_type token_type;
21
22#define yyterminate() return token::END
23#define YY_NO_UNISTD_H
24%}
25
26%{
27#ifdef __clang__
28#pragma clang diagnostic push
29#pragma clang diagnostic ignored "-Wdeprecated-register"
30#endif
31%}
32
33/* Flex Declarations and Options */
34%option c++
35%option batch
36%option noyywrap
37%option nounput
38%option stack
39
40%{
41#define YY_USER_ACTION  yylloc->columns(yyleng);
42%}
43
44/* abbrev. of RE @ref binutils ld/ldlex.l */
45FILENAMECHAR1   [_a-zA-Z\/\.\\\$\_\~]
46SYMBOLCHARN     [_a-zA-Z\/\.\\\$\_\~0-9]
47NOCFILENAMECHAR [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\~]
48WILDCHAR        [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\,\~\?\*\^\!]
49WS [ \t\r]
50
51/* Start conditions */
52%s LDSCRIPT
53%s EXPRESSION
54
55%% /* Regular Expressions */
56
57 /* code to place at the beginning of yylex() */
58%{
59  /* reset location */
60  yylloc->step();
61
62  /* determine the initial parser state */
63  if (m_Kind == ScriptFile::Unknown) {
64    m_Kind = pScriptFile.getKind();
65    switch (pScriptFile.getKind()) {
66    case ScriptFile::LDScript:
67    case ScriptFile::Expression:
68      return token::LINKER_SCRIPT;
69    case ScriptFile::VersionScript:
70    case ScriptFile::DynamicList:
71    default:
72      assert(0 && "Unsupported script type!");
73      break;
74    }
75  }
76%}
77
78 /* Entry Point */
79<LDSCRIPT>"ENTRY"                      { return token::ENTRY; }
80 /* File Commands */
81<LDSCRIPT>"INCLUDE"                    { return token::INCLUDE; }
82<LDSCRIPT>"INPUT"                      { return token::INPUT; }
83<LDSCRIPT>"GROUP"                      { return token::GROUP; }
84<LDSCRIPT>"AS_NEEDED"                  { return token::AS_NEEDED; }
85<LDSCRIPT>"OUTPUT"                     { return token::OUTPUT; }
86<LDSCRIPT>"SEARCH_DIR"                 { return token::SEARCH_DIR; }
87<LDSCRIPT>"STARTUP"                    { return token::STARTUP; }
88 /* Format Commands */
89<LDSCRIPT>"OUTPUT_FORMAT"              { return token::OUTPUT_FORMAT; }
90<LDSCRIPT>"TARGET"                     { return token::TARGET; }
91 /* Misc Commands */
92<LDSCRIPT>"ASSERT"                     { return token::ASSERT; }
93<LDSCRIPT>"EXTERN"                     { return token::EXTERN; }
94<LDSCRIPT>"FORCE_COMMON_ALLOCATION"    { return token::FORCE_COMMON_ALLOCATION; }
95<LDSCRIPT>"INHIBIT_COMMON_ALLOCATION"  { return token::INHIBIT_COMMON_ALLOCATION; }
96<LDSCRIPT>"INSERT"                     { return token::INSERT; }
97<LDSCRIPT>"NOCROSSREFS"                { return token::NOCROSSREFS; }
98<LDSCRIPT>"OUTPUT_ARCH"                { return token::OUTPUT_ARCH; }
99<LDSCRIPT>"LD_FEATURE"                 { return token::LD_FEATURE; }
100 /* Assignemnts */
101<LDSCRIPT,EXPRESSION>"HIDDEN"          { return token::HIDDEN; }
102<LDSCRIPT,EXPRESSION>"PROVIDE"         { return token::PROVIDE; }
103<LDSCRIPT,EXPRESSION>"PROVIDE_HIDDEN"  { return token::PROVIDE_HIDDEN; }
104 /* SECTIONS Command */
105<LDSCRIPT>"SECTIONS"                   { return token::SECTIONS; }
106 /* MEMORY Command */
107<LDSCRIPT>"MEMORY"                     { return token::MEMORY; }
108 /* PHDRS Command */
109<LDSCRIPT>"PHDRS"                      { return token::PHDRS; }
110 /* Builtin Functions */
111<EXPRESSION>"ABSOLUTE"                 { return token::ABSOLUTE; }
112<EXPRESSION>"ADDR"                     { return token::ADDR; }
113<LDSCRIPT,EXPRESSION>"ALIGN"           { return token::ALIGN; }
114<EXPRESSION>"ALIGNOF"                  { return token::ALIGNOF; }
115<EXPRESSION>"BLOCK"                    { return token::BLOCK; }
116<EXPRESSION>"DATA_SEGMENT_ALIGN"       { return token::DATA_SEGMENT_ALIGN; }
117<EXPRESSION>"DATA_SEGMENT_END"         { return token::DATA_SEGMENT_END; }
118<EXPRESSION>"DATA_SEGMENT_RELRO_END"   { return token::DATA_SEGMENT_RELRO_END; }
119<EXPRESSION>"DEFINED"                  { return token::DEFINED; }
120<EXPRESSION>"LENGTH"                   { return token::LENGTH; }
121<EXPRESSION>"LOADADDR"                 { return token::LOADADDR; }
122<EXPRESSION>"MAX"                      { return token::MAX; }
123<EXPRESSION>"MIN"                      { return token::MIN; }
124<EXPRESSION>"NEXT"                     { return token::NEXT; }
125<EXPRESSION>"ORIGIN"                   { return token::ORIGIN; }
126<EXPRESSION>"SEGMENT_START"            { return token::SEGMENT_START; }
127<EXPRESSION>"SIZEOF"                   { return token::SIZEOF; }
128<EXPRESSION>"SIZEOF_HEADERS"           { return token::SIZEOF_HEADERS; }
129<EXPRESSION>"CONSTANT"                 { return token::CONSTANT; }
130 /* Symbolic Constants */
131<EXPRESSION>"MAXPAGESIZE"              { return token::MAXPAGESIZE; }
132<EXPRESSION>"COMMONPAGESIZE"           { return token::COMMONPAGESIZE; }
133 /* Input Section Description */
134<LDSCRIPT>"EXCLUDE_FILE"               { return token::EXCLUDE_FILE; }
135<LDSCRIPT>"KEEP"                       { return token::KEEP; }
136<LDSCRIPT>"SORT"                       { return token::SORT_BY_NAME; }
137<LDSCRIPT>"SORT_BY_NAME"               { return token::SORT_BY_NAME; }
138<LDSCRIPT>"SORT_BY_ALIGNMENT"          { return token::SORT_BY_ALIGNMENT; }
139<LDSCRIPT>"SORT_NONE"                  { return token::SORT_NONE; }
140<LDSCRIPT>"SORT_BY_INIT_PRIORITY"      { return token::SORT_BY_INIT_PRIORITY; }
141 /* Output Section Data */
142<LDSCRIPT>"BYTE"                       { return token::BYTE; }
143<LDSCRIPT>"SHORT"                      { return token::SHORT; }
144<LDSCRIPT>"LONG"                       { return token::LONG; }
145<LDSCRIPT>"QUAD"                       { return token::QUAD; }
146<LDSCRIPT>"SQUAD"                      { return token::SQUAD; }
147<LDSCRIPT>"FILL"                       { return token::FILL; }
148 /* Output Section Discarding */
149<LDSCRIPT>"DISCARD"                    { return token::DISCARD; }
150 /* Output Section Keywords */
151<LDSCRIPT>"CREATE_OBJECT_SYMBOLS"      { return token::CREATE_OBJECT_SYMBOLS; }
152<LDSCRIPT>"CONSTRUCTORS"               { return token::CONSTRUCTORS; }
153 /* Output Section Attributes */
154 /* Output Section Type */
155<LDSCRIPT,EXPRESSION>"NOLOAD"          { return token::NOLOAD; }
156<LDSCRIPT,EXPRESSION>"DSECT"           { return token::DSECT; }
157<LDSCRIPT,EXPRESSION>"COPY"            { return token::COPY; }
158<LDSCRIPT,EXPRESSION>"INFO"            { return token::INFO; }
159<LDSCRIPT,EXPRESSION>"OVERLAY"         { return token::OVERLAY; }
160 /* Output Section LMA */
161<LDSCRIPT>"AT"                         { return token::AT; }
162 /* Forced Input Alignment */
163<LDSCRIPT>"SUBALIGN"                   { return token::SUBALIGN; }
164 /* Output Section Constraint */
165<LDSCRIPT>"ONLY_IF_RO"                 { return token::ONLY_IF_RO; }
166<LDSCRIPT>"ONLY_IF_RW"                 { return token::ONLY_IF_RW; }
167 /* Operators */
168<LDSCRIPT,EXPRESSION>"<<"              { return token::LSHIFT; }
169<LDSCRIPT,EXPRESSION>">>"              { return token::RSHIFT; }
170<LDSCRIPT,EXPRESSION>"=="              { return token::EQ; }
171<LDSCRIPT,EXPRESSION>"!="              { return token::NE; }
172<LDSCRIPT,EXPRESSION>"<="              { return token::LE; }
173<LDSCRIPT,EXPRESSION>">="              { return token::GE; }
174<LDSCRIPT,EXPRESSION>"&&"              { return token::LOGICAL_AND; }
175<LDSCRIPT,EXPRESSION>"||"              { return token::LOGICAL_OR; }
176<LDSCRIPT,EXPRESSION>"+="              { return token::ADD_ASSIGN; }
177<LDSCRIPT,EXPRESSION>"-="              { return token::SUB_ASSIGN; }
178<LDSCRIPT,EXPRESSION>"*="              { return token::MUL_ASSIGN; }
179<LDSCRIPT,EXPRESSION>"/="              { return token::DIV_ASSIGN; }
180<LDSCRIPT,EXPRESSION>"&="              { return token::AND_ASSIGN; }
181<LDSCRIPT,EXPRESSION>"|="              { return token::OR_ASSIGN; }
182<LDSCRIPT,EXPRESSION>"<<="             { return token::LS_ASSIGN; }
183<LDSCRIPT,EXPRESSION>">>="             { return token::RS_ASSIGN; }
184<LDSCRIPT,EXPRESSION>","               { return static_cast<token_type>(*yytext); }
185<LDSCRIPT,EXPRESSION>"="               { return static_cast<token_type>(*yytext); }
186<LDSCRIPT,EXPRESSION>"?"               { return static_cast<token_type>(*yytext); }
187<LDSCRIPT,EXPRESSION>":"               { return static_cast<token_type>(*yytext); }
188<LDSCRIPT,EXPRESSION>"|"               { return static_cast<token_type>(*yytext); }
189<LDSCRIPT,EXPRESSION>"^"               { return static_cast<token_type>(*yytext); }
190<LDSCRIPT,EXPRESSION>"&"               { return static_cast<token_type>(*yytext); }
191<LDSCRIPT,EXPRESSION>"<"               { return static_cast<token_type>(*yytext); }
192<LDSCRIPT,EXPRESSION>">"               { return static_cast<token_type>(*yytext); }
193<LDSCRIPT,EXPRESSION>"+"               { return static_cast<token_type>(*yytext); }
194<LDSCRIPT,EXPRESSION>"-"               { return static_cast<token_type>(*yytext); }
195<LDSCRIPT,EXPRESSION>"*"               { return static_cast<token_type>(*yytext); }
196<LDSCRIPT,EXPRESSION>"/"               { return static_cast<token_type>(*yytext); }
197<LDSCRIPT,EXPRESSION>"%"               { return static_cast<token_type>(*yytext); }
198<LDSCRIPT,EXPRESSION>"!"               { return static_cast<token_type>(*yytext); }
199<LDSCRIPT,EXPRESSION>"~"               { return static_cast<token_type>(*yytext); }
200<LDSCRIPT,EXPRESSION>";"               { return static_cast<token_type>(*yytext); }
201<LDSCRIPT,EXPRESSION>"("               { return static_cast<token_type>(*yytext); }
202<LDSCRIPT,EXPRESSION>")"               { return static_cast<token_type>(*yytext); }
203<LDSCRIPT,EXPRESSION>"{"               { return static_cast<token_type>(*yytext); }
204<LDSCRIPT,EXPRESSION>"}"               { return static_cast<token_type>(*yytext); }
205
206 /* Numbers */
207<LDSCRIPT,EXPRESSION>((("$"|0[xX])([0-9A-Fa-f])+)|(([0-9])+))(M|K|m|k)? {
208  llvm::StringRef str(yytext, yyleng);
209  switch (str.back()) {
210  case 'k':
211  case 'K':
212    str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer);
213    yylval->integer *= 1024;
214    break;
215  case 'm':
216  case 'M':
217    str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer);
218    yylval->integer *= 1024 * 1024;
219    break;
220  default:
221    str.getAsInteger(0, yylval->integer);
222    break;
223  }
224  return token::INTEGER;
225}
226
227 /* Expression string */
228<EXPRESSION>{FILENAMECHAR1}{SYMBOLCHARN}* {
229  const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
230  yylval->string = &str;
231  return token::STRING;
232}
233
234 /* String */
235<LDSCRIPT>{FILENAMECHAR1}{NOCFILENAMECHAR}* {
236  const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
237  yylval->string = &str;
238  return token::STRING;
239}
240
241<LDSCRIPT,EXPRESSION>\"(\\.|[^\\"])*\" {
242  /*" c string literal */
243  const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
244  yylval->string = &str;
245  return token::STRING;
246}
247
248 /* -l namespec */
249<LDSCRIPT>"-l"{FILENAMECHAR1}{NOCFILENAMECHAR}* {
250  const std::string& str = pScriptFile.createParserStr(yytext + 2, yyleng - 2);
251  yylval->string = &str;
252  return token::LNAMESPEC;
253}
254
255 /* WILDCHAR String */
256<LDSCRIPT>{WILDCHAR}* {
257  if (yytext[0] == '/' && yytext[1] == '*') {
258    yyless (2);
259    enterComments(*yylloc);
260  } else {
261    const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
262    yylval->string = &str;
263    return token::STRING;
264  }
265}
266
267 /* gobble up C comments */
268<LDSCRIPT,EXPRESSION>"/*" {
269  enterComments(*yylloc);
270  yylloc->step();
271}
272
273 /* gobble up white-spaces */
274<LDSCRIPT,EXPRESSION>{WS}+ {
275  yylloc->step();
276}
277
278 /* gobble up end-of-lines */
279<LDSCRIPT,EXPRESSION>\n {
280  yylloc->lines(1);
281  yylloc->step();
282}
283
284%% /* Additional Code */
285
286namespace mcld {
287
288ScriptScanner::ScriptScanner(std::istream* yyin, std::ostream* yyout)
289  : yyFlexLexer(yyin, yyout), m_Kind(ScriptFile::Unknown)
290{
291}
292
293ScriptScanner::~ScriptScanner()
294{
295}
296
297void ScriptScanner::enterComments(ScriptParser::location_type& pLocation)
298{
299  const int start_line = pLocation.begin.line;
300  const int start_col  = pLocation.begin.column;
301
302  int ch = 0;
303
304  while (true) {
305    ch = yyinput();
306    pLocation.columns(1);
307
308    while (ch != '*' && ch != EOF) {
309      if (ch == '\n') {
310        pLocation.lines(1);
311      }
312
313      ch = yyinput();
314      pLocation.columns(1);
315    }
316
317    if (ch == '*') {
318      ch = yyinput();
319      pLocation.columns(1);
320
321      while (ch == '*') {
322        ch = yyinput();
323        pLocation.columns(1);
324      }
325
326      if (ch == '/')
327        break;
328    }
329
330    if (ch == '\n')
331      pLocation.lines(1);
332
333    if (ch == EOF) {
334      error(diag::err_unterminated_comment) << pLocation.begin.filename
335                                            << start_line
336                                            << start_col;
337      break;
338    }
339  }
340}
341
342void ScriptScanner::setLexState(ScriptFile::Kind pKind)
343{
344  /* push the state into the top of stach */
345  m_StateStack.push(pKind);
346
347  switch (pKind) {
348  case ScriptFile::LDScript:
349    BEGIN(LDSCRIPT);
350    break;
351  case ScriptFile::Expression:
352    BEGIN(EXPRESSION);
353    break;
354  case ScriptFile::VersionScript:
355  case ScriptFile::DynamicList:
356  default:
357    assert(0 && "Unsupported script type!");
358    break;
359  }
360}
361
362void ScriptScanner::popLexState()
363{
364  /* pop the last state */
365  m_StateStack.pop();
366
367  /* resume the appropriate state */
368  if (!m_StateStack.empty()) {
369    switch (m_StateStack.top()) {
370    case ScriptFile::LDScript:
371      BEGIN(LDSCRIPT);
372      break;
373    case ScriptFile::Expression:
374      BEGIN(EXPRESSION);
375      break;
376    case ScriptFile::VersionScript:
377    case ScriptFile::DynamicList:
378    default:
379      assert(0 && "Unsupported script type!");
380      break;
381    }
382  }
383}
384
385} /* namespace mcld */
386
387#ifdef __clang__
388#pragma clang diagnostic pop
389#endif
390
391#ifdef yylex
392#undef yylex
393#endif
394
395int yyFlexLexer::yylex()
396{
397  return 0;
398}
399
400