1/* Bison Grammar Scanner                             -*- C -*-
2
3   Copyright (C) 2002-2012 Free Software Foundation, Inc.
4
5   This file is part of Bison, the GNU Compiler Compiler.
6
7   This program is free software: you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation, either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20%option debug nodefault noinput nounput noyywrap never-interactive
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
24/* Work around a bug in flex 2.5.31.  See Debian bug 333231
25   <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>.  */
26#undef gram_wrap
27#define gram_wrap() 1
28
29#define FLEX_PREFIX(Id) gram_ ## Id
30#include "flex-scanner.h"
31
32#include "complain.h"
33#include "files.h"
34#include "gram.h"
35#include "quotearg.h"
36#include "reader.h"
37#include "uniqstr.h"
38
39#include <c-ctype.h>
40#include <mbswidth.h>
41#include <quote.h>
42
43#include "scan-gram.h"
44
45#define YY_DECL GRAM_LEX_DECL
46
47#define YY_USER_INIT					\
48   code_start = scanner_cursor = loc->start;		\
49
50/* Location of scanner cursor.  */
51static boundary scanner_cursor;
52
53#define YY_USER_ACTION  location_compute (loc, &scanner_cursor, yytext, yyleng);
54
55static size_t no_cr_read (FILE *, char *, size_t);
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58#define ROLLBACK_CURRENT_TOKEN                                  \
59  do {                                                          \
60    scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);	\
61    yyless (0);                                                 \
62  } while (0)
63
64/* A string representing the most recently saved token.  */
65static char *last_string;
66
67/* Bracketed identifier. */
68static uniqstr bracketed_id_str = 0;
69static location bracketed_id_loc;
70static boundary bracketed_id_start;
71static int bracketed_id_context_state = 0;
72
73void
74gram_scanner_last_string_free (void)
75{
76  STRING_FREE;
77}
78
79static void handle_syncline (char *, location);
80static unsigned long int scan_integer (char const *p, int base, location loc);
81static int convert_ucn_to_byte (char const *hex_text);
82static void unexpected_eof (boundary, char const *);
83static void unexpected_newline (boundary, char const *);
84
85%}
86 /* A C-like comment in directives/rules. */
87%x SC_YACC_COMMENT
88 /* Strings and characters in directives/rules. */
89%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
90 /* A identifier was just read in directives/rules.  Special state
91    to capture the sequence 'identifier :'. */
92%x SC_AFTER_IDENTIFIER
93
94 /* Three types of user code:
95    - prologue (code between '%{' '%}' in the first section, before %%);
96    - actions, printers, union, etc, (between braced in the middle section);
97    - epilogue (everything after the second %%). */
98%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
99 /* C and C++ comments in code. */
100%x SC_COMMENT SC_LINE_COMMENT
101 /* Strings and characters in code. */
102%x SC_STRING SC_CHARACTER
103 /* Bracketed identifiers support. */
104%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
105
106letter	   [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
107notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
108id	  {letter}({letter}|[-0-9])*
109int	  [0-9]+
110
111/* POSIX says that a tag must be both an id and a C union member, but
112   historically almost any character is allowed in a tag.  We disallow
113   NUL and newline, as this simplifies our implementation.  */
114tag	 [^\0\n>]+
115
116/* Zero or more instances of backslash-newline.  Following GCC, allow
117   white space between the backslash and the newline.  */
118splice	 (\\[ \f\t\v]*\n)*
119
120%%
121%{
122  /* Nesting level of the current code in braces.  */
123  int braces_level PACIFY_CC (= 0);
124
125  /* Parent context state, when applicable.  */
126  int context_state PACIFY_CC (= 0);
127
128  /* Location of most recent identifier, when applicable.  */
129  location id_loc PACIFY_CC (= empty_location);
130
131  /* Where containing code started, when applicable.  Its initial
132     value is relevant only when yylex is invoked in the SC_EPILOGUE
133     start condition.  */
134  boundary code_start = scanner_cursor;
135
136  /* Where containing comment or string or character literal started,
137     when applicable.  */
138  boundary token_start PACIFY_CC (= scanner_cursor);
139%}
140
141
142  /*-----------------------.
143  | Scanning white space.  |
144  `-----------------------*/
145
146<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
147{
148  /* Comments and white space.  */
149  ","          warn_at (*loc, _("stray ',' treated as white space"));
150  [ \f\n\t\v]  |
151  "//".*       continue;
152  "/*" {
153    token_start = loc->start;
154    context_state = YY_START;
155    BEGIN SC_YACC_COMMENT;
156  }
157
158  /* #line directives are not documented, and may be withdrawn or
159     modified in future versions of Bison.  */
160  ^"#line "{int}(" \"".*"\"")?"\n" {
161    handle_syncline (yytext + sizeof "#line " - 1, *loc);
162  }
163}
164
165
166  /*----------------------------.
167  | Scanning Bison directives.  |
168  `----------------------------*/
169
170  /* For directives that are also command line options, the regex must be
171	"%..."
172     after "[-_]"s are removed, and the directive must match the --long
173     option name, with a single string argument.  Otherwise, add exceptions
174     to ../build-aux/cross-options.pl.  */
175
176<INITIAL>
177{
178  "%binary"                         return PERCENT_NONASSOC;
179  "%code"                           return PERCENT_CODE;
180  "%debug"                          return PERCENT_DEBUG;
181  "%default"[-_]"prec"              return PERCENT_DEFAULT_PREC;
182  "%define"                         return PERCENT_DEFINE;
183  "%defines"                        return PERCENT_DEFINES;
184  "%destructor"                     return PERCENT_DESTRUCTOR;
185  "%dprec"                          return PERCENT_DPREC;
186  "%error"[-_]"verbose"             return PERCENT_ERROR_VERBOSE;
187  "%expect"                         return PERCENT_EXPECT;
188  "%expect"[-_]"rr"                 return PERCENT_EXPECT_RR;
189  "%file-prefix"                    return PERCENT_FILE_PREFIX;
190  "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
191  "%initial-action"                 return PERCENT_INITIAL_ACTION;
192  "%glr-parser"                     return PERCENT_GLR_PARSER;
193  "%language"                       return PERCENT_LANGUAGE;
194  "%left"                           return PERCENT_LEFT;
195  "%lex-param"                      return PERCENT_LEX_PARAM;
196  "%locations"                      return PERCENT_LOCATIONS;
197  "%merge"                          return PERCENT_MERGE;
198  "%name"[-_]"prefix"               return PERCENT_NAME_PREFIX;
199  "%no"[-_]"default"[-_]"prec"      return PERCENT_NO_DEFAULT_PREC;
200  "%no"[-_]"lines"                  return PERCENT_NO_LINES;
201  "%nonassoc"                       return PERCENT_NONASSOC;
202  "%nondeterministic-parser"        return PERCENT_NONDETERMINISTIC_PARSER;
203  "%nterm"                          return PERCENT_NTERM;
204  "%output"                         return PERCENT_OUTPUT;
205  "%parse-param"                    return PERCENT_PARSE_PARAM;
206  "%prec"                           return PERCENT_PREC;
207  "%printer"                        return PERCENT_PRINTER;
208  "%pure"[-_]"parser"               return PERCENT_PURE_PARSER;
209  "%require"                        return PERCENT_REQUIRE;
210  "%right"                          return PERCENT_RIGHT;
211  "%skeleton"                       return PERCENT_SKELETON;
212  "%start"                          return PERCENT_START;
213  "%term"                           return PERCENT_TOKEN;
214  "%token"                          return PERCENT_TOKEN;
215  "%token"[-_]"table"               return PERCENT_TOKEN_TABLE;
216  "%type"                           return PERCENT_TYPE;
217  "%union"                          return PERCENT_UNION;
218  "%verbose"                        return PERCENT_VERBOSE;
219  "%yacc"                           return PERCENT_YACC;
220
221  "%"{id}|"%"{notletter}([[:graph:]])+ {
222    complain_at (*loc, _("invalid directive: %s"), quote (yytext));
223  }
224
225  "="                     return EQUAL;
226  "|"                     return PIPE;
227  ";"                     return SEMICOLON;
228  "<*>"                   return TYPE_TAG_ANY;
229  "<>"                    return TYPE_TAG_NONE;
230
231  {id} {
232    val->uniqstr = uniqstr_new (yytext);
233    id_loc = *loc;
234    bracketed_id_str = NULL;
235    BEGIN SC_AFTER_IDENTIFIER;
236  }
237
238  {int} {
239    val->integer = scan_integer (yytext, 10, *loc);
240    return INT;
241  }
242  0[xX][0-9abcdefABCDEF]+ {
243    val->integer = scan_integer (yytext, 16, *loc);
244    return INT;
245  }
246
247  /* Identifiers may not start with a digit.  Yet, don't silently
248     accept "1FOO" as "1 FOO".  */
249  {int}{id} {
250    complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
251  }
252
253  /* Characters.  */
254  "'"	      token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
255
256  /* Strings. */
257  "\""	      token_start = loc->start; BEGIN SC_ESCAPED_STRING;
258
259  /* Prologue. */
260  "%{"        code_start = loc->start; BEGIN SC_PROLOGUE;
261
262  /* Code in between braces.  */
263  "{" {
264    STRING_GROW;
265    braces_level = 0;
266    code_start = loc->start;
267    BEGIN SC_BRACED_CODE;
268  }
269
270  /* A type. */
271  "<"{tag}">" {
272    obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
273    STRING_FINISH;
274    val->uniqstr = uniqstr_new (last_string);
275    STRING_FREE;
276    return TYPE;
277  }
278
279  "%%" {
280    static int percent_percent_count;
281    if (++percent_percent_count == 2)
282      BEGIN SC_EPILOGUE;
283    return PERCENT_PERCENT;
284  }
285
286  "[" {
287    bracketed_id_str = NULL;
288    bracketed_id_start = loc->start;
289    bracketed_id_context_state = YY_START;
290    BEGIN SC_BRACKETED_ID;
291  }
292
293  [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
294    complain_at (*loc, "%s: %s",
295                 ngettext ("invalid character", "invalid characters", yyleng),
296                 quote_mem (yytext, yyleng));
297  }
298
299  <<EOF>> {
300    loc->start = loc->end = scanner_cursor;
301    yyterminate ();
302  }
303}
304
305
306  /*-----------------------------------------------------------------.
307  | Scanning after an identifier, checking whether a colon is next.  |
308  `-----------------------------------------------------------------*/
309
310<SC_AFTER_IDENTIFIER>
311{
312  "[" {
313    if (bracketed_id_str)
314      {
315	ROLLBACK_CURRENT_TOKEN;
316	BEGIN SC_RETURN_BRACKETED_ID;
317	*loc = id_loc;
318	return ID;
319      }
320    else
321      {
322	bracketed_id_start = loc->start;
323	bracketed_id_context_state = YY_START;
324	BEGIN SC_BRACKETED_ID;
325      }
326  }
327  ":" {
328    BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
329    *loc = id_loc;
330    return ID_COLON;
331  }
332  . {
333    ROLLBACK_CURRENT_TOKEN;
334    BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
335    *loc = id_loc;
336    return ID;
337  }
338  <<EOF>> {
339    BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
340    *loc = id_loc;
341    return ID;
342  }
343}
344
345  /*--------------------------------.
346  | Scanning bracketed identifiers. |
347  `--------------------------------*/
348
349<SC_BRACKETED_ID>
350{
351  {id} {
352    if (bracketed_id_str)
353      {
354	complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
355		     quote (yytext));
356      }
357    else
358      {
359	bracketed_id_str = uniqstr_new (yytext);
360	bracketed_id_loc = *loc;
361      }
362  }
363  "]" {
364    BEGIN bracketed_id_context_state;
365    if (bracketed_id_str)
366      {
367	if (INITIAL == bracketed_id_context_state)
368	  {
369	    val->uniqstr = bracketed_id_str;
370	    bracketed_id_str = 0;
371	    *loc = bracketed_id_loc;
372	    return BRACKETED_ID;
373	  }
374      }
375    else
376      complain_at (*loc, _("an identifier expected"));
377  }
378
379  [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
380    complain_at (*loc, "%s: %s",
381                 ngettext ("invalid character in bracketed name",
382                           "invalid characters in bracketed name", yyleng),
383                 quote_mem (yytext, yyleng));
384  }
385
386  <<EOF>> {
387    BEGIN bracketed_id_context_state;
388    unexpected_eof (bracketed_id_start, "]");
389  }
390}
391
392<SC_RETURN_BRACKETED_ID>
393{
394  . {
395    ROLLBACK_CURRENT_TOKEN;
396    val->uniqstr = bracketed_id_str;
397    bracketed_id_str = 0;
398    *loc = bracketed_id_loc;
399    BEGIN INITIAL;
400    return BRACKETED_ID;
401  }
402}
403
404
405  /*---------------------------------------------------------------.
406  | Scanning a Yacc comment.  The initial '/ *' is already eaten.  |
407  `---------------------------------------------------------------*/
408
409<SC_YACC_COMMENT>
410{
411  "*/"     BEGIN context_state;
412  .|\n	   continue;
413  <<EOF>>  unexpected_eof (token_start, "*/"); BEGIN context_state;
414}
415
416
417  /*------------------------------------------------------------.
418  | Scanning a C comment.  The initial '/ *' is already eaten.  |
419  `------------------------------------------------------------*/
420
421<SC_COMMENT>
422{
423  "*"{splice}"/"  STRING_GROW; BEGIN context_state;
424  <<EOF>>	  unexpected_eof (token_start, "*/"); BEGIN context_state;
425}
426
427
428  /*--------------------------------------------------------------.
429  | Scanning a line comment.  The initial '//' is already eaten.  |
430  `--------------------------------------------------------------*/
431
432<SC_LINE_COMMENT>
433{
434  "\n"		 STRING_GROW; BEGIN context_state;
435  {splice}	 STRING_GROW;
436  <<EOF>>	 BEGIN context_state;
437}
438
439
440  /*------------------------------------------------.
441  | Scanning a Bison string, including its escapes. |
442  | The initial quote is already eaten.             |
443  `------------------------------------------------*/
444
445<SC_ESCAPED_STRING>
446{
447  "\""|"\n" {
448    if (yytext[0] == '\n')
449      unexpected_newline (token_start, "\"");
450    STRING_FINISH;
451    loc->start = token_start;
452    val->chars = last_string;
453    BEGIN INITIAL;
454    return STRING;
455  }
456  <<EOF>> {
457    unexpected_eof (token_start, "\"");
458    STRING_FINISH;
459    loc->start = token_start;
460    val->chars = last_string;
461    BEGIN INITIAL;
462    return STRING;
463  }
464}
465
466  /*----------------------------------------------------------.
467  | Scanning a Bison character literal, decoding its escapes. |
468  | The initial quote is already eaten.			      |
469  `----------------------------------------------------------*/
470
471<SC_ESCAPED_CHARACTER>
472{
473  "'"|"\n" {
474    STRING_FINISH;
475    loc->start = token_start;
476    val->character = last_string[0];
477    {
478      /* FIXME: Eventually, make these errors.  */
479      if (last_string[0] == '\0')
480        {
481          warn_at (*loc, _("empty character literal"));
482          /* '\0' seems dangerous even if we are about to complain.  */
483          val->character = '\'';
484        }
485      else if (last_string[1] != '\0')
486        warn_at (*loc, _("extra characters in character literal"));
487    }
488    if (yytext[0] == '\n')
489      unexpected_newline (token_start, "'");
490    STRING_FREE;
491    BEGIN INITIAL;
492    return CHAR;
493  }
494  <<EOF>> {
495    STRING_FINISH;
496    loc->start = token_start;
497    val->character = last_string[0];
498    {
499      /* FIXME: Eventually, make these errors.  */
500      if (last_string[0] == '\0')
501        {
502          warn_at (*loc, _("empty character literal"));
503          /* '\0' seems dangerous even if we are about to complain.  */
504          val->character = '\'';
505        }
506      else if (last_string[1] != '\0')
507        warn_at (*loc, _("extra characters in character literal"));
508    }
509    unexpected_eof (token_start, "'");
510    STRING_FREE;
511    BEGIN INITIAL;
512    return CHAR;
513  }
514}
515
516<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
517{
518  \0	    complain_at (*loc, _("invalid null character"));
519}
520
521
522  /*----------------------------.
523  | Decode escaped characters.  |
524  `----------------------------*/
525
526<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
527{
528  \\[0-7]{1,3} {
529    unsigned long int c = strtoul (yytext + 1, NULL, 8);
530    if (!c || UCHAR_MAX < c)
531      complain_at (*loc, _("invalid number after \\-escape: %s"),
532                   yytext+1);
533    else
534      obstack_1grow (&obstack_for_string, c);
535  }
536
537  \\x[0-9abcdefABCDEF]+ {
538    verify (UCHAR_MAX < ULONG_MAX);
539    unsigned long int c = strtoul (yytext + 2, NULL, 16);
540    if (!c || UCHAR_MAX < c)
541      complain_at (*loc, _("invalid number after \\-escape: %s"),
542                   yytext+1);
543    else
544      obstack_1grow (&obstack_for_string, c);
545  }
546
547  \\a	obstack_1grow (&obstack_for_string, '\a');
548  \\b	obstack_1grow (&obstack_for_string, '\b');
549  \\f	obstack_1grow (&obstack_for_string, '\f');
550  \\n	obstack_1grow (&obstack_for_string, '\n');
551  \\r	obstack_1grow (&obstack_for_string, '\r');
552  \\t	obstack_1grow (&obstack_for_string, '\t');
553  \\v	obstack_1grow (&obstack_for_string, '\v');
554
555  /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
556  \\("\""|"'"|"?"|"\\")  obstack_1grow (&obstack_for_string, yytext[1]);
557
558  \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
559    int c = convert_ucn_to_byte (yytext);
560    if (c <= 0)
561      complain_at (*loc, _("invalid number after \\-escape: %s"),
562                   yytext+1);
563    else
564      obstack_1grow (&obstack_for_string, c);
565  }
566  \\(.|\n)	{
567    char const *p = yytext + 1;
568    /* Quote only if escaping won't make the character visible.  */
569    if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
570      p = quote (p);
571    else
572      p = quotearg_style_mem (escape_quoting_style, p, 1);
573    complain_at (*loc, _("invalid character after \\-escape: %s"), p);
574  }
575}
576
577  /*--------------------------------------------.
578  | Scanning user-code characters and strings.  |
579  `--------------------------------------------*/
580
581<SC_CHARACTER,SC_STRING>
582{
583  {splice}|\\{splice}[^\n\[\]]	STRING_GROW;
584}
585
586<SC_CHARACTER>
587{
588  "'"		STRING_GROW; BEGIN context_state;
589  \n		unexpected_newline (token_start, "'"); BEGIN context_state;
590  <<EOF>>	unexpected_eof (token_start, "'"); BEGIN context_state;
591}
592
593<SC_STRING>
594{
595  "\""		STRING_GROW; BEGIN context_state;
596  \n		unexpected_newline (token_start, "\""); BEGIN context_state;
597  <<EOF>>	unexpected_eof (token_start, "\""); BEGIN context_state;
598}
599
600
601  /*---------------------------------------------------.
602  | Strings, comments etc. can be found in user code.  |
603  `---------------------------------------------------*/
604
605<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
606{
607  "'" {
608    STRING_GROW;
609    context_state = YY_START;
610    token_start = loc->start;
611    BEGIN SC_CHARACTER;
612  }
613  "\"" {
614    STRING_GROW;
615    context_state = YY_START;
616    token_start = loc->start;
617    BEGIN SC_STRING;
618  }
619  "/"{splice}"*" {
620    STRING_GROW;
621    context_state = YY_START;
622    token_start = loc->start;
623    BEGIN SC_COMMENT;
624  }
625  "/"{splice}"/" {
626    STRING_GROW;
627    context_state = YY_START;
628    BEGIN SC_LINE_COMMENT;
629  }
630}
631
632
633
634  /*-----------------------------------------------------------.
635  | Scanning some code in braces (actions). The initial "{" is |
636  | already eaten.                                             |
637  `-----------------------------------------------------------*/
638
639<SC_BRACED_CODE>
640{
641  "{"|"<"{splice}"%"  STRING_GROW; braces_level++;
642  "%"{splice}">"      STRING_GROW; braces_level--;
643  "}" {
644    obstack_1grow (&obstack_for_string, '}');
645
646    --braces_level;
647    if (braces_level < 0)
648      {
649	STRING_FINISH;
650	loc->start = code_start;
651	val->code = last_string;
652	BEGIN INITIAL;
653	return BRACED_CODE;
654      }
655  }
656
657  /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
658     (as '<' '<%').  */
659  "<"{splice}"<"  STRING_GROW;
660
661  <<EOF>> {
662    unexpected_eof (code_start, "}");
663    STRING_FINISH;
664    loc->start = code_start;
665    val->code = last_string;
666    BEGIN INITIAL;
667    return BRACED_CODE;
668  }
669}
670
671
672  /*--------------------------------------------------------------.
673  | Scanning some prologue: from "%{" (already scanned) to "%}".  |
674  `--------------------------------------------------------------*/
675
676<SC_PROLOGUE>
677{
678  "%}" {
679    STRING_FINISH;
680    loc->start = code_start;
681    val->chars = last_string;
682    BEGIN INITIAL;
683    return PROLOGUE;
684  }
685
686  <<EOF>> {
687    unexpected_eof (code_start, "%}");
688    STRING_FINISH;
689    loc->start = code_start;
690    val->chars = last_string;
691    BEGIN INITIAL;
692    return PROLOGUE;
693  }
694}
695
696
697  /*---------------------------------------------------------------.
698  | Scanning the epilogue (everything after the second "%%", which |
699  | has already been eaten).                                       |
700  `---------------------------------------------------------------*/
701
702<SC_EPILOGUE>
703{
704  <<EOF>> {
705    STRING_FINISH;
706    loc->start = code_start;
707    val->chars = last_string;
708    BEGIN INITIAL;
709    return EPILOGUE;
710  }
711}
712
713
714  /*-----------------------------------------------------.
715  | By default, grow the string obstack with the input.  |
716  `-----------------------------------------------------*/
717
718<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>.	|
719<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n	STRING_GROW;
720
721%%
722
723/* Read bytes from FP into buffer BUF of size SIZE.  Return the
724   number of bytes read.  Remove '\r' from input, treating \r\n
725   and isolated \r as \n.  */
726
727static size_t
728no_cr_read (FILE *fp, char *buf, size_t size)
729{
730  size_t bytes_read = fread (buf, 1, size, fp);
731  if (bytes_read)
732    {
733      char *w = memchr (buf, '\r', bytes_read);
734      if (w)
735	{
736	  char const *r = ++w;
737	  char const *lim = buf + bytes_read;
738
739	  for (;;)
740	    {
741	      /* Found an '\r'.  Treat it like '\n', but ignore any
742		 '\n' that immediately follows.  */
743	      w[-1] = '\n';
744	      if (r == lim)
745		{
746		  int ch = getc (fp);
747		  if (ch != '\n' && ungetc (ch, fp) != ch)
748		    break;
749		}
750	      else if (*r == '\n')
751		r++;
752
753	      /* Copy until the next '\r'.  */
754	      do
755		{
756		  if (r == lim)
757		    return w - buf;
758		}
759	      while ((*w++ = *r++) != '\r');
760	    }
761
762	  return w - buf;
763	}
764    }
765
766  return bytes_read;
767}
768
769
770
771/*------------------------------------------------------.
772| Scan NUMBER for a base-BASE integer at location LOC.  |
773`------------------------------------------------------*/
774
775static unsigned long int
776scan_integer (char const *number, int base, location loc)
777{
778  verify (INT_MAX < ULONG_MAX);
779  unsigned long int num = strtoul (number, NULL, base);
780
781  if (INT_MAX < num)
782    {
783      complain_at (loc, _("integer out of range: %s"), quote (number));
784      num = INT_MAX;
785    }
786
787  return num;
788}
789
790
791/*------------------------------------------------------------------.
792| Convert universal character name UCN to a single-byte character,  |
793| and return that character.  Return -1 if UCN does not correspond  |
794| to a single-byte character.					    |
795`------------------------------------------------------------------*/
796
797static int
798convert_ucn_to_byte (char const *ucn)
799{
800  verify (UCHAR_MAX <= INT_MAX);
801  unsigned long int code = strtoul (ucn + 2, NULL, 16);
802
803  /* FIXME: Currently we assume Unicode-compatible unibyte characters
804     on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
805     non-ASCII hosts we support only the portable C character set.
806     These limitations should be removed once we add support for
807     multibyte characters.  */
808
809  if (UCHAR_MAX < code)
810    return -1;
811
812#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
813  {
814    /* A non-ASCII host.  Use CODE to index into a table of the C
815       basic execution character set, which is guaranteed to exist on
816       all Standard C platforms.  This table also includes '$', '@',
817       and '`', which are not in the basic execution character set but
818       which are unibyte characters on all the platforms that we know
819       about.  */
820    static signed char const table[] =
821      {
822	'\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
823	'\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
824	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
825	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
826	 ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
827	 '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
828	 '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
829	 '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
830	 '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
831	 'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
832	 'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
833	 'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
834	 '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
835	 'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
836	 'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
837	 'x',  'y',  'z',  '{',  '|',  '}',  '~'
838      };
839
840    code = code < sizeof table ? table[code] : -1;
841  }
842#endif
843
844  return code;
845}
846
847
848/*---------------------------------------------------------------------.
849| Handle '#line INT( "FILE")?\n'.  ARGS has already skipped '#line '.  |
850`---------------------------------------------------------------------*/
851
852static void
853handle_syncline (char *args, location loc)
854{
855  char *file;
856  unsigned long int lineno = strtoul (args, &file, 10);
857  if (INT_MAX <= lineno)
858    {
859      warn_at (loc, _("line number overflow"));
860      lineno = INT_MAX;
861    }
862
863  file = mbschr (file, '"');
864  if (file)
865    {
866      *mbschr (file + 1, '"') = '\0';
867      current_file = uniqstr_new (file + 1);
868    }
869  boundary_set (&scanner_cursor, current_file, lineno, 1);
870}
871
872
873/*----------------------------------------------------------------.
874| For a token or comment starting at START, report message MSGID, |
875| which should say that an end marker was found before		  |
876| the expected TOKEN_END.					  |
877`----------------------------------------------------------------*/
878
879static void
880unexpected_end (boundary start, char const *msgid, char const *token_end)
881{
882  location loc;
883  loc.start = start;
884  loc.end = scanner_cursor;
885  token_end = quote (token_end);
886  /* Instead of '\'', display "'".  */
887  if (!strcmp (token_end, "'\\''"))
888    token_end = "\"'\"";
889  complain_at (loc, _(msgid), token_end);
890}
891
892
893/*------------------------------------------------------------------------.
894| Report an unexpected EOF in a token or comment starting at START.       |
895| An end of file was encountered and the expected TOKEN_END was missing.  |
896`------------------------------------------------------------------------*/
897
898static void
899unexpected_eof (boundary start, char const *token_end)
900{
901  unexpected_end (start, N_("missing %s at end of file"), token_end);
902}
903
904
905/*----------------------------------------.
906| Likewise, but for unexpected newlines.  |
907`----------------------------------------*/
908
909static void
910unexpected_newline (boundary start, char const *token_end)
911{
912  unexpected_end (start, N_("missing %s at end of line"), token_end);
913}
914
915
916/*-------------------------.
917| Initialize the scanner.  |
918`-------------------------*/
919
920void
921gram_scanner_initialize (void)
922{
923  obstack_init (&obstack_for_string);
924}
925
926
927/*-----------------------------------------------.
928| Free all the memory allocated to the scanner.  |
929`-----------------------------------------------*/
930
931void
932gram_scanner_free (void)
933{
934  obstack_free (&obstack_for_string, 0);
935  /* Reclaim Flex's buffers.  */
936  yylex_destroy ();
937}
938