1#include <stdlib.h>
2#include <string.h>
3#include "tools/re2c/scanner.h"
4#include "tools/re2c/parse.h"
5#include "tools/re2c/globals.h"
6#include "tools/re2c/parser.h"
7
8#ifndef MAX
9#define MAX(a,b) (((a)>(b))?(a):(b))
10#endif
11
12#define	BSIZE	8192
13
14#define	YYCTYPE		unsigned char
15#define	YYCURSOR	cursor
16#define	YYLIMIT		s->lim
17#define	YYMARKER	s->ptr
18#define	YYFILL(n)	{cursor = fill(s, cursor);}
19
20#define	RETURN(i)	{s->cur = cursor; return i;}
21
22static unsigned char *fill(Scanner*, unsigned char*);
23
24void
25Scanner_init(Scanner *s, FILE *i)
26{
27    s->in = i;
28    s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top =
29	     s->eof = NULL;
30    s->tchar = s->tline = 0;
31    s->cline = 1;
32}
33
34static unsigned char *
35fill(Scanner *s, unsigned char *cursor)
36{
37    if(!s->eof){
38	unsigned int cnt = s->tok - s->bot;
39	if(cnt){
40	    memcpy(s->bot, s->tok, s->lim - s->tok);
41	    s->tok = s->bot;
42	    s->ptr -= cnt;
43	    cursor -= cnt;
44	    s->pos -= cnt;
45	    s->lim -= cnt;
46	}
47	if((s->top - s->lim) < BSIZE){
48	    unsigned char *buf = malloc(((s->lim - s->bot) + BSIZE) + 1);
49	    memcpy(buf, s->tok, s->lim - s->tok);
50	    s->tok = buf;
51	    s->ptr = &buf[s->ptr - s->bot];
52	    cursor = &buf[cursor - s->bot];
53	    s->pos = &buf[s->pos - s->bot];
54	    s->lim = &buf[s->lim - s->bot];
55	    s->top = &s->lim[BSIZE];
56	    if (s->bot)
57		free(s->bot);
58	    s->bot = buf;
59	}
60	if((cnt = fread(s->lim, 1, BSIZE, s->in)) != BSIZE){
61	    s->eof = &s->lim[cnt]; *s->eof++ = '\0';
62	}
63	s->lim += cnt;
64    }
65    return cursor;
66}
67
68/*!re2c
69zero		= "\000";
70any		= [\000-\377];
71dot		= any \ [\n];
72esc		= dot \ [\\];
73istring		= "[" "^" ((esc \ [\]]) | "\\" dot)* "]" ;
74cstring		= "["  ((esc \ [\]]) | "\\" dot)* "]" ;
75dstring		= "\"" ((esc \ ["] ) | "\\" dot)* "\"";
76sstring		= "'"  ((esc \ ['] ) | "\\" dot)* "'" ;
77letter		= [a-zA-Z];
78digit		= [0-9];
79*/
80
81int
82Scanner_echo(Scanner *s, FILE *out)
83{
84    unsigned char *cursor = s->cur;
85    int ignore_eoc = 0;
86
87    /* Catch EOF */
88    if (s->eof && cursor == s->eof)
89	return 0;
90
91    s->tok = cursor;
92echo:
93/*!re2c
94	"/*!re2c"		{ fwrite(s->tok, 1, &cursor[-7] - s->tok, out);
95				  s->tok = cursor;
96				  RETURN(1); }
97	"/*!max:re2c" {
98		fprintf(out, "#define YYMAXFILL %u\n", maxFill);
99		s->tok = s->pos = cursor;
100		ignore_eoc = 1;
101		goto echo;
102	}
103	"*" "/"		{
104		if (ignore_eoc) {
105		    ignore_eoc = 0;
106		} else {
107		    fwrite(s->tok, 1, cursor - s->tok, out);
108		}
109		s->tok = s->pos = cursor;
110		goto echo;
111	}
112	"\n"			{ fwrite(s->tok, 1, cursor - s->tok, out);
113				  s->tok = s->pos = cursor; s->cline++; oline++;
114				  goto echo; }
115	zero			{ fwrite(s->tok, 1, cursor - s->tok - 1, out); /* -1 so we don't write out the \0 */
116				  if(cursor == s->eof) { RETURN(0); } }
117	any			{ goto echo; }
118*/
119}
120
121
122int
123Scanner_scan(Scanner *s)
124{
125    unsigned char *cursor = s->cur;
126    unsigned int depth;
127
128scan:
129    s->tchar = cursor - s->pos;
130    s->tline = s->cline;
131    s->tok = cursor;
132/*!re2c
133	"{"			{ depth = 1;
134				  goto code;
135				}
136	"/*"			{ depth = 1;
137				  goto comment; }
138
139	"*/"			{ s->tok = cursor;
140				  RETURN(0); }
141
142	dstring			{ s->cur = cursor;
143				  yylval.regexp = strToRE(Scanner_token(s));
144				  return STRING; }
145
146	sstring			{ s->cur = cursor;
147				  yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s));
148				  return STRING; }
149
150	"\""			{ Scanner_fatal(s, "unterminated string constant (missing \")"); }
151	"'"			{ Scanner_fatal(s, "unterminated string constant (missing ')"); }
152
153	istring			{ s->cur = cursor;
154				  yylval.regexp = invToRE(Scanner_token(s));
155				  return RANGE; }
156
157	cstring			{ s->cur = cursor;
158				  yylval.regexp = ranToRE(Scanner_token(s));
159				  return RANGE; }
160
161	"["			{ Scanner_fatal(s, "unterminated range (missing ])"); }
162
163	[()|=;/\\]		{ RETURN(*s->tok); }
164
165	[*+?]			{ yylval.op = *s->tok;
166				  RETURN(CLOSE); }
167
168	"{" [0-9]+ "}"		{ yylval.extop.minsize = atoi((char *)s->tok+1);
169				  yylval.extop.maxsize = atoi((char *)s->tok+1);
170				  RETURN(CLOSESIZE); }
171
172	"{" [0-9]+ "," [0-9]+ "}"	{ yylval.extop.minsize = atoi((char *)s->tok+1);
173				  yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1));
174				  RETURN(CLOSESIZE); }
175
176	"{" [0-9]+ ",}"		{ yylval.extop.minsize = atoi((char *)s->tok+1);
177				  yylval.extop.maxsize = -1;
178				  RETURN(CLOSESIZE); }
179
180	letter (letter|digit)*	{ SubStr substr;
181				  s->cur = cursor;
182				  substr = Scanner_token(s);
183				  yylval.symbol = Symbol_find(&substr);
184				  return ID; }
185
186	[ \t]+			{ goto scan; }
187
188	"\n"			{ if(cursor == s->eof) RETURN(0);
189				  s->pos = cursor; s->cline++;
190				  goto scan;
191	    			}
192
193	"."			{ s->cur = cursor;
194				  yylval.regexp = mkDot();
195				  return RANGE;
196				}
197
198	any			{ fprintf(stderr, "unexpected character: '%c'\n", *s->tok);
199				  goto scan;
200				}
201*/
202
203code:
204/*!re2c
205	"}"			{ if(--depth == 0){
206					s->cur = cursor;
207					yylval.token = Token_new(Scanner_token(s), s->tline);
208					return CODE;
209				  }
210				  goto code; }
211	"{"			{ ++depth;
212				  goto code; }
213	"\n"			{ if(cursor == s->eof) Scanner_fatal(s, "missing '}'");
214				  s->pos = cursor; s->cline++;
215				  goto code;
216				}
217	dstring | sstring | any	{ goto code; }
218*/
219
220comment:
221/*!re2c
222	"*/"			{ if(--depth == 0)
223					goto scan;
224				    else
225					goto comment; }
226	"/*"			{ ++depth;
227				  goto comment; }
228	"\n"			{ if(cursor == s->eof) RETURN(0);
229				  s->tok = s->pos = cursor; s->cline++;
230				  goto comment;
231				}
232        any			{ goto comment; }
233*/
234}
235
236void
237Scanner_fatal(Scanner *s, const char *msg)
238{
239    fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg);
240    exit(1);
241}
242