1/* scan.l - scanner for flex input */
2
3%{
4/*-
5 * Copyright (c) 1990 The Regents of the University of California.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Vern Paxson.
10 *
11 * The United States Government has rights in this work pursuant
12 * to contract no. DE-AC03-76SF00098 between the United States
13 * Department of Energy and the University of California.
14 *
15 * Redistribution and use in source and binary forms with or without
16 * modification are permitted provided that: (1) source distributions retain
17 * this entire copyright notice and comment, and (2) distributions including
18 * binaries display the following acknowledgement:  ``This product includes
19 * software developed by the University of California, Berkeley and its
20 * contributors'' in the documentation or other materials provided with the
21 * distribution and in all advertising materials mentioning features or use
22 * of this software.  Neither the name of the University nor the names of
23 * its contributors may be used to endorse or promote products derived from
24 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
27 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28 */
29
30/* $Header: /home/daffy/u0/vern/flex/RCS/scan.l,v 2.56 95/04/24 12:17:19 vern Exp $ */
31
32#include "flexdef.h"
33#include "parse.h"
34
35#define ACTION_ECHO add_action( yytext )
36#define ACTION_IFDEF(def, should_define) \
37	{ \
38	if ( should_define ) \
39		action_define( def, 1 ); \
40	}
41
42#define MARK_END_OF_PROLOG mark_prolog();
43
44#define YY_DECL \
45	int flexscan()
46
47#define RETURNCHAR \
48	yylval = (unsigned char) yytext[0]; \
49	return CHAR;
50
51#define RETURNNAME \
52	strcpy( nmstr, yytext ); \
53	return NAME;
54
55#define PUT_BACK_STRING(str, start) \
56	for ( i = strlen( str ) - 1; i >= start; --i ) \
57		unput((str)[i])
58
59#define CHECK_REJECT(str) \
60	if ( all_upper( str ) ) \
61		reject = true;
62
63#define CHECK_YYMORE(str) \
64	if ( all_lower( str ) ) \
65		yymore_used = true;
66%}
67
68%option caseless nodefault outfile="scan.c" stack noyy_top_state
69%option nostdinit
70
71%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
72%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
73%x OPTION LINEDIR
74
75WS		[[:blank:]]+
76OPTWS		[[:blank:]]*
77NOT_WS		[^[:blank:]\n]
78
79NL		\r?\n
80
81NAME		([[:alpha:]_][[:alnum:]_-]*)
82NOT_NAME	[^[:alpha:]_*\n]+
83
84SCNAME		{NAME}
85
86ESCSEQ		(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
87
88FIRST_CCL_CHAR	([^\\\n]|{ESCSEQ})
89CCL_CHAR	([^\\\n\]]|{ESCSEQ})
90CCL_EXPR	("[:"[[:alpha:]]+":]")
91
92LEXOPT		[aceknopr]
93
94%%
95	static int bracelevel, didadef, indented_code;
96	static int doing_rule_action = false;
97	static int option_sense;
98
99	int doing_codeblock = false;
100	int i;
101	Char nmdef[MAXLINE], myesc();
102
103
104<INITIAL>{
105	^{WS}		indented_code = true; BEGIN(CODEBLOCK);
106	^"/*"		ACTION_ECHO; yy_push_state( COMMENT );
107	^#{OPTWS}line{WS}	yy_push_state( LINEDIR );
108	^"%s"{NAME}?	return SCDECL;
109	^"%x"{NAME}?	return XSCDECL;
110	^"%{".*{NL}	{
111			++linenum;
112			line_directive_out( (FILE *) 0, 1 );
113			indented_code = false;
114			BEGIN(CODEBLOCK);
115			}
116
117	{WS}		/* discard */
118
119	^"%%".*		{
120			sectnum = 2;
121			bracelevel = 0;
122			mark_defs1();
123			line_directive_out( (FILE *) 0, 1 );
124			BEGIN(SECT2PROLOG);
125			return SECTEND;
126			}
127
128	^"%pointer".*{NL}	yytext_is_array = false; ++linenum;
129	^"%array".*{NL}		yytext_is_array = true; ++linenum;
130
131	^"%option"	BEGIN(OPTION); return OPTION_OP;
132
133	^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL}	++linenum; /* ignore */
134	^"%"{LEXOPT}{WS}.*{NL}	++linenum;	/* ignore */
135
136	^"%"[^sxaceknopr{}].*	synerr( _( "unrecognized '%' directive" ) );
137
138	^{NAME}		{
139			strcpy( nmstr, yytext );
140			didadef = false;
141			BEGIN(PICKUPDEF);
142			}
143
144	{SCNAME}	RETURNNAME;
145	^{OPTWS}{NL}	++linenum; /* allows blank lines in section 1 */
146	{OPTWS}{NL}	ACTION_ECHO; ++linenum; /* maybe end of comment line */
147}
148
149
150<COMMENT>{
151	"*/"		ACTION_ECHO; yy_pop_state();
152	"*"		ACTION_ECHO;
153	[^*\n]+		ACTION_ECHO;
154	[^*\n]*{NL}	++linenum; ACTION_ECHO;
155}
156
157<LINEDIR>{
158	\n		yy_pop_state();
159	[[:digit:]]+	linenum = myctoi( yytext );
160
161	\"[^"\n]*\"	{
162			flex_free( (void *) infilename );
163			infilename = copy_string( yytext + 1 );
164			infilename[strlen( infilename ) - 1] = '\0';
165			}
166	.		/* ignore spurious characters */
167}
168
169<CODEBLOCK>{
170	^"%}".*{NL}	++linenum; BEGIN(INITIAL);
171
172	{NAME}|{NOT_NAME}|.	ACTION_ECHO;
173
174	{NL}		{
175			++linenum;
176			ACTION_ECHO;
177			if ( indented_code )
178				BEGIN(INITIAL);
179			}
180}
181
182
183<PICKUPDEF>{
184	{WS}		/* separates name and definition */
185
186	{NOT_WS}.*	{
187			strcpy( (char *) nmdef, yytext );
188
189			/* Skip trailing whitespace. */
190			for ( i = strlen( (char *) nmdef ) - 1;
191			      i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
192			      --i )
193				;
194
195			nmdef[i + 1] = '\0';
196
197			ndinstal( nmstr, nmdef );
198			didadef = true;
199			}
200
201	{NL}		{
202			if ( ! didadef )
203				synerr( _( "incomplete name definition" ) );
204			BEGIN(INITIAL);
205			++linenum;
206			}
207}
208
209
210<OPTION>{
211	{NL}		++linenum; BEGIN(INITIAL);
212	{WS}		option_sense = true;
213
214	"="		return '=';
215
216	no		option_sense = ! option_sense;
217
218	7bit		csize = option_sense ? 128 : 256;
219	8bit		csize = option_sense ? 256 : 128;
220
221	align		long_align = option_sense;
222	always-interactive	{
223			action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
224			}
225	array		yytext_is_array = option_sense;
226	backup		backing_up_report = option_sense;
227	batch		interactive = ! option_sense;
228	"c++"		C_plus_plus = option_sense;
229	caseful|case-sensitive		caseins = ! option_sense;
230	caseless|case-insensitive	caseins = option_sense;
231	debug		ddebug = option_sense;
232	default		spprdflt = ! option_sense;
233	ecs		useecs = option_sense;
234	fast		{
235			useecs = usemecs = false;
236			use_read = fullspd = true;
237			}
238	full		{
239			useecs = usemecs = false;
240			use_read = fulltbl = true;
241			}
242	input		ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
243	interactive	interactive = option_sense;
244	lex-compat	lex_compat = option_sense;
245	main		{
246			action_define( "YY_MAIN", option_sense );
247			do_yywrap = ! option_sense;
248			}
249	meta-ecs	usemecs = option_sense;
250	never-interactive	{
251			action_define( "YY_NEVER_INTERACTIVE", option_sense );
252			}
253	perf-report	performance_report += option_sense ? 1 : -1;
254	pointer		yytext_is_array = ! option_sense;
255	read		use_read = option_sense;
256	reject		reject_really_used = option_sense;
257	stack		action_define( "YY_STACK_USED", option_sense );
258	stdinit		do_stdinit = option_sense;
259	stdout		use_stdout = option_sense;
260	unput		ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
261	verbose		printstats = option_sense;
262	warn		nowarn = ! option_sense;
263	yylineno	do_yylineno = option_sense;
264	yymore		yymore_really_used = option_sense;
265	yywrap		do_yywrap = option_sense;
266
267	yy_push_state	ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
268	yy_pop_state	ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
269	yy_top_state	ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
270
271	yy_scan_buffer	ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
272	yy_scan_bytes	ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
273	yy_scan_string	ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
274
275	outfile		return OPT_OUTFILE;
276	prefix		return OPT_PREFIX;
277	yyclass		return OPT_YYCLASS;
278
279	\"[^"\n]*\"	{
280			strcpy( nmstr, yytext + 1 );
281			nmstr[strlen( nmstr ) - 1] = '\0';
282			return NAME;
283			}
284
285	(([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|.	{
286			format_synerr( _( "unrecognized %%option: %s" ),
287				yytext );
288			BEGIN(RECOVER);
289			}
290}
291
292<RECOVER>.*{NL}		++linenum; BEGIN(INITIAL);
293
294
295<SECT2PROLOG>{
296	^"%{".*	++bracelevel; yyless( 2 );	/* eat only %{ */
297	^"%}".*	--bracelevel; yyless( 2 );	/* eat only %} */
298
299	^{WS}.*	ACTION_ECHO;	/* indented code in prolog */
300
301	^{NOT_WS}.*	{	/* non-indented code */
302			if ( bracelevel <= 0 )
303				{ /* not in %{ ... %} */
304				yyless( 0 );	/* put it all back */
305				yy_set_bol( 1 );
306				mark_prolog();
307				BEGIN(SECT2);
308				}
309			else
310				ACTION_ECHO;
311			}
312
313	.*		ACTION_ECHO;
314	{NL}	++linenum; ACTION_ECHO;
315
316	<<EOF>>		{
317			mark_prolog();
318			sectnum = 0;
319			yyterminate(); /* to stop the parser */
320			}
321}
322
323<SECT2>{
324	^{OPTWS}{NL}	++linenum; /* allow blank lines in section 2 */
325
326	^{OPTWS}"%{"	{
327			indented_code = false;
328			doing_codeblock = true;
329			bracelevel = 1;
330			BEGIN(PERCENT_BRACE_ACTION);
331			}
332
333	^{OPTWS}"<"	BEGIN(SC); return '<';
334	^{OPTWS}"^"	return '^';
335	\"		BEGIN(QUOTE); return '"';
336	"{"/[[:digit:]]	BEGIN(NUM); return '{';
337	"$"/([[:blank:]]|{NL})	return '$';
338
339	{WS}"%{"		{
340			bracelevel = 1;
341			BEGIN(PERCENT_BRACE_ACTION);
342
343			if ( in_rule )
344				{
345				doing_rule_action = true;
346				in_rule = false;
347				return '\n';
348				}
349			}
350	{WS}"|".*{NL}	continued_action = true; ++linenum; return '\n';
351
352	^{WS}"/*"	{
353			yyless( yyleng - 2 );	/* put back '/', '*' */
354			bracelevel = 0;
355			continued_action = false;
356			BEGIN(ACTION);
357			}
358
359	^{WS}		/* allow indented rules */
360
361	{WS}		{
362			/* This rule is separate from the one below because
363			 * otherwise we get variable trailing context, so
364			 * we can't build the scanner using -{f,F}.
365			 */
366			bracelevel = 0;
367			continued_action = false;
368			BEGIN(ACTION);
369
370			if ( in_rule )
371				{
372				doing_rule_action = true;
373				in_rule = false;
374				return '\n';
375				}
376			}
377
378	{OPTWS}{NL}	{
379			bracelevel = 0;
380			continued_action = false;
381			BEGIN(ACTION);
382			unput( '\n' );	/* so <ACTION> sees it */
383
384			if ( in_rule )
385				{
386				doing_rule_action = true;
387				in_rule = false;
388				return '\n';
389				}
390			}
391
392	^{OPTWS}"<<EOF>>"	|
393	"<<EOF>>"	return EOF_OP;
394
395	^"%%".*		{
396			sectnum = 3;
397			BEGIN(SECT3);
398			yyterminate(); /* to stop the parser */
399			}
400
401	"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})*	{
402			int cclval;
403
404			strcpy( nmstr, yytext );
405
406			/* Check to see if we've already encountered this
407			 * ccl.
408			 */
409			if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
410				{
411				if ( input() != ']' )
412					synerr( _( "bad character class" ) );
413
414				yylval = cclval;
415				++cclreuse;
416				return PREVCCL;
417				}
418			else
419				{
420				/* We fudge a bit.  We know that this ccl will
421				 * soon be numbered as lastccl + 1 by cclinit.
422				 */
423				cclinstal( (Char *) nmstr, lastccl + 1 );
424
425				/* Push back everything but the leading bracket
426				 * so the ccl can be rescanned.
427				 */
428				yyless( 1 );
429
430				BEGIN(FIRSTCCL);
431				return '[';
432				}
433			}
434
435	"{"{NAME}"}"	{
436			register Char *nmdefptr;
437			Char *ndlookup();
438
439			strcpy( nmstr, yytext + 1 );
440			nmstr[yyleng - 2] = '\0';  /* chop trailing brace */
441
442			if ( (nmdefptr = ndlookup( nmstr )) == 0 )
443				format_synerr(
444					_( "undefined definition {%s}" ),
445						nmstr );
446
447			else
448				{ /* push back name surrounded by ()'s */
449				int len = strlen( (char *) nmdefptr );
450
451				if ( lex_compat || nmdefptr[0] == '^' ||
452				     (len > 0 && nmdefptr[len - 1] == '$') )
453					{ /* don't use ()'s after all */
454					PUT_BACK_STRING((char *) nmdefptr, 0);
455
456					if ( nmdefptr[0] == '^' )
457						BEGIN(CARETISBOL);
458					}
459
460				else
461					{
462					unput(')');
463					PUT_BACK_STRING((char *) nmdefptr, 0);
464					unput('(');
465					}
466				}
467			}
468
469	[/|*+?.(){}]	return (unsigned char) yytext[0];
470	.		RETURNCHAR;
471}
472
473
474<SC>{
475	[,*]		return (unsigned char) yytext[0];
476	">"		BEGIN(SECT2); return '>';
477	">"/^		BEGIN(CARETISBOL); return '>';
478	{SCNAME}	RETURNNAME;
479	.		{
480			format_synerr( _( "bad <start condition>: %s" ),
481				yytext );
482			}
483}
484
485<CARETISBOL>"^"		BEGIN(SECT2); return '^';
486
487
488<QUOTE>{
489	[^"\n]		RETURNCHAR;
490	\"		BEGIN(SECT2); return '"';
491
492	{NL}		{
493			synerr( _( "missing quote" ) );
494			BEGIN(SECT2);
495			++linenum;
496			return '"';
497			}
498}
499
500
501<FIRSTCCL>{
502	"^"/[^-\]\n]	BEGIN(CCL); return '^';
503	"^"/("-"|"]")	return '^';
504	.		BEGIN(CCL); RETURNCHAR;
505}
506
507<CCL>{
508	-/[^\]\n]	return '-';
509	[^\]\n]		RETURNCHAR;
510	"]"		BEGIN(SECT2); return ']';
511	.|{NL}		{
512			synerr( _( "bad character class" ) );
513			BEGIN(SECT2);
514			return ']';
515			}
516}
517
518<FIRSTCCL,CCL>{
519	"[:alnum:]"	BEGIN(CCL); return CCE_ALNUM;
520	"[:alpha:]"	BEGIN(CCL); return CCE_ALPHA;
521	"[:blank:]"	BEGIN(CCL); return CCE_BLANK;
522	"[:cntrl:]"	BEGIN(CCL); return CCE_CNTRL;
523	"[:digit:]"	BEGIN(CCL); return CCE_DIGIT;
524	"[:graph:]"	BEGIN(CCL); return CCE_GRAPH;
525	"[:lower:]"	BEGIN(CCL); return CCE_LOWER;
526	"[:print:]"	BEGIN(CCL); return CCE_PRINT;
527	"[:punct:]"	BEGIN(CCL); return CCE_PUNCT;
528	"[:space:]"	BEGIN(CCL); return CCE_SPACE;
529	"[:upper:]"	BEGIN(CCL); return CCE_UPPER;
530	"[:xdigit:]"	BEGIN(CCL); return CCE_XDIGIT;
531	{CCL_EXPR}	{
532			format_synerr(
533				_( "bad character class expression: %s" ),
534					yytext );
535			BEGIN(CCL); return CCE_ALNUM;
536			}
537}
538
539<NUM>{
540	[[:digit:]]+	{
541			yylval = myctoi( yytext );
542			return NUMBER;
543			}
544
545	","		return ',';
546	"}"		BEGIN(SECT2); return '}';
547
548	.		{
549			synerr( _( "bad character inside {}'s" ) );
550			BEGIN(SECT2);
551			return '}';
552			}
553
554	{NL}		{
555			synerr( _( "missing }" ) );
556			BEGIN(SECT2);
557			++linenum;
558			return '}';
559			}
560}
561
562
563<PERCENT_BRACE_ACTION>{
564	{OPTWS}"%}".*		bracelevel = 0;
565
566	<ACTION>"/*"		ACTION_ECHO; yy_push_state( COMMENT );
567
568	<CODEBLOCK,ACTION>{
569		"reject"	{
570			ACTION_ECHO;
571			CHECK_REJECT(yytext);
572			}
573		"yymore"	{
574			ACTION_ECHO;
575			CHECK_YYMORE(yytext);
576			}
577	}
578
579	{NAME}|{NOT_NAME}|.	ACTION_ECHO;
580	{NL}		{
581			++linenum;
582			ACTION_ECHO;
583			if ( bracelevel == 0 ||
584			     (doing_codeblock && indented_code) )
585				{
586				if ( doing_rule_action )
587					add_action( "\tYY_BREAK\n" );
588
589				doing_rule_action = doing_codeblock = false;
590				BEGIN(SECT2);
591				}
592			}
593}
594
595
596	/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
597<ACTION>{
598	"{"		ACTION_ECHO; ++bracelevel;
599	"}"		ACTION_ECHO; --bracelevel;
600	[^[:alpha:]_{}"'/\n]+	ACTION_ECHO;
601	{NAME}		ACTION_ECHO;
602	"'"([^'\\\n]|\\.)*"'"	ACTION_ECHO; /* character constant */
603	\"		ACTION_ECHO; BEGIN(ACTION_STRING);
604	{NL}		{
605			++linenum;
606			ACTION_ECHO;
607			if ( bracelevel == 0 )
608				{
609				if ( doing_rule_action )
610					add_action( "\tYY_BREAK\n" );
611
612				doing_rule_action = false;
613				BEGIN(SECT2);
614				}
615			}
616	.		ACTION_ECHO;
617}
618
619<ACTION_STRING>{
620	[^"\\\n]+	ACTION_ECHO;
621	\\.		ACTION_ECHO;
622	{NL}		++linenum; ACTION_ECHO;
623	\"		ACTION_ECHO; BEGIN(ACTION);
624	.		ACTION_ECHO;
625}
626
627<COMMENT,ACTION,ACTION_STRING><<EOF>>	{
628			synerr( _( "EOF encountered inside an action" ) );
629			yyterminate();
630			}
631
632
633<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ}	{
634			yylval = myesc( (Char *) yytext );
635
636			if ( YY_START == FIRSTCCL )
637				BEGIN(CCL);
638
639			return CHAR;
640			}
641
642
643<SECT3>{
644	.*(\n?)		ECHO;
645	<<EOF>>		sectnum = 0; yyterminate();
646}
647
648<*>.|\n			format_synerr( _( "bad character: %s" ), yytext );
649
650%%
651
652
653int yywrap()
654	{
655	if ( --num_input_files > 0 )
656		{
657		set_input_file( *++input_files );
658		return 0;
659		}
660
661	else
662		return 1;
663	}
664
665
666/* set_input_file - open the given file (if NULL, stdin) for scanning */
667
668void set_input_file( file )
669char *file;
670	{
671	if ( file && strcmp( file, "-" ) )
672		{
673		infilename = copy_string( file );
674		yyin = fopen( infilename, "r" );
675
676		if ( yyin == NULL )
677			lerrsf( _( "can't open %s" ), file );
678		}
679
680	else
681		{
682		yyin = stdin;
683		infilename = copy_string( "<stdin>" );
684		}
685
686	linenum = 1;
687	}
688
689
690/* Wrapper routines for accessing the scanner's malloc routines. */
691
692void *flex_alloc( size )
693size_t size;
694	{
695	return (void *) malloc( size );
696	}
697
698void *flex_realloc( ptr, size )
699void *ptr;
700size_t size;
701	{
702	return (void *) realloc( ptr, size );
703	}
704
705void flex_free( ptr )
706void *ptr;
707	{
708	if ( ptr )
709		free( ptr );
710	}
711