1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \file 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * While the C runtime does not need to model the state of 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * multiple lexers and parsers in the same way as the Java runtime does 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * it is no overhead to reflect that model. In fact the 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * C runtime has always been able to share recognizer state. 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This 'class' therefore defines all the elements of a recognizer 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * (either lexer, parser or tree parser) that are need to 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * track the current recognition state. Multiple recognizers 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * may then share this state, for instance when one grammar 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * imports another. 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_H 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#define _ANTLR3_RECOGNIZER_SHARED_STATE_H 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// [The "BSD licence"] 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.temporal-wave.com 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.linkedin.com/in/jimidle 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// All rights reserved. 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Redistribution and use in source and binary forms, with or without 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// modification, are permitted provided that the following conditions 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// are met: 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1. Redistributions of source code must retain the above copyright 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// notice, this list of conditions and the following disclaimer. 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 2. Redistributions in binary form must reproduce the above copyright 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// notice, this list of conditions and the following disclaimer in the 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// documentation and/or other materials provided with the distribution. 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 3. The name of the author may not be used to endorse or promote products 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// derived from this software without specific prior written permission. 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include <antlr3defs.h> 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifdef __cplusplus 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverextern "C" { 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** All the data elements required to track the current state 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * of any recognizer (lexer, parser, tree parser). 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * May be share between multiple recognizers such that 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * grammar inheritance is easily supported. 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertypedef struct ANTLR3_RECOGNIZER_SHARED_STATE_struct 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** If set to ANTLR3_TRUE then the recognizer has an exception 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * condition (this is tested by the generated code for the rules of 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the grammar). 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_BOOLEAN error; 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Points to the first in a possible chain of exceptions that the 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * recognizer has discovered. 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_EXCEPTION exception; 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Track around a hint from the creator of the recognizer as to how big this 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * thing is going to get, as the actress said to the bishop. This allows us 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to tune hash tables accordingly. This might not be the best place for this 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * in the end but we will see. 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 sizeHint; 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Track the set of token types that can follow any rule invocation. 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Stack structure, to support: List<BitSet>. 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_STACK following; 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** This is true when we see an error and before having successfully 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * matched a token. Prevents generation of more than one error message 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * per error. 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_BOOLEAN errorRecovery; 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The index into the input stream where the last error occurred. 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is used to prevent infinite loops where an error is found 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * but no token is consumed during recovery...another error is found, 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ad nauseam. This is a failsafe mechanism to guarantee that at least 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * one token/tree node is consumed for two errors. 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_MARKER lastErrorIndex; 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** In lieu of a return value, this indicates that a rule or token 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * has failed to match. Reset to false upon valid token match. 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_BOOLEAN failed; 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** When the recognizer terminates, the error handling functions 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * will have incremented this value if any error occurred (that was displayed). It can then be 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * used by the grammar programmer without having to use static globals. 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 errorCount; 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** If 0, no backtracking is going on. Safe to exec actions etc... 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * If >0 then it's the level of backtracking. 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_INT32 backtracking; 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the memoization table for ruleIndex. For key ruleStartIndex, you 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * get back the stop token for associated rule or MEMO_RULE_FAILED. 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is only used if rule memoization is on. 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INT_TRIE ruleMemo; 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to an array of token names 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * that are generally useful in error reporting. The generated parsers install 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * this pointer. The table it points to is statically allocated as 8 bit ascii 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * at parser compile time - grammar token names are thus restricted in character 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * sets, which does not seem to terrible. 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_UINT8 * tokenNames; 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** User programmable pointer that can be used for instance as a place to 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * store some tracking structure specific to the grammar that would not normally 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * be available to the error handling functions. 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * userp; 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The goal of all lexer rules/methods is to create a token object. 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is an instance variable as multiple rules may collaborate to 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * create a single token. For example, NUM : INT | FLOAT ; 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * In this case, you want the INT or FLOAT rule to set token and not 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * have it reset to a NUM token in rule NUM. 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_COMMON_TOKEN token; 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The goal of all lexer rules being to create a token, then a lexer 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * needs to build a token factory to create them. 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_TOKEN_FACTORY tokFactory; 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** A lexer is a source of tokens, produced by all the generated (or 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * hand crafted if you like) matching rules. As such it needs to provide 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a token source interface implementation. 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_TOKEN_SOURCE tokSource; 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The channel number for the current token 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 channel; 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The token type for the current token 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 type; 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The input line (where it makes sense) on which the first character of the current 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * token resides. 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_INT32 tokenStartLine; 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The character position of the first character of the current token 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * within the line specified by tokenStartLine 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_INT32 tokenStartCharPositionInLine; 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** What character index in the stream did the current token start at? 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Needed, for example, to get the text for current token. Set at 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the start of nextToken. 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_MARKER tokenStartCharIndex; 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Text for the current token. This can be overridden by setting this 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * variable directly or by using the SETTEXT() macro (preferred) in your 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * lexer rules. 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_STRING text; 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** User controlled variables that will be installed in a newly created 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * token. 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 user1, user2, user3; 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * custom; 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Input stream stack, which allows the C programmer to switch input streams 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * easily and allow the standard nextToken() implementation to deal with it 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * as this is a common requirement. 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_STACK streams; 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// A stack of token/tree rewrite streams that are available for use 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// by a parser or tree parser that is using rewrites to generate 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// an AST. This saves each rule in the recongizer from having to 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// allocate and deallocate rewtire streams on entry and exit. As 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// the parser recurses throgh the rules it will reach a steady state 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// of the maximum number of allocated streams, which instead of 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// deallocating them at rule exit, it will place on this stack for 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// reuse. The streams are then all finally freed when this stack 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// is freed. 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_VECTOR rStreams; 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_RECOGNIZER_SHARED_STATE; 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifdef __cplusplus 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 219