1/** \file
2 * \brief Defines the interface for a common token.
3 *
4 * All token streams should provide their tokens using an instance
5 * of this common token. A custom pointer is provided, wher you may attach
6 * a further structure to enhance the common token if you feel the need
7 * to do so. The C runtime will assume that a token provides implementations
8 * of the interface functions, but all of them may be rplaced by your own
9 * implementation if you require it.
10 */
11#ifndef	_ANTLR3_COMMON_TOKEN_H
12#define	_ANTLR3_COMMON_TOKEN_H
13
14// [The "BSD licence"]
15// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
16// http://www.temporal-wave.com
17// http://www.linkedin.com/in/jimidle
18//
19// All rights reserved.
20//
21// Redistribution and use in source and binary forms, with or without
22// modification, are permitted provided that the following conditions
23// are met:
24// 1. Redistributions of source code must retain the above copyright
25//    notice, this list of conditions and the following disclaimer.
26// 2. Redistributions in binary form must reproduce the above copyright
27//    notice, this list of conditions and the following disclaimer in the
28//    documentation and/or other materials provided with the distribution.
29// 3. The name of the author may not be used to endorse or promote products
30//    derived from this software without specific prior written permission.
31//
32// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
33// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
35// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
36// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
41// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42
43#include    <antlr3defs.h>
44
45/** How many tokens to allocate at once in the token factory
46 */
47#define	ANTLR3_FACTORY_POOL_SIZE    1024
48
49/* Base token types, which all lexer/parser tokens come after in sequence.
50 */
51
52/** Indicator of an invalid token
53 */
54#define	ANTLR3_TOKEN_INVALID	0
55
56#define	ANTLR3_EOR_TOKEN_TYPE	1
57
58/** Imaginary token type to cause a traversal of child nodes in a tree parser
59 */
60#define	ANTLR3_TOKEN_DOWN		2
61
62/** Imaginary token type to signal the end of a stream of child nodes.
63 */
64#define	ANTLR3_TOKEN_UP		3
65
66/** First token that can be used by users/generated code
67 */
68
69#define	ANTLR3_MIN_TOKEN_TYPE	ANTLR3_TOKEN_UP + 1
70
71/** End of file token
72 */
73#define	ANTLR3_TOKEN_EOF	(ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF)
74
75/** Default channel for a token
76 */
77#define	ANTLR3_TOKEN_DEFAULT_CHANNEL	0
78
79/** Reserved channel number for a HIDDEN token - a token that
80 *  is hidden from the parser.
81 */
82#define	HIDDEN				99
83
84#ifdef __cplusplus
85extern "C" {
86#endif
87
88// Indicates whether this token is carrying:
89//
90// State | Meaning
91// ------+--------------------------------------
92//     0 | Nothing (neither rewrite text, nor setText)
93//     1 | char * to user supplied rewrite text
94//     2 | pANTLR3_STRING because of setText or similar action
95//
96#define	ANTLR3_TEXT_NONE	0
97#define	ANTLR3_TEXT_CHARP	1
98#define	ANTLR3_TEXT_STRING	2
99
100/** The definition of an ANTLR3 common token structure, which all implementations
101 * of a token stream should provide, installing any further structures in the
102 * custom pointer element of this structure.
103 *
104 * \remark
105 * Token streams are in essence provided by lexers or other programs that serve
106 * as lexers.
107 */
108typedef	struct ANTLR3_COMMON_TOKEN_struct
109{
110    /** The actual type of this token
111     */
112    ANTLR3_UINT32   type;
113
114    /** Indicates that a token was produced from the token factory and therefore
115     *  the the freeToken() method should not do anything itself because
116     *  token factory is responsible for deleting it.
117     */
118    ANTLR3_BOOLEAN  factoryMade;
119
120	/// A string factory that we can use if we ever need the text of a token
121	/// and need to manufacture a pANTLR3_STRING
122	///
123	pANTLR3_STRING_FACTORY	strFactory;
124
125    /** The line number in the input stream where this token was derived from
126     */
127    ANTLR3_UINT32   line;
128
129    /** The offset into the input stream that the line in which this
130     *  token resides starts.
131     */
132    void	    * lineStart;
133
134    /** The character position in the line that this token was derived from
135     */
136    ANTLR3_INT32    charPosition;
137
138    /** The virtual channel that this token exists in.
139     */
140    ANTLR3_UINT32   channel;
141
142    /** Pointer to the input stream that this token originated in.
143     */
144    pANTLR3_INPUT_STREAM    input;
145
146    /** What the index of this token is, 0, 1, .., n-2, n-1 tokens
147     */
148    ANTLR3_MARKER   index;
149
150    /** The character offset in the input stream where the text for this token
151     *  starts.
152     */
153    ANTLR3_MARKER   start;
154
155    /** The character offset in the input stream where the text for this token
156     *  stops.
157     */
158    ANTLR3_MARKER   stop;
159
160	/// Indicates whether this token is carrying:
161	///
162	/// State | Meaning
163	/// ------+--------------------------------------
164	///     0 | Nothing (neither rewrite text, nor setText)
165	///     1 | char * to user supplied rewrite text
166	///     2 | pANTLR3_STRING because of setText or similar action
167	///
168	/// Affects the union structure tokText below
169	/// (uses 32 bit so alignment is always good)
170	///
171	ANTLR3_UINT32	textState;
172
173	union
174	{
175		/// Pointer that is used when the token just has a pointer to
176		/// a char *, such as when a rewrite of an imaginary token supplies
177		/// a string in the grammar. No sense in constructing a pANTLR3_STRING just
178		/// for that, as mostly the text will not be accessed - if it is, then
179		/// we will build a pANTLR3_STRING for it a that point.
180		///
181		pANTLR3_UCHAR	chars;
182
183		/// Some token types actually do carry around their associated text, hence
184		/// (*getText)() will return this pointer if it is not NULL
185		///
186		pANTLR3_STRING	text;
187	}
188		tokText;
189
190    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
191     *   as the standard structure for a token, a number of user programmable
192     *	 elements are allowed in a token. This is one of them.
193     */
194    ANTLR3_UINT32   user1;
195
196    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
197     *   as the standard structure for a token, a number of user programmable
198     *	 elements are allowed in a token. This is one of them.
199     */
200    ANTLR3_UINT32   user2;
201
202    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
203     *   as the standard structure for a token, a number of user programmable
204     *	 elements are allowed in a token. This is one of them.
205     */
206    ANTLR3_UINT32   user3;
207
208    /** Pointer to a custom element that the ANTLR3 programmer may define and install
209     */
210    void    * custom;
211
212    /** Pointer to a function that knows how to free the custom structure when the
213     *  token is destroyed.
214     */
215    void    (*freeCustom)(void * custom);
216
217    /* ==============================
218     * API
219     */
220
221    /** Pointer to function that returns the text pointer of a token, use
222     *  toString() if you want a pANTLR3_STRING version of the token.
223     */
224    pANTLR3_STRING  (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token);
225
226    /** Pointer to a function that 'might' be able to set the text associated
227     *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
228     *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
229     *  strings associated with them but just point into the current input stream. These
230     *  tokens will implement this function with a function that errors out (probably
231     *  drastically.
232     */
233    void	    (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text);
234
235    /** Pointer to a function that 'might' be able to set the text associated
236     *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
237     *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
238     *  strings associated with them but just point into the current input stream. These
239     *  tokens will implement this function with a function that errors out (probably
240     *  drastically.
241     */
242    void	    (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text);
243
244    /** Pointer to a function that returns the token type of this token
245     */
246    ANTLR3_UINT32   (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token);
247
248    /** Pointer to a function that sets the type of this token
249     */
250    void	    (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype);
251
252    /** Pointer to a function that gets the 'line' number where this token resides
253     */
254    ANTLR3_UINT32   (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token);
255
256    /** Pointer to a function that sets the 'line' number where this token reside
257     */
258    void	    (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line);
259
260    /** Pointer to a function that gets the offset in the line where this token exists
261     */
262    ANTLR3_INT32    (*getCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token);
263
264    /** Pointer to a function that sets the offset in the line where this token exists
265     */
266    void	    (*setCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos);
267
268    /** Pointer to a function that gets the channel that this token was placed in (parsers
269     *  can 'tune' to these channels.
270     */
271    ANTLR3_UINT32   (*getChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token);
272
273    /** Pointer to a function that sets the channel that this token should belong to
274     */
275    void	    (*setChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel);
276
277    /** Pointer to a function that returns an index 0...n-1 of the token in the token
278     *  input stream.
279     */
280    ANTLR3_MARKER   (*getTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
281
282    /** Pointer to a function that can set the token index of this token in the token
283     *  input stream.
284     */
285    void			(*setTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER);
286
287    /** Pointer to a function that gets the start index in the input stream for this token.
288     */
289    ANTLR3_MARKER   (*getStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
290
291    /** Pointer to a function that sets the start index in the input stream for this token.
292     */
293    void			(*setStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
294
295    /** Pointer to a function that gets the stop index in the input stream for this token.
296     */
297    ANTLR3_MARKER   (*getStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
298
299    /** Pointer to a function that sets the stop index in the input stream for this token.
300     */
301    void			(*setStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
302
303    /** Pointer to a function that returns this token as a text representation that can be
304     *  printed with embedded control codes such as \n replaced with the printable sequence "\\n"
305     *  This also yields a string structure that can be used more easily than the pointer to
306     *  the input stream in certain situations.
307     */
308    pANTLR3_STRING  (*toString)		(struct ANTLR3_COMMON_TOKEN_struct * token);
309}
310    ANTLR3_COMMON_TOKEN;
311
312/** \brief ANTLR3 Token factory interface to create lots of tokens efficiently
313 *  rather than creating and freeing lots of little bits of memory.
314 */
315typedef	struct ANTLR3_TOKEN_FACTORY_struct
316{
317    /** Pointers to the array of tokens that this factory has produced so far
318     */
319    pANTLR3_COMMON_TOKEN    *pools;
320
321    /** Current pool tokens we are allocating from
322     */
323    ANTLR3_INT32	    thisPool;
324
325    /** Maximum pool count we have available
326     */
327    ANTLR3_INT32            maxPool;
328
329    /** The next token to throw out from the pool, will cause a new pool allocation
330     *  if this exceeds the available tokenCount
331     */
332    ANTLR3_UINT32	    nextToken;
333
334    /** Trick to initialize tokens and their API quickly, we set up this token when the
335     *  factory is created, then just copy the memory it uses into the new token.
336     */
337    ANTLR3_COMMON_TOKEN	    unTruc;
338
339    /** Pointer to an input stream that is using this token factory (may be NULL)
340     *  which will be assigned to the tokens automatically.
341     */
342    pANTLR3_INPUT_STREAM    input;
343
344    /** Pointer to a function that returns a new token
345     */
346    pANTLR3_COMMON_TOKEN    (*newToken)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
347
348    /** Pointer to a function that resets the factory so you can reuse the pools it
349     *  has laready allocated
350     */
351    void                    (*reset)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
352
353    /** Pointer to a function that changes teh curent inptu stream so that
354     *  new tokens are created with reference to their originating text.
355     */
356    void		    (*setInputStream)	(struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input);
357    /** Pointer to a function the destroys the factory
358     */
359    void		    (*close)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
360}
361    ANTLR3_TOKEN_FACTORY;
362
363#ifdef __cplusplus
364}
365#endif
366
367#endif
368