1// Copyright (c) 2003-2004 Brian Wellington (bwelling@xbill.org)
2//
3// Copyright (C) 2003-2004 Nominum, Inc.
4//
5// Permission to use, copy, modify, and distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY
12// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15// OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16//
17
18package org.xbill.DNS;
19
20import java.io.*;
21import java.net.*;
22
23import org.xbill.DNS.utils.*;
24
25/**
26 * Tokenizer is used to parse DNS records and zones from text format,
27 *
28 * @author Brian Wellington
29 * @author Bob Halley
30 */
31
32public class Tokenizer {
33
34private static String delim = " \t\n;()\"";
35private static String quotes = "\"";
36
37/** End of file */
38public static final int EOF		= 0;
39
40/** End of line */
41public static final int EOL		= 1;
42
43/** Whitespace; only returned when wantWhitespace is set */
44public static final int WHITESPACE	= 2;
45
46/** An identifier (unquoted string) */
47public static final int IDENTIFIER	= 3;
48
49/** A quoted string */
50public static final int QUOTED_STRING	= 4;
51
52/** A comment; only returned when wantComment is set */
53public static final int COMMENT		= 5;
54
55private PushbackInputStream is;
56private boolean ungottenToken;
57private int multiline;
58private boolean quoting;
59private String delimiters;
60private Token current;
61private StringBuffer sb;
62private boolean wantClose;
63
64private String filename;
65private int line;
66
67public static class Token {
68	/** The type of token. */
69	public int type;
70
71	/** The value of the token, or null for tokens without values. */
72	public String value;
73
74	private
75	Token() {
76		type = -1;
77		value = null;
78	}
79
80	private Token
81	set(int type, StringBuffer value) {
82		if (type < 0)
83			throw new IllegalArgumentException();
84		this.type = type;
85		this.value = value == null ? null : value.toString();
86		return this;
87	}
88
89	/**
90	 * Converts the token to a string containing a representation useful
91	 * for debugging.
92	 */
93	public String
94	toString() {
95		switch (type) {
96		case EOF:
97			return "<eof>";
98		case EOL:
99			return "<eol>";
100		case WHITESPACE:
101			return "<whitespace>";
102		case IDENTIFIER:
103			return "<identifier: " + value + ">";
104		case QUOTED_STRING:
105			return "<quoted_string: " + value + ">";
106		case COMMENT:
107			return "<comment: " + value + ">";
108		default:
109			return "<unknown>";
110		}
111	}
112
113	/** Indicates whether this token contains a string. */
114	public boolean
115	isString() {
116		return (type == IDENTIFIER || type == QUOTED_STRING);
117	}
118
119	/** Indicates whether this token contains an EOL or EOF. */
120	public boolean
121	isEOL() {
122		return (type == EOL || type == EOF);
123	}
124}
125
126static class TokenizerException extends TextParseException {
127	String message;
128
129	public
130	TokenizerException(String filename, int line, String message) {
131		super(filename + ":" + line + ": " + message);
132		this.message = message;
133	}
134
135	public String
136	getBaseMessage() {
137		return message;
138	}
139}
140
141/**
142 * Creates a Tokenizer from an arbitrary input stream.
143 * @param is The InputStream to tokenize.
144 */
145public
146Tokenizer(InputStream is) {
147	if (!(is instanceof BufferedInputStream))
148		is = new BufferedInputStream(is);
149	this.is = new PushbackInputStream(is, 2);
150	ungottenToken = false;
151	multiline = 0;
152	quoting = false;
153	delimiters = delim;
154	current = new Token();
155	sb = new StringBuffer();
156	filename = "<none>";
157	line = 1;
158}
159
160/**
161 * Creates a Tokenizer from a string.
162 * @param s The String to tokenize.
163 */
164public
165Tokenizer(String s) {
166	this(new ByteArrayInputStream(s.getBytes()));
167}
168
169/**
170 * Creates a Tokenizer from a file.
171 * @param f The File to tokenize.
172 */
173public
174Tokenizer(File f) throws FileNotFoundException {
175	this(new FileInputStream(f));
176	wantClose = true;
177	filename = f.getName();
178}
179
180private int
181getChar() throws IOException {
182	int c = is.read();
183	if (c == '\r') {
184		int next = is.read();
185		if (next != '\n')
186			is.unread(next);
187		c = '\n';
188	}
189	if (c == '\n')
190		line++;
191	return c;
192}
193
194private void
195ungetChar(int c) throws IOException {
196	if (c == -1)
197		return;
198	is.unread(c);
199	if (c == '\n')
200		line--;
201}
202
203private int
204skipWhitespace() throws IOException {
205	int skipped = 0;
206	while (true) {
207		int c = getChar();
208		if (c != ' ' && c != '\t') {
209	                if (!(c == '\n' && multiline > 0)) {
210				ungetChar(c);
211				return skipped;
212			}
213		}
214		skipped++;
215	}
216}
217
218private void
219checkUnbalancedParens() throws TextParseException {
220	if (multiline > 0)
221		throw exception("unbalanced parentheses");
222}
223
224/**
225 * Gets the next token from a tokenizer.
226 * @param wantWhitespace If true, leading whitespace will be returned as a
227 * token.
228 * @param wantComment If true, comments are returned as tokens.
229 * @return The next token in the stream.
230 * @throws TextParseException The input was invalid.
231 * @throws IOException An I/O error occurred.
232 */
233public Token
234get(boolean wantWhitespace, boolean wantComment) throws IOException {
235	int type;
236	int c;
237
238	if (ungottenToken) {
239		ungottenToken = false;
240		if (current.type == WHITESPACE) {
241			if (wantWhitespace)
242				return current;
243		} else if (current.type == COMMENT) {
244			if (wantComment)
245				return current;
246		} else {
247			if (current.type == EOL)
248				line++;
249			return current;
250		}
251	}
252	int skipped = skipWhitespace();
253	if (skipped > 0 && wantWhitespace)
254		return current.set(WHITESPACE, null);
255	type = IDENTIFIER;
256	sb.setLength(0);
257	while (true) {
258		c = getChar();
259		if (c == -1 || delimiters.indexOf(c) != -1) {
260			if (c == -1) {
261				if (quoting)
262					throw exception("EOF in " +
263							"quoted string");
264				else if (sb.length() == 0)
265					return current.set(EOF, null);
266				else
267					return current.set(type, sb);
268			}
269			if (sb.length() == 0 && type != QUOTED_STRING) {
270				if (c == '(') {
271					multiline++;
272					skipWhitespace();
273					continue;
274				} else if (c == ')') {
275					if (multiline <= 0)
276						throw exception("invalid " +
277								"close " +
278								"parenthesis");
279					multiline--;
280					skipWhitespace();
281					continue;
282				} else if (c == '"') {
283					if (!quoting) {
284						quoting = true;
285						delimiters = quotes;
286						type = QUOTED_STRING;
287					} else {
288						quoting = false;
289						delimiters = delim;
290						skipWhitespace();
291					}
292					continue;
293				} else if (c == '\n') {
294					return current.set(EOL, null);
295				} else if (c == ';') {
296					while (true) {
297						c = getChar();
298						if (c == '\n' || c == -1)
299							break;
300						sb.append((char)c);
301					}
302					if (wantComment) {
303						ungetChar(c);
304						return current.set(COMMENT, sb);
305					} else if (c == -1 &&
306						   type != QUOTED_STRING)
307					{
308						checkUnbalancedParens();
309						return current.set(EOF, null);
310					} else if (multiline > 0) {
311						skipWhitespace();
312						sb.setLength(0);
313						continue;
314					} else
315						return current.set(EOL, null);
316				} else
317					throw new IllegalStateException();
318			} else
319				ungetChar(c);
320			break;
321		} else if (c == '\\') {
322			c = getChar();
323			if (c == -1)
324				throw exception("unterminated escape sequence");
325			sb.append('\\');
326		} else if (quoting && c == '\n') {
327			throw exception("newline in quoted string");
328		}
329		sb.append((char)c);
330	}
331	if (sb.length() == 0 && type != QUOTED_STRING) {
332		checkUnbalancedParens();
333		return current.set(EOF, null);
334	}
335	return current.set(type, sb);
336}
337
338/**
339 * Gets the next token from a tokenizer, ignoring whitespace and comments.
340 * @return The next token in the stream.
341 * @throws TextParseException The input was invalid.
342 * @throws IOException An I/O error occurred.
343 */
344public Token
345get() throws IOException {
346	return get(false, false);
347}
348
349/**
350 * Returns a token to the stream, so that it will be returned by the next call
351 * to get().
352 * @throws IllegalStateException There are already ungotten tokens.
353 */
354public void
355unget() {
356	if (ungottenToken)
357		throw new IllegalStateException
358				("Cannot unget multiple tokens");
359	if (current.type == EOL)
360		line--;
361	ungottenToken = true;
362}
363
364/**
365 * Gets the next token from a tokenizer and converts it to a string.
366 * @return The next token in the stream, as a string.
367 * @throws TextParseException The input was invalid or not a string.
368 * @throws IOException An I/O error occurred.
369 */
370public String
371getString() throws IOException {
372	Token next = get();
373	if (!next.isString()) {
374		throw exception("expected a string");
375	}
376	return next.value;
377}
378
379private String
380_getIdentifier(String expected) throws IOException {
381	Token next = get();
382	if (next.type != IDENTIFIER)
383		throw exception("expected " + expected);
384	return next.value;
385}
386
387/**
388 * Gets the next token from a tokenizer, ensures it is an unquoted string,
389 * and converts it to a string.
390 * @return The next token in the stream, as a string.
391 * @throws TextParseException The input was invalid or not an unquoted string.
392 * @throws IOException An I/O error occurred.
393 */
394public String
395getIdentifier() throws IOException {
396	return _getIdentifier("an identifier");
397}
398
399/**
400 * Gets the next token from a tokenizer and converts it to a long.
401 * @return The next token in the stream, as a long.
402 * @throws TextParseException The input was invalid or not a long.
403 * @throws IOException An I/O error occurred.
404 */
405public long
406getLong() throws IOException {
407	String next = _getIdentifier("an integer");
408	if (!Character.isDigit(next.charAt(0)))
409		throw exception("expected an integer");
410	try {
411		return Long.parseLong(next);
412	} catch (NumberFormatException e) {
413		throw exception("expected an integer");
414	}
415}
416
417/**
418 * Gets the next token from a tokenizer and converts it to an unsigned 32 bit
419 * integer.
420 * @return The next token in the stream, as an unsigned 32 bit integer.
421 * @throws TextParseException The input was invalid or not an unsigned 32
422 * bit integer.
423 * @throws IOException An I/O error occurred.
424 */
425public long
426getUInt32() throws IOException {
427	long l = getLong();
428	if (l < 0 || l > 0xFFFFFFFFL)
429		throw exception("expected an 32 bit unsigned integer");
430	return l;
431}
432
433/**
434 * Gets the next token from a tokenizer and converts it to an unsigned 16 bit
435 * integer.
436 * @return The next token in the stream, as an unsigned 16 bit integer.
437 * @throws TextParseException The input was invalid or not an unsigned 16
438 * bit integer.
439 * @throws IOException An I/O error occurred.
440 */
441public int
442getUInt16() throws IOException {
443	long l = getLong();
444	if (l < 0 || l > 0xFFFFL)
445		throw exception("expected an 16 bit unsigned integer");
446	return (int) l;
447}
448
449/**
450 * Gets the next token from a tokenizer and converts it to an unsigned 8 bit
451 * integer.
452 * @return The next token in the stream, as an unsigned 8 bit integer.
453 * @throws TextParseException The input was invalid or not an unsigned 8
454 * bit integer.
455 * @throws IOException An I/O error occurred.
456 */
457public int
458getUInt8() throws IOException {
459	long l = getLong();
460	if (l < 0 || l > 0xFFL)
461		throw exception("expected an 8 bit unsigned integer");
462	return (int) l;
463}
464
465/**
466 * Gets the next token from a tokenizer and parses it as a TTL.
467 * @return The next token in the stream, as an unsigned 32 bit integer.
468 * @throws TextParseException The input was not valid.
469 * @throws IOException An I/O error occurred.
470 * @see TTL
471 */
472public long
473getTTL() throws IOException {
474	String next = _getIdentifier("a TTL value");
475	try {
476		return TTL.parseTTL(next);
477	}
478	catch (NumberFormatException e) {
479		throw exception("expected a TTL value");
480	}
481}
482
483/**
484 * Gets the next token from a tokenizer and parses it as if it were a TTL.
485 * @return The next token in the stream, as an unsigned 32 bit integer.
486 * @throws TextParseException The input was not valid.
487 * @throws IOException An I/O error occurred.
488 * @see TTL
489 */
490public long
491getTTLLike() throws IOException {
492	String next = _getIdentifier("a TTL-like value");
493	try {
494		return TTL.parse(next, false);
495	}
496	catch (NumberFormatException e) {
497		throw exception("expected a TTL-like value");
498	}
499}
500
501/**
502 * Gets the next token from a tokenizer and converts it to a name.
503 * @param origin The origin to append to relative names.
504 * @return The next token in the stream, as a name.
505 * @throws TextParseException The input was invalid or not a valid name.
506 * @throws IOException An I/O error occurred.
507 * @throws RelativeNameException The parsed name was relative, even with the
508 * origin.
509 * @see Name
510 */
511public Name
512getName(Name origin) throws IOException {
513	String next = _getIdentifier("a name");
514	try {
515		Name name = Name.fromString(next, origin);
516		if (!name.isAbsolute())
517			throw new RelativeNameException(name);
518		return name;
519	}
520	catch (TextParseException e) {
521		throw exception(e.getMessage());
522	}
523}
524
525/**
526 * Gets the next token from a tokenizer and converts it to an IP Address.
527 * @param family The address family.
528 * @return The next token in the stream, as an InetAddress
529 * @throws TextParseException The input was invalid or not a valid address.
530 * @throws IOException An I/O error occurred.
531 * @see Address
532 */
533public InetAddress
534getAddress(int family) throws IOException {
535	String next = _getIdentifier("an address");
536	try {
537		return Address.getByAddress(next, family);
538	}
539	catch (UnknownHostException e) {
540		throw exception(e.getMessage());
541	}
542}
543
544/**
545 * Gets the next token from a tokenizer, which must be an EOL or EOF.
546 * @throws TextParseException The input was invalid or not an EOL or EOF token.
547 * @throws IOException An I/O error occurred.
548 */
549public void
550getEOL() throws IOException {
551	Token next = get();
552	if (next.type != EOL && next.type != EOF) {
553		throw exception("expected EOL or EOF");
554	}
555}
556
557/**
558 * Returns a concatenation of the remaining strings from a Tokenizer.
559 */
560private String
561remainingStrings() throws IOException {
562        StringBuffer buffer = null;
563        while (true) {
564                Tokenizer.Token t = get();
565                if (!t.isString())
566                        break;
567                if (buffer == null)
568                        buffer = new StringBuffer();
569                buffer.append(t.value);
570        }
571        unget();
572        if (buffer == null)
573                return null;
574        return buffer.toString();
575}
576
577/**
578 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
579 * them together, and converts the base64 encoded data to a byte array.
580 * @param required If true, an exception will be thrown if no strings remain;
581 * otherwise null be be returned.
582 * @return The byte array containing the decoded strings, or null if there
583 * were no strings to decode.
584 * @throws TextParseException The input was invalid.
585 * @throws IOException An I/O error occurred.
586 */
587public byte []
588getBase64(boolean required) throws IOException {
589	String s = remainingStrings();
590	if (s == null) {
591		if (required)
592			throw exception("expected base64 encoded string");
593		else
594			return null;
595	}
596	byte [] array = base64.fromString(s);
597	if (array == null)
598		throw exception("invalid base64 encoding");
599	return array;
600}
601
602/**
603 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
604 * them together, and converts the base64 encoded data to a byte array.
605 * @return The byte array containing the decoded strings, or null if there
606 * were no strings to decode.
607 * @throws TextParseException The input was invalid.
608 * @throws IOException An I/O error occurred.
609 */
610public byte []
611getBase64() throws IOException {
612	return getBase64(false);
613}
614
615/**
616 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
617 * them together, and converts the hex encoded data to a byte array.
618 * @param required If true, an exception will be thrown if no strings remain;
619 * otherwise null be be returned.
620 * @return The byte array containing the decoded strings, or null if there
621 * were no strings to decode.
622 * @throws TextParseException The input was invalid.
623 * @throws IOException An I/O error occurred.
624 */
625public byte []
626getHex(boolean required) throws IOException {
627	String s = remainingStrings();
628	if (s == null) {
629		if (required)
630			throw exception("expected hex encoded string");
631		else
632			return null;
633	}
634	byte [] array = base16.fromString(s);
635	if (array == null)
636		throw exception("invalid hex encoding");
637	return array;
638}
639
640/**
641 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
642 * them together, and converts the hex encoded data to a byte array.
643 * @return The byte array containing the decoded strings, or null if there
644 * were no strings to decode.
645 * @throws TextParseException The input was invalid.
646 * @throws IOException An I/O error occurred.
647 */
648public byte []
649getHex() throws IOException {
650	return getHex(false);
651}
652
653/**
654 * Gets the next token from a tokenizer and decodes it as hex.
655 * @return The byte array containing the decoded string.
656 * @throws TextParseException The input was invalid.
657 * @throws IOException An I/O error occurred.
658 */
659public byte []
660getHexString() throws IOException {
661	String next = _getIdentifier("a hex string");
662	byte [] array = base16.fromString(next);
663	if (array == null)
664		throw exception("invalid hex encoding");
665	return array;
666}
667
668/**
669 * Gets the next token from a tokenizer and decodes it as base32.
670 * @param b32 The base32 context to decode with.
671 * @return The byte array containing the decoded string.
672 * @throws TextParseException The input was invalid.
673 * @throws IOException An I/O error occurred.
674 */
675public byte []
676getBase32String(base32 b32) throws IOException {
677	String next = _getIdentifier("a base32 string");
678	byte [] array = b32.fromString(next);
679	if (array == null)
680		throw exception("invalid base32 encoding");
681	return array;
682}
683
684/**
685 * Creates an exception which includes the current state in the error message
686 * @param s The error message to include.
687 * @return The exception to be thrown
688 */
689public TextParseException
690exception(String s) {
691	return new TokenizerException(filename, line, s);
692}
693
694/**
695 * Closes any files opened by this tokenizer.
696 */
697public void
698close() {
699	if (wantClose) {
700		try {
701			is.close();
702		}
703		catch (IOException e) {
704		}
705	}
706}
707
708protected void
709finalize() {
710	close();
711}
712
713}
714