1// Copyright (c) 2003-2004 Brian Wellington (bwelling@xbill.org) 2// 3// Copyright (C) 2003-2004 Nominum, Inc. 4// 5// Permission to use, copy, modify, and distribute this software for any 6// purpose with or without fee is hereby granted, provided that the above 7// copyright notice and this permission notice appear in all copies. 8// 9// THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES 10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY 12// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 15// OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16// 17 18package org.xbill.DNS; 19 20import java.io.*; 21import java.net.*; 22 23import org.xbill.DNS.utils.*; 24 25/** 26 * Tokenizer is used to parse DNS records and zones from text format, 27 * 28 * @author Brian Wellington 29 * @author Bob Halley 30 */ 31 32public class Tokenizer { 33 34private static String delim = " \t\n;()\""; 35private static String quotes = "\""; 36 37/** End of file */ 38public static final int EOF = 0; 39 40/** End of line */ 41public static final int EOL = 1; 42 43/** Whitespace; only returned when wantWhitespace is set */ 44public static final int WHITESPACE = 2; 45 46/** An identifier (unquoted string) */ 47public static final int IDENTIFIER = 3; 48 49/** A quoted string */ 50public static final int QUOTED_STRING = 4; 51 52/** A comment; only returned when wantComment is set */ 53public static final int COMMENT = 5; 54 55private PushbackInputStream is; 56private boolean ungottenToken; 57private int multiline; 58private boolean quoting; 59private String delimiters; 60private Token current; 61private StringBuffer sb; 62private boolean wantClose; 63 64private String filename; 65private int line; 66 67public static class Token { 68 /** The type of token. */ 69 public int type; 70 71 /** The value of the token, or null for tokens without values. */ 72 public String value; 73 74 private 75 Token() { 76 type = -1; 77 value = null; 78 } 79 80 private Token 81 set(int type, StringBuffer value) { 82 if (type < 0) 83 throw new IllegalArgumentException(); 84 this.type = type; 85 this.value = value == null ? null : value.toString(); 86 return this; 87 } 88 89 /** 90 * Converts the token to a string containing a representation useful 91 * for debugging. 92 */ 93 public String 94 toString() { 95 switch (type) { 96 case EOF: 97 return "<eof>"; 98 case EOL: 99 return "<eol>"; 100 case WHITESPACE: 101 return "<whitespace>"; 102 case IDENTIFIER: 103 return "<identifier: " + value + ">"; 104 case QUOTED_STRING: 105 return "<quoted_string: " + value + ">"; 106 case COMMENT: 107 return "<comment: " + value + ">"; 108 default: 109 return "<unknown>"; 110 } 111 } 112 113 /** Indicates whether this token contains a string. */ 114 public boolean 115 isString() { 116 return (type == IDENTIFIER || type == QUOTED_STRING); 117 } 118 119 /** Indicates whether this token contains an EOL or EOF. */ 120 public boolean 121 isEOL() { 122 return (type == EOL || type == EOF); 123 } 124} 125 126static class TokenizerException extends TextParseException { 127 String message; 128 129 public 130 TokenizerException(String filename, int line, String message) { 131 super(filename + ":" + line + ": " + message); 132 this.message = message; 133 } 134 135 public String 136 getBaseMessage() { 137 return message; 138 } 139} 140 141/** 142 * Creates a Tokenizer from an arbitrary input stream. 143 * @param is The InputStream to tokenize. 144 */ 145public 146Tokenizer(InputStream is) { 147 if (!(is instanceof BufferedInputStream)) 148 is = new BufferedInputStream(is); 149 this.is = new PushbackInputStream(is, 2); 150 ungottenToken = false; 151 multiline = 0; 152 quoting = false; 153 delimiters = delim; 154 current = new Token(); 155 sb = new StringBuffer(); 156 filename = "<none>"; 157 line = 1; 158} 159 160/** 161 * Creates a Tokenizer from a string. 162 * @param s The String to tokenize. 163 */ 164public 165Tokenizer(String s) { 166 this(new ByteArrayInputStream(s.getBytes())); 167} 168 169/** 170 * Creates a Tokenizer from a file. 171 * @param f The File to tokenize. 172 */ 173public 174Tokenizer(File f) throws FileNotFoundException { 175 this(new FileInputStream(f)); 176 wantClose = true; 177 filename = f.getName(); 178} 179 180private int 181getChar() throws IOException { 182 int c = is.read(); 183 if (c == '\r') { 184 int next = is.read(); 185 if (next != '\n') 186 is.unread(next); 187 c = '\n'; 188 } 189 if (c == '\n') 190 line++; 191 return c; 192} 193 194private void 195ungetChar(int c) throws IOException { 196 if (c == -1) 197 return; 198 is.unread(c); 199 if (c == '\n') 200 line--; 201} 202 203private int 204skipWhitespace() throws IOException { 205 int skipped = 0; 206 while (true) { 207 int c = getChar(); 208 if (c != ' ' && c != '\t') { 209 if (!(c == '\n' && multiline > 0)) { 210 ungetChar(c); 211 return skipped; 212 } 213 } 214 skipped++; 215 } 216} 217 218private void 219checkUnbalancedParens() throws TextParseException { 220 if (multiline > 0) 221 throw exception("unbalanced parentheses"); 222} 223 224/** 225 * Gets the next token from a tokenizer. 226 * @param wantWhitespace If true, leading whitespace will be returned as a 227 * token. 228 * @param wantComment If true, comments are returned as tokens. 229 * @return The next token in the stream. 230 * @throws TextParseException The input was invalid. 231 * @throws IOException An I/O error occurred. 232 */ 233public Token 234get(boolean wantWhitespace, boolean wantComment) throws IOException { 235 int type; 236 int c; 237 238 if (ungottenToken) { 239 ungottenToken = false; 240 if (current.type == WHITESPACE) { 241 if (wantWhitespace) 242 return current; 243 } else if (current.type == COMMENT) { 244 if (wantComment) 245 return current; 246 } else { 247 if (current.type == EOL) 248 line++; 249 return current; 250 } 251 } 252 int skipped = skipWhitespace(); 253 if (skipped > 0 && wantWhitespace) 254 return current.set(WHITESPACE, null); 255 type = IDENTIFIER; 256 sb.setLength(0); 257 while (true) { 258 c = getChar(); 259 if (c == -1 || delimiters.indexOf(c) != -1) { 260 if (c == -1) { 261 if (quoting) 262 throw exception("EOF in " + 263 "quoted string"); 264 else if (sb.length() == 0) 265 return current.set(EOF, null); 266 else 267 return current.set(type, sb); 268 } 269 if (sb.length() == 0 && type != QUOTED_STRING) { 270 if (c == '(') { 271 multiline++; 272 skipWhitespace(); 273 continue; 274 } else if (c == ')') { 275 if (multiline <= 0) 276 throw exception("invalid " + 277 "close " + 278 "parenthesis"); 279 multiline--; 280 skipWhitespace(); 281 continue; 282 } else if (c == '"') { 283 if (!quoting) { 284 quoting = true; 285 delimiters = quotes; 286 type = QUOTED_STRING; 287 } else { 288 quoting = false; 289 delimiters = delim; 290 skipWhitespace(); 291 } 292 continue; 293 } else if (c == '\n') { 294 return current.set(EOL, null); 295 } else if (c == ';') { 296 while (true) { 297 c = getChar(); 298 if (c == '\n' || c == -1) 299 break; 300 sb.append((char)c); 301 } 302 if (wantComment) { 303 ungetChar(c); 304 return current.set(COMMENT, sb); 305 } else if (c == -1 && 306 type != QUOTED_STRING) 307 { 308 checkUnbalancedParens(); 309 return current.set(EOF, null); 310 } else if (multiline > 0) { 311 skipWhitespace(); 312 sb.setLength(0); 313 continue; 314 } else 315 return current.set(EOL, null); 316 } else 317 throw new IllegalStateException(); 318 } else 319 ungetChar(c); 320 break; 321 } else if (c == '\\') { 322 c = getChar(); 323 if (c == -1) 324 throw exception("unterminated escape sequence"); 325 sb.append('\\'); 326 } else if (quoting && c == '\n') { 327 throw exception("newline in quoted string"); 328 } 329 sb.append((char)c); 330 } 331 if (sb.length() == 0 && type != QUOTED_STRING) { 332 checkUnbalancedParens(); 333 return current.set(EOF, null); 334 } 335 return current.set(type, sb); 336} 337 338/** 339 * Gets the next token from a tokenizer, ignoring whitespace and comments. 340 * @return The next token in the stream. 341 * @throws TextParseException The input was invalid. 342 * @throws IOException An I/O error occurred. 343 */ 344public Token 345get() throws IOException { 346 return get(false, false); 347} 348 349/** 350 * Returns a token to the stream, so that it will be returned by the next call 351 * to get(). 352 * @throws IllegalStateException There are already ungotten tokens. 353 */ 354public void 355unget() { 356 if (ungottenToken) 357 throw new IllegalStateException 358 ("Cannot unget multiple tokens"); 359 if (current.type == EOL) 360 line--; 361 ungottenToken = true; 362} 363 364/** 365 * Gets the next token from a tokenizer and converts it to a string. 366 * @return The next token in the stream, as a string. 367 * @throws TextParseException The input was invalid or not a string. 368 * @throws IOException An I/O error occurred. 369 */ 370public String 371getString() throws IOException { 372 Token next = get(); 373 if (!next.isString()) { 374 throw exception("expected a string"); 375 } 376 return next.value; 377} 378 379private String 380_getIdentifier(String expected) throws IOException { 381 Token next = get(); 382 if (next.type != IDENTIFIER) 383 throw exception("expected " + expected); 384 return next.value; 385} 386 387/** 388 * Gets the next token from a tokenizer, ensures it is an unquoted string, 389 * and converts it to a string. 390 * @return The next token in the stream, as a string. 391 * @throws TextParseException The input was invalid or not an unquoted string. 392 * @throws IOException An I/O error occurred. 393 */ 394public String 395getIdentifier() throws IOException { 396 return _getIdentifier("an identifier"); 397} 398 399/** 400 * Gets the next token from a tokenizer and converts it to a long. 401 * @return The next token in the stream, as a long. 402 * @throws TextParseException The input was invalid or not a long. 403 * @throws IOException An I/O error occurred. 404 */ 405public long 406getLong() throws IOException { 407 String next = _getIdentifier("an integer"); 408 if (!Character.isDigit(next.charAt(0))) 409 throw exception("expected an integer"); 410 try { 411 return Long.parseLong(next); 412 } catch (NumberFormatException e) { 413 throw exception("expected an integer"); 414 } 415} 416 417/** 418 * Gets the next token from a tokenizer and converts it to an unsigned 32 bit 419 * integer. 420 * @return The next token in the stream, as an unsigned 32 bit integer. 421 * @throws TextParseException The input was invalid or not an unsigned 32 422 * bit integer. 423 * @throws IOException An I/O error occurred. 424 */ 425public long 426getUInt32() throws IOException { 427 long l = getLong(); 428 if (l < 0 || l > 0xFFFFFFFFL) 429 throw exception("expected an 32 bit unsigned integer"); 430 return l; 431} 432 433/** 434 * Gets the next token from a tokenizer and converts it to an unsigned 16 bit 435 * integer. 436 * @return The next token in the stream, as an unsigned 16 bit integer. 437 * @throws TextParseException The input was invalid or not an unsigned 16 438 * bit integer. 439 * @throws IOException An I/O error occurred. 440 */ 441public int 442getUInt16() throws IOException { 443 long l = getLong(); 444 if (l < 0 || l > 0xFFFFL) 445 throw exception("expected an 16 bit unsigned integer"); 446 return (int) l; 447} 448 449/** 450 * Gets the next token from a tokenizer and converts it to an unsigned 8 bit 451 * integer. 452 * @return The next token in the stream, as an unsigned 8 bit integer. 453 * @throws TextParseException The input was invalid or not an unsigned 8 454 * bit integer. 455 * @throws IOException An I/O error occurred. 456 */ 457public int 458getUInt8() throws IOException { 459 long l = getLong(); 460 if (l < 0 || l > 0xFFL) 461 throw exception("expected an 8 bit unsigned integer"); 462 return (int) l; 463} 464 465/** 466 * Gets the next token from a tokenizer and parses it as a TTL. 467 * @return The next token in the stream, as an unsigned 32 bit integer. 468 * @throws TextParseException The input was not valid. 469 * @throws IOException An I/O error occurred. 470 * @see TTL 471 */ 472public long 473getTTL() throws IOException { 474 String next = _getIdentifier("a TTL value"); 475 try { 476 return TTL.parseTTL(next); 477 } 478 catch (NumberFormatException e) { 479 throw exception("expected a TTL value"); 480 } 481} 482 483/** 484 * Gets the next token from a tokenizer and parses it as if it were a TTL. 485 * @return The next token in the stream, as an unsigned 32 bit integer. 486 * @throws TextParseException The input was not valid. 487 * @throws IOException An I/O error occurred. 488 * @see TTL 489 */ 490public long 491getTTLLike() throws IOException { 492 String next = _getIdentifier("a TTL-like value"); 493 try { 494 return TTL.parse(next, false); 495 } 496 catch (NumberFormatException e) { 497 throw exception("expected a TTL-like value"); 498 } 499} 500 501/** 502 * Gets the next token from a tokenizer and converts it to a name. 503 * @param origin The origin to append to relative names. 504 * @return The next token in the stream, as a name. 505 * @throws TextParseException The input was invalid or not a valid name. 506 * @throws IOException An I/O error occurred. 507 * @throws RelativeNameException The parsed name was relative, even with the 508 * origin. 509 * @see Name 510 */ 511public Name 512getName(Name origin) throws IOException { 513 String next = _getIdentifier("a name"); 514 try { 515 Name name = Name.fromString(next, origin); 516 if (!name.isAbsolute()) 517 throw new RelativeNameException(name); 518 return name; 519 } 520 catch (TextParseException e) { 521 throw exception(e.getMessage()); 522 } 523} 524 525/** 526 * Gets the next token from a tokenizer and converts it to an IP Address. 527 * @param family The address family. 528 * @return The next token in the stream, as an InetAddress 529 * @throws TextParseException The input was invalid or not a valid address. 530 * @throws IOException An I/O error occurred. 531 * @see Address 532 */ 533public InetAddress 534getAddress(int family) throws IOException { 535 String next = _getIdentifier("an address"); 536 try { 537 return Address.getByAddress(next, family); 538 } 539 catch (UnknownHostException e) { 540 throw exception(e.getMessage()); 541 } 542} 543 544/** 545 * Gets the next token from a tokenizer, which must be an EOL or EOF. 546 * @throws TextParseException The input was invalid or not an EOL or EOF token. 547 * @throws IOException An I/O error occurred. 548 */ 549public void 550getEOL() throws IOException { 551 Token next = get(); 552 if (next.type != EOL && next.type != EOF) { 553 throw exception("expected EOL or EOF"); 554 } 555} 556 557/** 558 * Returns a concatenation of the remaining strings from a Tokenizer. 559 */ 560private String 561remainingStrings() throws IOException { 562 StringBuffer buffer = null; 563 while (true) { 564 Tokenizer.Token t = get(); 565 if (!t.isString()) 566 break; 567 if (buffer == null) 568 buffer = new StringBuffer(); 569 buffer.append(t.value); 570 } 571 unget(); 572 if (buffer == null) 573 return null; 574 return buffer.toString(); 575} 576 577/** 578 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates 579 * them together, and converts the base64 encoded data to a byte array. 580 * @param required If true, an exception will be thrown if no strings remain; 581 * otherwise null be be returned. 582 * @return The byte array containing the decoded strings, or null if there 583 * were no strings to decode. 584 * @throws TextParseException The input was invalid. 585 * @throws IOException An I/O error occurred. 586 */ 587public byte [] 588getBase64(boolean required) throws IOException { 589 String s = remainingStrings(); 590 if (s == null) { 591 if (required) 592 throw exception("expected base64 encoded string"); 593 else 594 return null; 595 } 596 byte [] array = base64.fromString(s); 597 if (array == null) 598 throw exception("invalid base64 encoding"); 599 return array; 600} 601 602/** 603 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates 604 * them together, and converts the base64 encoded data to a byte array. 605 * @return The byte array containing the decoded strings, or null if there 606 * were no strings to decode. 607 * @throws TextParseException The input was invalid. 608 * @throws IOException An I/O error occurred. 609 */ 610public byte [] 611getBase64() throws IOException { 612 return getBase64(false); 613} 614 615/** 616 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates 617 * them together, and converts the hex encoded data to a byte array. 618 * @param required If true, an exception will be thrown if no strings remain; 619 * otherwise null be be returned. 620 * @return The byte array containing the decoded strings, or null if there 621 * were no strings to decode. 622 * @throws TextParseException The input was invalid. 623 * @throws IOException An I/O error occurred. 624 */ 625public byte [] 626getHex(boolean required) throws IOException { 627 String s = remainingStrings(); 628 if (s == null) { 629 if (required) 630 throw exception("expected hex encoded string"); 631 else 632 return null; 633 } 634 byte [] array = base16.fromString(s); 635 if (array == null) 636 throw exception("invalid hex encoding"); 637 return array; 638} 639 640/** 641 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates 642 * them together, and converts the hex encoded data to a byte array. 643 * @return The byte array containing the decoded strings, or null if there 644 * were no strings to decode. 645 * @throws TextParseException The input was invalid. 646 * @throws IOException An I/O error occurred. 647 */ 648public byte [] 649getHex() throws IOException { 650 return getHex(false); 651} 652 653/** 654 * Gets the next token from a tokenizer and decodes it as hex. 655 * @return The byte array containing the decoded string. 656 * @throws TextParseException The input was invalid. 657 * @throws IOException An I/O error occurred. 658 */ 659public byte [] 660getHexString() throws IOException { 661 String next = _getIdentifier("a hex string"); 662 byte [] array = base16.fromString(next); 663 if (array == null) 664 throw exception("invalid hex encoding"); 665 return array; 666} 667 668/** 669 * Gets the next token from a tokenizer and decodes it as base32. 670 * @param b32 The base32 context to decode with. 671 * @return The byte array containing the decoded string. 672 * @throws TextParseException The input was invalid. 673 * @throws IOException An I/O error occurred. 674 */ 675public byte [] 676getBase32String(base32 b32) throws IOException { 677 String next = _getIdentifier("a base32 string"); 678 byte [] array = b32.fromString(next); 679 if (array == null) 680 throw exception("invalid base32 encoding"); 681 return array; 682} 683 684/** 685 * Creates an exception which includes the current state in the error message 686 * @param s The error message to include. 687 * @return The exception to be thrown 688 */ 689public TextParseException 690exception(String s) { 691 return new TokenizerException(filename, line, s); 692} 693 694/** 695 * Closes any files opened by this tokenizer. 696 */ 697public void 698close() { 699 if (wantClose) { 700 try { 701 is.close(); 702 } 703 catch (IOException e) { 704 } 705 } 706} 707 708protected void 709finalize() { 710 close(); 711} 712 713} 714