1/*
2* Conditions Of Use
3*
4* This software was developed by employees of the National Institute of
5* Standards and Technology (NIST), an agency of the Federal Government.
6* Pursuant to title 15 Untied States Code Section 105, works of NIST
7* employees are not subject to copyright protection in the United States
8* and are considered to be in the public domain.  As a result, a formal
9* license is not needed to use the software.
10*
11* This software is provided by NIST as a service and is expressly
12* provided "AS IS."  NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED
13* OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF
14* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT
15* AND DATA ACCURACY.  NIST does not warrant or make any representations
16* regarding the use of the software or the results thereof, including but
17* not limited to the correctness, accuracy, reliability or usefulness of
18* the software.
19*
20* Permission to use this software is contingent upon your acceptance
21* of the terms of this agreement
22*
23* .
24*
25*/
26package gov.nist.core;
27
28import java.text.ParseException;
29import java.util.Hashtable;
30
31/** A lexical analyzer that is used by all parsers in our implementation.
32 *
33 *@version 1.2
34 *@since 1.1
35 *
36 *@author M. Ranganathan
37 */
38public class LexerCore extends StringTokenizer {
39
40    // IMPORTANT - All keyword matches should be between START and END
41    public static final int START = 2048;
42    public static final int END = START + 2048;
43    // IMPORTANT -- This should be < END
44    public static final int ID = END - 1;
45    public static final int SAFE = END - 2;
46    // Individial token classes.
47    public static final int WHITESPACE = END + 1;
48    public static final int DIGIT = END + 2;
49    public static final int ALPHA = END + 3;
50    public static final int BACKSLASH = (int) '\\';
51    public static final int QUOTE = (int) '\'';
52    public static final int AT = (int) '@';
53    public static final int SP = (int) ' ';
54    public static final int HT = (int) '\t';
55    public static final int COLON = (int) ':';
56    public static final int STAR = (int) '*';
57    public static final int DOLLAR = (int) '$';
58    public static final int PLUS = (int) '+';
59    public static final int POUND = (int) '#';
60    public static final int MINUS = (int) '-';
61    public static final int DOUBLEQUOTE = (int) '\"';
62    public static final int TILDE = (int) '~';
63    public static final int BACK_QUOTE = (int) '`';
64    public static final int NULL = (int) '\0';
65    public static final int EQUALS = (int) '=';
66    public static final int SEMICOLON = (int) ';';
67    public static final int SLASH = (int) '/';
68    public static final int L_SQUARE_BRACKET = (int) '[';
69    public static final int R_SQUARE_BRACKET = (int) ']';
70    public static final int R_CURLY = (int) '}';
71    public static final int L_CURLY = (int) '{';
72    public static final int HAT = (int) '^';
73    public static final int BAR = (int) '|';
74    public static final int DOT = (int) '.';
75    public static final int EXCLAMATION = (int) '!';
76    public static final int LPAREN = (int) '(';
77    public static final int RPAREN = (int) ')';
78    public static final int GREATER_THAN = (int) '>';
79    public static final int LESS_THAN = (int) '<';
80    public static final int PERCENT = (int) '%';
81    public static final int QUESTION = (int) '?';
82    public static final int AND = (int) '&';
83    public static final int UNDERSCORE = (int) '_';
84
85    protected static final Hashtable globalSymbolTable;
86    protected static final Hashtable lexerTables;
87    protected Hashtable currentLexer;
88    protected String currentLexerName;
89    protected Token currentMatch;
90
91    static {
92        globalSymbolTable = new Hashtable();
93        lexerTables = new Hashtable();
94    }
95
96    protected void addKeyword(String name, int value) {
97        // System.out.println("addKeyword " + name + " value = " + value);
98        // new Exception().printStackTrace();
99        Integer val = Integer.valueOf(value);
100        currentLexer.put(name, val);
101        if (!globalSymbolTable.containsKey(val))
102            globalSymbolTable.put(val, name);
103    }
104
105    public String lookupToken(int value) {
106        if (value > START) {
107            return (String) globalSymbolTable.get(Integer.valueOf(value));
108        } else {
109            Character ch = Character.valueOf((char) value);
110            return ch.toString();
111        }
112    }
113
114    protected Hashtable addLexer(String lexerName) {
115        currentLexer = (Hashtable) lexerTables.get(lexerName);
116        if (currentLexer == null) {
117            currentLexer = new Hashtable();
118            lexerTables.put(lexerName, currentLexer);
119        }
120        return currentLexer;
121    }
122
123    //public abstract void selectLexer(String lexerName);
124
125    public void selectLexer(String lexerName) {
126        this.currentLexerName = lexerName;
127    }
128
129    protected LexerCore() {
130        this.currentLexer = new Hashtable();
131        this.currentLexerName = "charLexer";
132    }
133
134    /** Initialize the lexer with a buffer.
135     */
136    public LexerCore(String lexerName, String buffer) {
137        super(buffer);
138        this.currentLexerName = lexerName;
139    }
140
141    /** Peek the next id but dont move the buffer pointer forward.
142     */
143
144    public String peekNextId() {
145        int oldPtr = ptr;
146        String retval = ttoken();
147        savedPtr = ptr;
148        ptr = oldPtr;
149        return retval;
150    }
151
152    /** Get the next id.
153     */
154    public String getNextId() {
155        return ttoken();
156    }
157
158    // call this after you call match
159    public Token getNextToken() {
160        return this.currentMatch;
161
162    }
163
164    /** Look ahead for one token.
165     */
166    public Token peekNextToken() throws ParseException {
167        return (Token) peekNextToken(1)[0];
168    }
169
170    public Token[] peekNextToken(int ntokens) throws ParseException {
171        int old = ptr;
172        Token[] retval = new Token[ntokens];
173        for (int i = 0; i < ntokens; i++) {
174            Token tok = new Token();
175            if (startsId()) {
176                String id = ttoken();
177                tok.tokenValue = id;
178                String idUppercase = id.toUpperCase();
179                if (currentLexer.containsKey(idUppercase)) {
180                    Integer type = (Integer) currentLexer.get(idUppercase);
181                    tok.tokenType = type.intValue();
182                } else
183                    tok.tokenType = ID;
184            } else {
185                char nextChar = getNextChar();
186                tok.tokenValue = String.valueOf(nextChar);
187                if (isAlpha(nextChar)) {
188                    tok.tokenType = ALPHA;
189                } else if (isDigit(nextChar)) {
190                    tok.tokenType = DIGIT;
191                } else
192                    tok.tokenType = (int) nextChar;
193            }
194            retval[i] = tok;
195        }
196        savedPtr = ptr;
197        ptr = old;
198        return retval;
199    }
200
201    /** Match the given token or throw an exception if no such token
202     * can be matched.
203     */
204    public Token match(int tok) throws ParseException {
205        if (Debug.parserDebug) {
206            Debug.println("match " + tok);
207        }
208        if (tok > START && tok < END) {
209            if (tok == ID) {
210                // Generic ID sought.
211                if (!startsId())
212                    throw new ParseException(buffer + "\nID expected", ptr);
213                String id = getNextId();
214                this.currentMatch = new Token();
215                this.currentMatch.tokenValue = id;
216                this.currentMatch.tokenType = ID;
217            } else if (tok == SAFE) {
218                if (!startsSafeToken())
219                    throw new ParseException(buffer + "\nID expected", ptr);
220                String id = ttokenSafe();
221                this.currentMatch = new Token();
222                this.currentMatch.tokenValue = id;
223                this.currentMatch.tokenType = SAFE;
224            } else {
225                String nexttok = getNextId();
226                Integer cur = (Integer) currentLexer.get(nexttok.toUpperCase());
227
228                if (cur == null || cur.intValue() != tok)
229                    throw new ParseException(
230                        buffer + "\nUnexpected Token : " + nexttok,
231                        ptr);
232                this.currentMatch = new Token();
233                this.currentMatch.tokenValue = nexttok;
234                this.currentMatch.tokenType = tok;
235            }
236        } else if (tok > END) {
237            // Character classes.
238            char next = lookAhead(0);
239            if (tok == DIGIT) {
240                if (!isDigit(next))
241                    throw new ParseException(buffer + "\nExpecting DIGIT", ptr);
242                this.currentMatch = new Token();
243                this.currentMatch.tokenValue =
244                    String.valueOf(next);
245                this.currentMatch.tokenType = tok;
246                consume(1);
247
248            } else if (tok == ALPHA) {
249                if (!isAlpha(next))
250                    throw new ParseException(buffer + "\nExpecting ALPHA", ptr);
251                this.currentMatch = new Token();
252                this.currentMatch.tokenValue =
253                    String.valueOf(next);
254                this.currentMatch.tokenType = tok;
255                consume(1);
256
257            }
258
259        } else {
260            // This is a direct character spec.
261            char ch = (char) tok;
262            char next = lookAhead(0);
263            if (next == ch) {
264                /*this.currentMatch = new Token();
265                this.currentMatch.tokenValue =
266                    String.valueOf(ch);
267                this.currentMatch.tokenType = tok;*/
268                consume(1);
269            } else
270                throw new ParseException(
271                    buffer + "\nExpecting  >>>" + ch + "<<< got >>>"
272                    + next + "<<<", ptr);
273        }
274        return this.currentMatch;
275    }
276
277    public void SPorHT() {
278        try {
279            char c = lookAhead(0);
280            while (c == ' ' || c == '\t') {
281                consume(1);
282                c = lookAhead(0);
283            }
284        } catch (ParseException ex) {
285            // Ignore
286        }
287    }
288
289    /**
290     * JvB: utility function added to validate tokens
291     *
292     * @see RFC3261 section 25.1:
293     * token       =  1*(alphanum / "-" / "." / "!" / "%" / "*"
294                     / "_" / "+" / "`" / "'" / "~" )
295
296     * @param c - character to check
297     * @return true iff character c is a valid token character as per RFC3261
298     */
299    public static final boolean isTokenChar( char c ) {
300        if ( isAlphaDigit(c) ) return true;
301        else switch (c)
302        {
303            case '-':
304            case '.':
305            case '!':
306            case '%':
307            case '*':
308            case '_':
309            case '+':
310            case '`':
311            case '\'':
312            case '~':
313                return true;
314            default:
315                return false;
316        }
317    }
318
319
320    public boolean startsId() {
321        try {
322            char nextChar = lookAhead(0);
323            return isTokenChar(nextChar);
324        } catch (ParseException ex) {
325            return false;
326        }
327    }
328
329    public boolean startsSafeToken() {
330        try {
331            char nextChar = lookAhead(0);
332            if (isAlphaDigit(nextChar)) {
333                return true;
334            }
335            else {
336                switch (nextChar) {
337                    case '_':
338                    case '+':
339                    case '-':
340                    case '!':
341                    case '`':
342                    case '\'':
343                    case '.':
344                    case '/':
345                    case '}':
346                    case '{':
347                    case ']':
348                    case '[':
349                    case '^':
350                    case '|':
351                    case '~':
352                    case '%': // bug fix by Bruno Konik, JvB copied here
353                    case '#':
354                    case '@':
355                    case '$':
356                    case ':':
357                    case ';':
358                    case '?':
359                    case '\"':
360                    case '*':
361                    case '=': // Issue 155 on java.net
362                        return true;
363                    default:
364                        return false;
365                }
366            }
367        } catch (ParseException ex) {
368            return false;
369        }
370    }
371
372    public String ttoken() {
373        int startIdx = ptr;
374        try {
375            while (hasMoreChars()) {
376                char nextChar = lookAhead(0);
377                if ( isTokenChar(nextChar) ) {
378                    consume(1);
379                } else {
380                    break;
381                }
382            }
383            return buffer.substring(startIdx, ptr);
384        } catch (ParseException ex) {
385            return null;
386        }
387    }
388
389    /* JvB: unreferenced
390    public String ttokenAllowSpace() {
391        int startIdx = ptr;
392        try {
393            while (hasMoreChars()) {
394                char nextChar = lookAhead(0);
395                if (isAlphaDigit(nextChar)) {
396                    consume(1);
397                }
398                else {
399                    boolean isValidChar = false;
400                    switch (nextChar) {
401                        case '_':
402                        case '+':
403                        case '-':
404                        case '!':
405                        case '`':
406                        case '\'':
407                        case '~':
408                        case '%': // bug fix by Bruno Konik, JvB copied here
409                        case '.':
410                        case ' ':
411                        case '\t':
412                        case '*':
413                            isValidChar = true;
414                    }
415                    if (isValidChar) {
416                        consume(1);
417                    }
418                    else {
419                        break;
420                    }
421                }
422
423            }
424            return buffer.substring(startIdx, ptr);
425        } catch (ParseException ex) {
426            return null;
427        }
428    }*/
429
430    public String ttokenSafe() {
431        int startIdx = ptr;
432        try {
433            while (hasMoreChars()) {
434                char nextChar = lookAhead(0);
435                if (isAlphaDigit(nextChar)) {
436                    consume(1);
437                }
438                else {
439                    boolean isValidChar = false;
440                    switch (nextChar) {
441                        case '_':
442                        case '+':
443                        case '-':
444                        case '!':
445                        case '`':
446                        case '\'':
447                        case '.':
448                        case '/':
449                        case '}':
450                        case '{':
451                        case ']':
452                        case '[':
453                        case '^':
454                        case '|':
455                        case '~':
456                        case '%': // bug fix by Bruno Konik, JvB copied here
457                        case '#':
458                        case '@':
459                        case '$':
460                        case ':':
461                        case ';':
462                        case '?':
463                        case '\"':
464                        case '*':
465                            isValidChar = true;
466                    }
467                    if (isValidChar) {
468                        consume(1);
469                    }
470                    else {
471                        break;
472                    }
473                }
474            }
475            return buffer.substring(startIdx, ptr);
476        } catch (ParseException ex) {
477            return null;
478        }
479    }
480
481    static final char ALPHA_VALID_CHARS = Character.MAX_VALUE;
482    static final char DIGIT_VALID_CHARS = Character.MAX_VALUE - 1;
483    static final char ALPHADIGIT_VALID_CHARS = Character.MAX_VALUE - 2;
484    public void consumeValidChars(char[] validChars) {
485        int validCharsLength = validChars.length;
486        try {
487            while (hasMoreChars()) {
488                char nextChar = lookAhead(0);
489                boolean isValid = false;
490                for (int i = 0; i < validCharsLength; i++) {
491                    char validChar = validChars[i];
492                    switch(validChar) {
493                        case ALPHA_VALID_CHARS:
494                            isValid = isAlpha(nextChar);
495                            break;
496                        case DIGIT_VALID_CHARS:
497                            isValid = isDigit(nextChar);
498                            break;
499                        case ALPHADIGIT_VALID_CHARS:
500                            isValid = isAlphaDigit(nextChar);
501                            break;
502                        default:
503                            isValid = nextChar == validChar;
504                    }
505                    if (isValid) {
506                        break;
507                    }
508                }
509                if (isValid) {
510                    consume(1);
511                }
512                else {
513                    break;
514                }
515            }
516        } catch (ParseException ex) {
517
518        }
519    }
520
521    /** Parse a comment string cursor is at a ". Leave cursor at closing "
522    *@return the substring containing the quoted string excluding the
523    * closing quote.
524    */
525    public String quotedString() throws ParseException {
526        int startIdx = ptr + 1;
527        if (lookAhead(0) != '\"')
528            return null;
529        consume(1);
530        while (true) {
531            char next = getNextChar();
532            if (next == '\"') {
533                // Got to the terminating quote.
534                break;
535            } else if (next == '\0') {
536                throw new ParseException(
537                    this.buffer + " :unexpected EOL",
538                    this.ptr);
539            } else if (next == '\\') {
540                consume(1);
541            }
542        }
543        return buffer.substring(startIdx, ptr - 1);
544    }
545
546    /** Parse a comment string cursor is at a "(". Leave cursor at )
547    *@return the substring containing the comment excluding the
548    * closing brace.
549    */
550    public String comment() throws ParseException {
551        StringBuffer retval = new StringBuffer();
552        if (lookAhead(0) != '(')
553            return null;
554        consume(1);
555        while (true) {
556            char next = getNextChar();
557            if (next == ')') {
558                break;
559            } else if (next == '\0') {
560                throw new ParseException(
561                    this.buffer + " :unexpected EOL",
562                    this.ptr);
563            } else if (next == '\\') {
564                retval.append(next);
565                next = getNextChar();
566                if (next == '\0')
567                    throw new ParseException(
568                        this.buffer + " : unexpected EOL",
569                        this.ptr);
570                retval.append(next);
571            } else {
572                retval.append(next);
573            }
574        }
575        return retval.toString();
576    }
577
578    /** Return a substring containing no semicolons.
579    *@return a substring containing no semicolons.
580    */
581    public String byteStringNoSemicolon() {
582        StringBuffer retval = new StringBuffer();
583        try {
584            while (true) {
585                char next = lookAhead(0);
586                // bug fix from Ben Evans.
587                if (next == '\0' || next == '\n' || next == ';' || next == ',' ) {
588                    break;
589                } else {
590                    consume(1);
591                    retval.append(next);
592                }
593            }
594        } catch (ParseException ex) {
595            return retval.toString();
596        }
597        return retval.toString();
598    }
599
600    /**
601     * Scan until you see a slash or an EOL.
602     *
603     * @return substring containing no slash.
604     */
605    public String byteStringNoSlash() {
606        StringBuffer retval = new StringBuffer();
607        try {
608            while (true) {
609                char next = lookAhead(0);
610                // bug fix from Ben Evans.
611                if (next == '\0' || next == '\n' || next == '/'  ) {
612                    break;
613                } else {
614                    consume(1);
615                    retval.append(next);
616                }
617            }
618        } catch (ParseException ex) {
619            return retval.toString();
620        }
621        return retval.toString();
622    }
623
624    /** Return a substring containing no commas
625    *@return a substring containing no commas.
626    */
627
628    public String byteStringNoComma() {
629        StringBuffer retval = new StringBuffer();
630        try {
631            while (true) {
632                char next = lookAhead(0);
633                if (next == '\n' || next == ',') {
634                    break;
635                } else {
636                    consume(1);
637                    retval.append(next);
638                }
639            }
640        } catch (ParseException ex) {
641        }
642        return retval.toString();
643    }
644
645    public static String charAsString(char ch) {
646        return String.valueOf(ch);
647    }
648
649    /** Lookahead in the inputBuffer for n chars and return as a string.
650     * Do not consume the input.
651     */
652    public String charAsString(int nchars) {
653        return buffer.substring(ptr, ptr + nchars);
654    }
655
656    /** Get and consume the next number.
657     *@return a substring corresponding to a number
658     *(i.e. sequence of digits).
659     */
660    public String number() throws ParseException {
661
662        int startIdx = ptr;
663        try {
664            if (!isDigit(lookAhead(0))) {
665                throw new ParseException(
666                    buffer + ": Unexpected token at " + lookAhead(0),
667                    ptr);
668            }
669            consume(1);
670            while (true) {
671                char next = lookAhead(0);
672                if (isDigit(next)) {
673                    consume(1);
674                } else
675                    break;
676            }
677            return buffer.substring(startIdx, ptr);
678        } catch (ParseException ex) {
679            return buffer.substring(startIdx, ptr);
680        }
681    }
682
683    /** Mark the position for backtracking.
684     *@return the current location of the pointer.
685     */
686    public int markInputPosition() {
687        return ptr;
688    }
689
690    /** Rewind the input ptr to the marked position.
691     *@param position - the position to rewind the parser to.
692     */
693    public void rewindInputPosition(int position) {
694        this.ptr = position;
695    }
696
697    /** Get the rest of the String
698     * @return rest of the buffer.
699     */
700    public String getRest() {
701        if (ptr >= buffer.length())
702            return null;
703        else
704            return buffer.substring(ptr);
705    }
706
707    /** Get the sub-String until the character is encountered
708     * @param c the character to match
709     * @return the substring that matches.
710     */
711    public String getString(char c) throws ParseException {
712        StringBuffer retval = new StringBuffer();
713        while (true) {
714            char next = lookAhead(0);
715            //System.out.println(" next = [" + next + ']' + "ptr = " + ptr);
716            //System.out.println(next == '\0');
717
718            if (next == '\0') {
719                throw new ParseException(
720                    this.buffer + "unexpected EOL",
721                    this.ptr);
722            } else if (next == c) {
723                consume(1);
724                break;
725            } else if (next == '\\') {
726                consume(1);
727                char nextchar = lookAhead(0);
728                if (nextchar == '\0') {
729                    throw new ParseException(
730                        this.buffer + "unexpected EOL",
731                        this.ptr);
732                } else {
733                    consume(1);
734                    retval.append(nextchar);
735                }
736            } else {
737                consume(1);
738                retval.append(next);
739            }
740        }
741        return retval.toString();
742    }
743
744    /** Get the read pointer.
745     */
746    public int getPtr() {
747        return this.ptr;
748    }
749
750    /** Get the buffer.
751     */
752    public String getBuffer() {
753        return this.buffer;
754    }
755
756    /** Create a parse exception.
757     */
758    public ParseException createParseException() {
759        return new ParseException(this.buffer, this.ptr);
760    }
761}
762