1/*
2* Conditions Of Use
3*
4* This software was developed by employees of the National Institute of
5* Standards and Technology (NIST), an agency of the Federal Government.
6* Pursuant to title 15 Untied States Code Section 105, works of NIST
7* employees are not subject to copyright protection in the United States
8* and are considered to be in the public domain.  As a result, a formal
9* license is not needed to use the software.
10*
11* This software is provided by NIST as a service and is expressly
12* provided "AS IS."  NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED
13* OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF
14* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT
15* AND DATA ACCURACY.  NIST does not warrant or make any representations
16* regarding the use of the software or the results thereof, including but
17* not limited to the correctness, accuracy, reliability or usefulness of
18* the software.
19*
20* Permission to use this software is contingent upon your acceptance
21* of the terms of this agreement
22*
23* .
24*
25*/
26package gov.nist.javax.sip.parser;
27import gov.nist.core.HostNameParser;
28import gov.nist.core.HostPort;
29import gov.nist.core.NameValue;
30import gov.nist.core.NameValueList;
31import gov.nist.core.Token;
32import gov.nist.javax.sip.address.GenericURI;
33import gov.nist.javax.sip.address.SipUri;
34import gov.nist.javax.sip.address.TelURLImpl;
35import gov.nist.javax.sip.address.TelephoneNumber;
36import java.text.ParseException;
37
38/**
39 * Parser For SIP and Tel URLs. Other kinds of URL's are handled by the
40 * J2SE 1.4 URL class.
41 * @version 1.2 $Revision: 1.27 $ $Date: 2009/10/22 10:27:39 $
42 *
43 * @author M. Ranganathan   <br/>
44 *
45 *
46 */
47public class URLParser extends Parser {
48
49    public URLParser(String url) {
50        this.lexer = new Lexer("sip_urlLexer", url);
51    }
52
53    // public tag added - issued by Miguel Freitas
54    public URLParser(Lexer lexer) {
55        this.lexer = lexer;
56        this.lexer.selectLexer("sip_urlLexer");
57    }
58    protected static boolean isMark(char next) {
59        switch (next) {
60            case '-':
61            case '_':
62            case '.':
63            case '!':
64            case '~':
65            case '*':
66            case '\'':
67            case '(':
68            case ')':
69                return true;
70            default:
71                return false;
72        }
73    }
74
75    protected static boolean isUnreserved(char next) {
76        return Lexer.isAlphaDigit(next) || isMark(next);
77    }
78
79    protected static boolean isReservedNoSlash(char next) {
80        switch (next) {
81            case ';':
82            case '?':
83            case ':':
84            case '@':
85            case '&':
86            case '+':
87            case '$':
88            case ',':
89                return true;
90            default:
91                return false;
92        }
93    }
94
95    // Missing '=' bug in character set - discovered by interop testing
96    // at SIPIT 13 by Bob Johnson and Scott Holben.
97    // change . to ; by Bruno Konik
98    protected static boolean isUserUnreserved(char la) {
99        switch (la) {
100            case '&':
101            case '?':
102            case '+':
103            case '$':
104            case '#':
105            case '/':
106            case ',':
107            case ';':
108            case '=':
109                return true;
110            default:
111                return false;
112        }
113    }
114
115    protected String unreserved() throws ParseException {
116        char next = lexer.lookAhead(0);
117        if (isUnreserved(next)) {
118            lexer.consume(1);
119            return String.valueOf(next);
120        } else
121            throw createParseException("unreserved");
122
123    }
124
125    /** Name or value of a parameter.
126     */
127    protected String paramNameOrValue() throws ParseException {
128        int startIdx = lexer.getPtr();
129        while (lexer.hasMoreChars()) {
130            char next = lexer.lookAhead(0);
131            boolean isValidChar = false;
132            switch (next) {
133                case '[':
134                case ']':// JvB: fixed this one
135                case '/':
136                case ':':
137                case '&':
138                case '+':
139                case '$':
140                    isValidChar = true;
141            }
142            if (isValidChar || isUnreserved(next)) {
143                lexer.consume(1);
144            } else if (isEscaped()) {
145                lexer.consume(3);
146            } else
147                break;
148        }
149        return lexer.getBuffer().substring(startIdx, lexer.getPtr());
150    }
151
152    private NameValue uriParam() throws ParseException {
153        if (debug)
154            dbg_enter("uriParam");
155        try {
156            String pvalue = "";
157            String pname = paramNameOrValue();
158            char next = lexer.lookAhead(0);
159            boolean isFlagParam = true;
160            if (next == '=') {
161                lexer.consume(1);
162                pvalue = paramNameOrValue();
163                isFlagParam = false;
164            }
165            if (pname.length() == 0 &&
166                ( pvalue == null ||
167                pvalue.length() == 0))
168                return null;
169            else return new NameValue(pname, pvalue, isFlagParam);
170        } finally {
171            if (debug)
172                dbg_leave("uriParam");
173        }
174    }
175
176    protected static boolean isReserved(char next) {
177        switch (next) {
178            case ';':
179            case '/':
180            case '?':
181            case ':':
182            case '=': // Bug fix by Bruno Konik
183            case '@':
184            case '&':
185            case '+':
186            case '$':
187            case ',':
188                return true;
189            default:
190                return false;
191        }
192    }
193
194    protected String reserved() throws ParseException {
195        char next = lexer.lookAhead(0);
196        if (isReserved(next)) {
197            lexer.consume(1);
198            return new StringBuffer().append(next).toString();
199        } else
200            throw createParseException("reserved");
201    }
202
203    protected boolean isEscaped() {
204        try {
205            return lexer.lookAhead(0) == '%' &&
206                Lexer.isHexDigit(lexer.lookAhead(1)) &&
207                Lexer.isHexDigit(lexer.lookAhead(2));
208        } catch (Exception ex) {
209            return false;
210        }
211    }
212
213    protected String escaped() throws ParseException {
214        if (debug)
215            dbg_enter("escaped");
216        try {
217            StringBuffer retval = new StringBuffer();
218            char next = lexer.lookAhead(0);
219            char next1 = lexer.lookAhead(1);
220            char next2 = lexer.lookAhead(2);
221            if (next == '%'
222                && Lexer.isHexDigit(next1)
223                && Lexer.isHexDigit(next2)) {
224                lexer.consume(3);
225                retval.append(next);
226                retval.append(next1);
227                retval.append(next2);
228            } else
229                throw createParseException("escaped");
230            return retval.toString();
231        } finally {
232            if (debug)
233                dbg_leave("escaped");
234        }
235    }
236
237    protected String mark() throws ParseException {
238        if (debug)
239            dbg_enter("mark");
240        try {
241            char next = lexer.lookAhead(0);
242            if (isMark(next)) {
243                lexer.consume(1);
244                return new String( new char[]{next} );
245            } else
246                throw createParseException("mark");
247        } finally {
248            if (debug)
249                dbg_leave("mark");
250        }
251    }
252
253    protected String uric() {
254        if (debug)
255            dbg_enter("uric");
256        try {
257            try {
258                char la = lexer.lookAhead(0);
259                if (isUnreserved(la)) {
260                    lexer.consume(1);
261                    return Lexer.charAsString(la);
262                } else if (isReserved(la)) {
263                    lexer.consume(1);
264                    return Lexer.charAsString(la);
265                } else if (isEscaped()) {
266                    String retval = lexer.charAsString(3);
267                    lexer.consume(3);
268                    return retval;
269                } else
270                    return null;
271            } catch (Exception ex) {
272                return null;
273            }
274        } finally {
275            if (debug)
276                dbg_leave("uric");
277        }
278
279    }
280
281    protected String uricNoSlash() {
282        if (debug)
283            dbg_enter("uricNoSlash");
284        try {
285            try {
286                char la = lexer.lookAhead(0);
287                if (isEscaped()) {
288                    String retval = lexer.charAsString(3);
289                    lexer.consume(3);
290                    return retval;
291                } else if (isUnreserved(la)) {
292                    lexer.consume(1);
293                    return Lexer.charAsString(la);
294                } else if (isReservedNoSlash(la)) {
295                    lexer.consume(1);
296                    return Lexer.charAsString(la);
297                } else
298                    return null;
299            } catch (ParseException ex) {
300                return null;
301            }
302        } finally {
303            if (debug)
304                dbg_leave("uricNoSlash");
305        }
306    }
307
308    protected String uricString() throws ParseException {
309        StringBuffer retval = new StringBuffer();
310        while (true) {
311            String next = uric();
312            if (next == null) {
313                char la = lexer.lookAhead(0);
314                // JvB: allow IPv6 addresses in generic URI strings
315                // e.g. http://[::1]
316                if ( la == '[' ) {
317                    HostNameParser hnp = new HostNameParser(this.getLexer());
318                    HostPort hp = hnp.hostPort( false );
319                    retval.append(hp.toString());
320                    continue;
321                }
322                break;
323            }
324            retval.append(next);
325        }
326        return retval.toString();
327    }
328
329    /**
330     * Parse and return a structure for a generic URL.
331     * Note that non SIP URLs are just stored as a string (not parsed).
332     * @return URI is a URL structure for a SIP url.
333     * @throws ParseException if there was a problem parsing.
334     */
335    public GenericURI uriReference( boolean inBrackets ) throws ParseException {
336        if (debug)
337            dbg_enter("uriReference");
338        GenericURI retval = null;
339        Token[] tokens = lexer.peekNextToken(2);
340        Token t1 = (Token) tokens[0];
341        Token t2 = (Token) tokens[1];
342        try {
343
344            if (t1.getTokenType() == TokenTypes.SIP ||
345                    t1.getTokenType() == TokenTypes.SIPS) {
346                if (t2.getTokenType() == ':')
347                    retval = sipURL( inBrackets );
348                else
349                    throw createParseException("Expecting \':\'");
350            } else if (t1.getTokenType() == TokenTypes.TEL) {
351                if (t2.getTokenType() == ':') {
352                    retval = telURL( inBrackets );
353                } else
354                    throw createParseException("Expecting \':\'");
355            } else {
356                String urlString = uricString();
357                try {
358                    retval = new GenericURI(urlString);
359                } catch (ParseException ex) {
360                    throw createParseException(ex.getMessage());
361                }
362            }
363        } finally {
364            if (debug)
365                dbg_leave("uriReference");
366        }
367        return retval;
368    }
369
370    /**
371     * Parser for the base phone number.
372     */
373    private String base_phone_number() throws ParseException {
374        StringBuffer s = new StringBuffer();
375
376        if (debug)
377            dbg_enter("base_phone_number");
378        try {
379            int lc = 0;
380            while (lexer.hasMoreChars()) {
381                char w = lexer.lookAhead(0);
382                if (Lexer.isDigit(w)
383                    || w == '-'
384                    || w == '.'
385                    || w == '('
386                    || w == ')') {
387                    lexer.consume(1);
388                    s.append(w);
389                    lc++;
390                } else if (lc > 0)
391                    break;
392                else
393                    throw createParseException("unexpected " + w);
394            }
395            return s.toString();
396        } finally {
397            if (debug)
398                dbg_leave("base_phone_number");
399        }
400
401    }
402
403    /**
404     * Parser for the local phone #.
405     */
406    private String local_number() throws ParseException {
407        StringBuffer s = new StringBuffer();
408        if (debug)
409            dbg_enter("local_number");
410        try {
411            int lc = 0;
412            while (lexer.hasMoreChars()) {
413                char la = lexer.lookAhead(0);
414                if (la == '*'
415                    || la == '#'
416                    || la == '-'
417                    || la == '.'
418                    || la == '('
419                    || la == ')'
420                        // JvB: allow 'A'..'F', should be uppercase
421                    || Lexer.isHexDigit(la)) {
422                    lexer.consume(1);
423                    s.append(la);
424                    lc++;
425                } else if (lc > 0)
426                    break;
427                else
428                    throw createParseException("unexepcted " + la);
429            }
430            return s.toString();
431        } finally {
432            if (debug)
433                dbg_leave("local_number");
434        }
435
436    }
437
438    /**
439     * Parser for telephone subscriber.
440     *
441     * @return the parsed telephone number.
442     */
443    public final TelephoneNumber parseTelephoneNumber( boolean inBrackets )
444    	throws ParseException {
445        TelephoneNumber tn;
446
447        if (debug)
448            dbg_enter("telephone_subscriber");
449        lexer.selectLexer("charLexer");
450        try {
451            char c = lexer.lookAhead(0);
452            if (c == '+')
453                tn = global_phone_number( inBrackets );
454            else if (
455                Lexer.isHexDigit(c)// see RFC3966
456                    || c == '#'
457                    || c == '*'
458                    || c == '-'
459                    || c == '.'
460                    || c == '('
461                    || c == ')' ) {
462                tn = local_phone_number( inBrackets );
463            } else
464                throw createParseException("unexpected char " + c);
465            return tn;
466        } finally {
467            if (debug)
468                dbg_leave("telephone_subscriber");
469        }
470
471    }
472
473    private final TelephoneNumber global_phone_number( boolean inBrackets ) throws ParseException {
474        if (debug)
475            dbg_enter("global_phone_number");
476        try {
477            TelephoneNumber tn = new TelephoneNumber();
478            tn.setGlobal(true);
479            NameValueList nv = null;
480            this.lexer.match(PLUS);
481            String b = base_phone_number();
482            tn.setPhoneNumber(b);
483            if (lexer.hasMoreChars()) {
484                char tok = lexer.lookAhead(0);
485                if (tok == ';' && inBrackets) {
486                    this.lexer.consume(1);
487                    nv = tel_parameters();
488                    tn.setParameters(nv);
489                }
490            }
491            return tn;
492        } finally {
493            if (debug)
494                dbg_leave("global_phone_number");
495        }
496    }
497
498    private TelephoneNumber local_phone_number( boolean inBrackets ) throws ParseException {
499        if (debug)
500            dbg_enter("local_phone_number");
501        TelephoneNumber tn = new TelephoneNumber();
502        tn.setGlobal(false);
503        NameValueList nv = null;
504        String b = null;
505        try {
506            b = local_number();
507            tn.setPhoneNumber(b);
508            if (lexer.hasMoreChars()) {
509                Token tok = this.lexer.peekNextToken();
510                switch (tok.getTokenType()) {
511                    case SEMICOLON:
512                        {
513                        	if (inBrackets) {
514                        		this.lexer.consume(1);
515                        		nv = tel_parameters();
516                        		tn.setParameters(nv);
517                        	}
518                            break;
519                        }
520                    default :
521                        {
522                            break;
523                        }
524                }
525            }
526        } finally {
527            if (debug)
528                dbg_leave("local_phone_number");
529        }
530        return tn;
531    }
532
533    private NameValueList tel_parameters() throws ParseException {
534        NameValueList nvList = new NameValueList();
535
536        // JvB: Need to handle 'phone-context' specially
537        // 'isub' (or 'ext') MUST appear first, but we accept any order here
538        NameValue nv;
539        while ( true ) {
540            String pname = paramNameOrValue();
541
542            // Handle 'phone-context' specially, it may start with '+'
543            if ( pname.equalsIgnoreCase("phone-context")) {
544                nv = phone_context();
545            } else {
546                if (lexer.lookAhead(0) == '=') {
547                    lexer.consume(1);
548                    String value = paramNameOrValue();
549                    nv = new NameValue( pname, value, false );
550                } else {
551                    nv = new NameValue( pname, "", true );// flag param
552                }
553            }
554            nvList.set( nv );
555
556            if ( lexer.lookAhead(0) == ';' ) {
557                lexer.consume(1);
558            } else {
559                return nvList;
560            }
561        }
562
563    }
564
565    /**
566     * Parses the 'phone-context' parameter in tel: URLs
567     * @throws ParseException
568     */
569    private NameValue phone_context() throws ParseException {
570        lexer.match('=');
571
572        char la = lexer.lookAhead(0);
573        Object value;
574        if (la=='+') {// global-number-digits
575            lexer.consume(1);// skip '+'
576            value = "+" + base_phone_number();
577        } else if ( Lexer.isAlphaDigit(la) ) {
578            Token t = lexer.match( Lexer.ID );// more broad than allowed
579            value = t.getTokenValue();
580        } else {
581            throw new ParseException( "Invalid phone-context:" + la , -1 );
582        }
583        return new NameValue( "phone-context", value, false );
584    }
585
586    /**
587     * Parse and return a structure for a Tel URL.
588     * @return a parsed tel url structure.
589     */
590    public TelURLImpl telURL( boolean inBrackets ) throws ParseException {
591        lexer.match(TokenTypes.TEL);
592        lexer.match(':');
593        TelephoneNumber tn = this.parseTelephoneNumber(inBrackets);
594        TelURLImpl telUrl = new TelURLImpl();
595        telUrl.setTelephoneNumber(tn);
596        return telUrl;
597
598    }
599
600    /**
601     * Parse and return a structure for a SIP URL.
602     * @return a URL structure for a SIP url.
603     * @throws ParseException if there was a problem parsing.
604     */
605    public SipUri sipURL( boolean inBrackets ) throws ParseException {
606        if (debug)
607            dbg_enter("sipURL");
608        SipUri retval = new SipUri();
609        // pmusgrave - handle sips case
610        Token nextToken = lexer.peekNextToken();
611        int sipOrSips = TokenTypes.SIP;
612        String scheme = TokenNames.SIP;
613        if ( nextToken.getTokenType() == TokenTypes.SIPS)
614        {
615            sipOrSips = TokenTypes.SIPS;
616            scheme = TokenNames.SIPS;
617        }
618
619        try {
620            lexer.match(sipOrSips);
621            lexer.match(':');
622            retval.setScheme(scheme);
623            int startOfUser = lexer.markInputPosition();
624            String userOrHost = user();// Note: user may contain ';', host may not...
625            String passOrPort = null;
626
627            // name:password or host:port
628            if ( lexer.lookAhead() == ':' ) {
629                lexer.consume(1);
630                passOrPort = password();
631            }
632
633            // name@hostPort
634            if ( lexer.lookAhead() == '@' ) {
635                lexer.consume(1);
636                retval.setUser( userOrHost );
637                if (passOrPort!=null) retval.setUserPassword( passOrPort );
638            } else {
639                // then userOrHost was a host, backtrack just in case a ';' was eaten...
640                lexer.rewindInputPosition( startOfUser );
641            }
642
643            HostNameParser hnp = new HostNameParser(this.getLexer());
644            HostPort hp = hnp.hostPort( false );
645            retval.setHostPort(hp);
646
647            lexer.selectLexer("charLexer");
648            while (lexer.hasMoreChars()) {
649            	// If the URI is not enclosed in brackets, parameters belong to header
650                if (lexer.lookAhead(0) != ';' || !inBrackets)
651                    break;
652                lexer.consume(1);
653                NameValue parms = uriParam();
654                if (parms != null) retval.setUriParameter(parms);
655            }
656
657            if (lexer.hasMoreChars() && lexer.lookAhead(0) == '?') {
658                lexer.consume(1);
659                while (lexer.hasMoreChars()) {
660                    NameValue parms = qheader();
661                    retval.setQHeader(parms);
662                    if (lexer.hasMoreChars() && lexer.lookAhead(0) != '&')
663                        break;
664                    else
665                        lexer.consume(1);
666                }
667            }
668            return retval;
669        // BEGIN android-added
670        } catch (RuntimeException e) {
671            throw new ParseException("Invalid URL: " + lexer.getBuffer(), -1);
672        // END android-added
673        } finally {
674            if (debug)
675                dbg_leave("sipURL");
676        }
677    }
678
679    public String peekScheme() throws ParseException {
680        Token[] tokens = lexer.peekNextToken(1);
681        if (tokens.length == 0)
682            return null;
683        String scheme = ((Token) tokens[0]).getTokenValue();
684        return scheme;
685    }
686
687    /**
688     * Get a name value for a given query header (ie one that comes
689     * after the ?).
690     */
691    protected NameValue qheader() throws ParseException {
692        String name = lexer.getNextToken('=');
693        lexer.consume(1);
694        String value = hvalue();
695        return new NameValue(name, value, false);
696
697    }
698
699    protected String hvalue() throws ParseException {
700        StringBuffer retval = new StringBuffer();
701        while (lexer.hasMoreChars()) {
702            char la = lexer.lookAhead(0);
703            // Look for a character that can terminate a URL.
704            boolean isValidChar = false;
705            switch (la) {
706                case '+':
707                case '?':
708                case ':':
709                case '[':
710                case ']':
711                case '/':
712                case '$':
713                case '_':
714                case '-':
715                case '"':
716                case '!':
717                case '~':
718                case '*':
719                case '.':
720                case '(':
721                case ')':
722                    isValidChar = true;
723            }
724            if (isValidChar || Lexer.isAlphaDigit(la)) {
725                lexer.consume(1);
726                retval.append(la);
727            } else if (la == '%') {
728                retval.append(escaped());
729            } else
730                break;
731        }
732        return retval.toString();
733    }
734
735    /**
736     * Scan forward until you hit a terminating character for a URL.
737     * We do not handle non sip urls in this implementation.
738     * @return the string that takes us to the end of this URL (i.e. to
739     * the next delimiter).
740     */
741    protected String urlString() throws ParseException {
742        StringBuffer retval = new StringBuffer();
743        lexer.selectLexer("charLexer");
744
745        while (lexer.hasMoreChars()) {
746            char la = lexer.lookAhead(0);
747            // Look for a character that can terminate a URL.
748            if (la == ' '
749                || la == '\t'
750                || la == '\n'
751                || la == '>'
752                || la == '<')
753                break;
754            lexer.consume(0);
755            retval.append(la);
756        }
757        return retval.toString();
758    }
759
760    protected String user() throws ParseException {
761        if (debug)
762            dbg_enter("user");
763        try {
764            int startIdx = lexer.getPtr();
765            while (lexer.hasMoreChars()) {
766                char la = lexer.lookAhead(0);
767                if (isUnreserved(la) || isUserUnreserved(la)) {
768                    lexer.consume(1);
769                } else if (isEscaped()) {
770                    lexer.consume(3);
771                } else
772                    break;
773            }
774            return lexer.getBuffer().substring(startIdx, lexer.getPtr());
775        } finally {
776            if (debug)
777                dbg_leave("user");
778        }
779
780    }
781
782    protected String password() throws ParseException {
783        int startIdx = lexer.getPtr();
784        while (true) {
785            char la = lexer.lookAhead(0);
786            boolean isValidChar = false;
787            switch (la) {
788                case '&':
789                case '=':
790                case '+':
791                case '$':
792                case ',':
793                    isValidChar = true;
794            }
795            if (isValidChar || isUnreserved(la)) {
796                lexer.consume(1);
797            } else if (isEscaped()) {
798                lexer.consume(3); // bug reported by
799                                // Jeff Haynie
800            } else
801                break;
802
803        }
804        return lexer.getBuffer().substring(startIdx, lexer.getPtr());
805    }
806
807    /**
808     * Default parse method. This method just calls uriReference.
809     */
810    public GenericURI parse() throws ParseException {
811        return uriReference( true );
812    }
813
814    // quick test routine for debugging type assignment
815    public static void main(String[] args) throws ParseException
816    {
817        // quick test for sips parsing
818        String[] test = { "sip:alice@example.com",
819                    "sips:alice@examples.com" ,
820                    "sip:3Zqkv5dajqaaas0tCjCxT0xH2ZEuEMsFl0xoasip%3A%2B3519116786244%40siplab.domain.com@213.0.115.163:7070"};
821
822        for ( int i = 0; i < test.length; i++)
823        {
824            URLParser p  = new URLParser(test[i]);
825
826                GenericURI uri = p.parse();
827                System.out.println("uri type returned " + uri.getClass().getName());
828                System.out.println(test[i] + " is SipUri? " + uri.isSipURI()
829                        + ">" + uri.encode());
830        }
831    }
832
833    /**
834
835    **/
836}
837
838