1//
2//  ========================================================================
3//  Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd.
4//  ------------------------------------------------------------------------
5//  All rights reserved. This program and the accompanying materials
6//  are made available under the terms of the Eclipse Public License v1.0
7//  and Apache License v2.0 which accompanies this distribution.
8//
9//      The Eclipse Public License is available at
10//      http://www.eclipse.org/legal/epl-v10.html
11//
12//      The Apache License v2.0 is available at
13//      http://www.opensource.org/licenses/apache2.0.php
14//
15//  You may elect to redistribute this code under either of these licenses.
16//  ========================================================================
17//
18
19package org.eclipse.jetty.http;
20
21import java.io.UnsupportedEncodingException;
22import java.net.URI;
23
24import org.eclipse.jetty.util.MultiMap;
25import org.eclipse.jetty.util.StringUtil;
26import org.eclipse.jetty.util.TypeUtil;
27import org.eclipse.jetty.util.URIUtil;
28import org.eclipse.jetty.util.UrlEncoded;
29import org.eclipse.jetty.util.Utf8StringBuilder;
30
31
32/* ------------------------------------------------------------ */
33/** Http URI.
34 * Parse a HTTP URI from a string or byte array.  Given a URI
35 * <code>http://user@host:port/path/info;param?query#fragment</code>
36 * this class will split it into the following undecoded optional elements:<ul>
37 * <li>{@link #getScheme()} - http:</li>
38 * <li>{@link #getAuthority()} - //name@host:port</li>
39 * <li>{@link #getHost()} - host</li>
40 * <li>{@link #getPort()} - port</li>
41 * <li>{@link #getPath()} - /path/info</li>
42 * <li>{@link #getParam()} - param</li>
43 * <li>{@link #getQuery()} - query</li>
44 * <li>{@link #getFragment()} - fragment</li>
45 * </ul>
46 *
47 */
48public class HttpURI
49{
50    private static final byte[] __empty={};
51    private final static int
52    START=0,
53    AUTH_OR_PATH=1,
54    SCHEME_OR_PATH=2,
55    AUTH=4,
56    IPV6=5,
57    PORT=6,
58    PATH=7,
59    PARAM=8,
60    QUERY=9,
61    ASTERISK=10;
62
63    boolean _partial=false;
64    byte[] _raw=__empty;
65    String _rawString;
66    int _scheme;
67    int _authority;
68    int _host;
69    int _port;
70    int _portValue;
71    int _path;
72    int _param;
73    int _query;
74    int _fragment;
75    int _end;
76    boolean _encoded=false;
77
78    final Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
79
80    public HttpURI()
81    {
82
83    }
84
85    /* ------------------------------------------------------------ */
86    /**
87     * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
88     */
89    public HttpURI(boolean parsePartialAuth)
90    {
91        _partial=parsePartialAuth;
92    }
93
94    public HttpURI(String raw)
95    {
96        _rawString=raw;
97        byte[] b;
98        try
99        {
100            b = raw.getBytes(StringUtil.__UTF8);
101        }
102        catch (UnsupportedEncodingException e)
103        {
104           throw new RuntimeException(e.getMessage());
105        }
106        parse(b,0,b.length);
107    }
108
109    public HttpURI(byte[] raw,int offset, int length)
110    {
111        parse2(raw,offset,length);
112    }
113
114    public HttpURI(URI uri)
115    {
116        parse(uri.toASCIIString());
117    }
118
119    public void parse(String raw)
120    {
121        byte[] b = raw.getBytes();
122        parse2(b,0,b.length);
123        _rawString=raw;
124    }
125
126    public void parse(byte[] raw,int offset, int length)
127    {
128        _rawString=null;
129        parse2(raw,offset,length);
130    }
131
132
133    public void parseConnect(byte[] raw,int offset, int length)
134    {
135        _rawString=null;
136        _encoded=false;
137        _raw=raw;
138        int i=offset;
139        int e=offset+length;
140        int state=AUTH;
141        _end=offset+length;
142        _scheme=offset;
143        _authority=offset;
144        _host=offset;
145        _port=_end;
146        _portValue=-1;
147        _path=_end;
148        _param=_end;
149        _query=_end;
150        _fragment=_end;
151
152        loop: while (i<e)
153        {
154            char c=(char)(0xff&_raw[i]);
155            int s=i++;
156
157            switch (state)
158            {
159                case AUTH:
160                {
161                    switch (c)
162                    {
163                        case ':':
164                        {
165                            _port = s;
166                            break loop;
167                        }
168                        case '[':
169                        {
170                            state = IPV6;
171                            break;
172                        }
173                    }
174                    continue;
175                }
176
177                case IPV6:
178                {
179                    switch (c)
180                    {
181                        case '/':
182                        {
183                            throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
184                        }
185                        case ']':
186                        {
187                            state = AUTH;
188                            break;
189                        }
190                    }
191
192                    continue;
193                }
194            }
195        }
196
197        if (_port<_path)
198            _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
199        else
200            throw new IllegalArgumentException("No port");
201        _path=offset;
202    }
203
204
205    private void parse2(byte[] raw,int offset, int length)
206    {
207        _encoded=false;
208        _raw=raw;
209        int i=offset;
210        int e=offset+length;
211        int state=START;
212        int m=offset;
213        _end=offset+length;
214        _scheme=offset;
215        _authority=offset;
216        _host=offset;
217        _port=offset;
218        _portValue=-1;
219        _path=offset;
220        _param=_end;
221        _query=_end;
222        _fragment=_end;
223        while (i<e)
224        {
225            char c=(char)(0xff&_raw[i]);
226            int s=i++;
227
228            state: switch (state)
229            {
230                case START:
231                {
232                    m=s;
233                    switch(c)
234                    {
235                        case '/':
236                            state=AUTH_OR_PATH;
237                            break;
238                        case ';':
239                            _param=s;
240                            state=PARAM;
241                            break;
242                        case '?':
243                            _param=s;
244                            _query=s;
245                            state=QUERY;
246                            break;
247                        case '#':
248                            _param=s;
249                            _query=s;
250                            _fragment=s;
251                            break;
252                        case '*':
253                            _path=s;
254                            state=ASTERISK;
255                            break;
256
257                        default:
258                            state=SCHEME_OR_PATH;
259                    }
260
261                    continue;
262                }
263
264                case AUTH_OR_PATH:
265                {
266                    if ((_partial||_scheme!=_authority) && c=='/')
267                    {
268                        _host=i;
269                        _port=_end;
270                        _path=_end;
271                        state=AUTH;
272                    }
273                    else if (c==';' || c=='?' || c=='#')
274                    {
275                        i--;
276                        state=PATH;
277                    }
278                    else
279                    {
280                        _host=m;
281                        _port=m;
282                        state=PATH;
283                    }
284                    continue;
285                }
286
287                case SCHEME_OR_PATH:
288                {
289                    // short cut for http and https
290                    if (length>6 && c=='t')
291                    {
292                        if (_raw[offset+3]==':')
293                        {
294                            s=offset+3;
295                            i=offset+4;
296                            c=':';
297                        }
298                        else if (_raw[offset+4]==':')
299                        {
300                            s=offset+4;
301                            i=offset+5;
302                            c=':';
303                        }
304                        else if (_raw[offset+5]==':')
305                        {
306                            s=offset+5;
307                            i=offset+6;
308                            c=':';
309                        }
310                    }
311
312                    switch (c)
313                    {
314                        case ':':
315                        {
316                            m = i++;
317                            _authority = m;
318                            _path = m;
319                            c = (char)(0xff & _raw[i]);
320                            if (c == '/')
321                                state = AUTH_OR_PATH;
322                            else
323                            {
324                                _host = m;
325                                _port = m;
326                                state = PATH;
327                            }
328                            break;
329                        }
330
331                        case '/':
332                        {
333                            state = PATH;
334                            break;
335                        }
336
337                        case ';':
338                        {
339                            _param = s;
340                            state = PARAM;
341                            break;
342                        }
343
344                        case '?':
345                        {
346                            _param = s;
347                            _query = s;
348                            state = QUERY;
349                            break;
350                        }
351
352                        case '#':
353                        {
354                            _param = s;
355                            _query = s;
356                            _fragment = s;
357                            break;
358                        }
359                    }
360                    continue;
361                }
362
363                case AUTH:
364                {
365                    switch (c)
366                    {
367
368                        case '/':
369                        {
370                            m = s;
371                            _path = m;
372                            _port = _path;
373                            state = PATH;
374                            break;
375                        }
376                        case '@':
377                        {
378                            _host = i;
379                            break;
380                        }
381                        case ':':
382                        {
383                            _port = s;
384                            state = PORT;
385                            break;
386                        }
387                        case '[':
388                        {
389                            state = IPV6;
390                            break;
391                        }
392                    }
393                    continue;
394                }
395
396                case IPV6:
397                {
398                    switch (c)
399                    {
400                        case '/':
401                        {
402                            throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
403                        }
404                        case ']':
405                        {
406                            state = AUTH;
407                            break;
408                        }
409                    }
410
411                    continue;
412                }
413
414                case PORT:
415                {
416                    if (c=='/')
417                    {
418                        m=s;
419                        _path=m;
420                        if (_port<=_authority)
421                            _port=_path;
422                        state=PATH;
423                    }
424                    continue;
425                }
426
427                case PATH:
428                {
429                    switch (c)
430                    {
431                        case ';':
432                        {
433                            _param = s;
434                            state = PARAM;
435                            break;
436                        }
437                        case '?':
438                        {
439                            _param = s;
440                            _query = s;
441                            state = QUERY;
442                            break;
443                        }
444                        case '#':
445                        {
446                            _param = s;
447                            _query = s;
448                            _fragment = s;
449                            break state;
450                        }
451                        case '%':
452                        {
453                            _encoded=true;
454                        }
455                    }
456                    continue;
457                }
458
459                case PARAM:
460                {
461                    switch (c)
462                    {
463                        case '?':
464                        {
465                            _query = s;
466                            state = QUERY;
467                            break;
468                        }
469                        case '#':
470                        {
471                            _query = s;
472                            _fragment = s;
473                            break state;
474                        }
475                    }
476                    continue;
477                }
478
479                case QUERY:
480                {
481                    if (c=='#')
482                    {
483                        _fragment=s;
484                        break state;
485                    }
486                    continue;
487                }
488
489                case ASTERISK:
490                {
491                    throw new IllegalArgumentException("only '*'");
492                }
493            }
494        }
495
496        if (_port<_path)
497            _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
498    }
499
500    private String toUtf8String(int offset,int length)
501    {
502        _utf8b.reset();
503        _utf8b.append(_raw,offset,length);
504        return _utf8b.toString();
505    }
506
507    public String getScheme()
508    {
509        if (_scheme==_authority)
510            return null;
511        int l=_authority-_scheme;
512        if (l==5 &&
513            _raw[_scheme]=='h' &&
514            _raw[_scheme+1]=='t' &&
515            _raw[_scheme+2]=='t' &&
516            _raw[_scheme+3]=='p' )
517            return HttpSchemes.HTTP;
518        if (l==6 &&
519            _raw[_scheme]=='h' &&
520            _raw[_scheme+1]=='t' &&
521            _raw[_scheme+2]=='t' &&
522            _raw[_scheme+3]=='p' &&
523            _raw[_scheme+4]=='s' )
524            return HttpSchemes.HTTPS;
525
526        return toUtf8String(_scheme,_authority-_scheme-1);
527    }
528
529    public String getAuthority()
530    {
531        if (_authority==_path)
532            return null;
533        return toUtf8String(_authority,_path-_authority);
534    }
535
536    public String getHost()
537    {
538        if (_host==_port)
539            return null;
540        return toUtf8String(_host,_port-_host);
541    }
542
543    public int getPort()
544    {
545        return _portValue;
546    }
547
548    public String getPath()
549    {
550        if (_path==_param)
551            return null;
552        return toUtf8String(_path,_param-_path);
553    }
554
555    public String getDecodedPath()
556    {
557        if (_path==_param)
558            return null;
559
560        int length = _param-_path;
561        boolean decoding=false;
562
563        for (int i=_path;i<_param;i++)
564        {
565            byte b = _raw[i];
566
567            if (b=='%')
568            {
569                if (!decoding)
570                {
571                    _utf8b.reset();
572                    _utf8b.append(_raw,_path,i-_path);
573                    decoding=true;
574                }
575
576                if ((i+2)>=_param)
577                    throw new IllegalArgumentException("Bad % encoding: "+this);
578                if (_raw[i+1]=='u')
579                {
580                    if ((i+5)>=_param)
581                        throw new IllegalArgumentException("Bad %u encoding: "+this);
582                    try
583                    {
584                        String unicode = new String(Character.toChars(TypeUtil.parseInt(_raw,i+2,4,16)));
585                        _utf8b.getStringBuilder().append(unicode);
586                        i+=5;
587                    }
588                    catch(Exception e)
589                    {
590                        throw new RuntimeException(e);
591                    }
592                }
593                else
594                {
595                    b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
596                    _utf8b.append(b);
597                    i+=2;
598                }
599                continue;
600            }
601            else if (decoding)
602            {
603                _utf8b.append(b);
604            }
605        }
606
607        if (!decoding)
608            return toUtf8String(_path,length);
609        return _utf8b.toString();
610    }
611
612    public String getDecodedPath(String encoding)
613    {
614        if (_path==_param)
615            return null;
616
617        int length = _param-_path;
618        byte[] bytes=null;
619        int n=0;
620
621        for (int i=_path;i<_param;i++)
622        {
623            byte b = _raw[i];
624
625            if (b=='%')
626            {
627                if (bytes==null)
628                {
629                    bytes=new byte[length];
630                    System.arraycopy(_raw,_path,bytes,0,n);
631                }
632
633                if ((i+2)>=_param)
634                    throw new IllegalArgumentException("Bad % encoding: "+this);
635                if (_raw[i+1]=='u')
636                {
637                    if ((i+5)>=_param)
638                        throw new IllegalArgumentException("Bad %u encoding: "+this);
639
640                    try
641                    {
642                        String unicode = new String(Character.toChars(TypeUtil.parseInt(_raw,i+2,4,16)));
643                        byte[] encoded = unicode.getBytes(encoding);
644                        System.arraycopy(encoded,0,bytes,n,encoded.length);
645                        n+=encoded.length;
646                        i+=5;
647                    }
648                    catch(Exception e)
649                    {
650                        throw new RuntimeException(e);
651                    }
652                }
653                else
654                {
655                    b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
656                    bytes[n++]=b;
657                    i+=2;
658                }
659                continue;
660            }
661            else if (bytes==null)
662            {
663                n++;
664                continue;
665            }
666
667            bytes[n++]=b;
668        }
669
670
671        if (bytes==null)
672            return StringUtil.toString(_raw,_path,_param-_path,encoding);
673
674        return StringUtil.toString(bytes,0,n,encoding);
675    }
676
677
678
679
680
681
682
683    public String getPathAndParam()
684    {
685        if (_path==_query)
686            return null;
687        return toUtf8String(_path,_query-_path);
688    }
689
690    public String getCompletePath()
691    {
692        if (_path==_end)
693            return null;
694        return toUtf8String(_path,_end-_path);
695    }
696
697    public String getParam()
698    {
699        if (_param==_query)
700            return null;
701        return toUtf8String(_param+1,_query-_param-1);
702    }
703
704    public String getQuery()
705    {
706        if (_query==_fragment)
707            return null;
708        return toUtf8String(_query+1,_fragment-_query-1);
709    }
710
711    public String getQuery(String encoding)
712    {
713        if (_query==_fragment)
714            return null;
715        return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
716    }
717
718    public boolean hasQuery()
719    {
720        return (_fragment>_query);
721    }
722
723    public String getFragment()
724    {
725        if (_fragment==_end)
726            return null;
727        return toUtf8String(_fragment+1,_end-_fragment-1);
728    }
729
730    public void decodeQueryTo(MultiMap parameters)
731    {
732        if (_query==_fragment)
733            return;
734        _utf8b.reset();
735        UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
736    }
737
738    public void decodeQueryTo(MultiMap parameters, String encoding)
739        throws UnsupportedEncodingException
740    {
741        if (_query==_fragment)
742            return;
743
744        if (encoding==null || StringUtil.isUTF8(encoding))
745            UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
746        else
747            UrlEncoded.decodeTo(StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding),parameters,encoding);
748    }
749
750    public void clear()
751    {
752        _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
753        _raw=__empty;
754        _rawString="";
755        _encoded=false;
756    }
757
758    @Override
759    public String toString()
760    {
761        if (_rawString==null)
762            _rawString=toUtf8String(_scheme,_end-_scheme);
763        return _rawString;
764    }
765
766    public void writeTo(Utf8StringBuilder buf)
767    {
768        buf.append(_raw,_scheme,_end-_scheme);
769    }
770
771}
772