1/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
18#include <libxml/globals.h>
19#include <libxml/xmlerror.h>
20
21static void xmlCleanURI(xmlURIPtr uri);
22
23/*
24 * Old rule from 2396 used in legacy handling code
25 * alpha    = lowalpha | upalpha
26 */
27#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28
29
30/*
31 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33 *            "u" | "v" | "w" | "x" | "y" | "z"
34 */
35
36#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37
38/*
39 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41 *           "U" | "V" | "W" | "X" | "Y" | "Z"
42 */
43#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44
45#ifdef IS_DIGIT
46#undef IS_DIGIT
47#endif
48/*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
51#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52
53/*
54 * alphanum = alpha | digit
55 */
56
57#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58
59/*
60 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61 */
62
63#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
64    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
65    ((x) == '(') || ((x) == ')'))
66
67/*
68 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
69 */
70
71#define IS_UNWISE(p)                                                    \
72      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
73       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
74       ((*(p) == ']')) || ((*(p) == '`')))
75/*
76 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77 *            "[" | "]"
78 */
79
80#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83        ((x) == ']'))
84
85/*
86 * unreserved = alphanum | mark
87 */
88
89#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91/*
92 * Skip to next pointer char, handle escaped sequences
93 */
94
95#define NEXT(p) ((*p == '%')? p += 3 : p++)
96
97/*
98 * Productions from the spec.
99 *
100 *    authority     = server | reg_name
101 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
102 *                        ";" | ":" | "@" | "&" | "=" | "+" )
103 *
104 * path          = [ abs_path | opaque_part ]
105 */
106
107#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108
109/************************************************************************
110 *									*
111 *                         RFC 3986 parser				*
112 *									*
113 ************************************************************************/
114
115#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
117                      ((*(p) >= 'A') && (*(p) <= 'Z')))
118#define ISA_HEXDIG(p)							\
119       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
120        ((*(p) >= 'A') && (*(p) <= 'F')))
121
122/*
123 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
124 *                     / "*" / "+" / "," / ";" / "="
125 */
126#define ISA_SUB_DELIM(p)						\
127      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
128       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
129       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
130       ((*(p) == '=')))
131
132/*
133 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134 */
135#define ISA_GEN_DELIM(p)						\
136      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
137       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
138       ((*(p) == '@')))
139
140/*
141 *    reserved      = gen-delims / sub-delims
142 */
143#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144
145/*
146 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
147 */
148#define ISA_UNRESERVED(p)						\
149      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
150       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151
152/*
153 *    pct-encoded   = "%" HEXDIG HEXDIG
154 */
155#define ISA_PCT_ENCODED(p)						\
156     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157
158/*
159 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
160 */
161#define ISA_PCHAR(p)							\
162     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
163      ((*(p) == ':')) || ((*(p) == '@')))
164
165/**
166 * xmlParse3986Scheme:
167 * @uri:  pointer to an URI structure
168 * @str:  pointer to the string to analyze
169 *
170 * Parse an URI scheme
171 *
172 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173 *
174 * Returns 0 or the error code
175 */
176static int
177xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178    const char *cur;
179
180    if (str == NULL)
181	return(-1);
182
183    cur = *str;
184    if (!ISA_ALPHA(cur))
185	return(2);
186    cur++;
187    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189    if (uri != NULL) {
190	if (uri->scheme != NULL) xmlFree(uri->scheme);
191	uri->scheme = STRNDUP(*str, cur - *str);
192    }
193    *str = cur;
194    return(0);
195}
196
197/**
198 * xmlParse3986Fragment:
199 * @uri:  pointer to an URI structure
200 * @str:  pointer to the string to analyze
201 *
202 * Parse the query part of an URI
203 *
204 * fragment      = *( pchar / "/" / "?" )
205 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206 *       in the fragment identifier but this is used very broadly for
207 *       xpointer scheme selection, so we are allowing it here to not break
208 *       for example all the DocBook processing chains.
209 *
210 * Returns 0 or the error code
211 */
212static int
213xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214{
215    const char *cur;
216
217    if (str == NULL)
218        return (-1);
219
220    cur = *str;
221
222    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
223           (*cur == '[') || (*cur == ']') ||
224           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225        NEXT(cur);
226    if (uri != NULL) {
227        if (uri->fragment != NULL)
228            xmlFree(uri->fragment);
229	if (uri->cleanup & 2)
230	    uri->fragment = STRNDUP(*str, cur - *str);
231	else
232	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233    }
234    *str = cur;
235    return (0);
236}
237
238/**
239 * xmlParse3986Query:
240 * @uri:  pointer to an URI structure
241 * @str:  pointer to the string to analyze
242 *
243 * Parse the query part of an URI
244 *
245 * query = *uric
246 *
247 * Returns 0 or the error code
248 */
249static int
250xmlParse3986Query(xmlURIPtr uri, const char **str)
251{
252    const char *cur;
253
254    if (str == NULL)
255        return (-1);
256
257    cur = *str;
258
259    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261        NEXT(cur);
262    if (uri != NULL) {
263        if (uri->query != NULL)
264            xmlFree(uri->query);
265	if (uri->cleanup & 2)
266	    uri->query = STRNDUP(*str, cur - *str);
267	else
268	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269
270	/* Save the raw bytes of the query as well.
271	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272	 */
273	if (uri->query_raw != NULL)
274	    xmlFree (uri->query_raw);
275	uri->query_raw = STRNDUP (*str, cur - *str);
276    }
277    *str = cur;
278    return (0);
279}
280
281/**
282 * xmlParse3986Port:
283 * @uri:  pointer to an URI structure
284 * @str:  the string to analyze
285 *
286 * Parse a port  part and fills in the appropriate fields
287 * of the @uri structure
288 *
289 * port          = *DIGIT
290 *
291 * Returns 0 or the error code
292 */
293static int
294xmlParse3986Port(xmlURIPtr uri, const char **str)
295{
296    const char *cur = *str;
297
298    if (ISA_DIGIT(cur)) {
299	if (uri != NULL)
300	    uri->port = 0;
301	while (ISA_DIGIT(cur)) {
302	    if (uri != NULL)
303		uri->port = uri->port * 10 + (*cur - '0');
304	    cur++;
305	}
306	*str = cur;
307	return(0);
308    }
309    return(1);
310}
311
312/**
313 * xmlParse3986Userinfo:
314 * @uri:  pointer to an URI structure
315 * @str:  the string to analyze
316 *
317 * Parse an user informations part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326{
327    const char *cur;
328
329    cur = *str;
330    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331           ISA_SUB_DELIM(cur) || (*cur == ':'))
332	NEXT(cur);
333    if (*cur == '@') {
334	if (uri != NULL) {
335	    if (uri->user != NULL) xmlFree(uri->user);
336	    if (uri->cleanup & 2)
337		uri->user = STRNDUP(*str, cur - *str);
338	    else
339		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340	}
341	*str = cur;
342	return(0);
343    }
344    return(1);
345}
346
347/**
348 * xmlParse3986DecOctet:
349 * @str:  the string to analyze
350 *
351 *    dec-octet     = DIGIT                 ; 0-9
352 *                  / %x31-39 DIGIT         ; 10-99
353 *                  / "1" 2DIGIT            ; 100-199
354 *                  / "2" %x30-34 DIGIT     ; 200-249
355 *                  / "25" %x30-35          ; 250-255
356 *
357 * Skip a dec-octet.
358 *
359 * Returns 0 if found and skipped, 1 otherwise
360 */
361static int
362xmlParse3986DecOctet(const char **str) {
363    const char *cur = *str;
364
365    if (!(ISA_DIGIT(cur)))
366        return(1);
367    if (!ISA_DIGIT(cur+1))
368	cur++;
369    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370	cur += 2;
371    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372	cur += 3;
373    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375	cur += 3;
376    else if ((*cur == '2') && (*(cur + 1) == '5') &&
377	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378	cur += 3;
379    else
380        return(1);
381    *str = cur;
382    return(0);
383}
384/**
385 * xmlParse3986Host:
386 * @uri:  pointer to an URI structure
387 * @str:  the string to analyze
388 *
389 * Parse an host part and fills in the appropriate fields
390 * of the @uri structure
391 *
392 * host          = IP-literal / IPv4address / reg-name
393 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
394 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
395 * reg-name      = *( unreserved / pct-encoded / sub-delims )
396 *
397 * Returns 0 or the error code
398 */
399static int
400xmlParse3986Host(xmlURIPtr uri, const char **str)
401{
402    const char *cur = *str;
403    const char *host;
404
405    host = cur;
406    /*
407     * IPv6 and future adressing scheme are enclosed between brackets
408     */
409    if (*cur == '[') {
410        cur++;
411	while ((*cur != ']') && (*cur != 0))
412	    cur++;
413	if (*cur != ']')
414	    return(1);
415	cur++;
416	goto found;
417    }
418    /*
419     * try to parse an IPv4
420     */
421    if (ISA_DIGIT(cur)) {
422        if (xmlParse3986DecOctet(&cur) != 0)
423	    goto not_ipv4;
424	if (*cur != '.')
425	    goto not_ipv4;
426	cur++;
427        if (xmlParse3986DecOctet(&cur) != 0)
428	    goto not_ipv4;
429	if (*cur != '.')
430	    goto not_ipv4;
431        if (xmlParse3986DecOctet(&cur) != 0)
432	    goto not_ipv4;
433	if (*cur != '.')
434	    goto not_ipv4;
435        if (xmlParse3986DecOctet(&cur) != 0)
436	    goto not_ipv4;
437	goto found;
438not_ipv4:
439        cur = *str;
440    }
441    /*
442     * then this should be a hostname which can be empty
443     */
444    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445        NEXT(cur);
446found:
447    if (uri != NULL) {
448	if (uri->authority != NULL) xmlFree(uri->authority);
449	uri->authority = NULL;
450	if (uri->server != NULL) xmlFree(uri->server);
451	if (cur != host) {
452	    if (uri->cleanup & 2)
453		uri->server = STRNDUP(host, cur - host);
454	    else
455		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456	} else
457	    uri->server = NULL;
458    }
459    *str = cur;
460    return(0);
461}
462
463/**
464 * xmlParse3986Authority:
465 * @uri:  pointer to an URI structure
466 * @str:  the string to analyze
467 *
468 * Parse an authority part and fills in the appropriate fields
469 * of the @uri structure
470 *
471 * authority     = [ userinfo "@" ] host [ ":" port ]
472 *
473 * Returns 0 or the error code
474 */
475static int
476xmlParse3986Authority(xmlURIPtr uri, const char **str)
477{
478    const char *cur;
479    int ret;
480
481    cur = *str;
482    /*
483     * try to parse an userinfo and check for the trailing @
484     */
485    ret = xmlParse3986Userinfo(uri, &cur);
486    if ((ret != 0) || (*cur != '@'))
487        cur = *str;
488    else
489        cur++;
490    ret = xmlParse3986Host(uri, &cur);
491    if (ret != 0) return(ret);
492    if (*cur == ':') {
493        cur++;
494        ret = xmlParse3986Port(uri, &cur);
495	if (ret != 0) return(ret);
496    }
497    *str = cur;
498    return(0);
499}
500
501/**
502 * xmlParse3986Segment:
503 * @str:  the string to analyze
504 * @forbid: an optional forbidden character
505 * @empty: allow an empty segment
506 *
507 * Parse a segment and fills in the appropriate fields
508 * of the @uri structure
509 *
510 * segment       = *pchar
511 * segment-nz    = 1*pchar
512 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513 *               ; non-zero-length segment without any colon ":"
514 *
515 * Returns 0 or the error code
516 */
517static int
518xmlParse3986Segment(const char **str, char forbid, int empty)
519{
520    const char *cur;
521
522    cur = *str;
523    if (!ISA_PCHAR(cur)) {
524        if (empty)
525	    return(0);
526	return(1);
527    }
528    while (ISA_PCHAR(cur) && (*cur != forbid))
529        NEXT(cur);
530    *str = cur;
531    return (0);
532}
533
534/**
535 * xmlParse3986PathAbEmpty:
536 * @uri:  pointer to an URI structure
537 * @str:  the string to analyze
538 *
539 * Parse an path absolute or empty and fills in the appropriate fields
540 * of the @uri structure
541 *
542 * path-abempty  = *( "/" segment )
543 *
544 * Returns 0 or the error code
545 */
546static int
547xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548{
549    const char *cur;
550    int ret;
551
552    cur = *str;
553
554    while (*cur == '/') {
555        cur++;
556	ret = xmlParse3986Segment(&cur, 0, 1);
557	if (ret != 0) return(ret);
558    }
559    if (uri != NULL) {
560	if (uri->path != NULL) xmlFree(uri->path);
561        if (*str != cur) {
562            if (uri->cleanup & 2)
563                uri->path = STRNDUP(*str, cur - *str);
564            else
565                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
566        } else {
567            uri->path = NULL;
568        }
569    }
570    *str = cur;
571    return (0);
572}
573
574/**
575 * xmlParse3986PathAbsolute:
576 * @uri:  pointer to an URI structure
577 * @str:  the string to analyze
578 *
579 * Parse an path absolute and fills in the appropriate fields
580 * of the @uri structure
581 *
582 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
583 *
584 * Returns 0 or the error code
585 */
586static int
587xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
588{
589    const char *cur;
590    int ret;
591
592    cur = *str;
593
594    if (*cur != '/')
595        return(1);
596    cur++;
597    ret = xmlParse3986Segment(&cur, 0, 0);
598    if (ret == 0) {
599	while (*cur == '/') {
600	    cur++;
601	    ret = xmlParse3986Segment(&cur, 0, 1);
602	    if (ret != 0) return(ret);
603	}
604    }
605    if (uri != NULL) {
606	if (uri->path != NULL) xmlFree(uri->path);
607        if (cur != *str) {
608            if (uri->cleanup & 2)
609                uri->path = STRNDUP(*str, cur - *str);
610            else
611                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
612        } else {
613            uri->path = NULL;
614        }
615    }
616    *str = cur;
617    return (0);
618}
619
620/**
621 * xmlParse3986PathRootless:
622 * @uri:  pointer to an URI structure
623 * @str:  the string to analyze
624 *
625 * Parse an path without root and fills in the appropriate fields
626 * of the @uri structure
627 *
628 * path-rootless = segment-nz *( "/" segment )
629 *
630 * Returns 0 or the error code
631 */
632static int
633xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
634{
635    const char *cur;
636    int ret;
637
638    cur = *str;
639
640    ret = xmlParse3986Segment(&cur, 0, 0);
641    if (ret != 0) return(ret);
642    while (*cur == '/') {
643        cur++;
644	ret = xmlParse3986Segment(&cur, 0, 1);
645	if (ret != 0) return(ret);
646    }
647    if (uri != NULL) {
648	if (uri->path != NULL) xmlFree(uri->path);
649        if (cur != *str) {
650            if (uri->cleanup & 2)
651                uri->path = STRNDUP(*str, cur - *str);
652            else
653                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
654        } else {
655            uri->path = NULL;
656        }
657    }
658    *str = cur;
659    return (0);
660}
661
662/**
663 * xmlParse3986PathNoScheme:
664 * @uri:  pointer to an URI structure
665 * @str:  the string to analyze
666 *
667 * Parse an path which is not a scheme and fills in the appropriate fields
668 * of the @uri structure
669 *
670 * path-noscheme = segment-nz-nc *( "/" segment )
671 *
672 * Returns 0 or the error code
673 */
674static int
675xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
676{
677    const char *cur;
678    int ret;
679
680    cur = *str;
681
682    ret = xmlParse3986Segment(&cur, ':', 0);
683    if (ret != 0) return(ret);
684    while (*cur == '/') {
685        cur++;
686	ret = xmlParse3986Segment(&cur, 0, 1);
687	if (ret != 0) return(ret);
688    }
689    if (uri != NULL) {
690	if (uri->path != NULL) xmlFree(uri->path);
691        if (cur != *str) {
692            if (uri->cleanup & 2)
693                uri->path = STRNDUP(*str, cur - *str);
694            else
695                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
696        } else {
697            uri->path = NULL;
698        }
699    }
700    *str = cur;
701    return (0);
702}
703
704/**
705 * xmlParse3986HierPart:
706 * @uri:  pointer to an URI structure
707 * @str:  the string to analyze
708 *
709 * Parse an hierarchical part and fills in the appropriate fields
710 * of the @uri structure
711 *
712 * hier-part     = "//" authority path-abempty
713 *                / path-absolute
714 *                / path-rootless
715 *                / path-empty
716 *
717 * Returns 0 or the error code
718 */
719static int
720xmlParse3986HierPart(xmlURIPtr uri, const char **str)
721{
722    const char *cur;
723    int ret;
724
725    cur = *str;
726
727    if ((*cur == '/') && (*(cur + 1) == '/')) {
728        cur += 2;
729	ret = xmlParse3986Authority(uri, &cur);
730	if (ret != 0) return(ret);
731	ret = xmlParse3986PathAbEmpty(uri, &cur);
732	if (ret != 0) return(ret);
733	*str = cur;
734	return(0);
735    } else if (*cur == '/') {
736        ret = xmlParse3986PathAbsolute(uri, &cur);
737	if (ret != 0) return(ret);
738    } else if (ISA_PCHAR(cur)) {
739        ret = xmlParse3986PathRootless(uri, &cur);
740	if (ret != 0) return(ret);
741    } else {
742	/* path-empty is effectively empty */
743	if (uri != NULL) {
744	    if (uri->path != NULL) xmlFree(uri->path);
745	    uri->path = NULL;
746	}
747    }
748    *str = cur;
749    return (0);
750}
751
752/**
753 * xmlParse3986RelativeRef:
754 * @uri:  pointer to an URI structure
755 * @str:  the string to analyze
756 *
757 * Parse an URI string and fills in the appropriate fields
758 * of the @uri structure
759 *
760 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
761 * relative-part = "//" authority path-abempty
762 *               / path-absolute
763 *               / path-noscheme
764 *               / path-empty
765 *
766 * Returns 0 or the error code
767 */
768static int
769xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
770    int ret;
771
772    if ((*str == '/') && (*(str + 1) == '/')) {
773        str += 2;
774	ret = xmlParse3986Authority(uri, &str);
775	if (ret != 0) return(ret);
776	ret = xmlParse3986PathAbEmpty(uri, &str);
777	if (ret != 0) return(ret);
778    } else if (*str == '/') {
779	ret = xmlParse3986PathAbsolute(uri, &str);
780	if (ret != 0) return(ret);
781    } else if (ISA_PCHAR(str)) {
782        ret = xmlParse3986PathNoScheme(uri, &str);
783	if (ret != 0) return(ret);
784    } else {
785	/* path-empty is effectively empty */
786	if (uri != NULL) {
787	    if (uri->path != NULL) xmlFree(uri->path);
788	    uri->path = NULL;
789	}
790    }
791
792    if (*str == '?') {
793	str++;
794	ret = xmlParse3986Query(uri, &str);
795	if (ret != 0) return(ret);
796    }
797    if (*str == '#') {
798	str++;
799	ret = xmlParse3986Fragment(uri, &str);
800	if (ret != 0) return(ret);
801    }
802    if (*str != 0) {
803	xmlCleanURI(uri);
804	return(1);
805    }
806    return(0);
807}
808
809
810/**
811 * xmlParse3986URI:
812 * @uri:  pointer to an URI structure
813 * @str:  the string to analyze
814 *
815 * Parse an URI string and fills in the appropriate fields
816 * of the @uri structure
817 *
818 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
819 *
820 * Returns 0 or the error code
821 */
822static int
823xmlParse3986URI(xmlURIPtr uri, const char *str) {
824    int ret;
825
826    ret = xmlParse3986Scheme(uri, &str);
827    if (ret != 0) return(ret);
828    if (*str != ':') {
829	return(1);
830    }
831    str++;
832    ret = xmlParse3986HierPart(uri, &str);
833    if (ret != 0) return(ret);
834    if (*str == '?') {
835	str++;
836	ret = xmlParse3986Query(uri, &str);
837	if (ret != 0) return(ret);
838    }
839    if (*str == '#') {
840	str++;
841	ret = xmlParse3986Fragment(uri, &str);
842	if (ret != 0) return(ret);
843    }
844    if (*str != 0) {
845	xmlCleanURI(uri);
846	return(1);
847    }
848    return(0);
849}
850
851/**
852 * xmlParse3986URIReference:
853 * @uri:  pointer to an URI structure
854 * @str:  the string to analyze
855 *
856 * Parse an URI reference string and fills in the appropriate fields
857 * of the @uri structure
858 *
859 * URI-reference = URI / relative-ref
860 *
861 * Returns 0 or the error code
862 */
863static int
864xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
865    int ret;
866
867    if (str == NULL)
868	return(-1);
869    xmlCleanURI(uri);
870
871    /*
872     * Try first to parse absolute refs, then fallback to relative if
873     * it fails.
874     */
875    ret = xmlParse3986URI(uri, str);
876    if (ret != 0) {
877	xmlCleanURI(uri);
878        ret = xmlParse3986RelativeRef(uri, str);
879	if (ret != 0) {
880	    xmlCleanURI(uri);
881	    return(ret);
882	}
883    }
884    return(0);
885}
886
887/**
888 * xmlParseURI:
889 * @str:  the URI string to analyze
890 *
891 * Parse an URI based on RFC 3986
892 *
893 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
894 *
895 * Returns a newly built xmlURIPtr or NULL in case of error
896 */
897xmlURIPtr
898xmlParseURI(const char *str) {
899    xmlURIPtr uri;
900    int ret;
901
902    if (str == NULL)
903	return(NULL);
904    uri = xmlCreateURI();
905    if (uri != NULL) {
906	ret = xmlParse3986URIReference(uri, str);
907        if (ret) {
908	    xmlFreeURI(uri);
909	    return(NULL);
910	}
911    }
912    return(uri);
913}
914
915/**
916 * xmlParseURIReference:
917 * @uri:  pointer to an URI structure
918 * @str:  the string to analyze
919 *
920 * Parse an URI reference string based on RFC 3986 and fills in the
921 * appropriate fields of the @uri structure
922 *
923 * URI-reference = URI / relative-ref
924 *
925 * Returns 0 or the error code
926 */
927int
928xmlParseURIReference(xmlURIPtr uri, const char *str) {
929    return(xmlParse3986URIReference(uri, str));
930}
931
932/**
933 * xmlParseURIRaw:
934 * @str:  the URI string to analyze
935 * @raw:  if 1 unescaping of URI pieces are disabled
936 *
937 * Parse an URI but allows to keep intact the original fragments.
938 *
939 * URI-reference = URI / relative-ref
940 *
941 * Returns a newly built xmlURIPtr or NULL in case of error
942 */
943xmlURIPtr
944xmlParseURIRaw(const char *str, int raw) {
945    xmlURIPtr uri;
946    int ret;
947
948    if (str == NULL)
949	return(NULL);
950    uri = xmlCreateURI();
951    if (uri != NULL) {
952        if (raw) {
953	    uri->cleanup |= 2;
954	}
955	ret = xmlParseURIReference(uri, str);
956        if (ret) {
957	    xmlFreeURI(uri);
958	    return(NULL);
959	}
960    }
961    return(uri);
962}
963
964/************************************************************************
965 *									*
966 *			Generic URI structure functions			*
967 *									*
968 ************************************************************************/
969
970/**
971 * xmlCreateURI:
972 *
973 * Simply creates an empty xmlURI
974 *
975 * Returns the new structure or NULL in case of error
976 */
977xmlURIPtr
978xmlCreateURI(void) {
979    xmlURIPtr ret;
980
981    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
982    if (ret == NULL) {
983	xmlGenericError(xmlGenericErrorContext,
984		"xmlCreateURI: out of memory\n");
985	return(NULL);
986    }
987    memset(ret, 0, sizeof(xmlURI));
988    return(ret);
989}
990
991/**
992 * xmlSaveUri:
993 * @uri:  pointer to an xmlURI
994 *
995 * Save the URI as an escaped string
996 *
997 * Returns a new string (to be deallocated by caller)
998 */
999xmlChar *
1000xmlSaveUri(xmlURIPtr uri) {
1001    xmlChar *ret = NULL;
1002    xmlChar *temp;
1003    const char *p;
1004    int len;
1005    int max;
1006
1007    if (uri == NULL) return(NULL);
1008
1009
1010    max = 80;
1011    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1012    if (ret == NULL) {
1013	xmlGenericError(xmlGenericErrorContext,
1014		"xmlSaveUri: out of memory\n");
1015	return(NULL);
1016    }
1017    len = 0;
1018
1019    if (uri->scheme != NULL) {
1020	p = uri->scheme;
1021	while (*p != 0) {
1022	    if (len >= max) {
1023		max *= 2;
1024		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1025		if (temp == NULL) {
1026		    xmlGenericError(xmlGenericErrorContext,
1027			    "xmlSaveUri: out of memory\n");
1028		    xmlFree(ret);
1029		    return(NULL);
1030		}
1031		ret = temp;
1032	    }
1033	    ret[len++] = *p++;
1034	}
1035	if (len >= max) {
1036	    max *= 2;
1037	    temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038	    if (temp == NULL) {
1039		xmlGenericError(xmlGenericErrorContext,
1040			"xmlSaveUri: out of memory\n");
1041		xmlFree(ret);
1042		return(NULL);
1043	    }
1044	    ret = temp;
1045	}
1046	ret[len++] = ':';
1047    }
1048    if (uri->opaque != NULL) {
1049	p = uri->opaque;
1050	while (*p != 0) {
1051	    if (len + 3 >= max) {
1052		max *= 2;
1053		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1054		if (temp == NULL) {
1055		    xmlGenericError(xmlGenericErrorContext,
1056			    "xmlSaveUri: out of memory\n");
1057		    xmlFree(ret);
1058		    return(NULL);
1059		}
1060		ret = temp;
1061	    }
1062	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1063		ret[len++] = *p++;
1064	    else {
1065		int val = *(unsigned char *)p++;
1066		int hi = val / 0x10, lo = val % 0x10;
1067		ret[len++] = '%';
1068		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1069		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1070	    }
1071	}
1072    } else {
1073	if (uri->server != NULL) {
1074	    if (len + 3 >= max) {
1075		max *= 2;
1076		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1077		if (temp == NULL) {
1078		    xmlGenericError(xmlGenericErrorContext,
1079			    "xmlSaveUri: out of memory\n");
1080                  xmlFree(ret);
1081		    return(NULL);
1082		}
1083		ret = temp;
1084	    }
1085	    ret[len++] = '/';
1086	    ret[len++] = '/';
1087	    if (uri->user != NULL) {
1088		p = uri->user;
1089		while (*p != 0) {
1090		    if (len + 3 >= max) {
1091			max *= 2;
1092			temp = (xmlChar *) xmlRealloc(ret,
1093				(max + 1) * sizeof(xmlChar));
1094			if (temp == NULL) {
1095			    xmlGenericError(xmlGenericErrorContext,
1096				    "xmlSaveUri: out of memory\n");
1097			    xmlFree(ret);
1098			    return(NULL);
1099			}
1100			ret = temp;
1101		    }
1102		    if ((IS_UNRESERVED(*(p))) ||
1103			((*(p) == ';')) || ((*(p) == ':')) ||
1104			((*(p) == '&')) || ((*(p) == '=')) ||
1105			((*(p) == '+')) || ((*(p) == '$')) ||
1106			((*(p) == ',')))
1107			ret[len++] = *p++;
1108		    else {
1109			int val = *(unsigned char *)p++;
1110			int hi = val / 0x10, lo = val % 0x10;
1111			ret[len++] = '%';
1112			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1113			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1114		    }
1115		}
1116		if (len + 3 >= max) {
1117		    max *= 2;
1118		    temp = (xmlChar *) xmlRealloc(ret,
1119			    (max + 1) * sizeof(xmlChar));
1120		    if (temp == NULL) {
1121			xmlGenericError(xmlGenericErrorContext,
1122				"xmlSaveUri: out of memory\n");
1123			xmlFree(ret);
1124			return(NULL);
1125		    }
1126		    ret = temp;
1127		}
1128		ret[len++] = '@';
1129	    }
1130	    p = uri->server;
1131	    while (*p != 0) {
1132		if (len >= max) {
1133		    max *= 2;
1134		    temp = (xmlChar *) xmlRealloc(ret,
1135			    (max + 1) * sizeof(xmlChar));
1136		    if (temp == NULL) {
1137			xmlGenericError(xmlGenericErrorContext,
1138				"xmlSaveUri: out of memory\n");
1139			xmlFree(ret);
1140			return(NULL);
1141		    }
1142		    ret = temp;
1143		}
1144		ret[len++] = *p++;
1145	    }
1146	    if (uri->port > 0) {
1147		if (len + 10 >= max) {
1148		    max *= 2;
1149		    temp = (xmlChar *) xmlRealloc(ret,
1150			    (max + 1) * sizeof(xmlChar));
1151		    if (temp == NULL) {
1152			xmlGenericError(xmlGenericErrorContext,
1153				"xmlSaveUri: out of memory\n");
1154                     xmlFree(ret);
1155			return(NULL);
1156		    }
1157		    ret = temp;
1158		}
1159		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1160	    }
1161	} else if (uri->authority != NULL) {
1162	    if (len + 3 >= max) {
1163		max *= 2;
1164		temp = (xmlChar *) xmlRealloc(ret,
1165			(max + 1) * sizeof(xmlChar));
1166		if (temp == NULL) {
1167			xmlGenericError(xmlGenericErrorContext,
1168				"xmlSaveUri: out of memory\n");
1169                     xmlFree(ret);
1170			return(NULL);
1171		    }
1172		    ret = temp;
1173	    }
1174	    ret[len++] = '/';
1175	    ret[len++] = '/';
1176	    p = uri->authority;
1177	    while (*p != 0) {
1178		if (len + 3 >= max) {
1179		    max *= 2;
1180		    temp = (xmlChar *) xmlRealloc(ret,
1181			    (max + 1) * sizeof(xmlChar));
1182		    if (temp == NULL) {
1183			xmlGenericError(xmlGenericErrorContext,
1184				"xmlSaveUri: out of memory\n");
1185                     xmlFree(ret);
1186			return(NULL);
1187		    }
1188		    ret = temp;
1189		}
1190		if ((IS_UNRESERVED(*(p))) ||
1191                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1192                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1193                    ((*(p) == '=')) || ((*(p) == '+')))
1194		    ret[len++] = *p++;
1195		else {
1196		    int val = *(unsigned char *)p++;
1197		    int hi = val / 0x10, lo = val % 0x10;
1198		    ret[len++] = '%';
1199		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1200		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1201		}
1202	    }
1203	} else if (uri->scheme != NULL) {
1204	    if (len + 3 >= max) {
1205		max *= 2;
1206		temp = (xmlChar *) xmlRealloc(ret,
1207			(max + 1) * sizeof(xmlChar));
1208		if (temp == NULL) {
1209			xmlGenericError(xmlGenericErrorContext,
1210				"xmlSaveUri: out of memory\n");
1211                     xmlFree(ret);
1212			return(NULL);
1213		    }
1214		    ret = temp;
1215	    }
1216	    ret[len++] = '/';
1217	    ret[len++] = '/';
1218	}
1219	if (uri->path != NULL) {
1220	    p = uri->path;
1221	    /*
1222	     * the colon in file:///d: should not be escaped or
1223	     * Windows accesses fail later.
1224	     */
1225	    if ((uri->scheme != NULL) &&
1226		(p[0] == '/') &&
1227		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1228		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1229		(p[2] == ':') &&
1230	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1231		if (len + 3 >= max) {
1232		    max *= 2;
1233		    ret = (xmlChar *) xmlRealloc(ret,
1234			    (max + 1) * sizeof(xmlChar));
1235		    if (ret == NULL) {
1236			xmlGenericError(xmlGenericErrorContext,
1237				"xmlSaveUri: out of memory\n");
1238			return(NULL);
1239		    }
1240		}
1241		ret[len++] = *p++;
1242		ret[len++] = *p++;
1243		ret[len++] = *p++;
1244	    }
1245	    while (*p != 0) {
1246		if (len + 3 >= max) {
1247		    max *= 2;
1248		    temp = (xmlChar *) xmlRealloc(ret,
1249			    (max + 1) * sizeof(xmlChar));
1250		    if (temp == NULL) {
1251			xmlGenericError(xmlGenericErrorContext,
1252				"xmlSaveUri: out of memory\n");
1253                     xmlFree(ret);
1254			return(NULL);
1255		    }
1256		    ret = temp;
1257		}
1258		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1259                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1260	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1261	            ((*(p) == ',')))
1262		    ret[len++] = *p++;
1263		else {
1264		    int val = *(unsigned char *)p++;
1265		    int hi = val / 0x10, lo = val % 0x10;
1266		    ret[len++] = '%';
1267		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1268		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1269		}
1270	    }
1271	}
1272	if (uri->query_raw != NULL) {
1273	    if (len + 1 >= max) {
1274		max *= 2;
1275		temp = (xmlChar *) xmlRealloc(ret,
1276			(max + 1) * sizeof(xmlChar));
1277		if (temp == NULL) {
1278			xmlGenericError(xmlGenericErrorContext,
1279				"xmlSaveUri: out of memory\n");
1280                     xmlFree(ret);
1281			return(NULL);
1282		    }
1283		    ret = temp;
1284	    }
1285	    ret[len++] = '?';
1286	    p = uri->query_raw;
1287	    while (*p != 0) {
1288		if (len + 1 >= max) {
1289		    max *= 2;
1290		    temp = (xmlChar *) xmlRealloc(ret,
1291			    (max + 1) * sizeof(xmlChar));
1292		    if (temp == NULL) {
1293			xmlGenericError(xmlGenericErrorContext,
1294				"xmlSaveUri: out of memory\n");
1295                     xmlFree(ret);
1296			return(NULL);
1297		    }
1298		    ret = temp;
1299		}
1300		ret[len++] = *p++;
1301	    }
1302	} else if (uri->query != NULL) {
1303	    if (len + 3 >= max) {
1304		max *= 2;
1305		temp = (xmlChar *) xmlRealloc(ret,
1306			(max + 1) * sizeof(xmlChar));
1307		if (temp == NULL) {
1308			xmlGenericError(xmlGenericErrorContext,
1309				"xmlSaveUri: out of memory\n");
1310                     xmlFree(ret);
1311			return(NULL);
1312		    }
1313		    ret = temp;
1314	    }
1315	    ret[len++] = '?';
1316	    p = uri->query;
1317	    while (*p != 0) {
1318		if (len + 3 >= max) {
1319		    max *= 2;
1320		    temp = (xmlChar *) xmlRealloc(ret,
1321			    (max + 1) * sizeof(xmlChar));
1322		    if (temp == NULL) {
1323			xmlGenericError(xmlGenericErrorContext,
1324				"xmlSaveUri: out of memory\n");
1325                     xmlFree(ret);
1326			return(NULL);
1327		    }
1328		    ret = temp;
1329		}
1330		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1331		    ret[len++] = *p++;
1332		else {
1333		    int val = *(unsigned char *)p++;
1334		    int hi = val / 0x10, lo = val % 0x10;
1335		    ret[len++] = '%';
1336		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1337		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1338		}
1339	    }
1340	}
1341    }
1342    if (uri->fragment != NULL) {
1343	if (len + 3 >= max) {
1344	    max *= 2;
1345	    temp = (xmlChar *) xmlRealloc(ret,
1346		    (max + 1) * sizeof(xmlChar));
1347	    if (temp == NULL) {
1348			xmlGenericError(xmlGenericErrorContext,
1349				"xmlSaveUri: out of memory\n");
1350                     xmlFree(ret);
1351			return(NULL);
1352		    }
1353		    ret = temp;
1354	}
1355	ret[len++] = '#';
1356	p = uri->fragment;
1357	while (*p != 0) {
1358	    if (len + 3 >= max) {
1359		max *= 2;
1360		temp = (xmlChar *) xmlRealloc(ret,
1361			(max + 1) * sizeof(xmlChar));
1362		if (temp == NULL) {
1363			xmlGenericError(xmlGenericErrorContext,
1364				"xmlSaveUri: out of memory\n");
1365                     xmlFree(ret);
1366			return(NULL);
1367		    }
1368		    ret = temp;
1369	    }
1370	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1371		ret[len++] = *p++;
1372	    else {
1373		int val = *(unsigned char *)p++;
1374		int hi = val / 0x10, lo = val % 0x10;
1375		ret[len++] = '%';
1376		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1377		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1378	    }
1379	}
1380    }
1381    if (len >= max) {
1382	max *= 2;
1383	temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1384	if (temp == NULL) {
1385			xmlGenericError(xmlGenericErrorContext,
1386				"xmlSaveUri: out of memory\n");
1387                     xmlFree(ret);
1388			return(NULL);
1389		    }
1390		    ret = temp;
1391    }
1392    ret[len] = 0;
1393    return(ret);
1394}
1395
1396/**
1397 * xmlPrintURI:
1398 * @stream:  a FILE* for the output
1399 * @uri:  pointer to an xmlURI
1400 *
1401 * Prints the URI in the stream @stream.
1402 */
1403void
1404xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1405    xmlChar *out;
1406
1407    out = xmlSaveUri(uri);
1408    if (out != NULL) {
1409	fprintf(stream, "%s", (char *) out);
1410	xmlFree(out);
1411    }
1412}
1413
1414/**
1415 * xmlCleanURI:
1416 * @uri:  pointer to an xmlURI
1417 *
1418 * Make sure the xmlURI struct is free of content
1419 */
1420static void
1421xmlCleanURI(xmlURIPtr uri) {
1422    if (uri == NULL) return;
1423
1424    if (uri->scheme != NULL) xmlFree(uri->scheme);
1425    uri->scheme = NULL;
1426    if (uri->server != NULL) xmlFree(uri->server);
1427    uri->server = NULL;
1428    if (uri->user != NULL) xmlFree(uri->user);
1429    uri->user = NULL;
1430    if (uri->path != NULL) xmlFree(uri->path);
1431    uri->path = NULL;
1432    if (uri->fragment != NULL) xmlFree(uri->fragment);
1433    uri->fragment = NULL;
1434    if (uri->opaque != NULL) xmlFree(uri->opaque);
1435    uri->opaque = NULL;
1436    if (uri->authority != NULL) xmlFree(uri->authority);
1437    uri->authority = NULL;
1438    if (uri->query != NULL) xmlFree(uri->query);
1439    uri->query = NULL;
1440    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1441    uri->query_raw = NULL;
1442}
1443
1444/**
1445 * xmlFreeURI:
1446 * @uri:  pointer to an xmlURI
1447 *
1448 * Free up the xmlURI struct
1449 */
1450void
1451xmlFreeURI(xmlURIPtr uri) {
1452    if (uri == NULL) return;
1453
1454    if (uri->scheme != NULL) xmlFree(uri->scheme);
1455    if (uri->server != NULL) xmlFree(uri->server);
1456    if (uri->user != NULL) xmlFree(uri->user);
1457    if (uri->path != NULL) xmlFree(uri->path);
1458    if (uri->fragment != NULL) xmlFree(uri->fragment);
1459    if (uri->opaque != NULL) xmlFree(uri->opaque);
1460    if (uri->authority != NULL) xmlFree(uri->authority);
1461    if (uri->query != NULL) xmlFree(uri->query);
1462    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1463    xmlFree(uri);
1464}
1465
1466/************************************************************************
1467 *									*
1468 *			Helper functions				*
1469 *									*
1470 ************************************************************************/
1471
1472/**
1473 * xmlNormalizeURIPath:
1474 * @path:  pointer to the path string
1475 *
1476 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1477 * Section 5.2, steps 6.c through 6.g.
1478 *
1479 * Normalization occurs directly on the string, no new allocation is done
1480 *
1481 * Returns 0 or an error code
1482 */
1483int
1484xmlNormalizeURIPath(char *path) {
1485    char *cur, *out;
1486
1487    if (path == NULL)
1488	return(-1);
1489
1490    /* Skip all initial "/" chars.  We want to get to the beginning of the
1491     * first non-empty segment.
1492     */
1493    cur = path;
1494    while (cur[0] == '/')
1495      ++cur;
1496    if (cur[0] == '\0')
1497      return(0);
1498
1499    /* Keep everything we've seen so far.  */
1500    out = cur;
1501
1502    /*
1503     * Analyze each segment in sequence for cases (c) and (d).
1504     */
1505    while (cur[0] != '\0') {
1506	/*
1507	 * c) All occurrences of "./", where "." is a complete path segment,
1508	 *    are removed from the buffer string.
1509	 */
1510	if ((cur[0] == '.') && (cur[1] == '/')) {
1511	    cur += 2;
1512	    /* '//' normalization should be done at this point too */
1513	    while (cur[0] == '/')
1514		cur++;
1515	    continue;
1516	}
1517
1518	/*
1519	 * d) If the buffer string ends with "." as a complete path segment,
1520	 *    that "." is removed.
1521	 */
1522	if ((cur[0] == '.') && (cur[1] == '\0'))
1523	    break;
1524
1525	/* Otherwise keep the segment.  */
1526	while (cur[0] != '/') {
1527            if (cur[0] == '\0')
1528              goto done_cd;
1529	    (out++)[0] = (cur++)[0];
1530	}
1531	/* nomalize // */
1532	while ((cur[0] == '/') && (cur[1] == '/'))
1533	    cur++;
1534
1535        (out++)[0] = (cur++)[0];
1536    }
1537 done_cd:
1538    out[0] = '\0';
1539
1540    /* Reset to the beginning of the first segment for the next sequence.  */
1541    cur = path;
1542    while (cur[0] == '/')
1543      ++cur;
1544    if (cur[0] == '\0')
1545	return(0);
1546
1547    /*
1548     * Analyze each segment in sequence for cases (e) and (f).
1549     *
1550     * e) All occurrences of "<segment>/../", where <segment> is a
1551     *    complete path segment not equal to "..", are removed from the
1552     *    buffer string.  Removal of these path segments is performed
1553     *    iteratively, removing the leftmost matching pattern on each
1554     *    iteration, until no matching pattern remains.
1555     *
1556     * f) If the buffer string ends with "<segment>/..", where <segment>
1557     *    is a complete path segment not equal to "..", that
1558     *    "<segment>/.." is removed.
1559     *
1560     * To satisfy the "iterative" clause in (e), we need to collapse the
1561     * string every time we find something that needs to be removed.  Thus,
1562     * we don't need to keep two pointers into the string: we only need a
1563     * "current position" pointer.
1564     */
1565    while (1) {
1566        char *segp, *tmp;
1567
1568        /* At the beginning of each iteration of this loop, "cur" points to
1569         * the first character of the segment we want to examine.
1570         */
1571
1572        /* Find the end of the current segment.  */
1573        segp = cur;
1574        while ((segp[0] != '/') && (segp[0] != '\0'))
1575          ++segp;
1576
1577        /* If this is the last segment, we're done (we need at least two
1578         * segments to meet the criteria for the (e) and (f) cases).
1579         */
1580        if (segp[0] == '\0')
1581          break;
1582
1583        /* If the first segment is "..", or if the next segment _isn't_ "..",
1584         * keep this segment and try the next one.
1585         */
1586        ++segp;
1587        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1588            || ((segp[0] != '.') || (segp[1] != '.')
1589                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1590          cur = segp;
1591          continue;
1592        }
1593
1594        /* If we get here, remove this segment and the next one and back up
1595         * to the previous segment (if there is one), to implement the
1596         * "iteratively" clause.  It's pretty much impossible to back up
1597         * while maintaining two pointers into the buffer, so just compact
1598         * the whole buffer now.
1599         */
1600
1601        /* If this is the end of the buffer, we're done.  */
1602        if (segp[2] == '\0') {
1603          cur[0] = '\0';
1604          break;
1605        }
1606        /* Valgrind complained, strcpy(cur, segp + 3); */
1607	/* string will overlap, do not use strcpy */
1608	tmp = cur;
1609	segp += 3;
1610	while ((*tmp++ = *segp++) != 0);
1611
1612        /* If there are no previous segments, then keep going from here.  */
1613        segp = cur;
1614        while ((segp > path) && ((--segp)[0] == '/'))
1615          ;
1616        if (segp == path)
1617          continue;
1618
1619        /* "segp" is pointing to the end of a previous segment; find it's
1620         * start.  We need to back up to the previous segment and start
1621         * over with that to handle things like "foo/bar/../..".  If we
1622         * don't do this, then on the first pass we'll remove the "bar/..",
1623         * but be pointing at the second ".." so we won't realize we can also
1624         * remove the "foo/..".
1625         */
1626        cur = segp;
1627        while ((cur > path) && (cur[-1] != '/'))
1628          --cur;
1629    }
1630    out[0] = '\0';
1631
1632    /*
1633     * g) If the resulting buffer string still begins with one or more
1634     *    complete path segments of "..", then the reference is
1635     *    considered to be in error. Implementations may handle this
1636     *    error by retaining these components in the resolved path (i.e.,
1637     *    treating them as part of the final URI), by removing them from
1638     *    the resolved path (i.e., discarding relative levels above the
1639     *    root), or by avoiding traversal of the reference.
1640     *
1641     * We discard them from the final path.
1642     */
1643    if (path[0] == '/') {
1644      cur = path;
1645      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1646             && ((cur[3] == '/') || (cur[3] == '\0')))
1647	cur += 3;
1648
1649      if (cur != path) {
1650	out = path;
1651	while (cur[0] != '\0')
1652          (out++)[0] = (cur++)[0];
1653	out[0] = 0;
1654      }
1655    }
1656
1657    return(0);
1658}
1659
1660static int is_hex(char c) {
1661    if (((c >= '0') && (c <= '9')) ||
1662        ((c >= 'a') && (c <= 'f')) ||
1663        ((c >= 'A') && (c <= 'F')))
1664	return(1);
1665    return(0);
1666}
1667
1668/**
1669 * xmlURIUnescapeString:
1670 * @str:  the string to unescape
1671 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1672 * @target:  optional destination buffer
1673 *
1674 * Unescaping routine, but does not check that the string is an URI. The
1675 * output is a direct unsigned char translation of %XX values (no encoding)
1676 * Note that the length of the result can only be smaller or same size as
1677 * the input string.
1678 *
1679 * Returns a copy of the string, but unescaped, will return NULL only in case
1680 * of error
1681 */
1682char *
1683xmlURIUnescapeString(const char *str, int len, char *target) {
1684    char *ret, *out;
1685    const char *in;
1686
1687    if (str == NULL)
1688	return(NULL);
1689    if (len <= 0) len = strlen(str);
1690    if (len < 0) return(NULL);
1691
1692    if (target == NULL) {
1693	ret = (char *) xmlMallocAtomic(len + 1);
1694	if (ret == NULL) {
1695	    xmlGenericError(xmlGenericErrorContext,
1696		    "xmlURIUnescapeString: out of memory\n");
1697	    return(NULL);
1698	}
1699    } else
1700	ret = target;
1701    in = str;
1702    out = ret;
1703    while(len > 0) {
1704	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1705	    in++;
1706	    if ((*in >= '0') && (*in <= '9'))
1707	        *out = (*in - '0');
1708	    else if ((*in >= 'a') && (*in <= 'f'))
1709	        *out = (*in - 'a') + 10;
1710	    else if ((*in >= 'A') && (*in <= 'F'))
1711	        *out = (*in - 'A') + 10;
1712	    in++;
1713	    if ((*in >= '0') && (*in <= '9'))
1714	        *out = *out * 16 + (*in - '0');
1715	    else if ((*in >= 'a') && (*in <= 'f'))
1716	        *out = *out * 16 + (*in - 'a') + 10;
1717	    else if ((*in >= 'A') && (*in <= 'F'))
1718	        *out = *out * 16 + (*in - 'A') + 10;
1719	    in++;
1720	    len -= 3;
1721	    out++;
1722	} else {
1723	    *out++ = *in++;
1724	    len--;
1725	}
1726    }
1727    *out = 0;
1728    return(ret);
1729}
1730
1731/**
1732 * xmlURIEscapeStr:
1733 * @str:  string to escape
1734 * @list: exception list string of chars not to escape
1735 *
1736 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1737 * and the characters in the exception list.
1738 *
1739 * Returns a new escaped string or NULL in case of error.
1740 */
1741xmlChar *
1742xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1743    xmlChar *ret, ch;
1744    xmlChar *temp;
1745    const xmlChar *in;
1746
1747    unsigned int len, out;
1748
1749    if (str == NULL)
1750	return(NULL);
1751    if (str[0] == 0)
1752	return(xmlStrdup(str));
1753    len = xmlStrlen(str);
1754    if (!(len > 0)) return(NULL);
1755
1756    len += 20;
1757    ret = (xmlChar *) xmlMallocAtomic(len);
1758    if (ret == NULL) {
1759	xmlGenericError(xmlGenericErrorContext,
1760		"xmlURIEscapeStr: out of memory\n");
1761	return(NULL);
1762    }
1763    in = (const xmlChar *) str;
1764    out = 0;
1765    while(*in != 0) {
1766	if (len - out <= 3) {
1767	    len += 20;
1768	    temp = (xmlChar *) xmlRealloc(ret, len);
1769	    if (temp == NULL) {
1770		xmlGenericError(xmlGenericErrorContext,
1771			"xmlURIEscapeStr: out of memory\n");
1772		xmlFree(ret);
1773		return(NULL);
1774	    }
1775	    ret = temp;
1776	}
1777
1778	ch = *in;
1779
1780	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1781	    unsigned char val;
1782	    ret[out++] = '%';
1783	    val = ch >> 4;
1784	    if (val <= 9)
1785		ret[out++] = '0' + val;
1786	    else
1787		ret[out++] = 'A' + val - 0xA;
1788	    val = ch & 0xF;
1789	    if (val <= 9)
1790		ret[out++] = '0' + val;
1791	    else
1792		ret[out++] = 'A' + val - 0xA;
1793	    in++;
1794	} else {
1795	    ret[out++] = *in++;
1796	}
1797
1798    }
1799    ret[out] = 0;
1800    return(ret);
1801}
1802
1803/**
1804 * xmlURIEscape:
1805 * @str:  the string of the URI to escape
1806 *
1807 * Escaping routine, does not do validity checks !
1808 * It will try to escape the chars needing this, but this is heuristic
1809 * based it's impossible to be sure.
1810 *
1811 * Returns an copy of the string, but escaped
1812 *
1813 * 25 May 2001
1814 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1815 * according to RFC2396.
1816 *   - Carl Douglas
1817 */
1818xmlChar *
1819xmlURIEscape(const xmlChar * str)
1820{
1821    xmlChar *ret, *segment = NULL;
1822    xmlURIPtr uri;
1823    int ret2;
1824
1825#define NULLCHK(p) if(!p) { \
1826                   xmlGenericError(xmlGenericErrorContext, \
1827                        "xmlURIEscape: out of memory\n"); \
1828                        xmlFreeURI(uri); \
1829                        return NULL; } \
1830
1831    if (str == NULL)
1832        return (NULL);
1833
1834    uri = xmlCreateURI();
1835    if (uri != NULL) {
1836	/*
1837	 * Allow escaping errors in the unescaped form
1838	 */
1839        uri->cleanup = 1;
1840        ret2 = xmlParseURIReference(uri, (const char *)str);
1841        if (ret2) {
1842            xmlFreeURI(uri);
1843            return (NULL);
1844        }
1845    }
1846
1847    if (!uri)
1848        return NULL;
1849
1850    ret = NULL;
1851
1852    if (uri->scheme) {
1853        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1854        NULLCHK(segment)
1855        ret = xmlStrcat(ret, segment);
1856        ret = xmlStrcat(ret, BAD_CAST ":");
1857        xmlFree(segment);
1858    }
1859
1860    if (uri->authority) {
1861        segment =
1862            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1863        NULLCHK(segment)
1864        ret = xmlStrcat(ret, BAD_CAST "//");
1865        ret = xmlStrcat(ret, segment);
1866        xmlFree(segment);
1867    }
1868
1869    if (uri->user) {
1870        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1871        NULLCHK(segment)
1872		ret = xmlStrcat(ret,BAD_CAST "//");
1873        ret = xmlStrcat(ret, segment);
1874        ret = xmlStrcat(ret, BAD_CAST "@");
1875        xmlFree(segment);
1876    }
1877
1878    if (uri->server) {
1879        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1880        NULLCHK(segment)
1881		if (uri->user == NULL)
1882		ret = xmlStrcat(ret, BAD_CAST "//");
1883        ret = xmlStrcat(ret, segment);
1884        xmlFree(segment);
1885    }
1886
1887    if (uri->port) {
1888        xmlChar port[10];
1889
1890        snprintf((char *) port, 10, "%d", uri->port);
1891        ret = xmlStrcat(ret, BAD_CAST ":");
1892        ret = xmlStrcat(ret, port);
1893    }
1894
1895    if (uri->path) {
1896        segment =
1897            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1898        NULLCHK(segment)
1899        ret = xmlStrcat(ret, segment);
1900        xmlFree(segment);
1901    }
1902
1903    if (uri->query_raw) {
1904        ret = xmlStrcat(ret, BAD_CAST "?");
1905        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1906    }
1907    else if (uri->query) {
1908        segment =
1909            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1910        NULLCHK(segment)
1911        ret = xmlStrcat(ret, BAD_CAST "?");
1912        ret = xmlStrcat(ret, segment);
1913        xmlFree(segment);
1914    }
1915
1916    if (uri->opaque) {
1917        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1918        NULLCHK(segment)
1919        ret = xmlStrcat(ret, segment);
1920        xmlFree(segment);
1921    }
1922
1923    if (uri->fragment) {
1924        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1925        NULLCHK(segment)
1926        ret = xmlStrcat(ret, BAD_CAST "#");
1927        ret = xmlStrcat(ret, segment);
1928        xmlFree(segment);
1929    }
1930
1931    xmlFreeURI(uri);
1932#undef NULLCHK
1933
1934    return (ret);
1935}
1936
1937/************************************************************************
1938 *									*
1939 *			Public functions				*
1940 *									*
1941 ************************************************************************/
1942
1943/**
1944 * xmlBuildURI:
1945 * @URI:  the URI instance found in the document
1946 * @base:  the base value
1947 *
1948 * Computes he final URI of the reference done by checking that
1949 * the given URI is valid, and building the final URI using the
1950 * base URI. This is processed according to section 5.2 of the
1951 * RFC 2396
1952 *
1953 * 5.2. Resolving Relative References to Absolute Form
1954 *
1955 * Returns a new URI string (to be freed by the caller) or NULL in case
1956 *         of error.
1957 */
1958xmlChar *
1959xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1960    xmlChar *val = NULL;
1961    int ret, len, indx, cur, out;
1962    xmlURIPtr ref = NULL;
1963    xmlURIPtr bas = NULL;
1964    xmlURIPtr res = NULL;
1965
1966    /*
1967     * 1) The URI reference is parsed into the potential four components and
1968     *    fragment identifier, as described in Section 4.3.
1969     *
1970     *    NOTE that a completely empty URI is treated by modern browsers
1971     *    as a reference to "." rather than as a synonym for the current
1972     *    URI.  Should we do that here?
1973     */
1974    if (URI == NULL)
1975	ret = -1;
1976    else {
1977	if (*URI) {
1978	    ref = xmlCreateURI();
1979	    if (ref == NULL)
1980		goto done;
1981	    ret = xmlParseURIReference(ref, (const char *) URI);
1982	}
1983	else
1984	    ret = 0;
1985    }
1986    if (ret != 0)
1987	goto done;
1988    if ((ref != NULL) && (ref->scheme != NULL)) {
1989	/*
1990	 * The URI is absolute don't modify.
1991	 */
1992	val = xmlStrdup(URI);
1993	goto done;
1994    }
1995    if (base == NULL)
1996	ret = -1;
1997    else {
1998	bas = xmlCreateURI();
1999	if (bas == NULL)
2000	    goto done;
2001	ret = xmlParseURIReference(bas, (const char *) base);
2002    }
2003    if (ret != 0) {
2004	if (ref)
2005	    val = xmlSaveUri(ref);
2006	goto done;
2007    }
2008    if (ref == NULL) {
2009	/*
2010	 * the base fragment must be ignored
2011	 */
2012	if (bas->fragment != NULL) {
2013	    xmlFree(bas->fragment);
2014	    bas->fragment = NULL;
2015	}
2016	val = xmlSaveUri(bas);
2017	goto done;
2018    }
2019
2020    /*
2021     * 2) If the path component is empty and the scheme, authority, and
2022     *    query components are undefined, then it is a reference to the
2023     *    current document and we are done.  Otherwise, the reference URI's
2024     *    query and fragment components are defined as found (or not found)
2025     *    within the URI reference and not inherited from the base URI.
2026     *
2027     *    NOTE that in modern browsers, the parsing differs from the above
2028     *    in the following aspect:  the query component is allowed to be
2029     *    defined while still treating this as a reference to the current
2030     *    document.
2031     */
2032    res = xmlCreateURI();
2033    if (res == NULL)
2034	goto done;
2035    if ((ref->scheme == NULL) && (ref->path == NULL) &&
2036	((ref->authority == NULL) && (ref->server == NULL))) {
2037	if (bas->scheme != NULL)
2038	    res->scheme = xmlMemStrdup(bas->scheme);
2039	if (bas->authority != NULL)
2040	    res->authority = xmlMemStrdup(bas->authority);
2041	else if (bas->server != NULL) {
2042	    res->server = xmlMemStrdup(bas->server);
2043	    if (bas->user != NULL)
2044		res->user = xmlMemStrdup(bas->user);
2045	    res->port = bas->port;
2046	}
2047	if (bas->path != NULL)
2048	    res->path = xmlMemStrdup(bas->path);
2049	if (ref->query_raw != NULL)
2050	    res->query_raw = xmlMemStrdup (ref->query_raw);
2051	else if (ref->query != NULL)
2052	    res->query = xmlMemStrdup(ref->query);
2053	else if (bas->query_raw != NULL)
2054	    res->query_raw = xmlMemStrdup(bas->query_raw);
2055	else if (bas->query != NULL)
2056	    res->query = xmlMemStrdup(bas->query);
2057	if (ref->fragment != NULL)
2058	    res->fragment = xmlMemStrdup(ref->fragment);
2059	goto step_7;
2060    }
2061
2062    /*
2063     * 3) If the scheme component is defined, indicating that the reference
2064     *    starts with a scheme name, then the reference is interpreted as an
2065     *    absolute URI and we are done.  Otherwise, the reference URI's
2066     *    scheme is inherited from the base URI's scheme component.
2067     */
2068    if (ref->scheme != NULL) {
2069	val = xmlSaveUri(ref);
2070	goto done;
2071    }
2072    if (bas->scheme != NULL)
2073	res->scheme = xmlMemStrdup(bas->scheme);
2074
2075    if (ref->query_raw != NULL)
2076	res->query_raw = xmlMemStrdup(ref->query_raw);
2077    else if (ref->query != NULL)
2078	res->query = xmlMemStrdup(ref->query);
2079    if (ref->fragment != NULL)
2080	res->fragment = xmlMemStrdup(ref->fragment);
2081
2082    /*
2083     * 4) If the authority component is defined, then the reference is a
2084     *    network-path and we skip to step 7.  Otherwise, the reference
2085     *    URI's authority is inherited from the base URI's authority
2086     *    component, which will also be undefined if the URI scheme does not
2087     *    use an authority component.
2088     */
2089    if ((ref->authority != NULL) || (ref->server != NULL)) {
2090	if (ref->authority != NULL)
2091	    res->authority = xmlMemStrdup(ref->authority);
2092	else {
2093	    res->server = xmlMemStrdup(ref->server);
2094	    if (ref->user != NULL)
2095		res->user = xmlMemStrdup(ref->user);
2096            res->port = ref->port;
2097	}
2098	if (ref->path != NULL)
2099	    res->path = xmlMemStrdup(ref->path);
2100	goto step_7;
2101    }
2102    if (bas->authority != NULL)
2103	res->authority = xmlMemStrdup(bas->authority);
2104    else if (bas->server != NULL) {
2105	res->server = xmlMemStrdup(bas->server);
2106	if (bas->user != NULL)
2107	    res->user = xmlMemStrdup(bas->user);
2108	res->port = bas->port;
2109    }
2110
2111    /*
2112     * 5) If the path component begins with a slash character ("/"), then
2113     *    the reference is an absolute-path and we skip to step 7.
2114     */
2115    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2116	res->path = xmlMemStrdup(ref->path);
2117	goto step_7;
2118    }
2119
2120
2121    /*
2122     * 6) If this step is reached, then we are resolving a relative-path
2123     *    reference.  The relative path needs to be merged with the base
2124     *    URI's path.  Although there are many ways to do this, we will
2125     *    describe a simple method using a separate string buffer.
2126     *
2127     * Allocate a buffer large enough for the result string.
2128     */
2129    len = 2; /* extra / and 0 */
2130    if (ref->path != NULL)
2131	len += strlen(ref->path);
2132    if (bas->path != NULL)
2133	len += strlen(bas->path);
2134    res->path = (char *) xmlMallocAtomic(len);
2135    if (res->path == NULL) {
2136	xmlGenericError(xmlGenericErrorContext,
2137		"xmlBuildURI: out of memory\n");
2138	goto done;
2139    }
2140    res->path[0] = 0;
2141
2142    /*
2143     * a) All but the last segment of the base URI's path component is
2144     *    copied to the buffer.  In other words, any characters after the
2145     *    last (right-most) slash character, if any, are excluded.
2146     */
2147    cur = 0;
2148    out = 0;
2149    if (bas->path != NULL) {
2150	while (bas->path[cur] != 0) {
2151	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2152		cur++;
2153	    if (bas->path[cur] == 0)
2154		break;
2155
2156	    cur++;
2157	    while (out < cur) {
2158		res->path[out] = bas->path[out];
2159		out++;
2160	    }
2161	}
2162    }
2163    res->path[out] = 0;
2164
2165    /*
2166     * b) The reference's path component is appended to the buffer
2167     *    string.
2168     */
2169    if (ref->path != NULL && ref->path[0] != 0) {
2170	indx = 0;
2171	/*
2172	 * Ensure the path includes a '/'
2173	 */
2174	if ((out == 0) && (bas->server != NULL))
2175	    res->path[out++] = '/';
2176	while (ref->path[indx] != 0) {
2177	    res->path[out++] = ref->path[indx++];
2178	}
2179    }
2180    res->path[out] = 0;
2181
2182    /*
2183     * Steps c) to h) are really path normalization steps
2184     */
2185    xmlNormalizeURIPath(res->path);
2186
2187step_7:
2188
2189    /*
2190     * 7) The resulting URI components, including any inherited from the
2191     *    base URI, are recombined to give the absolute form of the URI
2192     *    reference.
2193     */
2194    val = xmlSaveUri(res);
2195
2196done:
2197    if (ref != NULL)
2198	xmlFreeURI(ref);
2199    if (bas != NULL)
2200	xmlFreeURI(bas);
2201    if (res != NULL)
2202	xmlFreeURI(res);
2203    return(val);
2204}
2205
2206/**
2207 * xmlBuildRelativeURI:
2208 * @URI:  the URI reference under consideration
2209 * @base:  the base value
2210 *
2211 * Expresses the URI of the reference in terms relative to the
2212 * base.  Some examples of this operation include:
2213 *     base = "http://site1.com/docs/book1.html"
2214 *        URI input                        URI returned
2215 *     docs/pic1.gif                    pic1.gif
2216 *     docs/img/pic1.gif                img/pic1.gif
2217 *     img/pic1.gif                     ../img/pic1.gif
2218 *     http://site1.com/docs/pic1.gif   pic1.gif
2219 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2220 *
2221 *     base = "docs/book1.html"
2222 *        URI input                        URI returned
2223 *     docs/pic1.gif                    pic1.gif
2224 *     docs/img/pic1.gif                img/pic1.gif
2225 *     img/pic1.gif                     ../img/pic1.gif
2226 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2227 *
2228 *
2229 * Note: if the URI reference is really wierd or complicated, it may be
2230 *       worthwhile to first convert it into a "nice" one by calling
2231 *       xmlBuildURI (using 'base') before calling this routine,
2232 *       since this routine (for reasonable efficiency) assumes URI has
2233 *       already been through some validation.
2234 *
2235 * Returns a new URI string (to be freed by the caller) or NULL in case
2236 * error.
2237 */
2238xmlChar *
2239xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2240{
2241    xmlChar *val = NULL;
2242    int ret;
2243    int ix;
2244    int pos = 0;
2245    int nbslash = 0;
2246    int len;
2247    xmlURIPtr ref = NULL;
2248    xmlURIPtr bas = NULL;
2249    xmlChar *bptr, *uptr, *vptr;
2250    int remove_path = 0;
2251
2252    if ((URI == NULL) || (*URI == 0))
2253	return NULL;
2254
2255    /*
2256     * First parse URI into a standard form
2257     */
2258    ref = xmlCreateURI ();
2259    if (ref == NULL)
2260	return NULL;
2261    /* If URI not already in "relative" form */
2262    if (URI[0] != '.') {
2263	ret = xmlParseURIReference (ref, (const char *) URI);
2264	if (ret != 0)
2265	    goto done;		/* Error in URI, return NULL */
2266    } else
2267	ref->path = (char *)xmlStrdup(URI);
2268
2269    /*
2270     * Next parse base into the same standard form
2271     */
2272    if ((base == NULL) || (*base == 0)) {
2273	val = xmlStrdup (URI);
2274	goto done;
2275    }
2276    bas = xmlCreateURI ();
2277    if (bas == NULL)
2278	goto done;
2279    if (base[0] != '.') {
2280	ret = xmlParseURIReference (bas, (const char *) base);
2281	if (ret != 0)
2282	    goto done;		/* Error in base, return NULL */
2283    } else
2284	bas->path = (char *)xmlStrdup(base);
2285
2286    /*
2287     * If the scheme / server on the URI differs from the base,
2288     * just return the URI
2289     */
2290    if ((ref->scheme != NULL) &&
2291	((bas->scheme == NULL) ||
2292	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2293	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2294	val = xmlStrdup (URI);
2295	goto done;
2296    }
2297    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2298	val = xmlStrdup(BAD_CAST "");
2299	goto done;
2300    }
2301    if (bas->path == NULL) {
2302	val = xmlStrdup((xmlChar *)ref->path);
2303	goto done;
2304    }
2305    if (ref->path == NULL) {
2306        ref->path = (char *) "/";
2307	remove_path = 1;
2308    }
2309
2310    /*
2311     * At this point (at last!) we can compare the two paths
2312     *
2313     * First we take care of the special case where either of the
2314     * two path components may be missing (bug 316224)
2315     */
2316    if (bas->path == NULL) {
2317	if (ref->path != NULL) {
2318	    uptr = (xmlChar *) ref->path;
2319	    if (*uptr == '/')
2320		uptr++;
2321	    /* exception characters from xmlSaveUri */
2322	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2323	}
2324	goto done;
2325    }
2326    bptr = (xmlChar *)bas->path;
2327    if (ref->path == NULL) {
2328	for (ix = 0; bptr[ix] != 0; ix++) {
2329	    if (bptr[ix] == '/')
2330		nbslash++;
2331	}
2332	uptr = NULL;
2333	len = 1;	/* this is for a string terminator only */
2334    } else {
2335    /*
2336     * Next we compare the two strings and find where they first differ
2337     */
2338	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2339            pos += 2;
2340	if ((*bptr == '.') && (bptr[1] == '/'))
2341            bptr += 2;
2342	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2343	    bptr++;
2344	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2345	    pos++;
2346
2347	if (bptr[pos] == ref->path[pos]) {
2348	    val = xmlStrdup(BAD_CAST "");
2349	    goto done;		/* (I can't imagine why anyone would do this) */
2350	}
2351
2352	/*
2353	 * In URI, "back up" to the last '/' encountered.  This will be the
2354	 * beginning of the "unique" suffix of URI
2355	 */
2356	ix = pos;
2357	if ((ref->path[ix] == '/') && (ix > 0))
2358	    ix--;
2359	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2360	    ix -= 2;
2361	for (; ix > 0; ix--) {
2362	    if (ref->path[ix] == '/')
2363		break;
2364	}
2365	if (ix == 0) {
2366	    uptr = (xmlChar *)ref->path;
2367	} else {
2368	    ix++;
2369	    uptr = (xmlChar *)&ref->path[ix];
2370	}
2371
2372	/*
2373	 * In base, count the number of '/' from the differing point
2374	 */
2375	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2376	    for (; bptr[ix] != 0; ix++) {
2377		if (bptr[ix] == '/')
2378		    nbslash++;
2379	    }
2380	}
2381	len = xmlStrlen (uptr) + 1;
2382    }
2383
2384    if (nbslash == 0) {
2385	if (uptr != NULL)
2386	    /* exception characters from xmlSaveUri */
2387	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2388	goto done;
2389    }
2390
2391    /*
2392     * Allocate just enough space for the returned string -
2393     * length of the remainder of the URI, plus enough space
2394     * for the "../" groups, plus one for the terminator
2395     */
2396    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2397    if (val == NULL) {
2398	xmlGenericError(xmlGenericErrorContext,
2399		"xmlBuildRelativeURI: out of memory\n");
2400	goto done;
2401    }
2402    vptr = val;
2403    /*
2404     * Put in as many "../" as needed
2405     */
2406    for (; nbslash>0; nbslash--) {
2407	*vptr++ = '.';
2408	*vptr++ = '.';
2409	*vptr++ = '/';
2410    }
2411    /*
2412     * Finish up with the end of the URI
2413     */
2414    if (uptr != NULL) {
2415        if ((vptr > val) && (len > 0) &&
2416	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2417	    memcpy (vptr, uptr + 1, len - 1);
2418	    vptr[len - 2] = 0;
2419	} else {
2420	    memcpy (vptr, uptr, len);
2421	    vptr[len - 1] = 0;
2422	}
2423    } else {
2424	vptr[len - 1] = 0;
2425    }
2426
2427    /* escape the freshly-built path */
2428    vptr = val;
2429	/* exception characters from xmlSaveUri */
2430    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2431    xmlFree(vptr);
2432
2433done:
2434    /*
2435     * Free the working variables
2436     */
2437    if (remove_path != 0)
2438        ref->path = NULL;
2439    if (ref != NULL)
2440	xmlFreeURI (ref);
2441    if (bas != NULL)
2442	xmlFreeURI (bas);
2443
2444    return val;
2445}
2446
2447/**
2448 * xmlCanonicPath:
2449 * @path:  the resource locator in a filesystem notation
2450 *
2451 * Constructs a canonic path from the specified path.
2452 *
2453 * Returns a new canonic path, or a duplicate of the path parameter if the
2454 * construction fails. The caller is responsible for freeing the memory occupied
2455 * by the returned string. If there is insufficient memory available, or the
2456 * argument is NULL, the function returns NULL.
2457 */
2458#define IS_WINDOWS_PATH(p) 					\
2459	((p != NULL) &&						\
2460	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2461	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2462	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2463xmlChar *
2464xmlCanonicPath(const xmlChar *path)
2465{
2466/*
2467 * For Windows implementations, additional work needs to be done to
2468 * replace backslashes in pathnames with "forward slashes"
2469 */
2470#if defined(_WIN32) && !defined(__CYGWIN__)
2471    int len = 0;
2472    int i = 0;
2473    xmlChar *p = NULL;
2474#endif
2475    xmlURIPtr uri;
2476    xmlChar *ret;
2477    const xmlChar *absuri;
2478
2479    if (path == NULL)
2480	return(NULL);
2481
2482    /* sanitize filename starting with // so it can be used as URI */
2483    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2484        path++;
2485
2486    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2487	xmlFreeURI(uri);
2488	return xmlStrdup(path);
2489    }
2490
2491    /* Check if this is an "absolute uri" */
2492    absuri = xmlStrstr(path, BAD_CAST "://");
2493    if (absuri != NULL) {
2494        int l, j;
2495	unsigned char c;
2496	xmlChar *escURI;
2497
2498        /*
2499	 * this looks like an URI where some parts have not been
2500	 * escaped leading to a parsing problem.  Check that the first
2501	 * part matches a protocol.
2502	 */
2503	l = absuri - path;
2504	/* Bypass if first part (part before the '://') is > 20 chars */
2505	if ((l <= 0) || (l > 20))
2506	    goto path_processing;
2507	/* Bypass if any non-alpha characters are present in first part */
2508	for (j = 0;j < l;j++) {
2509	    c = path[j];
2510	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2511	        goto path_processing;
2512	}
2513
2514	/* Escape all except the characters specified in the supplied path */
2515        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2516	if (escURI != NULL) {
2517	    /* Try parsing the escaped path */
2518	    uri = xmlParseURI((const char *) escURI);
2519	    /* If successful, return the escaped string */
2520	    if (uri != NULL) {
2521	        xmlFreeURI(uri);
2522		return escURI;
2523	    }
2524	}
2525    }
2526
2527path_processing:
2528/* For Windows implementations, replace backslashes with 'forward slashes' */
2529#if defined(_WIN32) && !defined(__CYGWIN__)
2530    /*
2531     * Create a URI structure
2532     */
2533    uri = xmlCreateURI();
2534    if (uri == NULL) {		/* Guard against 'out of memory' */
2535        return(NULL);
2536    }
2537
2538    len = xmlStrlen(path);
2539    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2540        /* make the scheme 'file' */
2541	uri->scheme = xmlStrdup(BAD_CAST "file");
2542	/* allocate space for leading '/' + path + string terminator */
2543	uri->path = xmlMallocAtomic(len + 2);
2544	if (uri->path == NULL) {
2545	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2546	    return(NULL);
2547	}
2548	/* Put in leading '/' plus path */
2549	uri->path[0] = '/';
2550	p = uri->path + 1;
2551	strncpy(p, path, len + 1);
2552    } else {
2553	uri->path = xmlStrdup(path);
2554	if (uri->path == NULL) {
2555	    xmlFreeURI(uri);
2556	    return(NULL);
2557	}
2558	p = uri->path;
2559    }
2560    /* Now change all occurences of '\' to '/' */
2561    while (*p != '\0') {
2562	if (*p == '\\')
2563	    *p = '/';
2564	p++;
2565    }
2566
2567    if (uri->scheme == NULL) {
2568	ret = xmlStrdup((const xmlChar *) uri->path);
2569    } else {
2570	ret = xmlSaveUri(uri);
2571    }
2572
2573    xmlFreeURI(uri);
2574#else
2575    ret = xmlStrdup((const xmlChar *) path);
2576#endif
2577    return(ret);
2578}
2579
2580/**
2581 * xmlPathToURI:
2582 * @path:  the resource locator in a filesystem notation
2583 *
2584 * Constructs an URI expressing the existing path
2585 *
2586 * Returns a new URI, or a duplicate of the path parameter if the
2587 * construction fails. The caller is responsible for freeing the memory
2588 * occupied by the returned string. If there is insufficient memory available,
2589 * or the argument is NULL, the function returns NULL.
2590 */
2591xmlChar *
2592xmlPathToURI(const xmlChar *path)
2593{
2594    xmlURIPtr uri;
2595    xmlURI temp;
2596    xmlChar *ret, *cal;
2597
2598    if (path == NULL)
2599        return(NULL);
2600
2601    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2602	xmlFreeURI(uri);
2603	return xmlStrdup(path);
2604    }
2605    cal = xmlCanonicPath(path);
2606    if (cal == NULL)
2607        return(NULL);
2608#if defined(_WIN32) && !defined(__CYGWIN__)
2609    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2610       If 'cal' is a valid URI allready then we are done here, as continuing would make
2611       it invalid. */
2612    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2613	xmlFreeURI(uri);
2614	return cal;
2615    }
2616    /* 'cal' can contain a relative path with backslashes. If that is processed
2617       by xmlSaveURI, they will be escaped and the external entity loader machinery
2618       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2619    ret = cal;
2620    while (*ret != '\0') {
2621	if (*ret == '\\')
2622	    *ret = '/';
2623	ret++;
2624    }
2625#endif
2626    memset(&temp, 0, sizeof(temp));
2627    temp.path = (char *) cal;
2628    ret = xmlSaveUri(&temp);
2629    xmlFree(cal);
2630    return(ret);
2631}
2632#define bottom_uri
2633#include "elfgcchack.h"
2634