1/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <string.h>
15#include <limits.h>
16
17#include <libxml/xmlmemory.h>
18#include <libxml/uri.h>
19#include <libxml/globals.h>
20#include <libxml/xmlerror.h>
21
22/**
23 * MAX_URI_LENGTH:
24 *
25 * The definition of the URI regexp in the above RFC has no size limit
26 * In practice they are usually relativey short except for the
27 * data URI scheme as defined in RFC 2397. Even for data URI the usual
28 * maximum size before hitting random practical limits is around 64 KB
29 * and 4KB is usually a maximum admitted limit for proper operations.
30 * The value below is more a security limit than anything else and
31 * really should never be hit by 'normal' operations
32 * Set to 1 MByte in 2012, this is only enforced on output
33 */
34#define MAX_URI_LENGTH 1024 * 1024
35
36static void
37xmlURIErrMemory(const char *extra)
38{
39    if (extra)
40        __xmlRaiseError(NULL, NULL, NULL,
41                        NULL, NULL, XML_FROM_URI,
42                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43                        extra, NULL, NULL, 0, 0,
44                        "Memory allocation failed : %s\n", extra);
45    else
46        __xmlRaiseError(NULL, NULL, NULL,
47                        NULL, NULL, XML_FROM_URI,
48                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49                        NULL, NULL, NULL, 0, 0,
50                        "Memory allocation failed\n");
51}
52
53static void xmlCleanURI(xmlURIPtr uri);
54
55/*
56 * Old rule from 2396 used in legacy handling code
57 * alpha    = lowalpha | upalpha
58 */
59#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60
61
62/*
63 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65 *            "u" | "v" | "w" | "x" | "y" | "z"
66 */
67
68#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69
70/*
71 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73 *           "U" | "V" | "W" | "X" | "Y" | "Z"
74 */
75#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76
77#ifdef IS_DIGIT
78#undef IS_DIGIT
79#endif
80/*
81 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82 */
83#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84
85/*
86 * alphanum = alpha | digit
87 */
88
89#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90
91/*
92 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93 */
94
95#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
96    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
97    ((x) == '(') || ((x) == ')'))
98
99/*
100 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
101 */
102
103#define IS_UNWISE(p)                                                    \
104      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
105       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
106       ((*(p) == ']')) || ((*(p) == '`')))
107/*
108 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109 *            "[" | "]"
110 */
111
112#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115        ((x) == ']'))
116
117/*
118 * unreserved = alphanum | mark
119 */
120
121#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122
123/*
124 * Skip to next pointer char, handle escaped sequences
125 */
126
127#define NEXT(p) ((*p == '%')? p += 3 : p++)
128
129/*
130 * Productions from the spec.
131 *
132 *    authority     = server | reg_name
133 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
134 *                        ";" | ":" | "@" | "&" | "=" | "+" )
135 *
136 * path          = [ abs_path | opaque_part ]
137 */
138
139#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140
141/************************************************************************
142 *									*
143 *                         RFC 3986 parser				*
144 *									*
145 ************************************************************************/
146
147#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
149                      ((*(p) >= 'A') && (*(p) <= 'Z')))
150#define ISA_HEXDIG(p)							\
151       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
152        ((*(p) >= 'A') && (*(p) <= 'F')))
153
154/*
155 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
156 *                     / "*" / "+" / "," / ";" / "="
157 */
158#define ISA_SUB_DELIM(p)						\
159      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
160       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
161       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
162       ((*(p) == '=')) || ((*(p) == '\'')))
163
164/*
165 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166 */
167#define ISA_GEN_DELIM(p)						\
168      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
169       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
170       ((*(p) == '@')))
171
172/*
173 *    reserved      = gen-delims / sub-delims
174 */
175#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176
177/*
178 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
179 */
180#define ISA_UNRESERVED(p)						\
181      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
182       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183
184/*
185 *    pct-encoded   = "%" HEXDIG HEXDIG
186 */
187#define ISA_PCT_ENCODED(p)						\
188     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189
190/*
191 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
192 */
193#define ISA_PCHAR(p)							\
194     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
195      ((*(p) == ':')) || ((*(p) == '@')))
196
197/**
198 * xmlParse3986Scheme:
199 * @uri:  pointer to an URI structure
200 * @str:  pointer to the string to analyze
201 *
202 * Parse an URI scheme
203 *
204 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205 *
206 * Returns 0 or the error code
207 */
208static int
209xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210    const char *cur;
211
212    if (str == NULL)
213	return(-1);
214
215    cur = *str;
216    if (!ISA_ALPHA(cur))
217	return(2);
218    cur++;
219    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221    if (uri != NULL) {
222	if (uri->scheme != NULL) xmlFree(uri->scheme);
223	uri->scheme = STRNDUP(*str, cur - *str);
224    }
225    *str = cur;
226    return(0);
227}
228
229/**
230 * xmlParse3986Fragment:
231 * @uri:  pointer to an URI structure
232 * @str:  pointer to the string to analyze
233 *
234 * Parse the query part of an URI
235 *
236 * fragment      = *( pchar / "/" / "?" )
237 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238 *       in the fragment identifier but this is used very broadly for
239 *       xpointer scheme selection, so we are allowing it here to not break
240 *       for example all the DocBook processing chains.
241 *
242 * Returns 0 or the error code
243 */
244static int
245xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246{
247    const char *cur;
248
249    if (str == NULL)
250        return (-1);
251
252    cur = *str;
253
254    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
255           (*cur == '[') || (*cur == ']') ||
256           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257        NEXT(cur);
258    if (uri != NULL) {
259        if (uri->fragment != NULL)
260            xmlFree(uri->fragment);
261	if (uri->cleanup & 2)
262	    uri->fragment = STRNDUP(*str, cur - *str);
263	else
264	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265    }
266    *str = cur;
267    return (0);
268}
269
270/**
271 * xmlParse3986Query:
272 * @uri:  pointer to an URI structure
273 * @str:  pointer to the string to analyze
274 *
275 * Parse the query part of an URI
276 *
277 * query = *uric
278 *
279 * Returns 0 or the error code
280 */
281static int
282xmlParse3986Query(xmlURIPtr uri, const char **str)
283{
284    const char *cur;
285
286    if (str == NULL)
287        return (-1);
288
289    cur = *str;
290
291    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293        NEXT(cur);
294    if (uri != NULL) {
295        if (uri->query != NULL)
296            xmlFree(uri->query);
297	if (uri->cleanup & 2)
298	    uri->query = STRNDUP(*str, cur - *str);
299	else
300	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301
302	/* Save the raw bytes of the query as well.
303	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304	 */
305	if (uri->query_raw != NULL)
306	    xmlFree (uri->query_raw);
307	uri->query_raw = STRNDUP (*str, cur - *str);
308    }
309    *str = cur;
310    return (0);
311}
312
313/**
314 * xmlParse3986Port:
315 * @uri:  pointer to an URI structure
316 * @str:  the string to analyze
317 *
318 * Parse a port part and fills in the appropriate fields
319 * of the @uri structure
320 *
321 * port          = *DIGIT
322 *
323 * Returns 0 or the error code
324 */
325static int
326xmlParse3986Port(xmlURIPtr uri, const char **str)
327{
328    const char *cur = *str;
329    unsigned port = 0; /* unsigned for defined overflow behavior */
330
331    if (ISA_DIGIT(cur)) {
332	while (ISA_DIGIT(cur)) {
333	    port = port * 10 + (*cur - '0');
334
335	    cur++;
336	}
337	if (uri != NULL)
338	    uri->port = port & USHRT_MAX; /* port value modulo INT_MAX+1 */
339	*str = cur;
340	return(0);
341    }
342    return(1);
343}
344
345/**
346 * xmlParse3986Userinfo:
347 * @uri:  pointer to an URI structure
348 * @str:  the string to analyze
349 *
350 * Parse an user informations part and fills in the appropriate fields
351 * of the @uri structure
352 *
353 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
354 *
355 * Returns 0 or the error code
356 */
357static int
358xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
359{
360    const char *cur;
361
362    cur = *str;
363    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
364           ISA_SUB_DELIM(cur) || (*cur == ':'))
365	NEXT(cur);
366    if (*cur == '@') {
367	if (uri != NULL) {
368	    if (uri->user != NULL) xmlFree(uri->user);
369	    if (uri->cleanup & 2)
370		uri->user = STRNDUP(*str, cur - *str);
371	    else
372		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
373	}
374	*str = cur;
375	return(0);
376    }
377    return(1);
378}
379
380/**
381 * xmlParse3986DecOctet:
382 * @str:  the string to analyze
383 *
384 *    dec-octet     = DIGIT                 ; 0-9
385 *                  / %x31-39 DIGIT         ; 10-99
386 *                  / "1" 2DIGIT            ; 100-199
387 *                  / "2" %x30-34 DIGIT     ; 200-249
388 *                  / "25" %x30-35          ; 250-255
389 *
390 * Skip a dec-octet.
391 *
392 * Returns 0 if found and skipped, 1 otherwise
393 */
394static int
395xmlParse3986DecOctet(const char **str) {
396    const char *cur = *str;
397
398    if (!(ISA_DIGIT(cur)))
399        return(1);
400    if (!ISA_DIGIT(cur+1))
401	cur++;
402    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
403	cur += 2;
404    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
405	cur += 3;
406    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
407	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
408	cur += 3;
409    else if ((*cur == '2') && (*(cur + 1) == '5') &&
410	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
411	cur += 3;
412    else
413        return(1);
414    *str = cur;
415    return(0);
416}
417/**
418 * xmlParse3986Host:
419 * @uri:  pointer to an URI structure
420 * @str:  the string to analyze
421 *
422 * Parse an host part and fills in the appropriate fields
423 * of the @uri structure
424 *
425 * host          = IP-literal / IPv4address / reg-name
426 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
427 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
428 * reg-name      = *( unreserved / pct-encoded / sub-delims )
429 *
430 * Returns 0 or the error code
431 */
432static int
433xmlParse3986Host(xmlURIPtr uri, const char **str)
434{
435    const char *cur = *str;
436    const char *host;
437
438    host = cur;
439    /*
440     * IPv6 and future adressing scheme are enclosed between brackets
441     */
442    if (*cur == '[') {
443        cur++;
444	while ((*cur != ']') && (*cur != 0))
445	    cur++;
446	if (*cur != ']')
447	    return(1);
448	cur++;
449	goto found;
450    }
451    /*
452     * try to parse an IPv4
453     */
454    if (ISA_DIGIT(cur)) {
455        if (xmlParse3986DecOctet(&cur) != 0)
456	    goto not_ipv4;
457	if (*cur != '.')
458	    goto not_ipv4;
459	cur++;
460        if (xmlParse3986DecOctet(&cur) != 0)
461	    goto not_ipv4;
462	if (*cur != '.')
463	    goto not_ipv4;
464        if (xmlParse3986DecOctet(&cur) != 0)
465	    goto not_ipv4;
466	if (*cur != '.')
467	    goto not_ipv4;
468        if (xmlParse3986DecOctet(&cur) != 0)
469	    goto not_ipv4;
470	goto found;
471not_ipv4:
472        cur = *str;
473    }
474    /*
475     * then this should be a hostname which can be empty
476     */
477    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
478        NEXT(cur);
479found:
480    if (uri != NULL) {
481	if (uri->authority != NULL) xmlFree(uri->authority);
482	uri->authority = NULL;
483	if (uri->server != NULL) xmlFree(uri->server);
484	if (cur != host) {
485	    if (uri->cleanup & 2)
486		uri->server = STRNDUP(host, cur - host);
487	    else
488		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
489	} else
490	    uri->server = NULL;
491    }
492    *str = cur;
493    return(0);
494}
495
496/**
497 * xmlParse3986Authority:
498 * @uri:  pointer to an URI structure
499 * @str:  the string to analyze
500 *
501 * Parse an authority part and fills in the appropriate fields
502 * of the @uri structure
503 *
504 * authority     = [ userinfo "@" ] host [ ":" port ]
505 *
506 * Returns 0 or the error code
507 */
508static int
509xmlParse3986Authority(xmlURIPtr uri, const char **str)
510{
511    const char *cur;
512    int ret;
513
514    cur = *str;
515    /*
516     * try to parse an userinfo and check for the trailing @
517     */
518    ret = xmlParse3986Userinfo(uri, &cur);
519    if ((ret != 0) || (*cur != '@'))
520        cur = *str;
521    else
522        cur++;
523    ret = xmlParse3986Host(uri, &cur);
524    if (ret != 0) return(ret);
525    if (*cur == ':') {
526        cur++;
527        ret = xmlParse3986Port(uri, &cur);
528	if (ret != 0) return(ret);
529    }
530    *str = cur;
531    return(0);
532}
533
534/**
535 * xmlParse3986Segment:
536 * @str:  the string to analyze
537 * @forbid: an optional forbidden character
538 * @empty: allow an empty segment
539 *
540 * Parse a segment and fills in the appropriate fields
541 * of the @uri structure
542 *
543 * segment       = *pchar
544 * segment-nz    = 1*pchar
545 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
546 *               ; non-zero-length segment without any colon ":"
547 *
548 * Returns 0 or the error code
549 */
550static int
551xmlParse3986Segment(const char **str, char forbid, int empty)
552{
553    const char *cur;
554
555    cur = *str;
556    if (!ISA_PCHAR(cur)) {
557        if (empty)
558	    return(0);
559	return(1);
560    }
561    while (ISA_PCHAR(cur) && (*cur != forbid))
562        NEXT(cur);
563    *str = cur;
564    return (0);
565}
566
567/**
568 * xmlParse3986PathAbEmpty:
569 * @uri:  pointer to an URI structure
570 * @str:  the string to analyze
571 *
572 * Parse an path absolute or empty and fills in the appropriate fields
573 * of the @uri structure
574 *
575 * path-abempty  = *( "/" segment )
576 *
577 * Returns 0 or the error code
578 */
579static int
580xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
581{
582    const char *cur;
583    int ret;
584
585    cur = *str;
586
587    while (*cur == '/') {
588        cur++;
589	ret = xmlParse3986Segment(&cur, 0, 1);
590	if (ret != 0) return(ret);
591    }
592    if (uri != NULL) {
593	if (uri->path != NULL) xmlFree(uri->path);
594        if (*str != cur) {
595            if (uri->cleanup & 2)
596                uri->path = STRNDUP(*str, cur - *str);
597            else
598                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
599        } else {
600            uri->path = NULL;
601        }
602    }
603    *str = cur;
604    return (0);
605}
606
607/**
608 * xmlParse3986PathAbsolute:
609 * @uri:  pointer to an URI structure
610 * @str:  the string to analyze
611 *
612 * Parse an path absolute and fills in the appropriate fields
613 * of the @uri structure
614 *
615 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
616 *
617 * Returns 0 or the error code
618 */
619static int
620xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
621{
622    const char *cur;
623    int ret;
624
625    cur = *str;
626
627    if (*cur != '/')
628        return(1);
629    cur++;
630    ret = xmlParse3986Segment(&cur, 0, 0);
631    if (ret == 0) {
632	while (*cur == '/') {
633	    cur++;
634	    ret = xmlParse3986Segment(&cur, 0, 1);
635	    if (ret != 0) return(ret);
636	}
637    }
638    if (uri != NULL) {
639	if (uri->path != NULL) xmlFree(uri->path);
640        if (cur != *str) {
641            if (uri->cleanup & 2)
642                uri->path = STRNDUP(*str, cur - *str);
643            else
644                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645        } else {
646            uri->path = NULL;
647        }
648    }
649    *str = cur;
650    return (0);
651}
652
653/**
654 * xmlParse3986PathRootless:
655 * @uri:  pointer to an URI structure
656 * @str:  the string to analyze
657 *
658 * Parse an path without root and fills in the appropriate fields
659 * of the @uri structure
660 *
661 * path-rootless = segment-nz *( "/" segment )
662 *
663 * Returns 0 or the error code
664 */
665static int
666xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
667{
668    const char *cur;
669    int ret;
670
671    cur = *str;
672
673    ret = xmlParse3986Segment(&cur, 0, 0);
674    if (ret != 0) return(ret);
675    while (*cur == '/') {
676        cur++;
677	ret = xmlParse3986Segment(&cur, 0, 1);
678	if (ret != 0) return(ret);
679    }
680    if (uri != NULL) {
681	if (uri->path != NULL) xmlFree(uri->path);
682        if (cur != *str) {
683            if (uri->cleanup & 2)
684                uri->path = STRNDUP(*str, cur - *str);
685            else
686                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
687        } else {
688            uri->path = NULL;
689        }
690    }
691    *str = cur;
692    return (0);
693}
694
695/**
696 * xmlParse3986PathNoScheme:
697 * @uri:  pointer to an URI structure
698 * @str:  the string to analyze
699 *
700 * Parse an path which is not a scheme and fills in the appropriate fields
701 * of the @uri structure
702 *
703 * path-noscheme = segment-nz-nc *( "/" segment )
704 *
705 * Returns 0 or the error code
706 */
707static int
708xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
709{
710    const char *cur;
711    int ret;
712
713    cur = *str;
714
715    ret = xmlParse3986Segment(&cur, ':', 0);
716    if (ret != 0) return(ret);
717    while (*cur == '/') {
718        cur++;
719	ret = xmlParse3986Segment(&cur, 0, 1);
720	if (ret != 0) return(ret);
721    }
722    if (uri != NULL) {
723	if (uri->path != NULL) xmlFree(uri->path);
724        if (cur != *str) {
725            if (uri->cleanup & 2)
726                uri->path = STRNDUP(*str, cur - *str);
727            else
728                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
729        } else {
730            uri->path = NULL;
731        }
732    }
733    *str = cur;
734    return (0);
735}
736
737/**
738 * xmlParse3986HierPart:
739 * @uri:  pointer to an URI structure
740 * @str:  the string to analyze
741 *
742 * Parse an hierarchical part and fills in the appropriate fields
743 * of the @uri structure
744 *
745 * hier-part     = "//" authority path-abempty
746 *                / path-absolute
747 *                / path-rootless
748 *                / path-empty
749 *
750 * Returns 0 or the error code
751 */
752static int
753xmlParse3986HierPart(xmlURIPtr uri, const char **str)
754{
755    const char *cur;
756    int ret;
757
758    cur = *str;
759
760    if ((*cur == '/') && (*(cur + 1) == '/')) {
761        cur += 2;
762	ret = xmlParse3986Authority(uri, &cur);
763	if (ret != 0) return(ret);
764	if (uri->server == NULL)
765	    uri->port = -1;
766	ret = xmlParse3986PathAbEmpty(uri, &cur);
767	if (ret != 0) return(ret);
768	*str = cur;
769	return(0);
770    } else if (*cur == '/') {
771        ret = xmlParse3986PathAbsolute(uri, &cur);
772	if (ret != 0) return(ret);
773    } else if (ISA_PCHAR(cur)) {
774        ret = xmlParse3986PathRootless(uri, &cur);
775	if (ret != 0) return(ret);
776    } else {
777	/* path-empty is effectively empty */
778	if (uri != NULL) {
779	    if (uri->path != NULL) xmlFree(uri->path);
780	    uri->path = NULL;
781	}
782    }
783    *str = cur;
784    return (0);
785}
786
787/**
788 * xmlParse3986RelativeRef:
789 * @uri:  pointer to an URI structure
790 * @str:  the string to analyze
791 *
792 * Parse an URI string and fills in the appropriate fields
793 * of the @uri structure
794 *
795 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
796 * relative-part = "//" authority path-abempty
797 *               / path-absolute
798 *               / path-noscheme
799 *               / path-empty
800 *
801 * Returns 0 or the error code
802 */
803static int
804xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
805    int ret;
806
807    if ((*str == '/') && (*(str + 1) == '/')) {
808        str += 2;
809	ret = xmlParse3986Authority(uri, &str);
810	if (ret != 0) return(ret);
811	ret = xmlParse3986PathAbEmpty(uri, &str);
812	if (ret != 0) return(ret);
813    } else if (*str == '/') {
814	ret = xmlParse3986PathAbsolute(uri, &str);
815	if (ret != 0) return(ret);
816    } else if (ISA_PCHAR(str)) {
817        ret = xmlParse3986PathNoScheme(uri, &str);
818	if (ret != 0) return(ret);
819    } else {
820	/* path-empty is effectively empty */
821	if (uri != NULL) {
822	    if (uri->path != NULL) xmlFree(uri->path);
823	    uri->path = NULL;
824	}
825    }
826
827    if (*str == '?') {
828	str++;
829	ret = xmlParse3986Query(uri, &str);
830	if (ret != 0) return(ret);
831    }
832    if (*str == '#') {
833	str++;
834	ret = xmlParse3986Fragment(uri, &str);
835	if (ret != 0) return(ret);
836    }
837    if (*str != 0) {
838	xmlCleanURI(uri);
839	return(1);
840    }
841    return(0);
842}
843
844
845/**
846 * xmlParse3986URI:
847 * @uri:  pointer to an URI structure
848 * @str:  the string to analyze
849 *
850 * Parse an URI string and fills in the appropriate fields
851 * of the @uri structure
852 *
853 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
854 *
855 * Returns 0 or the error code
856 */
857static int
858xmlParse3986URI(xmlURIPtr uri, const char *str) {
859    int ret;
860
861    ret = xmlParse3986Scheme(uri, &str);
862    if (ret != 0) return(ret);
863    if (*str != ':') {
864	return(1);
865    }
866    str++;
867    ret = xmlParse3986HierPart(uri, &str);
868    if (ret != 0) return(ret);
869    if (*str == '?') {
870	str++;
871	ret = xmlParse3986Query(uri, &str);
872	if (ret != 0) return(ret);
873    }
874    if (*str == '#') {
875	str++;
876	ret = xmlParse3986Fragment(uri, &str);
877	if (ret != 0) return(ret);
878    }
879    if (*str != 0) {
880	xmlCleanURI(uri);
881	return(1);
882    }
883    return(0);
884}
885
886/**
887 * xmlParse3986URIReference:
888 * @uri:  pointer to an URI structure
889 * @str:  the string to analyze
890 *
891 * Parse an URI reference string and fills in the appropriate fields
892 * of the @uri structure
893 *
894 * URI-reference = URI / relative-ref
895 *
896 * Returns 0 or the error code
897 */
898static int
899xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
900    int ret;
901
902    if (str == NULL)
903	return(-1);
904    xmlCleanURI(uri);
905
906    /*
907     * Try first to parse absolute refs, then fallback to relative if
908     * it fails.
909     */
910    ret = xmlParse3986URI(uri, str);
911    if (ret != 0) {
912	xmlCleanURI(uri);
913        ret = xmlParse3986RelativeRef(uri, str);
914	if (ret != 0) {
915	    xmlCleanURI(uri);
916	    return(ret);
917	}
918    }
919    return(0);
920}
921
922/**
923 * xmlParseURI:
924 * @str:  the URI string to analyze
925 *
926 * Parse an URI based on RFC 3986
927 *
928 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
929 *
930 * Returns a newly built xmlURIPtr or NULL in case of error
931 */
932xmlURIPtr
933xmlParseURI(const char *str) {
934    xmlURIPtr uri;
935    int ret;
936
937    if (str == NULL)
938	return(NULL);
939    uri = xmlCreateURI();
940    if (uri != NULL) {
941	ret = xmlParse3986URIReference(uri, str);
942        if (ret) {
943	    xmlFreeURI(uri);
944	    return(NULL);
945	}
946    }
947    return(uri);
948}
949
950/**
951 * xmlParseURIReference:
952 * @uri:  pointer to an URI structure
953 * @str:  the string to analyze
954 *
955 * Parse an URI reference string based on RFC 3986 and fills in the
956 * appropriate fields of the @uri structure
957 *
958 * URI-reference = URI / relative-ref
959 *
960 * Returns 0 or the error code
961 */
962int
963xmlParseURIReference(xmlURIPtr uri, const char *str) {
964    return(xmlParse3986URIReference(uri, str));
965}
966
967/**
968 * xmlParseURIRaw:
969 * @str:  the URI string to analyze
970 * @raw:  if 1 unescaping of URI pieces are disabled
971 *
972 * Parse an URI but allows to keep intact the original fragments.
973 *
974 * URI-reference = URI / relative-ref
975 *
976 * Returns a newly built xmlURIPtr or NULL in case of error
977 */
978xmlURIPtr
979xmlParseURIRaw(const char *str, int raw) {
980    xmlURIPtr uri;
981    int ret;
982
983    if (str == NULL)
984	return(NULL);
985    uri = xmlCreateURI();
986    if (uri != NULL) {
987        if (raw) {
988	    uri->cleanup |= 2;
989	}
990	ret = xmlParseURIReference(uri, str);
991        if (ret) {
992	    xmlFreeURI(uri);
993	    return(NULL);
994	}
995    }
996    return(uri);
997}
998
999/************************************************************************
1000 *									*
1001 *			Generic URI structure functions			*
1002 *									*
1003 ************************************************************************/
1004
1005/**
1006 * xmlCreateURI:
1007 *
1008 * Simply creates an empty xmlURI
1009 *
1010 * Returns the new structure or NULL in case of error
1011 */
1012xmlURIPtr
1013xmlCreateURI(void) {
1014    xmlURIPtr ret;
1015
1016    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1017    if (ret == NULL) {
1018        xmlURIErrMemory("creating URI structure\n");
1019	return(NULL);
1020    }
1021    memset(ret, 0, sizeof(xmlURI));
1022    return(ret);
1023}
1024
1025/**
1026 * xmlSaveUriRealloc:
1027 *
1028 * Function to handle properly a reallocation when saving an URI
1029 * Also imposes some limit on the length of an URI string output
1030 */
1031static xmlChar *
1032xmlSaveUriRealloc(xmlChar *ret, int *max) {
1033    xmlChar *temp;
1034    int tmp;
1035
1036    if (*max > MAX_URI_LENGTH) {
1037        xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1038        return(NULL);
1039    }
1040    tmp = *max * 2;
1041    temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1042    if (temp == NULL) {
1043        xmlURIErrMemory("saving URI\n");
1044        return(NULL);
1045    }
1046    *max = tmp;
1047    return(temp);
1048}
1049
1050/**
1051 * xmlSaveUri:
1052 * @uri:  pointer to an xmlURI
1053 *
1054 * Save the URI as an escaped string
1055 *
1056 * Returns a new string (to be deallocated by caller)
1057 */
1058xmlChar *
1059xmlSaveUri(xmlURIPtr uri) {
1060    xmlChar *ret = NULL;
1061    xmlChar *temp;
1062    const char *p;
1063    int len;
1064    int max;
1065
1066    if (uri == NULL) return(NULL);
1067
1068
1069    max = 80;
1070    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1071    if (ret == NULL) {
1072        xmlURIErrMemory("saving URI\n");
1073	return(NULL);
1074    }
1075    len = 0;
1076
1077    if (uri->scheme != NULL) {
1078	p = uri->scheme;
1079	while (*p != 0) {
1080	    if (len >= max) {
1081                temp = xmlSaveUriRealloc(ret, &max);
1082                if (temp == NULL) goto mem_error;
1083		ret = temp;
1084	    }
1085	    ret[len++] = *p++;
1086	}
1087	if (len >= max) {
1088            temp = xmlSaveUriRealloc(ret, &max);
1089            if (temp == NULL) goto mem_error;
1090            ret = temp;
1091	}
1092	ret[len++] = ':';
1093    }
1094    if (uri->opaque != NULL) {
1095	p = uri->opaque;
1096	while (*p != 0) {
1097	    if (len + 3 >= max) {
1098                temp = xmlSaveUriRealloc(ret, &max);
1099                if (temp == NULL) goto mem_error;
1100                ret = temp;
1101	    }
1102	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1103		ret[len++] = *p++;
1104	    else {
1105		int val = *(unsigned char *)p++;
1106		int hi = val / 0x10, lo = val % 0x10;
1107		ret[len++] = '%';
1108		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1109		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1110	    }
1111	}
1112    } else {
1113	if ((uri->server != NULL) || (uri->port == -1)) {
1114	    if (len + 3 >= max) {
1115                temp = xmlSaveUriRealloc(ret, &max);
1116                if (temp == NULL) goto mem_error;
1117                ret = temp;
1118	    }
1119	    ret[len++] = '/';
1120	    ret[len++] = '/';
1121	    if (uri->user != NULL) {
1122		p = uri->user;
1123		while (*p != 0) {
1124		    if (len + 3 >= max) {
1125                        temp = xmlSaveUriRealloc(ret, &max);
1126                        if (temp == NULL) goto mem_error;
1127                        ret = temp;
1128		    }
1129		    if ((IS_UNRESERVED(*(p))) ||
1130			((*(p) == ';')) || ((*(p) == ':')) ||
1131			((*(p) == '&')) || ((*(p) == '=')) ||
1132			((*(p) == '+')) || ((*(p) == '$')) ||
1133			((*(p) == ',')))
1134			ret[len++] = *p++;
1135		    else {
1136			int val = *(unsigned char *)p++;
1137			int hi = val / 0x10, lo = val % 0x10;
1138			ret[len++] = '%';
1139			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1140			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1141		    }
1142		}
1143		if (len + 3 >= max) {
1144                    temp = xmlSaveUriRealloc(ret, &max);
1145                    if (temp == NULL) goto mem_error;
1146                    ret = temp;
1147		}
1148		ret[len++] = '@';
1149	    }
1150	    if (uri->server != NULL) {
1151		p = uri->server;
1152		while (*p != 0) {
1153		    if (len >= max) {
1154			temp = xmlSaveUriRealloc(ret, &max);
1155			if (temp == NULL) goto mem_error;
1156			ret = temp;
1157		    }
1158		    ret[len++] = *p++;
1159		}
1160		if (uri->port > 0) {
1161		    if (len + 10 >= max) {
1162			temp = xmlSaveUriRealloc(ret, &max);
1163			if (temp == NULL) goto mem_error;
1164			ret = temp;
1165		    }
1166		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1167		}
1168	    }
1169	} else if (uri->authority != NULL) {
1170	    if (len + 3 >= max) {
1171                temp = xmlSaveUriRealloc(ret, &max);
1172                if (temp == NULL) goto mem_error;
1173                ret = temp;
1174	    }
1175	    ret[len++] = '/';
1176	    ret[len++] = '/';
1177	    p = uri->authority;
1178	    while (*p != 0) {
1179		if (len + 3 >= max) {
1180                    temp = xmlSaveUriRealloc(ret, &max);
1181                    if (temp == NULL) goto mem_error;
1182                    ret = temp;
1183		}
1184		if ((IS_UNRESERVED(*(p))) ||
1185                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1186                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1187                    ((*(p) == '=')) || ((*(p) == '+')))
1188		    ret[len++] = *p++;
1189		else {
1190		    int val = *(unsigned char *)p++;
1191		    int hi = val / 0x10, lo = val % 0x10;
1192		    ret[len++] = '%';
1193		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1194		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1195		}
1196	    }
1197	} else if (uri->scheme != NULL) {
1198	    if (len + 3 >= max) {
1199                temp = xmlSaveUriRealloc(ret, &max);
1200                if (temp == NULL) goto mem_error;
1201                ret = temp;
1202	    }
1203	}
1204	if (uri->path != NULL) {
1205	    p = uri->path;
1206	    /*
1207	     * the colon in file:///d: should not be escaped or
1208	     * Windows accesses fail later.
1209	     */
1210	    if ((uri->scheme != NULL) &&
1211		(p[0] == '/') &&
1212		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1213		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1214		(p[2] == ':') &&
1215	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1216		if (len + 3 >= max) {
1217                    temp = xmlSaveUriRealloc(ret, &max);
1218                    if (temp == NULL) goto mem_error;
1219                    ret = temp;
1220		}
1221		ret[len++] = *p++;
1222		ret[len++] = *p++;
1223		ret[len++] = *p++;
1224	    }
1225	    while (*p != 0) {
1226		if (len + 3 >= max) {
1227                    temp = xmlSaveUriRealloc(ret, &max);
1228                    if (temp == NULL) goto mem_error;
1229                    ret = temp;
1230		}
1231		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1232                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1233	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1234	            ((*(p) == ',')))
1235		    ret[len++] = *p++;
1236		else {
1237		    int val = *(unsigned char *)p++;
1238		    int hi = val / 0x10, lo = val % 0x10;
1239		    ret[len++] = '%';
1240		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1241		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1242		}
1243	    }
1244	}
1245	if (uri->query_raw != NULL) {
1246	    if (len + 1 >= max) {
1247                temp = xmlSaveUriRealloc(ret, &max);
1248                if (temp == NULL) goto mem_error;
1249                ret = temp;
1250	    }
1251	    ret[len++] = '?';
1252	    p = uri->query_raw;
1253	    while (*p != 0) {
1254		if (len + 1 >= max) {
1255                    temp = xmlSaveUriRealloc(ret, &max);
1256                    if (temp == NULL) goto mem_error;
1257                    ret = temp;
1258		}
1259		ret[len++] = *p++;
1260	    }
1261	} else if (uri->query != NULL) {
1262	    if (len + 3 >= max) {
1263                temp = xmlSaveUriRealloc(ret, &max);
1264                if (temp == NULL) goto mem_error;
1265                ret = temp;
1266	    }
1267	    ret[len++] = '?';
1268	    p = uri->query;
1269	    while (*p != 0) {
1270		if (len + 3 >= max) {
1271                    temp = xmlSaveUriRealloc(ret, &max);
1272                    if (temp == NULL) goto mem_error;
1273                    ret = temp;
1274		}
1275		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1276		    ret[len++] = *p++;
1277		else {
1278		    int val = *(unsigned char *)p++;
1279		    int hi = val / 0x10, lo = val % 0x10;
1280		    ret[len++] = '%';
1281		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1282		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1283		}
1284	    }
1285	}
1286    }
1287    if (uri->fragment != NULL) {
1288	if (len + 3 >= max) {
1289            temp = xmlSaveUriRealloc(ret, &max);
1290            if (temp == NULL) goto mem_error;
1291            ret = temp;
1292	}
1293	ret[len++] = '#';
1294	p = uri->fragment;
1295	while (*p != 0) {
1296	    if (len + 3 >= max) {
1297                temp = xmlSaveUriRealloc(ret, &max);
1298                if (temp == NULL) goto mem_error;
1299                ret = temp;
1300	    }
1301	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1302		ret[len++] = *p++;
1303	    else {
1304		int val = *(unsigned char *)p++;
1305		int hi = val / 0x10, lo = val % 0x10;
1306		ret[len++] = '%';
1307		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1308		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1309	    }
1310	}
1311    }
1312    if (len >= max) {
1313        temp = xmlSaveUriRealloc(ret, &max);
1314        if (temp == NULL) goto mem_error;
1315        ret = temp;
1316    }
1317    ret[len] = 0;
1318    return(ret);
1319
1320mem_error:
1321    xmlFree(ret);
1322    return(NULL);
1323}
1324
1325/**
1326 * xmlPrintURI:
1327 * @stream:  a FILE* for the output
1328 * @uri:  pointer to an xmlURI
1329 *
1330 * Prints the URI in the stream @stream.
1331 */
1332void
1333xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1334    xmlChar *out;
1335
1336    out = xmlSaveUri(uri);
1337    if (out != NULL) {
1338	fprintf(stream, "%s", (char *) out);
1339	xmlFree(out);
1340    }
1341}
1342
1343/**
1344 * xmlCleanURI:
1345 * @uri:  pointer to an xmlURI
1346 *
1347 * Make sure the xmlURI struct is free of content
1348 */
1349static void
1350xmlCleanURI(xmlURIPtr uri) {
1351    if (uri == NULL) return;
1352
1353    if (uri->scheme != NULL) xmlFree(uri->scheme);
1354    uri->scheme = NULL;
1355    if (uri->server != NULL) xmlFree(uri->server);
1356    uri->server = NULL;
1357    if (uri->user != NULL) xmlFree(uri->user);
1358    uri->user = NULL;
1359    if (uri->path != NULL) xmlFree(uri->path);
1360    uri->path = NULL;
1361    if (uri->fragment != NULL) xmlFree(uri->fragment);
1362    uri->fragment = NULL;
1363    if (uri->opaque != NULL) xmlFree(uri->opaque);
1364    uri->opaque = NULL;
1365    if (uri->authority != NULL) xmlFree(uri->authority);
1366    uri->authority = NULL;
1367    if (uri->query != NULL) xmlFree(uri->query);
1368    uri->query = NULL;
1369    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1370    uri->query_raw = NULL;
1371}
1372
1373/**
1374 * xmlFreeURI:
1375 * @uri:  pointer to an xmlURI
1376 *
1377 * Free up the xmlURI struct
1378 */
1379void
1380xmlFreeURI(xmlURIPtr uri) {
1381    if (uri == NULL) return;
1382
1383    if (uri->scheme != NULL) xmlFree(uri->scheme);
1384    if (uri->server != NULL) xmlFree(uri->server);
1385    if (uri->user != NULL) xmlFree(uri->user);
1386    if (uri->path != NULL) xmlFree(uri->path);
1387    if (uri->fragment != NULL) xmlFree(uri->fragment);
1388    if (uri->opaque != NULL) xmlFree(uri->opaque);
1389    if (uri->authority != NULL) xmlFree(uri->authority);
1390    if (uri->query != NULL) xmlFree(uri->query);
1391    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1392    xmlFree(uri);
1393}
1394
1395/************************************************************************
1396 *									*
1397 *			Helper functions				*
1398 *									*
1399 ************************************************************************/
1400
1401/**
1402 * xmlNormalizeURIPath:
1403 * @path:  pointer to the path string
1404 *
1405 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1406 * Section 5.2, steps 6.c through 6.g.
1407 *
1408 * Normalization occurs directly on the string, no new allocation is done
1409 *
1410 * Returns 0 or an error code
1411 */
1412int
1413xmlNormalizeURIPath(char *path) {
1414    char *cur, *out;
1415
1416    if (path == NULL)
1417	return(-1);
1418
1419    /* Skip all initial "/" chars.  We want to get to the beginning of the
1420     * first non-empty segment.
1421     */
1422    cur = path;
1423    while (cur[0] == '/')
1424      ++cur;
1425    if (cur[0] == '\0')
1426      return(0);
1427
1428    /* Keep everything we've seen so far.  */
1429    out = cur;
1430
1431    /*
1432     * Analyze each segment in sequence for cases (c) and (d).
1433     */
1434    while (cur[0] != '\0') {
1435	/*
1436	 * c) All occurrences of "./", where "." is a complete path segment,
1437	 *    are removed from the buffer string.
1438	 */
1439	if ((cur[0] == '.') && (cur[1] == '/')) {
1440	    cur += 2;
1441	    /* '//' normalization should be done at this point too */
1442	    while (cur[0] == '/')
1443		cur++;
1444	    continue;
1445	}
1446
1447	/*
1448	 * d) If the buffer string ends with "." as a complete path segment,
1449	 *    that "." is removed.
1450	 */
1451	if ((cur[0] == '.') && (cur[1] == '\0'))
1452	    break;
1453
1454	/* Otherwise keep the segment.  */
1455	while (cur[0] != '/') {
1456            if (cur[0] == '\0')
1457              goto done_cd;
1458	    (out++)[0] = (cur++)[0];
1459	}
1460	/* nomalize // */
1461	while ((cur[0] == '/') && (cur[1] == '/'))
1462	    cur++;
1463
1464        (out++)[0] = (cur++)[0];
1465    }
1466 done_cd:
1467    out[0] = '\0';
1468
1469    /* Reset to the beginning of the first segment for the next sequence.  */
1470    cur = path;
1471    while (cur[0] == '/')
1472      ++cur;
1473    if (cur[0] == '\0')
1474	return(0);
1475
1476    /*
1477     * Analyze each segment in sequence for cases (e) and (f).
1478     *
1479     * e) All occurrences of "<segment>/../", where <segment> is a
1480     *    complete path segment not equal to "..", are removed from the
1481     *    buffer string.  Removal of these path segments is performed
1482     *    iteratively, removing the leftmost matching pattern on each
1483     *    iteration, until no matching pattern remains.
1484     *
1485     * f) If the buffer string ends with "<segment>/..", where <segment>
1486     *    is a complete path segment not equal to "..", that
1487     *    "<segment>/.." is removed.
1488     *
1489     * To satisfy the "iterative" clause in (e), we need to collapse the
1490     * string every time we find something that needs to be removed.  Thus,
1491     * we don't need to keep two pointers into the string: we only need a
1492     * "current position" pointer.
1493     */
1494    while (1) {
1495        char *segp, *tmp;
1496
1497        /* At the beginning of each iteration of this loop, "cur" points to
1498         * the first character of the segment we want to examine.
1499         */
1500
1501        /* Find the end of the current segment.  */
1502        segp = cur;
1503        while ((segp[0] != '/') && (segp[0] != '\0'))
1504          ++segp;
1505
1506        /* If this is the last segment, we're done (we need at least two
1507         * segments to meet the criteria for the (e) and (f) cases).
1508         */
1509        if (segp[0] == '\0')
1510          break;
1511
1512        /* If the first segment is "..", or if the next segment _isn't_ "..",
1513         * keep this segment and try the next one.
1514         */
1515        ++segp;
1516        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1517            || ((segp[0] != '.') || (segp[1] != '.')
1518                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1519          cur = segp;
1520          continue;
1521        }
1522
1523        /* If we get here, remove this segment and the next one and back up
1524         * to the previous segment (if there is one), to implement the
1525         * "iteratively" clause.  It's pretty much impossible to back up
1526         * while maintaining two pointers into the buffer, so just compact
1527         * the whole buffer now.
1528         */
1529
1530        /* If this is the end of the buffer, we're done.  */
1531        if (segp[2] == '\0') {
1532          cur[0] = '\0';
1533          break;
1534        }
1535        /* Valgrind complained, strcpy(cur, segp + 3); */
1536        /* string will overlap, do not use strcpy */
1537        tmp = cur;
1538        segp += 3;
1539        while ((*tmp++ = *segp++) != 0)
1540          ;
1541
1542        /* If there are no previous segments, then keep going from here.  */
1543        segp = cur;
1544        while ((segp > path) && ((--segp)[0] == '/'))
1545          ;
1546        if (segp == path)
1547          continue;
1548
1549        /* "segp" is pointing to the end of a previous segment; find it's
1550         * start.  We need to back up to the previous segment and start
1551         * over with that to handle things like "foo/bar/../..".  If we
1552         * don't do this, then on the first pass we'll remove the "bar/..",
1553         * but be pointing at the second ".." so we won't realize we can also
1554         * remove the "foo/..".
1555         */
1556        cur = segp;
1557        while ((cur > path) && (cur[-1] != '/'))
1558          --cur;
1559    }
1560    out[0] = '\0';
1561
1562    /*
1563     * g) If the resulting buffer string still begins with one or more
1564     *    complete path segments of "..", then the reference is
1565     *    considered to be in error. Implementations may handle this
1566     *    error by retaining these components in the resolved path (i.e.,
1567     *    treating them as part of the final URI), by removing them from
1568     *    the resolved path (i.e., discarding relative levels above the
1569     *    root), or by avoiding traversal of the reference.
1570     *
1571     * We discard them from the final path.
1572     */
1573    if (path[0] == '/') {
1574      cur = path;
1575      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1576             && ((cur[3] == '/') || (cur[3] == '\0')))
1577	cur += 3;
1578
1579      if (cur != path) {
1580	out = path;
1581	while (cur[0] != '\0')
1582          (out++)[0] = (cur++)[0];
1583	out[0] = 0;
1584      }
1585    }
1586
1587    return(0);
1588}
1589
1590static int is_hex(char c) {
1591    if (((c >= '0') && (c <= '9')) ||
1592        ((c >= 'a') && (c <= 'f')) ||
1593        ((c >= 'A') && (c <= 'F')))
1594	return(1);
1595    return(0);
1596}
1597
1598/**
1599 * xmlURIUnescapeString:
1600 * @str:  the string to unescape
1601 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1602 * @target:  optional destination buffer
1603 *
1604 * Unescaping routine, but does not check that the string is an URI. The
1605 * output is a direct unsigned char translation of %XX values (no encoding)
1606 * Note that the length of the result can only be smaller or same size as
1607 * the input string.
1608 *
1609 * Returns a copy of the string, but unescaped, will return NULL only in case
1610 * of error
1611 */
1612char *
1613xmlURIUnescapeString(const char *str, int len, char *target) {
1614    char *ret, *out;
1615    const char *in;
1616
1617    if (str == NULL)
1618	return(NULL);
1619    if (len <= 0) len = strlen(str);
1620    if (len < 0) return(NULL);
1621
1622    if (target == NULL) {
1623	ret = (char *) xmlMallocAtomic(len + 1);
1624	if (ret == NULL) {
1625            xmlURIErrMemory("unescaping URI value\n");
1626	    return(NULL);
1627	}
1628    } else
1629	ret = target;
1630    in = str;
1631    out = ret;
1632    while(len > 0) {
1633	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1634	    in++;
1635	    if ((*in >= '0') && (*in <= '9'))
1636	        *out = (*in - '0');
1637	    else if ((*in >= 'a') && (*in <= 'f'))
1638	        *out = (*in - 'a') + 10;
1639	    else if ((*in >= 'A') && (*in <= 'F'))
1640	        *out = (*in - 'A') + 10;
1641	    in++;
1642	    if ((*in >= '0') && (*in <= '9'))
1643	        *out = *out * 16 + (*in - '0');
1644	    else if ((*in >= 'a') && (*in <= 'f'))
1645	        *out = *out * 16 + (*in - 'a') + 10;
1646	    else if ((*in >= 'A') && (*in <= 'F'))
1647	        *out = *out * 16 + (*in - 'A') + 10;
1648	    in++;
1649	    len -= 3;
1650	    out++;
1651	} else {
1652	    *out++ = *in++;
1653	    len--;
1654	}
1655    }
1656    *out = 0;
1657    return(ret);
1658}
1659
1660/**
1661 * xmlURIEscapeStr:
1662 * @str:  string to escape
1663 * @list: exception list string of chars not to escape
1664 *
1665 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1666 * and the characters in the exception list.
1667 *
1668 * Returns a new escaped string or NULL in case of error.
1669 */
1670xmlChar *
1671xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1672    xmlChar *ret, ch;
1673    xmlChar *temp;
1674    const xmlChar *in;
1675    int len, out;
1676
1677    if (str == NULL)
1678	return(NULL);
1679    if (str[0] == 0)
1680	return(xmlStrdup(str));
1681    len = xmlStrlen(str);
1682    if (!(len > 0)) return(NULL);
1683
1684    len += 20;
1685    ret = (xmlChar *) xmlMallocAtomic(len);
1686    if (ret == NULL) {
1687        xmlURIErrMemory("escaping URI value\n");
1688	return(NULL);
1689    }
1690    in = (const xmlChar *) str;
1691    out = 0;
1692    while(*in != 0) {
1693	if (len - out <= 3) {
1694            temp = xmlSaveUriRealloc(ret, &len);
1695	    if (temp == NULL) {
1696                xmlURIErrMemory("escaping URI value\n");
1697		xmlFree(ret);
1698		return(NULL);
1699	    }
1700	    ret = temp;
1701	}
1702
1703	ch = *in;
1704
1705	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1706	    unsigned char val;
1707	    ret[out++] = '%';
1708	    val = ch >> 4;
1709	    if (val <= 9)
1710		ret[out++] = '0' + val;
1711	    else
1712		ret[out++] = 'A' + val - 0xA;
1713	    val = ch & 0xF;
1714	    if (val <= 9)
1715		ret[out++] = '0' + val;
1716	    else
1717		ret[out++] = 'A' + val - 0xA;
1718	    in++;
1719	} else {
1720	    ret[out++] = *in++;
1721	}
1722
1723    }
1724    ret[out] = 0;
1725    return(ret);
1726}
1727
1728/**
1729 * xmlURIEscape:
1730 * @str:  the string of the URI to escape
1731 *
1732 * Escaping routine, does not do validity checks !
1733 * It will try to escape the chars needing this, but this is heuristic
1734 * based it's impossible to be sure.
1735 *
1736 * Returns an copy of the string, but escaped
1737 *
1738 * 25 May 2001
1739 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1740 * according to RFC2396.
1741 *   - Carl Douglas
1742 */
1743xmlChar *
1744xmlURIEscape(const xmlChar * str)
1745{
1746    xmlChar *ret, *segment = NULL;
1747    xmlURIPtr uri;
1748    int ret2;
1749
1750#define NULLCHK(p) if(!p) { \
1751         xmlURIErrMemory("escaping URI value\n"); \
1752         xmlFreeURI(uri); \
1753         return NULL; } \
1754
1755    if (str == NULL)
1756        return (NULL);
1757
1758    uri = xmlCreateURI();
1759    if (uri != NULL) {
1760	/*
1761	 * Allow escaping errors in the unescaped form
1762	 */
1763        uri->cleanup = 1;
1764        ret2 = xmlParseURIReference(uri, (const char *)str);
1765        if (ret2) {
1766            xmlFreeURI(uri);
1767            return (NULL);
1768        }
1769    }
1770
1771    if (!uri)
1772        return NULL;
1773
1774    ret = NULL;
1775
1776    if (uri->scheme) {
1777        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1778        NULLCHK(segment)
1779        ret = xmlStrcat(ret, segment);
1780        ret = xmlStrcat(ret, BAD_CAST ":");
1781        xmlFree(segment);
1782    }
1783
1784    if (uri->authority) {
1785        segment =
1786            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1787        NULLCHK(segment)
1788        ret = xmlStrcat(ret, BAD_CAST "//");
1789        ret = xmlStrcat(ret, segment);
1790        xmlFree(segment);
1791    }
1792
1793    if (uri->user) {
1794        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1795        NULLCHK(segment)
1796		ret = xmlStrcat(ret,BAD_CAST "//");
1797        ret = xmlStrcat(ret, segment);
1798        ret = xmlStrcat(ret, BAD_CAST "@");
1799        xmlFree(segment);
1800    }
1801
1802    if (uri->server) {
1803        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1804        NULLCHK(segment)
1805		if (uri->user == NULL)
1806		ret = xmlStrcat(ret, BAD_CAST "//");
1807        ret = xmlStrcat(ret, segment);
1808        xmlFree(segment);
1809    }
1810
1811    if (uri->port) {
1812        xmlChar port[10];
1813
1814        snprintf((char *) port, 10, "%d", uri->port);
1815        ret = xmlStrcat(ret, BAD_CAST ":");
1816        ret = xmlStrcat(ret, port);
1817    }
1818
1819    if (uri->path) {
1820        segment =
1821            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1822        NULLCHK(segment)
1823        ret = xmlStrcat(ret, segment);
1824        xmlFree(segment);
1825    }
1826
1827    if (uri->query_raw) {
1828        ret = xmlStrcat(ret, BAD_CAST "?");
1829        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1830    }
1831    else if (uri->query) {
1832        segment =
1833            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1834        NULLCHK(segment)
1835        ret = xmlStrcat(ret, BAD_CAST "?");
1836        ret = xmlStrcat(ret, segment);
1837        xmlFree(segment);
1838    }
1839
1840    if (uri->opaque) {
1841        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1842        NULLCHK(segment)
1843        ret = xmlStrcat(ret, segment);
1844        xmlFree(segment);
1845    }
1846
1847    if (uri->fragment) {
1848        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1849        NULLCHK(segment)
1850        ret = xmlStrcat(ret, BAD_CAST "#");
1851        ret = xmlStrcat(ret, segment);
1852        xmlFree(segment);
1853    }
1854
1855    xmlFreeURI(uri);
1856#undef NULLCHK
1857
1858    return (ret);
1859}
1860
1861/************************************************************************
1862 *									*
1863 *			Public functions				*
1864 *									*
1865 ************************************************************************/
1866
1867/**
1868 * xmlBuildURI:
1869 * @URI:  the URI instance found in the document
1870 * @base:  the base value
1871 *
1872 * Computes he final URI of the reference done by checking that
1873 * the given URI is valid, and building the final URI using the
1874 * base URI. This is processed according to section 5.2 of the
1875 * RFC 2396
1876 *
1877 * 5.2. Resolving Relative References to Absolute Form
1878 *
1879 * Returns a new URI string (to be freed by the caller) or NULL in case
1880 *         of error.
1881 */
1882xmlChar *
1883xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1884    xmlChar *val = NULL;
1885    int ret, len, indx, cur, out;
1886    xmlURIPtr ref = NULL;
1887    xmlURIPtr bas = NULL;
1888    xmlURIPtr res = NULL;
1889
1890    /*
1891     * 1) The URI reference is parsed into the potential four components and
1892     *    fragment identifier, as described in Section 4.3.
1893     *
1894     *    NOTE that a completely empty URI is treated by modern browsers
1895     *    as a reference to "." rather than as a synonym for the current
1896     *    URI.  Should we do that here?
1897     */
1898    if (URI == NULL)
1899	ret = -1;
1900    else {
1901	if (*URI) {
1902	    ref = xmlCreateURI();
1903	    if (ref == NULL)
1904		goto done;
1905	    ret = xmlParseURIReference(ref, (const char *) URI);
1906	}
1907	else
1908	    ret = 0;
1909    }
1910    if (ret != 0)
1911	goto done;
1912    if ((ref != NULL) && (ref->scheme != NULL)) {
1913	/*
1914	 * The URI is absolute don't modify.
1915	 */
1916	val = xmlStrdup(URI);
1917	goto done;
1918    }
1919    if (base == NULL)
1920	ret = -1;
1921    else {
1922	bas = xmlCreateURI();
1923	if (bas == NULL)
1924	    goto done;
1925	ret = xmlParseURIReference(bas, (const char *) base);
1926    }
1927    if (ret != 0) {
1928	if (ref)
1929	    val = xmlSaveUri(ref);
1930	goto done;
1931    }
1932    if (ref == NULL) {
1933	/*
1934	 * the base fragment must be ignored
1935	 */
1936	if (bas->fragment != NULL) {
1937	    xmlFree(bas->fragment);
1938	    bas->fragment = NULL;
1939	}
1940	val = xmlSaveUri(bas);
1941	goto done;
1942    }
1943
1944    /*
1945     * 2) If the path component is empty and the scheme, authority, and
1946     *    query components are undefined, then it is a reference to the
1947     *    current document and we are done.  Otherwise, the reference URI's
1948     *    query and fragment components are defined as found (or not found)
1949     *    within the URI reference and not inherited from the base URI.
1950     *
1951     *    NOTE that in modern browsers, the parsing differs from the above
1952     *    in the following aspect:  the query component is allowed to be
1953     *    defined while still treating this as a reference to the current
1954     *    document.
1955     */
1956    res = xmlCreateURI();
1957    if (res == NULL)
1958	goto done;
1959    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1960	((ref->authority == NULL) && (ref->server == NULL))) {
1961	if (bas->scheme != NULL)
1962	    res->scheme = xmlMemStrdup(bas->scheme);
1963	if (bas->authority != NULL)
1964	    res->authority = xmlMemStrdup(bas->authority);
1965	else if (bas->server != NULL) {
1966	    res->server = xmlMemStrdup(bas->server);
1967	    if (bas->user != NULL)
1968		res->user = xmlMemStrdup(bas->user);
1969	    res->port = bas->port;
1970	}
1971	if (bas->path != NULL)
1972	    res->path = xmlMemStrdup(bas->path);
1973	if (ref->query_raw != NULL)
1974	    res->query_raw = xmlMemStrdup (ref->query_raw);
1975	else if (ref->query != NULL)
1976	    res->query = xmlMemStrdup(ref->query);
1977	else if (bas->query_raw != NULL)
1978	    res->query_raw = xmlMemStrdup(bas->query_raw);
1979	else if (bas->query != NULL)
1980	    res->query = xmlMemStrdup(bas->query);
1981	if (ref->fragment != NULL)
1982	    res->fragment = xmlMemStrdup(ref->fragment);
1983	goto step_7;
1984    }
1985
1986    /*
1987     * 3) If the scheme component is defined, indicating that the reference
1988     *    starts with a scheme name, then the reference is interpreted as an
1989     *    absolute URI and we are done.  Otherwise, the reference URI's
1990     *    scheme is inherited from the base URI's scheme component.
1991     */
1992    if (ref->scheme != NULL) {
1993	val = xmlSaveUri(ref);
1994	goto done;
1995    }
1996    if (bas->scheme != NULL)
1997	res->scheme = xmlMemStrdup(bas->scheme);
1998
1999    if (ref->query_raw != NULL)
2000	res->query_raw = xmlMemStrdup(ref->query_raw);
2001    else if (ref->query != NULL)
2002	res->query = xmlMemStrdup(ref->query);
2003    if (ref->fragment != NULL)
2004	res->fragment = xmlMemStrdup(ref->fragment);
2005
2006    /*
2007     * 4) If the authority component is defined, then the reference is a
2008     *    network-path and we skip to step 7.  Otherwise, the reference
2009     *    URI's authority is inherited from the base URI's authority
2010     *    component, which will also be undefined if the URI scheme does not
2011     *    use an authority component.
2012     */
2013    if ((ref->authority != NULL) || (ref->server != NULL)) {
2014	if (ref->authority != NULL)
2015	    res->authority = xmlMemStrdup(ref->authority);
2016	else {
2017	    res->server = xmlMemStrdup(ref->server);
2018	    if (ref->user != NULL)
2019		res->user = xmlMemStrdup(ref->user);
2020            res->port = ref->port;
2021	}
2022	if (ref->path != NULL)
2023	    res->path = xmlMemStrdup(ref->path);
2024	goto step_7;
2025    }
2026    if (bas->authority != NULL)
2027	res->authority = xmlMemStrdup(bas->authority);
2028    else if (bas->server != NULL) {
2029	res->server = xmlMemStrdup(bas->server);
2030	if (bas->user != NULL)
2031	    res->user = xmlMemStrdup(bas->user);
2032	res->port = bas->port;
2033    }
2034
2035    /*
2036     * 5) If the path component begins with a slash character ("/"), then
2037     *    the reference is an absolute-path and we skip to step 7.
2038     */
2039    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2040	res->path = xmlMemStrdup(ref->path);
2041	goto step_7;
2042    }
2043
2044
2045    /*
2046     * 6) If this step is reached, then we are resolving a relative-path
2047     *    reference.  The relative path needs to be merged with the base
2048     *    URI's path.  Although there are many ways to do this, we will
2049     *    describe a simple method using a separate string buffer.
2050     *
2051     * Allocate a buffer large enough for the result string.
2052     */
2053    len = 2; /* extra / and 0 */
2054    if (ref->path != NULL)
2055	len += strlen(ref->path);
2056    if (bas->path != NULL)
2057	len += strlen(bas->path);
2058    res->path = (char *) xmlMallocAtomic(len);
2059    if (res->path == NULL) {
2060        xmlURIErrMemory("resolving URI against base\n");
2061	goto done;
2062    }
2063    res->path[0] = 0;
2064
2065    /*
2066     * a) All but the last segment of the base URI's path component is
2067     *    copied to the buffer.  In other words, any characters after the
2068     *    last (right-most) slash character, if any, are excluded.
2069     */
2070    cur = 0;
2071    out = 0;
2072    if (bas->path != NULL) {
2073	while (bas->path[cur] != 0) {
2074	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2075		cur++;
2076	    if (bas->path[cur] == 0)
2077		break;
2078
2079	    cur++;
2080	    while (out < cur) {
2081		res->path[out] = bas->path[out];
2082		out++;
2083	    }
2084	}
2085    }
2086    res->path[out] = 0;
2087
2088    /*
2089     * b) The reference's path component is appended to the buffer
2090     *    string.
2091     */
2092    if (ref->path != NULL && ref->path[0] != 0) {
2093	indx = 0;
2094	/*
2095	 * Ensure the path includes a '/'
2096	 */
2097	if ((out == 0) && (bas->server != NULL))
2098	    res->path[out++] = '/';
2099	while (ref->path[indx] != 0) {
2100	    res->path[out++] = ref->path[indx++];
2101	}
2102    }
2103    res->path[out] = 0;
2104
2105    /*
2106     * Steps c) to h) are really path normalization steps
2107     */
2108    xmlNormalizeURIPath(res->path);
2109
2110step_7:
2111
2112    /*
2113     * 7) The resulting URI components, including any inherited from the
2114     *    base URI, are recombined to give the absolute form of the URI
2115     *    reference.
2116     */
2117    val = xmlSaveUri(res);
2118
2119done:
2120    if (ref != NULL)
2121	xmlFreeURI(ref);
2122    if (bas != NULL)
2123	xmlFreeURI(bas);
2124    if (res != NULL)
2125	xmlFreeURI(res);
2126    return(val);
2127}
2128
2129/**
2130 * xmlBuildRelativeURI:
2131 * @URI:  the URI reference under consideration
2132 * @base:  the base value
2133 *
2134 * Expresses the URI of the reference in terms relative to the
2135 * base.  Some examples of this operation include:
2136 *     base = "http://site1.com/docs/book1.html"
2137 *        URI input                        URI returned
2138 *     docs/pic1.gif                    pic1.gif
2139 *     docs/img/pic1.gif                img/pic1.gif
2140 *     img/pic1.gif                     ../img/pic1.gif
2141 *     http://site1.com/docs/pic1.gif   pic1.gif
2142 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2143 *
2144 *     base = "docs/book1.html"
2145 *        URI input                        URI returned
2146 *     docs/pic1.gif                    pic1.gif
2147 *     docs/img/pic1.gif                img/pic1.gif
2148 *     img/pic1.gif                     ../img/pic1.gif
2149 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2150 *
2151 *
2152 * Note: if the URI reference is really wierd or complicated, it may be
2153 *       worthwhile to first convert it into a "nice" one by calling
2154 *       xmlBuildURI (using 'base') before calling this routine,
2155 *       since this routine (for reasonable efficiency) assumes URI has
2156 *       already been through some validation.
2157 *
2158 * Returns a new URI string (to be freed by the caller) or NULL in case
2159 * error.
2160 */
2161xmlChar *
2162xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2163{
2164    xmlChar *val = NULL;
2165    int ret;
2166    int ix;
2167    int pos = 0;
2168    int nbslash = 0;
2169    int len;
2170    xmlURIPtr ref = NULL;
2171    xmlURIPtr bas = NULL;
2172    xmlChar *bptr, *uptr, *vptr;
2173    int remove_path = 0;
2174
2175    if ((URI == NULL) || (*URI == 0))
2176	return NULL;
2177
2178    /*
2179     * First parse URI into a standard form
2180     */
2181    ref = xmlCreateURI ();
2182    if (ref == NULL)
2183	return NULL;
2184    /* If URI not already in "relative" form */
2185    if (URI[0] != '.') {
2186	ret = xmlParseURIReference (ref, (const char *) URI);
2187	if (ret != 0)
2188	    goto done;		/* Error in URI, return NULL */
2189    } else
2190	ref->path = (char *)xmlStrdup(URI);
2191
2192    /*
2193     * Next parse base into the same standard form
2194     */
2195    if ((base == NULL) || (*base == 0)) {
2196	val = xmlStrdup (URI);
2197	goto done;
2198    }
2199    bas = xmlCreateURI ();
2200    if (bas == NULL)
2201	goto done;
2202    if (base[0] != '.') {
2203	ret = xmlParseURIReference (bas, (const char *) base);
2204	if (ret != 0)
2205	    goto done;		/* Error in base, return NULL */
2206    } else
2207	bas->path = (char *)xmlStrdup(base);
2208
2209    /*
2210     * If the scheme / server on the URI differs from the base,
2211     * just return the URI
2212     */
2213    if ((ref->scheme != NULL) &&
2214	((bas->scheme == NULL) ||
2215	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2216	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2217	val = xmlStrdup (URI);
2218	goto done;
2219    }
2220    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2221	val = xmlStrdup(BAD_CAST "");
2222	goto done;
2223    }
2224    if (bas->path == NULL) {
2225	val = xmlStrdup((xmlChar *)ref->path);
2226	goto done;
2227    }
2228    if (ref->path == NULL) {
2229        ref->path = (char *) "/";
2230	remove_path = 1;
2231    }
2232
2233    /*
2234     * At this point (at last!) we can compare the two paths
2235     *
2236     * First we take care of the special case where either of the
2237     * two path components may be missing (bug 316224)
2238     */
2239    if (bas->path == NULL) {
2240	if (ref->path != NULL) {
2241	    uptr = (xmlChar *) ref->path;
2242	    if (*uptr == '/')
2243		uptr++;
2244	    /* exception characters from xmlSaveUri */
2245	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2246	}
2247	goto done;
2248    }
2249    bptr = (xmlChar *)bas->path;
2250    if (ref->path == NULL) {
2251	for (ix = 0; bptr[ix] != 0; ix++) {
2252	    if (bptr[ix] == '/')
2253		nbslash++;
2254	}
2255	uptr = NULL;
2256	len = 1;	/* this is for a string terminator only */
2257    } else {
2258    /*
2259     * Next we compare the two strings and find where they first differ
2260     */
2261	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2262            pos += 2;
2263	if ((*bptr == '.') && (bptr[1] == '/'))
2264            bptr += 2;
2265	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2266	    bptr++;
2267	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2268	    pos++;
2269
2270	if (bptr[pos] == ref->path[pos]) {
2271	    val = xmlStrdup(BAD_CAST "");
2272	    goto done;		/* (I can't imagine why anyone would do this) */
2273	}
2274
2275	/*
2276	 * In URI, "back up" to the last '/' encountered.  This will be the
2277	 * beginning of the "unique" suffix of URI
2278	 */
2279	ix = pos;
2280	if ((ref->path[ix] == '/') && (ix > 0))
2281	    ix--;
2282	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2283	    ix -= 2;
2284	for (; ix > 0; ix--) {
2285	    if (ref->path[ix] == '/')
2286		break;
2287	}
2288	if (ix == 0) {
2289	    uptr = (xmlChar *)ref->path;
2290	} else {
2291	    ix++;
2292	    uptr = (xmlChar *)&ref->path[ix];
2293	}
2294
2295	/*
2296	 * In base, count the number of '/' from the differing point
2297	 */
2298	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2299	    for (; bptr[ix] != 0; ix++) {
2300		if (bptr[ix] == '/')
2301		    nbslash++;
2302	    }
2303	}
2304	len = xmlStrlen (uptr) + 1;
2305    }
2306
2307    if (nbslash == 0) {
2308	if (uptr != NULL)
2309	    /* exception characters from xmlSaveUri */
2310	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2311	goto done;
2312    }
2313
2314    /*
2315     * Allocate just enough space for the returned string -
2316     * length of the remainder of the URI, plus enough space
2317     * for the "../" groups, plus one for the terminator
2318     */
2319    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2320    if (val == NULL) {
2321        xmlURIErrMemory("building relative URI\n");
2322	goto done;
2323    }
2324    vptr = val;
2325    /*
2326     * Put in as many "../" as needed
2327     */
2328    for (; nbslash>0; nbslash--) {
2329	*vptr++ = '.';
2330	*vptr++ = '.';
2331	*vptr++ = '/';
2332    }
2333    /*
2334     * Finish up with the end of the URI
2335     */
2336    if (uptr != NULL) {
2337        if ((vptr > val) && (len > 0) &&
2338	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2339	    memcpy (vptr, uptr + 1, len - 1);
2340	    vptr[len - 2] = 0;
2341	} else {
2342	    memcpy (vptr, uptr, len);
2343	    vptr[len - 1] = 0;
2344	}
2345    } else {
2346	vptr[len - 1] = 0;
2347    }
2348
2349    /* escape the freshly-built path */
2350    vptr = val;
2351	/* exception characters from xmlSaveUri */
2352    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2353    xmlFree(vptr);
2354
2355done:
2356    /*
2357     * Free the working variables
2358     */
2359    if (remove_path != 0)
2360        ref->path = NULL;
2361    if (ref != NULL)
2362	xmlFreeURI (ref);
2363    if (bas != NULL)
2364	xmlFreeURI (bas);
2365
2366    return val;
2367}
2368
2369/**
2370 * xmlCanonicPath:
2371 * @path:  the resource locator in a filesystem notation
2372 *
2373 * Constructs a canonic path from the specified path.
2374 *
2375 * Returns a new canonic path, or a duplicate of the path parameter if the
2376 * construction fails. The caller is responsible for freeing the memory occupied
2377 * by the returned string. If there is insufficient memory available, or the
2378 * argument is NULL, the function returns NULL.
2379 */
2380#define IS_WINDOWS_PATH(p)					\
2381	((p != NULL) &&						\
2382	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2383	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2384	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2385xmlChar *
2386xmlCanonicPath(const xmlChar *path)
2387{
2388/*
2389 * For Windows implementations, additional work needs to be done to
2390 * replace backslashes in pathnames with "forward slashes"
2391 */
2392#if defined(_WIN32) && !defined(__CYGWIN__)
2393    int len = 0;
2394    int i = 0;
2395    xmlChar *p = NULL;
2396#endif
2397    xmlURIPtr uri;
2398    xmlChar *ret;
2399    const xmlChar *absuri;
2400
2401    if (path == NULL)
2402	return(NULL);
2403
2404#if defined(_WIN32)
2405    /*
2406     * We must not change the backslashes to slashes if the the path
2407     * starts with \\?\
2408     * Those paths can be up to 32k characters long.
2409     * Was added specifically for OpenOffice, those paths can't be converted
2410     * to URIs anyway.
2411     */
2412    if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2413        (path[3] == '\\') )
2414	return xmlStrdup((const xmlChar *) path);
2415#endif
2416
2417	/* sanitize filename starting with // so it can be used as URI */
2418    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2419        path++;
2420
2421    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2422	xmlFreeURI(uri);
2423	return xmlStrdup(path);
2424    }
2425
2426    /* Check if this is an "absolute uri" */
2427    absuri = xmlStrstr(path, BAD_CAST "://");
2428    if (absuri != NULL) {
2429        int l, j;
2430	unsigned char c;
2431	xmlChar *escURI;
2432
2433        /*
2434	 * this looks like an URI where some parts have not been
2435	 * escaped leading to a parsing problem.  Check that the first
2436	 * part matches a protocol.
2437	 */
2438	l = absuri - path;
2439	/* Bypass if first part (part before the '://') is > 20 chars */
2440	if ((l <= 0) || (l > 20))
2441	    goto path_processing;
2442	/* Bypass if any non-alpha characters are present in first part */
2443	for (j = 0;j < l;j++) {
2444	    c = path[j];
2445	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2446	        goto path_processing;
2447	}
2448
2449	/* Escape all except the characters specified in the supplied path */
2450        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2451	if (escURI != NULL) {
2452	    /* Try parsing the escaped path */
2453	    uri = xmlParseURI((const char *) escURI);
2454	    /* If successful, return the escaped string */
2455	    if (uri != NULL) {
2456	        xmlFreeURI(uri);
2457		return escURI;
2458	    }
2459	}
2460    }
2461
2462path_processing:
2463/* For Windows implementations, replace backslashes with 'forward slashes' */
2464#if defined(_WIN32) && !defined(__CYGWIN__)
2465    /*
2466     * Create a URI structure
2467     */
2468    uri = xmlCreateURI();
2469    if (uri == NULL) {		/* Guard against 'out of memory' */
2470        return(NULL);
2471    }
2472
2473    len = xmlStrlen(path);
2474    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2475        /* make the scheme 'file' */
2476	uri->scheme = xmlStrdup(BAD_CAST "file");
2477	/* allocate space for leading '/' + path + string terminator */
2478	uri->path = xmlMallocAtomic(len + 2);
2479	if (uri->path == NULL) {
2480	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2481	    return(NULL);
2482	}
2483	/* Put in leading '/' plus path */
2484	uri->path[0] = '/';
2485	p = uri->path + 1;
2486	strncpy(p, path, len + 1);
2487    } else {
2488	uri->path = xmlStrdup(path);
2489	if (uri->path == NULL) {
2490	    xmlFreeURI(uri);
2491	    return(NULL);
2492	}
2493	p = uri->path;
2494    }
2495    /* Now change all occurences of '\' to '/' */
2496    while (*p != '\0') {
2497	if (*p == '\\')
2498	    *p = '/';
2499	p++;
2500    }
2501
2502    if (uri->scheme == NULL) {
2503	ret = xmlStrdup((const xmlChar *) uri->path);
2504    } else {
2505	ret = xmlSaveUri(uri);
2506    }
2507
2508    xmlFreeURI(uri);
2509#else
2510    ret = xmlStrdup((const xmlChar *) path);
2511#endif
2512    return(ret);
2513}
2514
2515/**
2516 * xmlPathToURI:
2517 * @path:  the resource locator in a filesystem notation
2518 *
2519 * Constructs an URI expressing the existing path
2520 *
2521 * Returns a new URI, or a duplicate of the path parameter if the
2522 * construction fails. The caller is responsible for freeing the memory
2523 * occupied by the returned string. If there is insufficient memory available,
2524 * or the argument is NULL, the function returns NULL.
2525 */
2526xmlChar *
2527xmlPathToURI(const xmlChar *path)
2528{
2529    xmlURIPtr uri;
2530    xmlURI temp;
2531    xmlChar *ret, *cal;
2532
2533    if (path == NULL)
2534        return(NULL);
2535
2536    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2537	xmlFreeURI(uri);
2538	return xmlStrdup(path);
2539    }
2540    cal = xmlCanonicPath(path);
2541    if (cal == NULL)
2542        return(NULL);
2543#if defined(_WIN32) && !defined(__CYGWIN__)
2544    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2545       If 'cal' is a valid URI allready then we are done here, as continuing would make
2546       it invalid. */
2547    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2548	xmlFreeURI(uri);
2549	return cal;
2550    }
2551    /* 'cal' can contain a relative path with backslashes. If that is processed
2552       by xmlSaveURI, they will be escaped and the external entity loader machinery
2553       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2554    ret = cal;
2555    while (*ret != '\0') {
2556	if (*ret == '\\')
2557	    *ret = '/';
2558	ret++;
2559    }
2560#endif
2561    memset(&temp, 0, sizeof(temp));
2562    temp.path = (char *) cal;
2563    ret = xmlSaveUri(&temp);
2564    xmlFree(cal);
2565    return(ret);
2566}
2567#define bottom_uri
2568#include "elfgcchack.h"
2569