uri.c revision 3c908dca479ed50dca24b8593bca90e40dbde6b8
1/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
18#include <libxml/globals.h>
19#include <libxml/xmlerror.h>
20
21/************************************************************************
22 *									*
23 *		Macros to differentiate various character type		*
24 *			directly extracted from RFC 2396		*
25 *									*
26 ************************************************************************/
27
28/*
29 * alpha    = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 *            "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 *           "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
49/*
50 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
51 */
52
53#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
54
55/*
56 * alphanum = alpha | digit
57 */
58
59#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
60
61/*
62 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
63 *               "a" | "b" | "c" | "d" | "e" | "f"
64 */
65
66#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
67	    (((x) >= 'A') && ((x) <= 'F')))
68
69/*
70 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
71 */
72
73#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||	\
74    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||	\
75    ((x) == '(') || ((x) == ')'))
76
77
78/*
79 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
80 */
81
82#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') ||	\
83        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') ||	\
84	((x) == '+') || ((x) == '$') || ((x) == ','))
85
86/*
87 * unreserved = alphanum | mark
88 */
89
90#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
91
92/*
93 * escaped = "%" hex hex
94 */
95
96#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) &&		\
97	    (IS_HEX((p)[2])))
98
99/*
100 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
101 *                        "&" | "=" | "+" | "$" | ","
102 */
103#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
104	        ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
105	        ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
106	        ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
107
108/*
109 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
110 */
111#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||	\
112	        ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
113	        ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
114	        ((*(p) == ',')))
115
116/*
117 * rel_segment   = 1*( unreserved | escaped |
118 *                 ";" | "@" | "&" | "=" | "+" | "$" | "," )
119 */
120
121#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||	\
122          ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||	\
123	  ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||	\
124	  ((*(p) == ',')))
125
126/*
127 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
128 */
129
130#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) ||			\
131	              ((x) == '+') || ((x) == '-') || ((x) == '.'))
132
133/*
134 * reg_name = 1*( unreserved | escaped | "$" | "," |
135 *                ";" | ":" | "@" | "&" | "=" | "+" )
136 */
137
138#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||	\
139       ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
140       ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||		\
141       ((*(p) == '=')) || ((*(p) == '+')))
142
143/*
144 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
145 *                      "+" | "$" | "," )
146 */
147#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||	\
148       ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) ||		\
149       ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||		\
150       ((*(p) == ',')))
151
152/*
153 * uric = reserved | unreserved | escaped
154 */
155
156#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||		\
157	            (IS_RESERVED(*(p))))
158
159/*
160* unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
161*/
162
163#define IS_UNWISE(p)                                                    \
164      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
165       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
166       ((*(p) == ']')) || ((*(p) == '`')))
167
168/*
169 * Skip to next pointer char, handle escaped sequences
170 */
171
172#define NEXT(p) ((*p == '%')? p += 3 : p++)
173
174/*
175 * Productions from the spec.
176 *
177 *    authority     = server | reg_name
178 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
179 *                        ";" | ":" | "@" | "&" | "=" | "+" )
180 *
181 * path          = [ abs_path | opaque_part ]
182 */
183
184/************************************************************************
185 *									*
186 *			Generic URI structure functions			*
187 *									*
188 ************************************************************************/
189
190/**
191 * xmlCreateURI:
192 *
193 * Simply creates an empty xmlURI
194 *
195 * Returns the new structure or NULL in case of error
196 */
197xmlURIPtr
198xmlCreateURI(void) {
199    xmlURIPtr ret;
200
201    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
202    if (ret == NULL) {
203	xmlGenericError(xmlGenericErrorContext,
204		"xmlCreateURI: out of memory\n");
205	return(NULL);
206    }
207    memset(ret, 0, sizeof(xmlURI));
208    return(ret);
209}
210
211/**
212 * xmlSaveUri:
213 * @uri:  pointer to an xmlURI
214 *
215 * Save the URI as an escaped string
216 *
217 * Returns a new string (to be deallocated by caller)
218 */
219xmlChar *
220xmlSaveUri(xmlURIPtr uri) {
221    xmlChar *ret = NULL;
222    const char *p;
223    int len;
224    int max;
225
226    if (uri == NULL) return(NULL);
227
228
229    max = 80;
230    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
231    if (ret == NULL) {
232	xmlGenericError(xmlGenericErrorContext,
233		"xmlSaveUri: out of memory\n");
234	return(NULL);
235    }
236    len = 0;
237
238    if (uri->scheme != NULL) {
239	p = uri->scheme;
240	while (*p != 0) {
241	    if (len >= max) {
242		max *= 2;
243		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
244		if (ret == NULL) {
245		    xmlGenericError(xmlGenericErrorContext,
246			    "xmlSaveUri: out of memory\n");
247		    return(NULL);
248		}
249	    }
250	    ret[len++] = *p++;
251	}
252	if (len >= max) {
253	    max *= 2;
254	    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
255	    if (ret == NULL) {
256		xmlGenericError(xmlGenericErrorContext,
257			"xmlSaveUri: out of memory\n");
258		return(NULL);
259	    }
260	}
261	ret[len++] = ':';
262    }
263    if (uri->opaque != NULL) {
264	p = uri->opaque;
265	while (*p != 0) {
266	    if (len + 3 >= max) {
267		max *= 2;
268		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
269		if (ret == NULL) {
270		    xmlGenericError(xmlGenericErrorContext,
271			    "xmlSaveUri: out of memory\n");
272		    return(NULL);
273		}
274	    }
275	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
276		ret[len++] = *p++;
277	    else {
278		int val = *(unsigned char *)p++;
279		int hi = val / 0x10, lo = val % 0x10;
280		ret[len++] = '%';
281		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
282		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
283	    }
284	}
285    } else {
286	if (uri->server != NULL) {
287	    if (len + 3 >= max) {
288		max *= 2;
289		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
290		if (ret == NULL) {
291		    xmlGenericError(xmlGenericErrorContext,
292			    "xmlSaveUri: out of memory\n");
293		    return(NULL);
294		}
295	    }
296	    ret[len++] = '/';
297	    ret[len++] = '/';
298	    if (uri->user != NULL) {
299		p = uri->user;
300		while (*p != 0) {
301		    if (len + 3 >= max) {
302			max *= 2;
303			ret = (xmlChar *) xmlRealloc(ret,
304				(max + 1) * sizeof(xmlChar));
305			if (ret == NULL) {
306			    xmlGenericError(xmlGenericErrorContext,
307				    "xmlSaveUri: out of memory\n");
308			    return(NULL);
309			}
310		    }
311		    if ((IS_UNRESERVED(*(p))) ||
312			((*(p) == ';')) || ((*(p) == ':')) ||
313			((*(p) == '&')) || ((*(p) == '=')) ||
314			((*(p) == '+')) || ((*(p) == '$')) ||
315			((*(p) == ',')))
316			ret[len++] = *p++;
317		    else {
318			int val = *(unsigned char *)p++;
319			int hi = val / 0x10, lo = val % 0x10;
320			ret[len++] = '%';
321			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
322			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
323		    }
324		}
325		if (len + 3 >= max) {
326		    max *= 2;
327		    ret = (xmlChar *) xmlRealloc(ret,
328			    (max + 1) * sizeof(xmlChar));
329		    if (ret == NULL) {
330			xmlGenericError(xmlGenericErrorContext,
331				"xmlSaveUri: out of memory\n");
332			return(NULL);
333		    }
334		}
335		ret[len++] = '@';
336	    }
337	    p = uri->server;
338	    while (*p != 0) {
339		if (len >= max) {
340		    max *= 2;
341		    ret = (xmlChar *) xmlRealloc(ret,
342			    (max + 1) * sizeof(xmlChar));
343		    if (ret == NULL) {
344			xmlGenericError(xmlGenericErrorContext,
345				"xmlSaveUri: out of memory\n");
346			return(NULL);
347		    }
348		}
349		ret[len++] = *p++;
350	    }
351	    if (uri->port > 0) {
352		if (len + 10 >= max) {
353		    max *= 2;
354		    ret = (xmlChar *) xmlRealloc(ret,
355			    (max + 1) * sizeof(xmlChar));
356		    if (ret == NULL) {
357			xmlGenericError(xmlGenericErrorContext,
358				"xmlSaveUri: out of memory\n");
359			return(NULL);
360		    }
361		}
362		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
363	    }
364	} else if (uri->authority != NULL) {
365	    if (len + 3 >= max) {
366		max *= 2;
367		ret = (xmlChar *) xmlRealloc(ret,
368			(max + 1) * sizeof(xmlChar));
369		if (ret == NULL) {
370		    xmlGenericError(xmlGenericErrorContext,
371			    "xmlSaveUri: out of memory\n");
372		    return(NULL);
373		}
374	    }
375	    ret[len++] = '/';
376	    ret[len++] = '/';
377	    p = uri->authority;
378	    while (*p != 0) {
379		if (len + 3 >= max) {
380		    max *= 2;
381		    ret = (xmlChar *) xmlRealloc(ret,
382			    (max + 1) * sizeof(xmlChar));
383		    if (ret == NULL) {
384			xmlGenericError(xmlGenericErrorContext,
385				"xmlSaveUri: out of memory\n");
386			return(NULL);
387		    }
388		}
389		if ((IS_UNRESERVED(*(p))) ||
390                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
391                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
392                    ((*(p) == '=')) || ((*(p) == '+')))
393		    ret[len++] = *p++;
394		else {
395		    int val = *(unsigned char *)p++;
396		    int hi = val / 0x10, lo = val % 0x10;
397		    ret[len++] = '%';
398		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
399		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
400		}
401	    }
402	} else if (uri->scheme != NULL) {
403	    if (len + 3 >= max) {
404		max *= 2;
405		ret = (xmlChar *) xmlRealloc(ret,
406			(max + 1) * sizeof(xmlChar));
407		if (ret == NULL) {
408		    xmlGenericError(xmlGenericErrorContext,
409			    "xmlSaveUri: out of memory\n");
410		    return(NULL);
411		}
412	    }
413	    ret[len++] = '/';
414	    ret[len++] = '/';
415	}
416	if (uri->path != NULL) {
417	    p = uri->path;
418	    while (*p != 0) {
419		if (len + 3 >= max) {
420		    max *= 2;
421		    ret = (xmlChar *) xmlRealloc(ret,
422			    (max + 1) * sizeof(xmlChar));
423		    if (ret == NULL) {
424			xmlGenericError(xmlGenericErrorContext,
425				"xmlSaveUri: out of memory\n");
426			return(NULL);
427		    }
428		}
429		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
430                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
431	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
432	            ((*(p) == ',')))
433		    ret[len++] = *p++;
434		else {
435		    int val = *(unsigned char *)p++;
436		    int hi = val / 0x10, lo = val % 0x10;
437		    ret[len++] = '%';
438		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
439		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
440		}
441	    }
442	}
443	if (uri->query != NULL) {
444	    if (len + 3 >= max) {
445		max *= 2;
446		ret = (xmlChar *) xmlRealloc(ret,
447			(max + 1) * sizeof(xmlChar));
448		if (ret == NULL) {
449		    xmlGenericError(xmlGenericErrorContext,
450			    "xmlSaveUri: out of memory\n");
451		    return(NULL);
452		}
453	    }
454	    ret[len++] = '?';
455	    p = uri->query;
456	    while (*p != 0) {
457		if (len + 3 >= max) {
458		    max *= 2;
459		    ret = (xmlChar *) xmlRealloc(ret,
460			    (max + 1) * sizeof(xmlChar));
461		    if (ret == NULL) {
462			xmlGenericError(xmlGenericErrorContext,
463				"xmlSaveUri: out of memory\n");
464			return(NULL);
465		    }
466		}
467		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
468		    ret[len++] = *p++;
469		else {
470		    int val = *(unsigned char *)p++;
471		    int hi = val / 0x10, lo = val % 0x10;
472		    ret[len++] = '%';
473		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
474		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
475		}
476	    }
477	}
478    }
479    if (uri->fragment != NULL) {
480	if (len + 3 >= max) {
481	    max *= 2;
482	    ret = (xmlChar *) xmlRealloc(ret,
483		    (max + 1) * sizeof(xmlChar));
484	    if (ret == NULL) {
485		xmlGenericError(xmlGenericErrorContext,
486			"xmlSaveUri: out of memory\n");
487		return(NULL);
488	    }
489	}
490	ret[len++] = '#';
491	p = uri->fragment;
492	while (*p != 0) {
493	    if (len + 3 >= max) {
494		max *= 2;
495		ret = (xmlChar *) xmlRealloc(ret,
496			(max + 1) * sizeof(xmlChar));
497		if (ret == NULL) {
498		    xmlGenericError(xmlGenericErrorContext,
499			    "xmlSaveUri: out of memory\n");
500		    return(NULL);
501		}
502	    }
503	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
504		ret[len++] = *p++;
505	    else {
506		int val = *(unsigned char *)p++;
507		int hi = val / 0x10, lo = val % 0x10;
508		ret[len++] = '%';
509		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
510		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
511	    }
512	}
513    }
514    if (len >= max) {
515	max *= 2;
516	ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
517	if (ret == NULL) {
518	    xmlGenericError(xmlGenericErrorContext,
519		    "xmlSaveUri: out of memory\n");
520	    return(NULL);
521	}
522    }
523    ret[len++] = 0;
524    return(ret);
525}
526
527/**
528 * xmlPrintURI:
529 * @stream:  a FILE* for the output
530 * @uri:  pointer to an xmlURI
531 *
532 * Prints the URI in the stream @steam.
533 */
534void
535xmlPrintURI(FILE *stream, xmlURIPtr uri) {
536    xmlChar *out;
537
538    out = xmlSaveUri(uri);
539    if (out != NULL) {
540	fprintf(stream, "%s", (char *) out);
541	xmlFree(out);
542    }
543}
544
545/**
546 * xmlCleanURI:
547 * @uri:  pointer to an xmlURI
548 *
549 * Make sure the xmlURI struct is free of content
550 */
551static void
552xmlCleanURI(xmlURIPtr uri) {
553    if (uri == NULL) return;
554
555    if (uri->scheme != NULL) xmlFree(uri->scheme);
556    uri->scheme = NULL;
557    if (uri->server != NULL) xmlFree(uri->server);
558    uri->server = NULL;
559    if (uri->user != NULL) xmlFree(uri->user);
560    uri->user = NULL;
561    if (uri->path != NULL) xmlFree(uri->path);
562    uri->path = NULL;
563    if (uri->fragment != NULL) xmlFree(uri->fragment);
564    uri->fragment = NULL;
565    if (uri->opaque != NULL) xmlFree(uri->opaque);
566    uri->opaque = NULL;
567    if (uri->authority != NULL) xmlFree(uri->authority);
568    uri->authority = NULL;
569    if (uri->query != NULL) xmlFree(uri->query);
570    uri->query = NULL;
571}
572
573/**
574 * xmlFreeURI:
575 * @uri:  pointer to an xmlURI
576 *
577 * Free up the xmlURI struct
578 */
579void
580xmlFreeURI(xmlURIPtr uri) {
581    if (uri == NULL) return;
582
583    if (uri->scheme != NULL) xmlFree(uri->scheme);
584    if (uri->server != NULL) xmlFree(uri->server);
585    if (uri->user != NULL) xmlFree(uri->user);
586    if (uri->path != NULL) xmlFree(uri->path);
587    if (uri->fragment != NULL) xmlFree(uri->fragment);
588    if (uri->opaque != NULL) xmlFree(uri->opaque);
589    if (uri->authority != NULL) xmlFree(uri->authority);
590    if (uri->query != NULL) xmlFree(uri->query);
591    xmlFree(uri);
592}
593
594/************************************************************************
595 *									*
596 *			Helper functions				*
597 *									*
598 ************************************************************************/
599
600/**
601 * xmlNormalizeURIPath:
602 * @path:  pointer to the path string
603 *
604 * Applies the 5 normalization steps to a path string--that is, RFC 2396
605 * Section 5.2, steps 6.c through 6.g.
606 *
607 * Normalization occurs directly on the string, no new allocation is done
608 *
609 * Returns 0 or an error code
610 */
611int
612xmlNormalizeURIPath(char *path) {
613    char *cur, *out;
614
615    if (path == NULL)
616	return(-1);
617
618    /* Skip all initial "/" chars.  We want to get to the beginning of the
619     * first non-empty segment.
620     */
621    cur = path;
622    while (cur[0] == '/')
623      ++cur;
624    if (cur[0] == '\0')
625      return(0);
626
627    /* Keep everything we've seen so far.  */
628    out = cur;
629
630    /*
631     * Analyze each segment in sequence for cases (c) and (d).
632     */
633    while (cur[0] != '\0') {
634	/*
635	 * c) All occurrences of "./", where "." is a complete path segment,
636	 *    are removed from the buffer string.
637	 */
638	if ((cur[0] == '.') && (cur[1] == '/')) {
639	    cur += 2;
640	    /* '//' normalization should be done at this point too */
641	    while (cur[0] == '/')
642		cur++;
643	    continue;
644	}
645
646	/*
647	 * d) If the buffer string ends with "." as a complete path segment,
648	 *    that "." is removed.
649	 */
650	if ((cur[0] == '.') && (cur[1] == '\0'))
651	    break;
652
653	/* Otherwise keep the segment.  */
654	while (cur[0] != '/') {
655            if (cur[0] == '\0')
656              goto done_cd;
657	    (out++)[0] = (cur++)[0];
658	}
659	/* nomalize // */
660	while ((cur[0] == '/') && (cur[1] == '/'))
661	    cur++;
662
663        (out++)[0] = (cur++)[0];
664    }
665 done_cd:
666    out[0] = '\0';
667
668    /* Reset to the beginning of the first segment for the next sequence.  */
669    cur = path;
670    while (cur[0] == '/')
671      ++cur;
672    if (cur[0] == '\0')
673	return(0);
674
675    /*
676     * Analyze each segment in sequence for cases (e) and (f).
677     *
678     * e) All occurrences of "<segment>/../", where <segment> is a
679     *    complete path segment not equal to "..", are removed from the
680     *    buffer string.  Removal of these path segments is performed
681     *    iteratively, removing the leftmost matching pattern on each
682     *    iteration, until no matching pattern remains.
683     *
684     * f) If the buffer string ends with "<segment>/..", where <segment>
685     *    is a complete path segment not equal to "..", that
686     *    "<segment>/.." is removed.
687     *
688     * To satisfy the "iterative" clause in (e), we need to collapse the
689     * string every time we find something that needs to be removed.  Thus,
690     * we don't need to keep two pointers into the string: we only need a
691     * "current position" pointer.
692     */
693    while (1) {
694        char *segp;
695
696        /* At the beginning of each iteration of this loop, "cur" points to
697         * the first character of the segment we want to examine.
698         */
699
700        /* Find the end of the current segment.  */
701        segp = cur;
702        while ((segp[0] != '/') && (segp[0] != '\0'))
703          ++segp;
704
705        /* If this is the last segment, we're done (we need at least two
706         * segments to meet the criteria for the (e) and (f) cases).
707         */
708        if (segp[0] == '\0')
709          break;
710
711        /* If the first segment is "..", or if the next segment _isn't_ "..",
712         * keep this segment and try the next one.
713         */
714        ++segp;
715        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
716            || ((segp[0] != '.') || (segp[1] != '.')
717                || ((segp[2] != '/') && (segp[2] != '\0')))) {
718          cur = segp;
719          continue;
720        }
721
722        /* If we get here, remove this segment and the next one and back up
723         * to the previous segment (if there is one), to implement the
724         * "iteratively" clause.  It's pretty much impossible to back up
725         * while maintaining two pointers into the buffer, so just compact
726         * the whole buffer now.
727         */
728
729        /* If this is the end of the buffer, we're done.  */
730        if (segp[2] == '\0') {
731          cur[0] = '\0';
732          break;
733        }
734        strcpy(cur, segp + 3);
735
736        /* If there are no previous segments, then keep going from here.  */
737        segp = cur;
738        while ((segp > path) && ((--segp)[0] == '/'))
739          ;
740        if (segp == path)
741          continue;
742
743        /* "segp" is pointing to the end of a previous segment; find it's
744         * start.  We need to back up to the previous segment and start
745         * over with that to handle things like "foo/bar/../..".  If we
746         * don't do this, then on the first pass we'll remove the "bar/..",
747         * but be pointing at the second ".." so we won't realize we can also
748         * remove the "foo/..".
749         */
750        cur = segp;
751        while ((cur > path) && (cur[-1] != '/'))
752          --cur;
753    }
754    out[0] = '\0';
755
756    /*
757     * g) If the resulting buffer string still begins with one or more
758     *    complete path segments of "..", then the reference is
759     *    considered to be in error. Implementations may handle this
760     *    error by retaining these components in the resolved path (i.e.,
761     *    treating them as part of the final URI), by removing them from
762     *    the resolved path (i.e., discarding relative levels above the
763     *    root), or by avoiding traversal of the reference.
764     *
765     * We discard them from the final path.
766     */
767    if (path[0] == '/') {
768      cur = path;
769      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
770             && ((cur[3] == '/') || (cur[3] == '\0')))
771	cur += 3;
772
773      if (cur != path) {
774	out = path;
775	while (cur[0] != '\0')
776          (out++)[0] = (cur++)[0];
777	out[0] = 0;
778      }
779    }
780
781    return(0);
782}
783
784/**
785 * xmlURIUnescapeString:
786 * @str:  the string to unescape
787 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
788 * @target:  optional destination buffer
789 *
790 * Unescaping routine, does not do validity checks !
791 * Output is direct unsigned char translation of %XX values (no encoding)
792 *
793 * Returns an copy of the string, but unescaped
794 */
795char *
796xmlURIUnescapeString(const char *str, int len, char *target) {
797    char *ret, *out;
798    const char *in;
799
800    if (str == NULL)
801	return(NULL);
802    if (len <= 0) len = strlen(str);
803    if (len < 0) return(NULL);
804
805    if (target == NULL) {
806	ret = (char *) xmlMallocAtomic(len + 1);
807	if (ret == NULL) {
808	    xmlGenericError(xmlGenericErrorContext,
809		    "xmlURIUnescapeString: out of memory\n");
810	    return(NULL);
811	}
812    } else
813	ret = target;
814    in = str;
815    out = ret;
816    while(len > 0) {
817	if (*in == '%') {
818	    in++;
819	    if ((*in >= '0') && (*in <= '9'))
820	        *out = (*in - '0');
821	    else if ((*in >= 'a') && (*in <= 'f'))
822	        *out = (*in - 'a') + 10;
823	    else if ((*in >= 'A') && (*in <= 'F'))
824	        *out = (*in - 'A') + 10;
825	    in++;
826	    if ((*in >= '0') && (*in <= '9'))
827	        *out = *out * 16 + (*in - '0');
828	    else if ((*in >= 'a') && (*in <= 'f'))
829	        *out = *out * 16 + (*in - 'a') + 10;
830	    else if ((*in >= 'A') && (*in <= 'F'))
831	        *out = *out * 16 + (*in - 'A') + 10;
832	    in++;
833	    len -= 3;
834	    out++;
835	} else {
836	    *out++ = *in++;
837	    len--;
838	}
839    }
840    *out = 0;
841    return(ret);
842}
843
844/**
845 * xmlURIEscapeStr:
846 * @str:  string to escape
847 * @list: exception list string of chars not to escape
848 *
849 * This routine escapes a string to hex, ignoring reserved characters (a-z)
850 * and the characters in the exception list.
851 *
852 * Returns a new escaped string or NULL in case of error.
853 */
854xmlChar *
855xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
856    xmlChar *ret, ch;
857    const xmlChar *in;
858
859    unsigned int len, out;
860
861    if (str == NULL)
862	return(NULL);
863    len = xmlStrlen(str);
864    if (!(len > 0)) return(NULL);
865
866    len += 20;
867    ret = (xmlChar *) xmlMallocAtomic(len);
868    if (ret == NULL) {
869	xmlGenericError(xmlGenericErrorContext,
870		"xmlURIEscapeStr: out of memory\n");
871	return(NULL);
872    }
873    in = (const xmlChar *) str;
874    out = 0;
875    while(*in != 0) {
876	if (len - out <= 3) {
877	    len += 20;
878	    ret = (xmlChar *) xmlRealloc(ret, len);
879	    if (ret == NULL) {
880		xmlGenericError(xmlGenericErrorContext,
881			"xmlURIEscapeStr: out of memory\n");
882		return(NULL);
883	    }
884	}
885
886	ch = *in;
887
888	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
889	    unsigned char val;
890	    ret[out++] = '%';
891	    val = ch >> 4;
892	    if (val <= 9)
893		ret[out++] = '0' + val;
894	    else
895		ret[out++] = 'A' + val - 0xA;
896	    val = ch & 0xF;
897	    if (val <= 9)
898		ret[out++] = '0' + val;
899	    else
900		ret[out++] = 'A' + val - 0xA;
901	    in++;
902	} else {
903	    ret[out++] = *in++;
904	}
905
906    }
907    ret[out] = 0;
908    return(ret);
909}
910
911/**
912 * xmlURIEscape:
913 * @str:  the string of the URI to escape
914 *
915 * Escaping routine, does not do validity checks !
916 * It will try to escape the chars needing this, but this is heuristic
917 * based it's impossible to be sure.
918 *
919 * Returns an copy of the string, but escaped
920 *
921 * 25 May 2001
922 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
923 * according to RFC2396.
924 *   - Carl Douglas
925 */
926xmlChar *
927xmlURIEscape(const xmlChar * str)
928{
929    xmlChar *ret, *segment = NULL;
930    xmlURIPtr uri;
931    int ret2;
932
933#define NULLCHK(p) if(!p) { \
934                   xmlGenericError(xmlGenericErrorContext, \
935                        "xmlURIEscape: out of memory\n"); \
936                   return NULL; }
937
938    if (str == NULL)
939        return (NULL);
940
941    uri = xmlCreateURI();
942    if (uri != NULL) {
943	/*
944	 * Allow escaping errors in the unescaped form
945	 */
946        uri->cleanup = 1;
947        ret2 = xmlParseURIReference(uri, (const char *)str);
948        if (ret2) {
949            xmlFreeURI(uri);
950            return (NULL);
951        }
952    }
953
954    if (!uri)
955        return NULL;
956
957    ret = NULL;
958
959    if (uri->scheme) {
960        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
961        NULLCHK(segment)
962        ret = xmlStrcat(ret, segment);
963        ret = xmlStrcat(ret, BAD_CAST ":");
964        xmlFree(segment);
965    }
966
967    if (uri->authority) {
968        segment =
969            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
970        NULLCHK(segment)
971        ret = xmlStrcat(ret, BAD_CAST "//");
972        ret = xmlStrcat(ret, segment);
973        xmlFree(segment);
974    }
975
976    if (uri->user) {
977        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
978        NULLCHK(segment)
979        ret = xmlStrcat(ret, segment);
980        ret = xmlStrcat(ret, BAD_CAST "@");
981        xmlFree(segment);
982    }
983
984    if (uri->server) {
985        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
986        NULLCHK(segment)
987        ret = xmlStrcat(ret, BAD_CAST "//");
988        ret = xmlStrcat(ret, segment);
989        xmlFree(segment);
990    }
991
992    if (uri->port) {
993        xmlChar port[10];
994
995        snprintf((char *) port, 10, "%d", uri->port);
996        ret = xmlStrcat(ret, BAD_CAST ":");
997        ret = xmlStrcat(ret, port);
998    }
999
1000    if (uri->path) {
1001        segment =
1002            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1003        NULLCHK(segment)
1004        ret = xmlStrcat(ret, segment);
1005        xmlFree(segment);
1006    }
1007
1008    if (uri->query) {
1009        segment =
1010            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1011        NULLCHK(segment)
1012        ret = xmlStrcat(ret, BAD_CAST "?");
1013        ret = xmlStrcat(ret, segment);
1014        xmlFree(segment);
1015    }
1016
1017    if (uri->opaque) {
1018        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1019        NULLCHK(segment)
1020        ret = xmlStrcat(ret, segment);
1021        xmlFree(segment);
1022    }
1023
1024    if (uri->fragment) {
1025        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1026        NULLCHK(segment)
1027        ret = xmlStrcat(ret, BAD_CAST "#");
1028        ret = xmlStrcat(ret, segment);
1029        xmlFree(segment);
1030    }
1031
1032    xmlFreeURI(uri);
1033#undef NULLCHK
1034
1035    return (ret);
1036}
1037
1038/************************************************************************
1039 *									*
1040 *			Escaped URI parsing				*
1041 *									*
1042 ************************************************************************/
1043
1044/**
1045 * xmlParseURIFragment:
1046 * @uri:  pointer to an URI structure
1047 * @str:  pointer to the string to analyze
1048 *
1049 * Parse an URI fragment string and fills in the appropriate fields
1050 * of the @uri structure.
1051 *
1052 * fragment = *uric
1053 *
1054 * Returns 0 or the error code
1055 */
1056static int
1057xmlParseURIFragment(xmlURIPtr uri, const char **str)
1058{
1059    const char *cur = *str;
1060
1061    if (str == NULL)
1062        return (-1);
1063
1064    while (IS_URIC(cur) || IS_UNWISE(cur))
1065        NEXT(cur);
1066    if (uri != NULL) {
1067        if (uri->fragment != NULL)
1068            xmlFree(uri->fragment);
1069        uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1070    }
1071    *str = cur;
1072    return (0);
1073}
1074
1075/**
1076 * xmlParseURIQuery:
1077 * @uri:  pointer to an URI structure
1078 * @str:  pointer to the string to analyze
1079 *
1080 * Parse the query part of an URI
1081 *
1082 * query = *uric
1083 *
1084 * Returns 0 or the error code
1085 */
1086static int
1087xmlParseURIQuery(xmlURIPtr uri, const char **str)
1088{
1089    const char *cur = *str;
1090
1091    if (str == NULL)
1092        return (-1);
1093
1094    while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1095        NEXT(cur);
1096    if (uri != NULL) {
1097        if (uri->query != NULL)
1098            xmlFree(uri->query);
1099        uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1100    }
1101    *str = cur;
1102    return (0);
1103}
1104
1105/**
1106 * xmlParseURIScheme:
1107 * @uri:  pointer to an URI structure
1108 * @str:  pointer to the string to analyze
1109 *
1110 * Parse an URI scheme
1111 *
1112 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1113 *
1114 * Returns 0 or the error code
1115 */
1116static int
1117xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1118    const char *cur;
1119
1120    if (str == NULL)
1121	return(-1);
1122
1123    cur = *str;
1124    if (!IS_ALPHA(*cur))
1125	return(2);
1126    cur++;
1127    while (IS_SCHEME(*cur)) cur++;
1128    if (uri != NULL) {
1129	if (uri->scheme != NULL) xmlFree(uri->scheme);
1130	/* !!! strndup */
1131	uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1132    }
1133    *str = cur;
1134    return(0);
1135}
1136
1137/**
1138 * xmlParseURIOpaquePart:
1139 * @uri:  pointer to an URI structure
1140 * @str:  pointer to the string to analyze
1141 *
1142 * Parse an URI opaque part
1143 *
1144 * opaque_part = uric_no_slash *uric
1145 *
1146 * Returns 0 or the error code
1147 */
1148static int
1149xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1150{
1151    const char *cur;
1152
1153    if (str == NULL)
1154        return (-1);
1155
1156    cur = *str;
1157    if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
1158        return (3);
1159    }
1160    NEXT(cur);
1161    while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1162        NEXT(cur);
1163    if (uri != NULL) {
1164        if (uri->opaque != NULL)
1165            xmlFree(uri->opaque);
1166        uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1167    }
1168    *str = cur;
1169    return (0);
1170}
1171
1172/**
1173 * xmlParseURIServer:
1174 * @uri:  pointer to an URI structure
1175 * @str:  pointer to the string to analyze
1176 *
1177 * Parse a server subpart of an URI, it's a finer grain analysis
1178 * of the authority part.
1179 *
1180 * server        = [ [ userinfo "@" ] hostport ]
1181 * userinfo      = *( unreserved | escaped |
1182 *                       ";" | ":" | "&" | "=" | "+" | "$" | "," )
1183 * hostport      = host [ ":" port ]
1184 * host          = hostname | IPv4address
1185 * hostname      = *( domainlabel "." ) toplabel [ "." ]
1186 * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1187 * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1188 * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1189 * port          = *digit
1190 *
1191 * Returns 0 or the error code
1192 */
1193static int
1194xmlParseURIServer(xmlURIPtr uri, const char **str) {
1195    const char *cur;
1196    const char *host, *tmp;
1197    const int IPmax = 4;
1198    int oct;
1199
1200    if (str == NULL)
1201	return(-1);
1202
1203    cur = *str;
1204
1205    /*
1206     * is there an userinfo ?
1207     */
1208    while (IS_USERINFO(cur)) NEXT(cur);
1209    if (*cur == '@') {
1210	if (uri != NULL) {
1211	    if (uri->user != NULL) xmlFree(uri->user);
1212	    uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1213	}
1214	cur++;
1215    } else {
1216	if (uri != NULL) {
1217	    if (uri->user != NULL) xmlFree(uri->user);
1218	    uri->user = NULL;
1219	}
1220        cur = *str;
1221    }
1222    /*
1223     * This can be empty in the case where there is no server
1224     */
1225    host = cur;
1226    if (*cur == '/') {
1227	if (uri != NULL) {
1228	    if (uri->authority != NULL) xmlFree(uri->authority);
1229	    uri->authority = NULL;
1230	    if (uri->server != NULL) xmlFree(uri->server);
1231	    uri->server = NULL;
1232	    uri->port = 0;
1233	}
1234	return(0);
1235    }
1236    /*
1237     * host part of hostport can derive either an IPV4 address
1238     * or an unresolved name. Check the IP first, it easier to detect
1239     * errors if wrong one
1240     */
1241    for (oct = 0; oct < IPmax; ++oct) {
1242        if (*cur == '.')
1243            return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1244        while(IS_DIGIT(*cur)) cur++;
1245        if (oct == (IPmax-1))
1246            continue;
1247        if (*cur != '.')
1248	    break;
1249        cur++;
1250    }
1251    if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) {
1252        /* maybe host_name */
1253        if (!IS_ALPHANUM(*cur))
1254            return(4); /* e.g. http://xml.$oft */
1255        do {
1256            do ++cur; while (IS_ALPHANUM(*cur));
1257            if (*cur == '-') {
1258	        --cur;
1259                if (*cur == '.')
1260                    return(5); /* e.g. http://xml.-soft */
1261	        ++cur;
1262		continue;
1263            }
1264    	    if (*cur == '.') {
1265	        --cur;
1266                if (*cur == '-')
1267                    return(6); /* e.g. http://xml-.soft */
1268                if (*cur == '.')
1269                    return(7); /* e.g. http://xml..soft */
1270	        ++cur;
1271		continue;
1272            }
1273	    break;
1274        } while (1);
1275        tmp = cur;
1276        if (tmp[-1] == '.')
1277            --tmp; /* e.g. http://xml.$Oft/ */
1278        do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1279        if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1280            return(8); /* e.g. http://xmlsOft.0rg/ */
1281    }
1282    if (uri != NULL) {
1283	if (uri->authority != NULL) xmlFree(uri->authority);
1284	uri->authority = NULL;
1285	if (uri->server != NULL) xmlFree(uri->server);
1286	uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1287    }
1288    /*
1289     * finish by checking for a port presence.
1290     */
1291    if (*cur == ':') {
1292        cur++;
1293	if (IS_DIGIT(*cur)) {
1294	    if (uri != NULL)
1295	        uri->port = 0;
1296	    while (IS_DIGIT(*cur)) {
1297	        if (uri != NULL)
1298		    uri->port = uri->port * 10 + (*cur - '0');
1299		cur++;
1300	    }
1301	}
1302    }
1303    *str = cur;
1304    return(0);
1305}
1306
1307/**
1308 * xmlParseURIRelSegment:
1309 * @uri:  pointer to an URI structure
1310 * @str:  pointer to the string to analyze
1311 *
1312 * Parse an URI relative segment
1313 *
1314 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1315 *                          "+" | "$" | "," )
1316 *
1317 * Returns 0 or the error code
1318 */
1319static int
1320xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1321{
1322    const char *cur;
1323
1324    if (str == NULL)
1325        return (-1);
1326
1327    cur = *str;
1328    if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
1329        return (3);
1330    }
1331    NEXT(cur);
1332    while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1333        NEXT(cur);
1334    if (uri != NULL) {
1335        if (uri->path != NULL)
1336            xmlFree(uri->path);
1337        uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1338    }
1339    *str = cur;
1340    return (0);
1341}
1342
1343/**
1344 * xmlParseURIPathSegments:
1345 * @uri:  pointer to an URI structure
1346 * @str:  pointer to the string to analyze
1347 * @slash:  should we add a leading slash
1348 *
1349 * Parse an URI set of path segments
1350 *
1351 * path_segments = segment *( "/" segment )
1352 * segment       = *pchar *( ";" param )
1353 * param         = *pchar
1354 *
1355 * Returns 0 or the error code
1356 */
1357static int
1358xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1359{
1360    const char *cur;
1361
1362    if (str == NULL)
1363        return (-1);
1364
1365    cur = *str;
1366
1367    do {
1368        while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1369            NEXT(cur);
1370        while (*cur == ';') {
1371            cur++;
1372            while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
1373                NEXT(cur);
1374        }
1375        if (*cur != '/')
1376            break;
1377        cur++;
1378    } while (1);
1379    if (uri != NULL) {
1380        int len, len2 = 0;
1381        char *path;
1382
1383        /*
1384         * Concat the set of path segments to the current path
1385         */
1386        len = cur - *str;
1387        if (slash)
1388            len++;
1389
1390        if (uri->path != NULL) {
1391            len2 = strlen(uri->path);
1392            len += len2;
1393        }
1394        path = (char *) xmlMallocAtomic(len + 1);
1395        if (path == NULL) {
1396            xmlGenericError(xmlGenericErrorContext,
1397                            "xmlParseURIPathSegments: out of memory\n");
1398            *str = cur;
1399            return (-1);
1400        }
1401        if (uri->path != NULL)
1402            memcpy(path, uri->path, len2);
1403        if (slash) {
1404            path[len2] = '/';
1405            len2++;
1406        }
1407        path[len2] = 0;
1408        if (cur - *str > 0)
1409            xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1410        if (uri->path != NULL)
1411            xmlFree(uri->path);
1412        uri->path = path;
1413    }
1414    *str = cur;
1415    return (0);
1416}
1417
1418/**
1419 * xmlParseURIAuthority:
1420 * @uri:  pointer to an URI structure
1421 * @str:  pointer to the string to analyze
1422 *
1423 * Parse the authority part of an URI.
1424 *
1425 * authority = server | reg_name
1426 * server    = [ [ userinfo "@" ] hostport ]
1427 * reg_name  = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1428 *                        "@" | "&" | "=" | "+" )
1429 *
1430 * Note : this is completely ambiguous since reg_name is allowed to
1431 *        use the full set of chars in use by server:
1432 *
1433 *        3.2.1. Registry-based Naming Authority
1434 *
1435 *        The structure of a registry-based naming authority is specific
1436 *        to the URI scheme, but constrained to the allowed characters
1437 *        for an authority component.
1438 *
1439 * Returns 0 or the error code
1440 */
1441static int
1442xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1443    const char *cur;
1444    int ret;
1445
1446    if (str == NULL)
1447	return(-1);
1448
1449    cur = *str;
1450
1451    /*
1452     * try first to parse it as a server string.
1453     */
1454    ret = xmlParseURIServer(uri, str);
1455    if ((ret == 0) && (*str != NULL) &&
1456	((**str == 0) || (**str == '/') || (**str == '?')))
1457        return(0);
1458    *str = cur;
1459
1460    /*
1461     * failed, fallback to reg_name
1462     */
1463    if (!IS_REG_NAME(cur)) {
1464	return(5);
1465    }
1466    NEXT(cur);
1467    while (IS_REG_NAME(cur)) NEXT(cur);
1468    if (uri != NULL) {
1469	if (uri->server != NULL) xmlFree(uri->server);
1470	uri->server = NULL;
1471	if (uri->user != NULL) xmlFree(uri->user);
1472	uri->user = NULL;
1473	if (uri->authority != NULL) xmlFree(uri->authority);
1474	uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1475    }
1476    *str = cur;
1477    return(0);
1478}
1479
1480/**
1481 * xmlParseURIHierPart:
1482 * @uri:  pointer to an URI structure
1483 * @str:  pointer to the string to analyze
1484 *
1485 * Parse an URI hierarchical part
1486 *
1487 * hier_part = ( net_path | abs_path ) [ "?" query ]
1488 * abs_path = "/"  path_segments
1489 * net_path = "//" authority [ abs_path ]
1490 *
1491 * Returns 0 or the error code
1492 */
1493static int
1494xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1495    int ret;
1496    const char *cur;
1497
1498    if (str == NULL)
1499	return(-1);
1500
1501    cur = *str;
1502
1503    if ((cur[0] == '/') && (cur[1] == '/')) {
1504	cur += 2;
1505	ret = xmlParseURIAuthority(uri, &cur);
1506	if (ret != 0)
1507	    return(ret);
1508	if (cur[0] == '/') {
1509	    cur++;
1510	    ret = xmlParseURIPathSegments(uri, &cur, 1);
1511	}
1512    } else if (cur[0] == '/') {
1513	cur++;
1514	ret = xmlParseURIPathSegments(uri, &cur, 1);
1515    } else {
1516	return(4);
1517    }
1518    if (ret != 0)
1519	return(ret);
1520    if (*cur == '?') {
1521	cur++;
1522	ret = xmlParseURIQuery(uri, &cur);
1523	if (ret != 0)
1524	    return(ret);
1525    }
1526    *str = cur;
1527    return(0);
1528}
1529
1530/**
1531 * xmlParseAbsoluteURI:
1532 * @uri:  pointer to an URI structure
1533 * @str:  pointer to the string to analyze
1534 *
1535 * Parse an URI reference string and fills in the appropriate fields
1536 * of the @uri structure
1537 *
1538 * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1539 *
1540 * Returns 0 or the error code
1541 */
1542static int
1543xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1544    int ret;
1545    const char *cur;
1546
1547    if (str == NULL)
1548	return(-1);
1549
1550    cur = *str;
1551
1552    ret = xmlParseURIScheme(uri, str);
1553    if (ret != 0) return(ret);
1554    if (**str != ':') {
1555	*str = cur;
1556	return(1);
1557    }
1558    (*str)++;
1559    if (**str == '/')
1560	return(xmlParseURIHierPart(uri, str));
1561    return(xmlParseURIOpaquePart(uri, str));
1562}
1563
1564/**
1565 * xmlParseRelativeURI:
1566 * @uri:  pointer to an URI structure
1567 * @str:  pointer to the string to analyze
1568 *
1569 * Parse an relative URI string and fills in the appropriate fields
1570 * of the @uri structure
1571 *
1572 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1573 * abs_path = "/"  path_segments
1574 * net_path = "//" authority [ abs_path ]
1575 * rel_path = rel_segment [ abs_path ]
1576 *
1577 * Returns 0 or the error code
1578 */
1579static int
1580xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1581    int ret = 0;
1582    const char *cur;
1583
1584    if (str == NULL)
1585	return(-1);
1586
1587    cur = *str;
1588    if ((cur[0] == '/') && (cur[1] == '/')) {
1589	cur += 2;
1590	ret = xmlParseURIAuthority(uri, &cur);
1591	if (ret != 0)
1592	    return(ret);
1593	if (cur[0] == '/') {
1594	    cur++;
1595	    ret = xmlParseURIPathSegments(uri, &cur, 1);
1596	}
1597    } else if (cur[0] == '/') {
1598	cur++;
1599	ret = xmlParseURIPathSegments(uri, &cur, 1);
1600    } else if (cur[0] != '#' && cur[0] != '?') {
1601	ret = xmlParseURIRelSegment(uri, &cur);
1602	if (ret != 0)
1603	    return(ret);
1604	if (cur[0] == '/') {
1605	    cur++;
1606	    ret = xmlParseURIPathSegments(uri, &cur, 1);
1607	}
1608    }
1609    if (ret != 0)
1610	return(ret);
1611    if (*cur == '?') {
1612	cur++;
1613	ret = xmlParseURIQuery(uri, &cur);
1614	if (ret != 0)
1615	    return(ret);
1616    }
1617    *str = cur;
1618    return(ret);
1619}
1620
1621/**
1622 * xmlParseURIReference:
1623 * @uri:  pointer to an URI structure
1624 * @str:  the string to analyze
1625 *
1626 * Parse an URI reference string and fills in the appropriate fields
1627 * of the @uri structure
1628 *
1629 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1630 *
1631 * Returns 0 or the error code
1632 */
1633int
1634xmlParseURIReference(xmlURIPtr uri, const char *str) {
1635    int ret;
1636    const char *tmp = str;
1637
1638    if (str == NULL)
1639	return(-1);
1640    xmlCleanURI(uri);
1641
1642    /*
1643     * Try first to parse absolute refs, then fallback to relative if
1644     * it fails.
1645     */
1646    ret = xmlParseAbsoluteURI(uri, &str);
1647    if (ret != 0) {
1648	xmlCleanURI(uri);
1649	str = tmp;
1650        ret = xmlParseRelativeURI(uri, &str);
1651    }
1652    if (ret != 0) {
1653	xmlCleanURI(uri);
1654	return(ret);
1655    }
1656
1657    if (*str == '#') {
1658	str++;
1659	ret = xmlParseURIFragment(uri, &str);
1660	if (ret != 0) return(ret);
1661    }
1662    if (*str != 0) {
1663	xmlCleanURI(uri);
1664	return(1);
1665    }
1666    return(0);
1667}
1668
1669/**
1670 * xmlParseURI:
1671 * @str:  the URI string to analyze
1672 *
1673 * Parse an URI
1674 *
1675 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1676 *
1677 * Returns a newly build xmlURIPtr or NULL in case of error
1678 */
1679xmlURIPtr
1680xmlParseURI(const char *str) {
1681    xmlURIPtr uri;
1682    int ret;
1683
1684    if (str == NULL)
1685	return(NULL);
1686    uri = xmlCreateURI();
1687    if (uri != NULL) {
1688	ret = xmlParseURIReference(uri, str);
1689        if (ret) {
1690	    xmlFreeURI(uri);
1691	    return(NULL);
1692	}
1693    }
1694    return(uri);
1695}
1696
1697/************************************************************************
1698 *									*
1699 *			Public functions				*
1700 *									*
1701 ************************************************************************/
1702
1703/**
1704 * xmlBuildURI:
1705 * @URI:  the URI instance found in the document
1706 * @base:  the base value
1707 *
1708 * Computes he final URI of the reference done by checking that
1709 * the given URI is valid, and building the final URI using the
1710 * base URI. This is processed according to section 5.2 of the
1711 * RFC 2396
1712 *
1713 * 5.2. Resolving Relative References to Absolute Form
1714 *
1715 * Returns a new URI string (to be freed by the caller) or NULL in case
1716 *         of error.
1717 */
1718xmlChar *
1719xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1720    xmlChar *val = NULL;
1721    int ret, len, indx, cur, out;
1722    xmlURIPtr ref = NULL;
1723    xmlURIPtr bas = NULL;
1724    xmlURIPtr res = NULL;
1725
1726    /*
1727     * 1) The URI reference is parsed into the potential four components and
1728     *    fragment identifier, as described in Section 4.3.
1729     *
1730     *    NOTE that a completely empty URI is treated by modern browsers
1731     *    as a reference to "." rather than as a synonym for the current
1732     *    URI.  Should we do that here?
1733     */
1734    if (URI == NULL)
1735	ret = -1;
1736    else {
1737	if (*URI) {
1738	    ref = xmlCreateURI();
1739	    if (ref == NULL)
1740		goto done;
1741	    ret = xmlParseURIReference(ref, (const char *) URI);
1742	}
1743	else
1744	    ret = 0;
1745    }
1746    if (ret != 0)
1747	goto done;
1748    if ((ref != NULL) && (ref->scheme != NULL)) {
1749	/*
1750	 * The URI is absolute don't modify.
1751	 */
1752	val = xmlStrdup(URI);
1753	goto done;
1754    }
1755    if (base == NULL)
1756	ret = -1;
1757    else {
1758	bas = xmlCreateURI();
1759	if (bas == NULL)
1760	    goto done;
1761	ret = xmlParseURIReference(bas, (const char *) base);
1762    }
1763    if (ret != 0) {
1764	if (ref)
1765	    val = xmlSaveUri(ref);
1766	goto done;
1767    }
1768    if (ref == NULL) {
1769	/*
1770	 * the base fragment must be ignored
1771	 */
1772	if (bas->fragment != NULL) {
1773	    xmlFree(bas->fragment);
1774	    bas->fragment = NULL;
1775	}
1776	val = xmlSaveUri(bas);
1777	goto done;
1778    }
1779
1780    /*
1781     * 2) If the path component is empty and the scheme, authority, and
1782     *    query components are undefined, then it is a reference to the
1783     *    current document and we are done.  Otherwise, the reference URI's
1784     *    query and fragment components are defined as found (or not found)
1785     *    within the URI reference and not inherited from the base URI.
1786     *
1787     *    NOTE that in modern browsers, the parsing differs from the above
1788     *    in the following aspect:  the query component is allowed to be
1789     *    defined while still treating this as a reference to the current
1790     *    document.
1791     */
1792    res = xmlCreateURI();
1793    if (res == NULL)
1794	goto done;
1795    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1796	((ref->authority == NULL) && (ref->server == NULL))) {
1797	if (bas->scheme != NULL)
1798	    res->scheme = xmlMemStrdup(bas->scheme);
1799	if (bas->authority != NULL)
1800	    res->authority = xmlMemStrdup(bas->authority);
1801	else if (bas->server != NULL) {
1802	    res->server = xmlMemStrdup(bas->server);
1803	    if (bas->user != NULL)
1804		res->user = xmlMemStrdup(bas->user);
1805	    res->port = bas->port;
1806	}
1807	if (bas->path != NULL)
1808	    res->path = xmlMemStrdup(bas->path);
1809	if (ref->query != NULL)
1810	    res->query = xmlMemStrdup(ref->query);
1811	else if (bas->query != NULL)
1812	    res->query = xmlMemStrdup(bas->query);
1813	if (ref->fragment != NULL)
1814	    res->fragment = xmlMemStrdup(ref->fragment);
1815	goto step_7;
1816    }
1817
1818    /*
1819     * 3) If the scheme component is defined, indicating that the reference
1820     *    starts with a scheme name, then the reference is interpreted as an
1821     *    absolute URI and we are done.  Otherwise, the reference URI's
1822     *    scheme is inherited from the base URI's scheme component.
1823     */
1824    if (ref->scheme != NULL) {
1825	val = xmlSaveUri(ref);
1826	goto done;
1827    }
1828    if (bas->scheme != NULL)
1829	res->scheme = xmlMemStrdup(bas->scheme);
1830
1831    if (ref->query != NULL)
1832	res->query = xmlMemStrdup(ref->query);
1833    if (ref->fragment != NULL)
1834	res->fragment = xmlMemStrdup(ref->fragment);
1835
1836    /*
1837     * 4) If the authority component is defined, then the reference is a
1838     *    network-path and we skip to step 7.  Otherwise, the reference
1839     *    URI's authority is inherited from the base URI's authority
1840     *    component, which will also be undefined if the URI scheme does not
1841     *    use an authority component.
1842     */
1843    if ((ref->authority != NULL) || (ref->server != NULL)) {
1844	if (ref->authority != NULL)
1845	    res->authority = xmlMemStrdup(ref->authority);
1846	else {
1847	    res->server = xmlMemStrdup(ref->server);
1848	    if (ref->user != NULL)
1849		res->user = xmlMemStrdup(ref->user);
1850            res->port = ref->port;
1851	}
1852	if (ref->path != NULL)
1853	    res->path = xmlMemStrdup(ref->path);
1854	goto step_7;
1855    }
1856    if (bas->authority != NULL)
1857	res->authority = xmlMemStrdup(bas->authority);
1858    else if (bas->server != NULL) {
1859	res->server = xmlMemStrdup(bas->server);
1860	if (bas->user != NULL)
1861	    res->user = xmlMemStrdup(bas->user);
1862	res->port = bas->port;
1863    }
1864
1865    /*
1866     * 5) If the path component begins with a slash character ("/"), then
1867     *    the reference is an absolute-path and we skip to step 7.
1868     */
1869    if ((ref->path != NULL) && (ref->path[0] == '/')) {
1870	res->path = xmlMemStrdup(ref->path);
1871	goto step_7;
1872    }
1873
1874
1875    /*
1876     * 6) If this step is reached, then we are resolving a relative-path
1877     *    reference.  The relative path needs to be merged with the base
1878     *    URI's path.  Although there are many ways to do this, we will
1879     *    describe a simple method using a separate string buffer.
1880     *
1881     * Allocate a buffer large enough for the result string.
1882     */
1883    len = 2; /* extra / and 0 */
1884    if (ref->path != NULL)
1885	len += strlen(ref->path);
1886    if (bas->path != NULL)
1887	len += strlen(bas->path);
1888    res->path = (char *) xmlMallocAtomic(len);
1889    if (res->path == NULL) {
1890	xmlGenericError(xmlGenericErrorContext,
1891		"xmlBuildURI: out of memory\n");
1892	goto done;
1893    }
1894    res->path[0] = 0;
1895
1896    /*
1897     * a) All but the last segment of the base URI's path component is
1898     *    copied to the buffer.  In other words, any characters after the
1899     *    last (right-most) slash character, if any, are excluded.
1900     */
1901    cur = 0;
1902    out = 0;
1903    if (bas->path != NULL) {
1904	while (bas->path[cur] != 0) {
1905	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1906		cur++;
1907	    if (bas->path[cur] == 0)
1908		break;
1909
1910	    cur++;
1911	    while (out < cur) {
1912		res->path[out] = bas->path[out];
1913		out++;
1914	    }
1915	}
1916    }
1917    res->path[out] = 0;
1918
1919    /*
1920     * b) The reference's path component is appended to the buffer
1921     *    string.
1922     */
1923    if (ref->path != NULL && ref->path[0] != 0) {
1924	indx = 0;
1925	/*
1926	 * Ensure the path includes a '/'
1927	 */
1928	if ((out == 0) && (bas->server != NULL))
1929	    res->path[out++] = '/';
1930	while (ref->path[indx] != 0) {
1931	    res->path[out++] = ref->path[indx++];
1932	}
1933    }
1934    res->path[out] = 0;
1935
1936    /*
1937     * Steps c) to h) are really path normalization steps
1938     */
1939    xmlNormalizeURIPath(res->path);
1940
1941step_7:
1942
1943    /*
1944     * 7) The resulting URI components, including any inherited from the
1945     *    base URI, are recombined to give the absolute form of the URI
1946     *    reference.
1947     */
1948    val = xmlSaveUri(res);
1949
1950done:
1951    if (ref != NULL)
1952	xmlFreeURI(ref);
1953    if (bas != NULL)
1954	xmlFreeURI(bas);
1955    if (res != NULL)
1956	xmlFreeURI(res);
1957    return(val);
1958}
1959
1960/**
1961 * xmlCanonicPath:
1962 * @path:  the resource locator in a filesystem notation
1963 *
1964 * Constructs a canonic path from the specified path.
1965 *
1966 * Returns a new canonic path, or a duplicate of the path parameter if the
1967 * construction fails. The caller is responsible for freeing the memory occupied
1968 * by the returned string. If there is insufficient memory available, or the
1969 * argument is NULL, the function returns NULL.
1970 */
1971#define IS_WINDOWS_PATH(p) 					\
1972	((p != NULL) &&						\
1973	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
1974	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
1975	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
1976xmlChar*
1977xmlCanonicPath(const xmlChar *path)
1978{
1979#if defined(_WIN32) && !defined(__CYGWIN__)
1980    int len = 0;
1981    int i = 0;
1982    xmlChar *p = NULL;
1983#endif
1984    xmlChar *ret;
1985    xmlURIPtr uri;
1986
1987    if (path == NULL)
1988	return(NULL);
1989    if ((uri = xmlParseURI((const char *) path)) != NULL) {
1990	xmlFreeURI(uri);
1991	return xmlStrdup(path);
1992    }
1993
1994    uri = xmlCreateURI();
1995
1996#if defined(_WIN32) && !defined(__CYGWIN__)
1997    len = xmlStrlen(path);
1998    if ((len > 2) && IS_WINDOWS_PATH(path)) {
1999	uri->scheme = xmlStrdup(BAD_CAST "file");
2000	uri->path = xmlMallocAtomic(len + 2);
2001	uri->path[0] = '/';
2002	p = uri->path + 1;
2003	strncpy(p, path, len + 1);
2004    } else {
2005	uri->path = xmlStrdup(path);
2006	p = uri->path;
2007    }
2008    while (*p != '\0') {
2009	if (*p == '\\')
2010	    *p = '/';
2011	p++;
2012    }
2013#else
2014    uri->path = (char *) xmlStrdup((const xmlChar *) path);
2015#endif
2016
2017    ret = xmlSaveUri(uri);
2018    xmlFreeURI(uri);
2019    return(ret);
2020}
2021
2022