1/** 2 * uri.c: set of generic URI related routines 3 * 4 * Reference: RFCs 3986, 2732 and 2373 5 * 6 * See Copyright for the status of this software. 7 * 8 * daniel@veillard.com 9 */ 10 11#define IN_LIBXML 12#include "libxml.h" 13 14#include <string.h> 15#include <limits.h> 16 17#include <libxml/xmlmemory.h> 18#include <libxml/uri.h> 19#include <libxml/globals.h> 20#include <libxml/xmlerror.h> 21 22/** 23 * MAX_URI_LENGTH: 24 * 25 * The definition of the URI regexp in the above RFC has no size limit 26 * In practice they are usually relativey short except for the 27 * data URI scheme as defined in RFC 2397. Even for data URI the usual 28 * maximum size before hitting random practical limits is around 64 KB 29 * and 4KB is usually a maximum admitted limit for proper operations. 30 * The value below is more a security limit than anything else and 31 * really should never be hit by 'normal' operations 32 * Set to 1 MByte in 2012, this is only enforced on output 33 */ 34#define MAX_URI_LENGTH 1024 * 1024 35 36static void 37xmlURIErrMemory(const char *extra) 38{ 39 if (extra) 40 __xmlRaiseError(NULL, NULL, NULL, 41 NULL, NULL, XML_FROM_URI, 42 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 43 extra, NULL, NULL, 0, 0, 44 "Memory allocation failed : %s\n", extra); 45 else 46 __xmlRaiseError(NULL, NULL, NULL, 47 NULL, NULL, XML_FROM_URI, 48 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 49 NULL, NULL, NULL, 0, 0, 50 "Memory allocation failed\n"); 51} 52 53static void xmlCleanURI(xmlURIPtr uri); 54 55/* 56 * Old rule from 2396 used in legacy handling code 57 * alpha = lowalpha | upalpha 58 */ 59#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 60 61 62/* 63 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 64 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 65 * "u" | "v" | "w" | "x" | "y" | "z" 66 */ 67 68#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 69 70/* 71 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 72 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 73 * "U" | "V" | "W" | "X" | "Y" | "Z" 74 */ 75#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 76 77#ifdef IS_DIGIT 78#undef IS_DIGIT 79#endif 80/* 81 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 82 */ 83#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 84 85/* 86 * alphanum = alpha | digit 87 */ 88 89#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 90 91/* 92 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 93 */ 94 95#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 96 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 97 ((x) == '(') || ((x) == ')')) 98 99/* 100 * unwise = "{" | "}" | "|" | "\" | "^" | "`" 101 */ 102 103#define IS_UNWISE(p) \ 104 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 105 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 106 ((*(p) == ']')) || ((*(p) == '`'))) 107/* 108 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 109 * "[" | "]" 110 */ 111 112#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 113 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 114 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 115 ((x) == ']')) 116 117/* 118 * unreserved = alphanum | mark 119 */ 120 121#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 122 123/* 124 * Skip to next pointer char, handle escaped sequences 125 */ 126 127#define NEXT(p) ((*p == '%')? p += 3 : p++) 128 129/* 130 * Productions from the spec. 131 * 132 * authority = server | reg_name 133 * reg_name = 1*( unreserved | escaped | "$" | "," | 134 * ";" | ":" | "@" | "&" | "=" | "+" ) 135 * 136 * path = [ abs_path | opaque_part ] 137 */ 138 139#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 140 141/************************************************************************ 142 * * 143 * RFC 3986 parser * 144 * * 145 ************************************************************************/ 146 147#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) 148#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ 149 ((*(p) >= 'A') && (*(p) <= 'Z'))) 150#define ISA_HEXDIG(p) \ 151 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ 152 ((*(p) >= 'A') && (*(p) <= 'F'))) 153 154/* 155 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 156 * / "*" / "+" / "," / ";" / "=" 157 */ 158#define ISA_SUB_DELIM(p) \ 159 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ 160 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ 161 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 162 ((*(p) == '=')) || ((*(p) == '\''))) 163 164/* 165 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 166 */ 167#define ISA_GEN_DELIM(p) \ 168 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ 169 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ 170 ((*(p) == '@'))) 171 172/* 173 * reserved = gen-delims / sub-delims 174 */ 175#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) 176 177/* 178 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 179 */ 180#define ISA_UNRESERVED(p) \ 181 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ 182 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) 183 184/* 185 * pct-encoded = "%" HEXDIG HEXDIG 186 */ 187#define ISA_PCT_ENCODED(p) \ 188 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) 189 190/* 191 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 192 */ 193#define ISA_PCHAR(p) \ 194 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ 195 ((*(p) == ':')) || ((*(p) == '@'))) 196 197/** 198 * xmlParse3986Scheme: 199 * @uri: pointer to an URI structure 200 * @str: pointer to the string to analyze 201 * 202 * Parse an URI scheme 203 * 204 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 205 * 206 * Returns 0 or the error code 207 */ 208static int 209xmlParse3986Scheme(xmlURIPtr uri, const char **str) { 210 const char *cur; 211 212 if (str == NULL) 213 return(-1); 214 215 cur = *str; 216 if (!ISA_ALPHA(cur)) 217 return(2); 218 cur++; 219 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || 220 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; 221 if (uri != NULL) { 222 if (uri->scheme != NULL) xmlFree(uri->scheme); 223 uri->scheme = STRNDUP(*str, cur - *str); 224 } 225 *str = cur; 226 return(0); 227} 228 229/** 230 * xmlParse3986Fragment: 231 * @uri: pointer to an URI structure 232 * @str: pointer to the string to analyze 233 * 234 * Parse the query part of an URI 235 * 236 * fragment = *( pchar / "/" / "?" ) 237 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' 238 * in the fragment identifier but this is used very broadly for 239 * xpointer scheme selection, so we are allowing it here to not break 240 * for example all the DocBook processing chains. 241 * 242 * Returns 0 or the error code 243 */ 244static int 245xmlParse3986Fragment(xmlURIPtr uri, const char **str) 246{ 247 const char *cur; 248 249 if (str == NULL) 250 return (-1); 251 252 cur = *str; 253 254 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 255 (*cur == '[') || (*cur == ']') || 256 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 257 NEXT(cur); 258 if (uri != NULL) { 259 if (uri->fragment != NULL) 260 xmlFree(uri->fragment); 261 if (uri->cleanup & 2) 262 uri->fragment = STRNDUP(*str, cur - *str); 263 else 264 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 265 } 266 *str = cur; 267 return (0); 268} 269 270/** 271 * xmlParse3986Query: 272 * @uri: pointer to an URI structure 273 * @str: pointer to the string to analyze 274 * 275 * Parse the query part of an URI 276 * 277 * query = *uric 278 * 279 * Returns 0 or the error code 280 */ 281static int 282xmlParse3986Query(xmlURIPtr uri, const char **str) 283{ 284 const char *cur; 285 286 if (str == NULL) 287 return (-1); 288 289 cur = *str; 290 291 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 292 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 293 NEXT(cur); 294 if (uri != NULL) { 295 if (uri->query != NULL) 296 xmlFree(uri->query); 297 if (uri->cleanup & 2) 298 uri->query = STRNDUP(*str, cur - *str); 299 else 300 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 301 302 /* Save the raw bytes of the query as well. 303 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114 304 */ 305 if (uri->query_raw != NULL) 306 xmlFree (uri->query_raw); 307 uri->query_raw = STRNDUP (*str, cur - *str); 308 } 309 *str = cur; 310 return (0); 311} 312 313/** 314 * xmlParse3986Port: 315 * @uri: pointer to an URI structure 316 * @str: the string to analyze 317 * 318 * Parse a port part and fills in the appropriate fields 319 * of the @uri structure 320 * 321 * port = *DIGIT 322 * 323 * Returns 0 or the error code 324 */ 325static int 326xmlParse3986Port(xmlURIPtr uri, const char **str) 327{ 328 const char *cur = *str; 329 unsigned port = 0; /* unsigned for defined overflow behavior */ 330 331 if (ISA_DIGIT(cur)) { 332 while (ISA_DIGIT(cur)) { 333 port = port * 10 + (*cur - '0'); 334 335 cur++; 336 } 337 if (uri != NULL) 338 uri->port = port & USHRT_MAX; /* port value modulo INT_MAX+1 */ 339 *str = cur; 340 return(0); 341 } 342 return(1); 343} 344 345/** 346 * xmlParse3986Userinfo: 347 * @uri: pointer to an URI structure 348 * @str: the string to analyze 349 * 350 * Parse an user informations part and fills in the appropriate fields 351 * of the @uri structure 352 * 353 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 354 * 355 * Returns 0 or the error code 356 */ 357static int 358xmlParse3986Userinfo(xmlURIPtr uri, const char **str) 359{ 360 const char *cur; 361 362 cur = *str; 363 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || 364 ISA_SUB_DELIM(cur) || (*cur == ':')) 365 NEXT(cur); 366 if (*cur == '@') { 367 if (uri != NULL) { 368 if (uri->user != NULL) xmlFree(uri->user); 369 if (uri->cleanup & 2) 370 uri->user = STRNDUP(*str, cur - *str); 371 else 372 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 373 } 374 *str = cur; 375 return(0); 376 } 377 return(1); 378} 379 380/** 381 * xmlParse3986DecOctet: 382 * @str: the string to analyze 383 * 384 * dec-octet = DIGIT ; 0-9 385 * / %x31-39 DIGIT ; 10-99 386 * / "1" 2DIGIT ; 100-199 387 * / "2" %x30-34 DIGIT ; 200-249 388 * / "25" %x30-35 ; 250-255 389 * 390 * Skip a dec-octet. 391 * 392 * Returns 0 if found and skipped, 1 otherwise 393 */ 394static int 395xmlParse3986DecOctet(const char **str) { 396 const char *cur = *str; 397 398 if (!(ISA_DIGIT(cur))) 399 return(1); 400 if (!ISA_DIGIT(cur+1)) 401 cur++; 402 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) 403 cur += 2; 404 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) 405 cur += 3; 406 else if ((*cur == '2') && (*(cur + 1) >= '0') && 407 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) 408 cur += 3; 409 else if ((*cur == '2') && (*(cur + 1) == '5') && 410 (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) 411 cur += 3; 412 else 413 return(1); 414 *str = cur; 415 return(0); 416} 417/** 418 * xmlParse3986Host: 419 * @uri: pointer to an URI structure 420 * @str: the string to analyze 421 * 422 * Parse an host part and fills in the appropriate fields 423 * of the @uri structure 424 * 425 * host = IP-literal / IPv4address / reg-name 426 * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 427 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 428 * reg-name = *( unreserved / pct-encoded / sub-delims ) 429 * 430 * Returns 0 or the error code 431 */ 432static int 433xmlParse3986Host(xmlURIPtr uri, const char **str) 434{ 435 const char *cur = *str; 436 const char *host; 437 438 host = cur; 439 /* 440 * IPv6 and future adressing scheme are enclosed between brackets 441 */ 442 if (*cur == '[') { 443 cur++; 444 while ((*cur != ']') && (*cur != 0)) 445 cur++; 446 if (*cur != ']') 447 return(1); 448 cur++; 449 goto found; 450 } 451 /* 452 * try to parse an IPv4 453 */ 454 if (ISA_DIGIT(cur)) { 455 if (xmlParse3986DecOctet(&cur) != 0) 456 goto not_ipv4; 457 if (*cur != '.') 458 goto not_ipv4; 459 cur++; 460 if (xmlParse3986DecOctet(&cur) != 0) 461 goto not_ipv4; 462 if (*cur != '.') 463 goto not_ipv4; 464 if (xmlParse3986DecOctet(&cur) != 0) 465 goto not_ipv4; 466 if (*cur != '.') 467 goto not_ipv4; 468 if (xmlParse3986DecOctet(&cur) != 0) 469 goto not_ipv4; 470 goto found; 471not_ipv4: 472 cur = *str; 473 } 474 /* 475 * then this should be a hostname which can be empty 476 */ 477 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) 478 NEXT(cur); 479found: 480 if (uri != NULL) { 481 if (uri->authority != NULL) xmlFree(uri->authority); 482 uri->authority = NULL; 483 if (uri->server != NULL) xmlFree(uri->server); 484 if (cur != host) { 485 if (uri->cleanup & 2) 486 uri->server = STRNDUP(host, cur - host); 487 else 488 uri->server = xmlURIUnescapeString(host, cur - host, NULL); 489 } else 490 uri->server = NULL; 491 } 492 *str = cur; 493 return(0); 494} 495 496/** 497 * xmlParse3986Authority: 498 * @uri: pointer to an URI structure 499 * @str: the string to analyze 500 * 501 * Parse an authority part and fills in the appropriate fields 502 * of the @uri structure 503 * 504 * authority = [ userinfo "@" ] host [ ":" port ] 505 * 506 * Returns 0 or the error code 507 */ 508static int 509xmlParse3986Authority(xmlURIPtr uri, const char **str) 510{ 511 const char *cur; 512 int ret; 513 514 cur = *str; 515 /* 516 * try to parse an userinfo and check for the trailing @ 517 */ 518 ret = xmlParse3986Userinfo(uri, &cur); 519 if ((ret != 0) || (*cur != '@')) 520 cur = *str; 521 else 522 cur++; 523 ret = xmlParse3986Host(uri, &cur); 524 if (ret != 0) return(ret); 525 if (*cur == ':') { 526 cur++; 527 ret = xmlParse3986Port(uri, &cur); 528 if (ret != 0) return(ret); 529 } 530 *str = cur; 531 return(0); 532} 533 534/** 535 * xmlParse3986Segment: 536 * @str: the string to analyze 537 * @forbid: an optional forbidden character 538 * @empty: allow an empty segment 539 * 540 * Parse a segment and fills in the appropriate fields 541 * of the @uri structure 542 * 543 * segment = *pchar 544 * segment-nz = 1*pchar 545 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 546 * ; non-zero-length segment without any colon ":" 547 * 548 * Returns 0 or the error code 549 */ 550static int 551xmlParse3986Segment(const char **str, char forbid, int empty) 552{ 553 const char *cur; 554 555 cur = *str; 556 if (!ISA_PCHAR(cur)) { 557 if (empty) 558 return(0); 559 return(1); 560 } 561 while (ISA_PCHAR(cur) && (*cur != forbid)) 562 NEXT(cur); 563 *str = cur; 564 return (0); 565} 566 567/** 568 * xmlParse3986PathAbEmpty: 569 * @uri: pointer to an URI structure 570 * @str: the string to analyze 571 * 572 * Parse an path absolute or empty and fills in the appropriate fields 573 * of the @uri structure 574 * 575 * path-abempty = *( "/" segment ) 576 * 577 * Returns 0 or the error code 578 */ 579static int 580xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str) 581{ 582 const char *cur; 583 int ret; 584 585 cur = *str; 586 587 while (*cur == '/') { 588 cur++; 589 ret = xmlParse3986Segment(&cur, 0, 1); 590 if (ret != 0) return(ret); 591 } 592 if (uri != NULL) { 593 if (uri->path != NULL) xmlFree(uri->path); 594 if (*str != cur) { 595 if (uri->cleanup & 2) 596 uri->path = STRNDUP(*str, cur - *str); 597 else 598 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 599 } else { 600 uri->path = NULL; 601 } 602 } 603 *str = cur; 604 return (0); 605} 606 607/** 608 * xmlParse3986PathAbsolute: 609 * @uri: pointer to an URI structure 610 * @str: the string to analyze 611 * 612 * Parse an path absolute and fills in the appropriate fields 613 * of the @uri structure 614 * 615 * path-absolute = "/" [ segment-nz *( "/" segment ) ] 616 * 617 * Returns 0 or the error code 618 */ 619static int 620xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str) 621{ 622 const char *cur; 623 int ret; 624 625 cur = *str; 626 627 if (*cur != '/') 628 return(1); 629 cur++; 630 ret = xmlParse3986Segment(&cur, 0, 0); 631 if (ret == 0) { 632 while (*cur == '/') { 633 cur++; 634 ret = xmlParse3986Segment(&cur, 0, 1); 635 if (ret != 0) return(ret); 636 } 637 } 638 if (uri != NULL) { 639 if (uri->path != NULL) xmlFree(uri->path); 640 if (cur != *str) { 641 if (uri->cleanup & 2) 642 uri->path = STRNDUP(*str, cur - *str); 643 else 644 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 645 } else { 646 uri->path = NULL; 647 } 648 } 649 *str = cur; 650 return (0); 651} 652 653/** 654 * xmlParse3986PathRootless: 655 * @uri: pointer to an URI structure 656 * @str: the string to analyze 657 * 658 * Parse an path without root and fills in the appropriate fields 659 * of the @uri structure 660 * 661 * path-rootless = segment-nz *( "/" segment ) 662 * 663 * Returns 0 or the error code 664 */ 665static int 666xmlParse3986PathRootless(xmlURIPtr uri, const char **str) 667{ 668 const char *cur; 669 int ret; 670 671 cur = *str; 672 673 ret = xmlParse3986Segment(&cur, 0, 0); 674 if (ret != 0) return(ret); 675 while (*cur == '/') { 676 cur++; 677 ret = xmlParse3986Segment(&cur, 0, 1); 678 if (ret != 0) return(ret); 679 } 680 if (uri != NULL) { 681 if (uri->path != NULL) xmlFree(uri->path); 682 if (cur != *str) { 683 if (uri->cleanup & 2) 684 uri->path = STRNDUP(*str, cur - *str); 685 else 686 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 687 } else { 688 uri->path = NULL; 689 } 690 } 691 *str = cur; 692 return (0); 693} 694 695/** 696 * xmlParse3986PathNoScheme: 697 * @uri: pointer to an URI structure 698 * @str: the string to analyze 699 * 700 * Parse an path which is not a scheme and fills in the appropriate fields 701 * of the @uri structure 702 * 703 * path-noscheme = segment-nz-nc *( "/" segment ) 704 * 705 * Returns 0 or the error code 706 */ 707static int 708xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str) 709{ 710 const char *cur; 711 int ret; 712 713 cur = *str; 714 715 ret = xmlParse3986Segment(&cur, ':', 0); 716 if (ret != 0) return(ret); 717 while (*cur == '/') { 718 cur++; 719 ret = xmlParse3986Segment(&cur, 0, 1); 720 if (ret != 0) return(ret); 721 } 722 if (uri != NULL) { 723 if (uri->path != NULL) xmlFree(uri->path); 724 if (cur != *str) { 725 if (uri->cleanup & 2) 726 uri->path = STRNDUP(*str, cur - *str); 727 else 728 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 729 } else { 730 uri->path = NULL; 731 } 732 } 733 *str = cur; 734 return (0); 735} 736 737/** 738 * xmlParse3986HierPart: 739 * @uri: pointer to an URI structure 740 * @str: the string to analyze 741 * 742 * Parse an hierarchical part and fills in the appropriate fields 743 * of the @uri structure 744 * 745 * hier-part = "//" authority path-abempty 746 * / path-absolute 747 * / path-rootless 748 * / path-empty 749 * 750 * Returns 0 or the error code 751 */ 752static int 753xmlParse3986HierPart(xmlURIPtr uri, const char **str) 754{ 755 const char *cur; 756 int ret; 757 758 cur = *str; 759 760 if ((*cur == '/') && (*(cur + 1) == '/')) { 761 cur += 2; 762 ret = xmlParse3986Authority(uri, &cur); 763 if (ret != 0) return(ret); 764 if (uri->server == NULL) 765 uri->port = -1; 766 ret = xmlParse3986PathAbEmpty(uri, &cur); 767 if (ret != 0) return(ret); 768 *str = cur; 769 return(0); 770 } else if (*cur == '/') { 771 ret = xmlParse3986PathAbsolute(uri, &cur); 772 if (ret != 0) return(ret); 773 } else if (ISA_PCHAR(cur)) { 774 ret = xmlParse3986PathRootless(uri, &cur); 775 if (ret != 0) return(ret); 776 } else { 777 /* path-empty is effectively empty */ 778 if (uri != NULL) { 779 if (uri->path != NULL) xmlFree(uri->path); 780 uri->path = NULL; 781 } 782 } 783 *str = cur; 784 return (0); 785} 786 787/** 788 * xmlParse3986RelativeRef: 789 * @uri: pointer to an URI structure 790 * @str: the string to analyze 791 * 792 * Parse an URI string and fills in the appropriate fields 793 * of the @uri structure 794 * 795 * relative-ref = relative-part [ "?" query ] [ "#" fragment ] 796 * relative-part = "//" authority path-abempty 797 * / path-absolute 798 * / path-noscheme 799 * / path-empty 800 * 801 * Returns 0 or the error code 802 */ 803static int 804xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { 805 int ret; 806 807 if ((*str == '/') && (*(str + 1) == '/')) { 808 str += 2; 809 ret = xmlParse3986Authority(uri, &str); 810 if (ret != 0) return(ret); 811 ret = xmlParse3986PathAbEmpty(uri, &str); 812 if (ret != 0) return(ret); 813 } else if (*str == '/') { 814 ret = xmlParse3986PathAbsolute(uri, &str); 815 if (ret != 0) return(ret); 816 } else if (ISA_PCHAR(str)) { 817 ret = xmlParse3986PathNoScheme(uri, &str); 818 if (ret != 0) return(ret); 819 } else { 820 /* path-empty is effectively empty */ 821 if (uri != NULL) { 822 if (uri->path != NULL) xmlFree(uri->path); 823 uri->path = NULL; 824 } 825 } 826 827 if (*str == '?') { 828 str++; 829 ret = xmlParse3986Query(uri, &str); 830 if (ret != 0) return(ret); 831 } 832 if (*str == '#') { 833 str++; 834 ret = xmlParse3986Fragment(uri, &str); 835 if (ret != 0) return(ret); 836 } 837 if (*str != 0) { 838 xmlCleanURI(uri); 839 return(1); 840 } 841 return(0); 842} 843 844 845/** 846 * xmlParse3986URI: 847 * @uri: pointer to an URI structure 848 * @str: the string to analyze 849 * 850 * Parse an URI string and fills in the appropriate fields 851 * of the @uri structure 852 * 853 * scheme ":" hier-part [ "?" query ] [ "#" fragment ] 854 * 855 * Returns 0 or the error code 856 */ 857static int 858xmlParse3986URI(xmlURIPtr uri, const char *str) { 859 int ret; 860 861 ret = xmlParse3986Scheme(uri, &str); 862 if (ret != 0) return(ret); 863 if (*str != ':') { 864 return(1); 865 } 866 str++; 867 ret = xmlParse3986HierPart(uri, &str); 868 if (ret != 0) return(ret); 869 if (*str == '?') { 870 str++; 871 ret = xmlParse3986Query(uri, &str); 872 if (ret != 0) return(ret); 873 } 874 if (*str == '#') { 875 str++; 876 ret = xmlParse3986Fragment(uri, &str); 877 if (ret != 0) return(ret); 878 } 879 if (*str != 0) { 880 xmlCleanURI(uri); 881 return(1); 882 } 883 return(0); 884} 885 886/** 887 * xmlParse3986URIReference: 888 * @uri: pointer to an URI structure 889 * @str: the string to analyze 890 * 891 * Parse an URI reference string and fills in the appropriate fields 892 * of the @uri structure 893 * 894 * URI-reference = URI / relative-ref 895 * 896 * Returns 0 or the error code 897 */ 898static int 899xmlParse3986URIReference(xmlURIPtr uri, const char *str) { 900 int ret; 901 902 if (str == NULL) 903 return(-1); 904 xmlCleanURI(uri); 905 906 /* 907 * Try first to parse absolute refs, then fallback to relative if 908 * it fails. 909 */ 910 ret = xmlParse3986URI(uri, str); 911 if (ret != 0) { 912 xmlCleanURI(uri); 913 ret = xmlParse3986RelativeRef(uri, str); 914 if (ret != 0) { 915 xmlCleanURI(uri); 916 return(ret); 917 } 918 } 919 return(0); 920} 921 922/** 923 * xmlParseURI: 924 * @str: the URI string to analyze 925 * 926 * Parse an URI based on RFC 3986 927 * 928 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 929 * 930 * Returns a newly built xmlURIPtr or NULL in case of error 931 */ 932xmlURIPtr 933xmlParseURI(const char *str) { 934 xmlURIPtr uri; 935 int ret; 936 937 if (str == NULL) 938 return(NULL); 939 uri = xmlCreateURI(); 940 if (uri != NULL) { 941 ret = xmlParse3986URIReference(uri, str); 942 if (ret) { 943 xmlFreeURI(uri); 944 return(NULL); 945 } 946 } 947 return(uri); 948} 949 950/** 951 * xmlParseURIReference: 952 * @uri: pointer to an URI structure 953 * @str: the string to analyze 954 * 955 * Parse an URI reference string based on RFC 3986 and fills in the 956 * appropriate fields of the @uri structure 957 * 958 * URI-reference = URI / relative-ref 959 * 960 * Returns 0 or the error code 961 */ 962int 963xmlParseURIReference(xmlURIPtr uri, const char *str) { 964 return(xmlParse3986URIReference(uri, str)); 965} 966 967/** 968 * xmlParseURIRaw: 969 * @str: the URI string to analyze 970 * @raw: if 1 unescaping of URI pieces are disabled 971 * 972 * Parse an URI but allows to keep intact the original fragments. 973 * 974 * URI-reference = URI / relative-ref 975 * 976 * Returns a newly built xmlURIPtr or NULL in case of error 977 */ 978xmlURIPtr 979xmlParseURIRaw(const char *str, int raw) { 980 xmlURIPtr uri; 981 int ret; 982 983 if (str == NULL) 984 return(NULL); 985 uri = xmlCreateURI(); 986 if (uri != NULL) { 987 if (raw) { 988 uri->cleanup |= 2; 989 } 990 ret = xmlParseURIReference(uri, str); 991 if (ret) { 992 xmlFreeURI(uri); 993 return(NULL); 994 } 995 } 996 return(uri); 997} 998 999/************************************************************************ 1000 * * 1001 * Generic URI structure functions * 1002 * * 1003 ************************************************************************/ 1004 1005/** 1006 * xmlCreateURI: 1007 * 1008 * Simply creates an empty xmlURI 1009 * 1010 * Returns the new structure or NULL in case of error 1011 */ 1012xmlURIPtr 1013xmlCreateURI(void) { 1014 xmlURIPtr ret; 1015 1016 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 1017 if (ret == NULL) { 1018 xmlURIErrMemory("creating URI structure\n"); 1019 return(NULL); 1020 } 1021 memset(ret, 0, sizeof(xmlURI)); 1022 return(ret); 1023} 1024 1025/** 1026 * xmlSaveUriRealloc: 1027 * 1028 * Function to handle properly a reallocation when saving an URI 1029 * Also imposes some limit on the length of an URI string output 1030 */ 1031static xmlChar * 1032xmlSaveUriRealloc(xmlChar *ret, int *max) { 1033 xmlChar *temp; 1034 int tmp; 1035 1036 if (*max > MAX_URI_LENGTH) { 1037 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n"); 1038 return(NULL); 1039 } 1040 tmp = *max * 2; 1041 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1)); 1042 if (temp == NULL) { 1043 xmlURIErrMemory("saving URI\n"); 1044 return(NULL); 1045 } 1046 *max = tmp; 1047 return(temp); 1048} 1049 1050/** 1051 * xmlSaveUri: 1052 * @uri: pointer to an xmlURI 1053 * 1054 * Save the URI as an escaped string 1055 * 1056 * Returns a new string (to be deallocated by caller) 1057 */ 1058xmlChar * 1059xmlSaveUri(xmlURIPtr uri) { 1060 xmlChar *ret = NULL; 1061 xmlChar *temp; 1062 const char *p; 1063 int len; 1064 int max; 1065 1066 if (uri == NULL) return(NULL); 1067 1068 1069 max = 80; 1070 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 1071 if (ret == NULL) { 1072 xmlURIErrMemory("saving URI\n"); 1073 return(NULL); 1074 } 1075 len = 0; 1076 1077 if (uri->scheme != NULL) { 1078 p = uri->scheme; 1079 while (*p != 0) { 1080 if (len >= max) { 1081 temp = xmlSaveUriRealloc(ret, &max); 1082 if (temp == NULL) goto mem_error; 1083 ret = temp; 1084 } 1085 ret[len++] = *p++; 1086 } 1087 if (len >= max) { 1088 temp = xmlSaveUriRealloc(ret, &max); 1089 if (temp == NULL) goto mem_error; 1090 ret = temp; 1091 } 1092 ret[len++] = ':'; 1093 } 1094 if (uri->opaque != NULL) { 1095 p = uri->opaque; 1096 while (*p != 0) { 1097 if (len + 3 >= max) { 1098 temp = xmlSaveUriRealloc(ret, &max); 1099 if (temp == NULL) goto mem_error; 1100 ret = temp; 1101 } 1102 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 1103 ret[len++] = *p++; 1104 else { 1105 int val = *(unsigned char *)p++; 1106 int hi = val / 0x10, lo = val % 0x10; 1107 ret[len++] = '%'; 1108 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1109 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1110 } 1111 } 1112 } else { 1113 if ((uri->server != NULL) || (uri->port == -1)) { 1114 if (len + 3 >= max) { 1115 temp = xmlSaveUriRealloc(ret, &max); 1116 if (temp == NULL) goto mem_error; 1117 ret = temp; 1118 } 1119 ret[len++] = '/'; 1120 ret[len++] = '/'; 1121 if (uri->user != NULL) { 1122 p = uri->user; 1123 while (*p != 0) { 1124 if (len + 3 >= max) { 1125 temp = xmlSaveUriRealloc(ret, &max); 1126 if (temp == NULL) goto mem_error; 1127 ret = temp; 1128 } 1129 if ((IS_UNRESERVED(*(p))) || 1130 ((*(p) == ';')) || ((*(p) == ':')) || 1131 ((*(p) == '&')) || ((*(p) == '=')) || 1132 ((*(p) == '+')) || ((*(p) == '$')) || 1133 ((*(p) == ','))) 1134 ret[len++] = *p++; 1135 else { 1136 int val = *(unsigned char *)p++; 1137 int hi = val / 0x10, lo = val % 0x10; 1138 ret[len++] = '%'; 1139 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1140 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1141 } 1142 } 1143 if (len + 3 >= max) { 1144 temp = xmlSaveUriRealloc(ret, &max); 1145 if (temp == NULL) goto mem_error; 1146 ret = temp; 1147 } 1148 ret[len++] = '@'; 1149 } 1150 if (uri->server != NULL) { 1151 p = uri->server; 1152 while (*p != 0) { 1153 if (len >= max) { 1154 temp = xmlSaveUriRealloc(ret, &max); 1155 if (temp == NULL) goto mem_error; 1156 ret = temp; 1157 } 1158 ret[len++] = *p++; 1159 } 1160 if (uri->port > 0) { 1161 if (len + 10 >= max) { 1162 temp = xmlSaveUriRealloc(ret, &max); 1163 if (temp == NULL) goto mem_error; 1164 ret = temp; 1165 } 1166 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 1167 } 1168 } 1169 } else if (uri->authority != NULL) { 1170 if (len + 3 >= max) { 1171 temp = xmlSaveUriRealloc(ret, &max); 1172 if (temp == NULL) goto mem_error; 1173 ret = temp; 1174 } 1175 ret[len++] = '/'; 1176 ret[len++] = '/'; 1177 p = uri->authority; 1178 while (*p != 0) { 1179 if (len + 3 >= max) { 1180 temp = xmlSaveUriRealloc(ret, &max); 1181 if (temp == NULL) goto mem_error; 1182 ret = temp; 1183 } 1184 if ((IS_UNRESERVED(*(p))) || 1185 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 1186 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 1187 ((*(p) == '=')) || ((*(p) == '+'))) 1188 ret[len++] = *p++; 1189 else { 1190 int val = *(unsigned char *)p++; 1191 int hi = val / 0x10, lo = val % 0x10; 1192 ret[len++] = '%'; 1193 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1194 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1195 } 1196 } 1197 } else if (uri->scheme != NULL) { 1198 if (len + 3 >= max) { 1199 temp = xmlSaveUriRealloc(ret, &max); 1200 if (temp == NULL) goto mem_error; 1201 ret = temp; 1202 } 1203 } 1204 if (uri->path != NULL) { 1205 p = uri->path; 1206 /* 1207 * the colon in file:///d: should not be escaped or 1208 * Windows accesses fail later. 1209 */ 1210 if ((uri->scheme != NULL) && 1211 (p[0] == '/') && 1212 (((p[1] >= 'a') && (p[1] <= 'z')) || 1213 ((p[1] >= 'A') && (p[1] <= 'Z'))) && 1214 (p[2] == ':') && 1215 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { 1216 if (len + 3 >= max) { 1217 temp = xmlSaveUriRealloc(ret, &max); 1218 if (temp == NULL) goto mem_error; 1219 ret = temp; 1220 } 1221 ret[len++] = *p++; 1222 ret[len++] = *p++; 1223 ret[len++] = *p++; 1224 } 1225 while (*p != 0) { 1226 if (len + 3 >= max) { 1227 temp = xmlSaveUriRealloc(ret, &max); 1228 if (temp == NULL) goto mem_error; 1229 ret = temp; 1230 } 1231 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 1232 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 1233 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 1234 ((*(p) == ','))) 1235 ret[len++] = *p++; 1236 else { 1237 int val = *(unsigned char *)p++; 1238 int hi = val / 0x10, lo = val % 0x10; 1239 ret[len++] = '%'; 1240 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1241 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1242 } 1243 } 1244 } 1245 if (uri->query_raw != NULL) { 1246 if (len + 1 >= max) { 1247 temp = xmlSaveUriRealloc(ret, &max); 1248 if (temp == NULL) goto mem_error; 1249 ret = temp; 1250 } 1251 ret[len++] = '?'; 1252 p = uri->query_raw; 1253 while (*p != 0) { 1254 if (len + 1 >= max) { 1255 temp = xmlSaveUriRealloc(ret, &max); 1256 if (temp == NULL) goto mem_error; 1257 ret = temp; 1258 } 1259 ret[len++] = *p++; 1260 } 1261 } else if (uri->query != NULL) { 1262 if (len + 3 >= max) { 1263 temp = xmlSaveUriRealloc(ret, &max); 1264 if (temp == NULL) goto mem_error; 1265 ret = temp; 1266 } 1267 ret[len++] = '?'; 1268 p = uri->query; 1269 while (*p != 0) { 1270 if (len + 3 >= max) { 1271 temp = xmlSaveUriRealloc(ret, &max); 1272 if (temp == NULL) goto mem_error; 1273 ret = temp; 1274 } 1275 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1276 ret[len++] = *p++; 1277 else { 1278 int val = *(unsigned char *)p++; 1279 int hi = val / 0x10, lo = val % 0x10; 1280 ret[len++] = '%'; 1281 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1282 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1283 } 1284 } 1285 } 1286 } 1287 if (uri->fragment != NULL) { 1288 if (len + 3 >= max) { 1289 temp = xmlSaveUriRealloc(ret, &max); 1290 if (temp == NULL) goto mem_error; 1291 ret = temp; 1292 } 1293 ret[len++] = '#'; 1294 p = uri->fragment; 1295 while (*p != 0) { 1296 if (len + 3 >= max) { 1297 temp = xmlSaveUriRealloc(ret, &max); 1298 if (temp == NULL) goto mem_error; 1299 ret = temp; 1300 } 1301 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1302 ret[len++] = *p++; 1303 else { 1304 int val = *(unsigned char *)p++; 1305 int hi = val / 0x10, lo = val % 0x10; 1306 ret[len++] = '%'; 1307 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1308 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1309 } 1310 } 1311 } 1312 if (len >= max) { 1313 temp = xmlSaveUriRealloc(ret, &max); 1314 if (temp == NULL) goto mem_error; 1315 ret = temp; 1316 } 1317 ret[len] = 0; 1318 return(ret); 1319 1320mem_error: 1321 xmlFree(ret); 1322 return(NULL); 1323} 1324 1325/** 1326 * xmlPrintURI: 1327 * @stream: a FILE* for the output 1328 * @uri: pointer to an xmlURI 1329 * 1330 * Prints the URI in the stream @stream. 1331 */ 1332void 1333xmlPrintURI(FILE *stream, xmlURIPtr uri) { 1334 xmlChar *out; 1335 1336 out = xmlSaveUri(uri); 1337 if (out != NULL) { 1338 fprintf(stream, "%s", (char *) out); 1339 xmlFree(out); 1340 } 1341} 1342 1343/** 1344 * xmlCleanURI: 1345 * @uri: pointer to an xmlURI 1346 * 1347 * Make sure the xmlURI struct is free of content 1348 */ 1349static void 1350xmlCleanURI(xmlURIPtr uri) { 1351 if (uri == NULL) return; 1352 1353 if (uri->scheme != NULL) xmlFree(uri->scheme); 1354 uri->scheme = NULL; 1355 if (uri->server != NULL) xmlFree(uri->server); 1356 uri->server = NULL; 1357 if (uri->user != NULL) xmlFree(uri->user); 1358 uri->user = NULL; 1359 if (uri->path != NULL) xmlFree(uri->path); 1360 uri->path = NULL; 1361 if (uri->fragment != NULL) xmlFree(uri->fragment); 1362 uri->fragment = NULL; 1363 if (uri->opaque != NULL) xmlFree(uri->opaque); 1364 uri->opaque = NULL; 1365 if (uri->authority != NULL) xmlFree(uri->authority); 1366 uri->authority = NULL; 1367 if (uri->query != NULL) xmlFree(uri->query); 1368 uri->query = NULL; 1369 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1370 uri->query_raw = NULL; 1371} 1372 1373/** 1374 * xmlFreeURI: 1375 * @uri: pointer to an xmlURI 1376 * 1377 * Free up the xmlURI struct 1378 */ 1379void 1380xmlFreeURI(xmlURIPtr uri) { 1381 if (uri == NULL) return; 1382 1383 if (uri->scheme != NULL) xmlFree(uri->scheme); 1384 if (uri->server != NULL) xmlFree(uri->server); 1385 if (uri->user != NULL) xmlFree(uri->user); 1386 if (uri->path != NULL) xmlFree(uri->path); 1387 if (uri->fragment != NULL) xmlFree(uri->fragment); 1388 if (uri->opaque != NULL) xmlFree(uri->opaque); 1389 if (uri->authority != NULL) xmlFree(uri->authority); 1390 if (uri->query != NULL) xmlFree(uri->query); 1391 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1392 xmlFree(uri); 1393} 1394 1395/************************************************************************ 1396 * * 1397 * Helper functions * 1398 * * 1399 ************************************************************************/ 1400 1401/** 1402 * xmlNormalizeURIPath: 1403 * @path: pointer to the path string 1404 * 1405 * Applies the 5 normalization steps to a path string--that is, RFC 2396 1406 * Section 5.2, steps 6.c through 6.g. 1407 * 1408 * Normalization occurs directly on the string, no new allocation is done 1409 * 1410 * Returns 0 or an error code 1411 */ 1412int 1413xmlNormalizeURIPath(char *path) { 1414 char *cur, *out; 1415 1416 if (path == NULL) 1417 return(-1); 1418 1419 /* Skip all initial "/" chars. We want to get to the beginning of the 1420 * first non-empty segment. 1421 */ 1422 cur = path; 1423 while (cur[0] == '/') 1424 ++cur; 1425 if (cur[0] == '\0') 1426 return(0); 1427 1428 /* Keep everything we've seen so far. */ 1429 out = cur; 1430 1431 /* 1432 * Analyze each segment in sequence for cases (c) and (d). 1433 */ 1434 while (cur[0] != '\0') { 1435 /* 1436 * c) All occurrences of "./", where "." is a complete path segment, 1437 * are removed from the buffer string. 1438 */ 1439 if ((cur[0] == '.') && (cur[1] == '/')) { 1440 cur += 2; 1441 /* '//' normalization should be done at this point too */ 1442 while (cur[0] == '/') 1443 cur++; 1444 continue; 1445 } 1446 1447 /* 1448 * d) If the buffer string ends with "." as a complete path segment, 1449 * that "." is removed. 1450 */ 1451 if ((cur[0] == '.') && (cur[1] == '\0')) 1452 break; 1453 1454 /* Otherwise keep the segment. */ 1455 while (cur[0] != '/') { 1456 if (cur[0] == '\0') 1457 goto done_cd; 1458 (out++)[0] = (cur++)[0]; 1459 } 1460 /* nomalize // */ 1461 while ((cur[0] == '/') && (cur[1] == '/')) 1462 cur++; 1463 1464 (out++)[0] = (cur++)[0]; 1465 } 1466 done_cd: 1467 out[0] = '\0'; 1468 1469 /* Reset to the beginning of the first segment for the next sequence. */ 1470 cur = path; 1471 while (cur[0] == '/') 1472 ++cur; 1473 if (cur[0] == '\0') 1474 return(0); 1475 1476 /* 1477 * Analyze each segment in sequence for cases (e) and (f). 1478 * 1479 * e) All occurrences of "<segment>/../", where <segment> is a 1480 * complete path segment not equal to "..", are removed from the 1481 * buffer string. Removal of these path segments is performed 1482 * iteratively, removing the leftmost matching pattern on each 1483 * iteration, until no matching pattern remains. 1484 * 1485 * f) If the buffer string ends with "<segment>/..", where <segment> 1486 * is a complete path segment not equal to "..", that 1487 * "<segment>/.." is removed. 1488 * 1489 * To satisfy the "iterative" clause in (e), we need to collapse the 1490 * string every time we find something that needs to be removed. Thus, 1491 * we don't need to keep two pointers into the string: we only need a 1492 * "current position" pointer. 1493 */ 1494 while (1) { 1495 char *segp, *tmp; 1496 1497 /* At the beginning of each iteration of this loop, "cur" points to 1498 * the first character of the segment we want to examine. 1499 */ 1500 1501 /* Find the end of the current segment. */ 1502 segp = cur; 1503 while ((segp[0] != '/') && (segp[0] != '\0')) 1504 ++segp; 1505 1506 /* If this is the last segment, we're done (we need at least two 1507 * segments to meet the criteria for the (e) and (f) cases). 1508 */ 1509 if (segp[0] == '\0') 1510 break; 1511 1512 /* If the first segment is "..", or if the next segment _isn't_ "..", 1513 * keep this segment and try the next one. 1514 */ 1515 ++segp; 1516 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 1517 || ((segp[0] != '.') || (segp[1] != '.') 1518 || ((segp[2] != '/') && (segp[2] != '\0')))) { 1519 cur = segp; 1520 continue; 1521 } 1522 1523 /* If we get here, remove this segment and the next one and back up 1524 * to the previous segment (if there is one), to implement the 1525 * "iteratively" clause. It's pretty much impossible to back up 1526 * while maintaining two pointers into the buffer, so just compact 1527 * the whole buffer now. 1528 */ 1529 1530 /* If this is the end of the buffer, we're done. */ 1531 if (segp[2] == '\0') { 1532 cur[0] = '\0'; 1533 break; 1534 } 1535 /* Valgrind complained, strcpy(cur, segp + 3); */ 1536 /* string will overlap, do not use strcpy */ 1537 tmp = cur; 1538 segp += 3; 1539 while ((*tmp++ = *segp++) != 0) 1540 ; 1541 1542 /* If there are no previous segments, then keep going from here. */ 1543 segp = cur; 1544 while ((segp > path) && ((--segp)[0] == '/')) 1545 ; 1546 if (segp == path) 1547 continue; 1548 1549 /* "segp" is pointing to the end of a previous segment; find it's 1550 * start. We need to back up to the previous segment and start 1551 * over with that to handle things like "foo/bar/../..". If we 1552 * don't do this, then on the first pass we'll remove the "bar/..", 1553 * but be pointing at the second ".." so we won't realize we can also 1554 * remove the "foo/..". 1555 */ 1556 cur = segp; 1557 while ((cur > path) && (cur[-1] != '/')) 1558 --cur; 1559 } 1560 out[0] = '\0'; 1561 1562 /* 1563 * g) If the resulting buffer string still begins with one or more 1564 * complete path segments of "..", then the reference is 1565 * considered to be in error. Implementations may handle this 1566 * error by retaining these components in the resolved path (i.e., 1567 * treating them as part of the final URI), by removing them from 1568 * the resolved path (i.e., discarding relative levels above the 1569 * root), or by avoiding traversal of the reference. 1570 * 1571 * We discard them from the final path. 1572 */ 1573 if (path[0] == '/') { 1574 cur = path; 1575 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 1576 && ((cur[3] == '/') || (cur[3] == '\0'))) 1577 cur += 3; 1578 1579 if (cur != path) { 1580 out = path; 1581 while (cur[0] != '\0') 1582 (out++)[0] = (cur++)[0]; 1583 out[0] = 0; 1584 } 1585 } 1586 1587 return(0); 1588} 1589 1590static int is_hex(char c) { 1591 if (((c >= '0') && (c <= '9')) || 1592 ((c >= 'a') && (c <= 'f')) || 1593 ((c >= 'A') && (c <= 'F'))) 1594 return(1); 1595 return(0); 1596} 1597 1598/** 1599 * xmlURIUnescapeString: 1600 * @str: the string to unescape 1601 * @len: the length in bytes to unescape (or <= 0 to indicate full string) 1602 * @target: optional destination buffer 1603 * 1604 * Unescaping routine, but does not check that the string is an URI. The 1605 * output is a direct unsigned char translation of %XX values (no encoding) 1606 * Note that the length of the result can only be smaller or same size as 1607 * the input string. 1608 * 1609 * Returns a copy of the string, but unescaped, will return NULL only in case 1610 * of error 1611 */ 1612char * 1613xmlURIUnescapeString(const char *str, int len, char *target) { 1614 char *ret, *out; 1615 const char *in; 1616 1617 if (str == NULL) 1618 return(NULL); 1619 if (len <= 0) len = strlen(str); 1620 if (len < 0) return(NULL); 1621 1622 if (target == NULL) { 1623 ret = (char *) xmlMallocAtomic(len + 1); 1624 if (ret == NULL) { 1625 xmlURIErrMemory("unescaping URI value\n"); 1626 return(NULL); 1627 } 1628 } else 1629 ret = target; 1630 in = str; 1631 out = ret; 1632 while(len > 0) { 1633 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 1634 in++; 1635 if ((*in >= '0') && (*in <= '9')) 1636 *out = (*in - '0'); 1637 else if ((*in >= 'a') && (*in <= 'f')) 1638 *out = (*in - 'a') + 10; 1639 else if ((*in >= 'A') && (*in <= 'F')) 1640 *out = (*in - 'A') + 10; 1641 in++; 1642 if ((*in >= '0') && (*in <= '9')) 1643 *out = *out * 16 + (*in - '0'); 1644 else if ((*in >= 'a') && (*in <= 'f')) 1645 *out = *out * 16 + (*in - 'a') + 10; 1646 else if ((*in >= 'A') && (*in <= 'F')) 1647 *out = *out * 16 + (*in - 'A') + 10; 1648 in++; 1649 len -= 3; 1650 out++; 1651 } else { 1652 *out++ = *in++; 1653 len--; 1654 } 1655 } 1656 *out = 0; 1657 return(ret); 1658} 1659 1660/** 1661 * xmlURIEscapeStr: 1662 * @str: string to escape 1663 * @list: exception list string of chars not to escape 1664 * 1665 * This routine escapes a string to hex, ignoring reserved characters (a-z) 1666 * and the characters in the exception list. 1667 * 1668 * Returns a new escaped string or NULL in case of error. 1669 */ 1670xmlChar * 1671xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 1672 xmlChar *ret, ch; 1673 xmlChar *temp; 1674 const xmlChar *in; 1675 int len, out; 1676 1677 if (str == NULL) 1678 return(NULL); 1679 if (str[0] == 0) 1680 return(xmlStrdup(str)); 1681 len = xmlStrlen(str); 1682 if (!(len > 0)) return(NULL); 1683 1684 len += 20; 1685 ret = (xmlChar *) xmlMallocAtomic(len); 1686 if (ret == NULL) { 1687 xmlURIErrMemory("escaping URI value\n"); 1688 return(NULL); 1689 } 1690 in = (const xmlChar *) str; 1691 out = 0; 1692 while(*in != 0) { 1693 if (len - out <= 3) { 1694 temp = xmlSaveUriRealloc(ret, &len); 1695 if (temp == NULL) { 1696 xmlURIErrMemory("escaping URI value\n"); 1697 xmlFree(ret); 1698 return(NULL); 1699 } 1700 ret = temp; 1701 } 1702 1703 ch = *in; 1704 1705 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 1706 unsigned char val; 1707 ret[out++] = '%'; 1708 val = ch >> 4; 1709 if (val <= 9) 1710 ret[out++] = '0' + val; 1711 else 1712 ret[out++] = 'A' + val - 0xA; 1713 val = ch & 0xF; 1714 if (val <= 9) 1715 ret[out++] = '0' + val; 1716 else 1717 ret[out++] = 'A' + val - 0xA; 1718 in++; 1719 } else { 1720 ret[out++] = *in++; 1721 } 1722 1723 } 1724 ret[out] = 0; 1725 return(ret); 1726} 1727 1728/** 1729 * xmlURIEscape: 1730 * @str: the string of the URI to escape 1731 * 1732 * Escaping routine, does not do validity checks ! 1733 * It will try to escape the chars needing this, but this is heuristic 1734 * based it's impossible to be sure. 1735 * 1736 * Returns an copy of the string, but escaped 1737 * 1738 * 25 May 2001 1739 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 1740 * according to RFC2396. 1741 * - Carl Douglas 1742 */ 1743xmlChar * 1744xmlURIEscape(const xmlChar * str) 1745{ 1746 xmlChar *ret, *segment = NULL; 1747 xmlURIPtr uri; 1748 int ret2; 1749 1750#define NULLCHK(p) if(!p) { \ 1751 xmlURIErrMemory("escaping URI value\n"); \ 1752 xmlFreeURI(uri); \ 1753 return NULL; } \ 1754 1755 if (str == NULL) 1756 return (NULL); 1757 1758 uri = xmlCreateURI(); 1759 if (uri != NULL) { 1760 /* 1761 * Allow escaping errors in the unescaped form 1762 */ 1763 uri->cleanup = 1; 1764 ret2 = xmlParseURIReference(uri, (const char *)str); 1765 if (ret2) { 1766 xmlFreeURI(uri); 1767 return (NULL); 1768 } 1769 } 1770 1771 if (!uri) 1772 return NULL; 1773 1774 ret = NULL; 1775 1776 if (uri->scheme) { 1777 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 1778 NULLCHK(segment) 1779 ret = xmlStrcat(ret, segment); 1780 ret = xmlStrcat(ret, BAD_CAST ":"); 1781 xmlFree(segment); 1782 } 1783 1784 if (uri->authority) { 1785 segment = 1786 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 1787 NULLCHK(segment) 1788 ret = xmlStrcat(ret, BAD_CAST "//"); 1789 ret = xmlStrcat(ret, segment); 1790 xmlFree(segment); 1791 } 1792 1793 if (uri->user) { 1794 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 1795 NULLCHK(segment) 1796 ret = xmlStrcat(ret,BAD_CAST "//"); 1797 ret = xmlStrcat(ret, segment); 1798 ret = xmlStrcat(ret, BAD_CAST "@"); 1799 xmlFree(segment); 1800 } 1801 1802 if (uri->server) { 1803 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1804 NULLCHK(segment) 1805 if (uri->user == NULL) 1806 ret = xmlStrcat(ret, BAD_CAST "//"); 1807 ret = xmlStrcat(ret, segment); 1808 xmlFree(segment); 1809 } 1810 1811 if (uri->port) { 1812 xmlChar port[10]; 1813 1814 snprintf((char *) port, 10, "%d", uri->port); 1815 ret = xmlStrcat(ret, BAD_CAST ":"); 1816 ret = xmlStrcat(ret, port); 1817 } 1818 1819 if (uri->path) { 1820 segment = 1821 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1822 NULLCHK(segment) 1823 ret = xmlStrcat(ret, segment); 1824 xmlFree(segment); 1825 } 1826 1827 if (uri->query_raw) { 1828 ret = xmlStrcat(ret, BAD_CAST "?"); 1829 ret = xmlStrcat(ret, BAD_CAST uri->query_raw); 1830 } 1831 else if (uri->query) { 1832 segment = 1833 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1834 NULLCHK(segment) 1835 ret = xmlStrcat(ret, BAD_CAST "?"); 1836 ret = xmlStrcat(ret, segment); 1837 xmlFree(segment); 1838 } 1839 1840 if (uri->opaque) { 1841 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1842 NULLCHK(segment) 1843 ret = xmlStrcat(ret, segment); 1844 xmlFree(segment); 1845 } 1846 1847 if (uri->fragment) { 1848 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1849 NULLCHK(segment) 1850 ret = xmlStrcat(ret, BAD_CAST "#"); 1851 ret = xmlStrcat(ret, segment); 1852 xmlFree(segment); 1853 } 1854 1855 xmlFreeURI(uri); 1856#undef NULLCHK 1857 1858 return (ret); 1859} 1860 1861/************************************************************************ 1862 * * 1863 * Public functions * 1864 * * 1865 ************************************************************************/ 1866 1867/** 1868 * xmlBuildURI: 1869 * @URI: the URI instance found in the document 1870 * @base: the base value 1871 * 1872 * Computes he final URI of the reference done by checking that 1873 * the given URI is valid, and building the final URI using the 1874 * base URI. This is processed according to section 5.2 of the 1875 * RFC 2396 1876 * 1877 * 5.2. Resolving Relative References to Absolute Form 1878 * 1879 * Returns a new URI string (to be freed by the caller) or NULL in case 1880 * of error. 1881 */ 1882xmlChar * 1883xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1884 xmlChar *val = NULL; 1885 int ret, len, indx, cur, out; 1886 xmlURIPtr ref = NULL; 1887 xmlURIPtr bas = NULL; 1888 xmlURIPtr res = NULL; 1889 1890 /* 1891 * 1) The URI reference is parsed into the potential four components and 1892 * fragment identifier, as described in Section 4.3. 1893 * 1894 * NOTE that a completely empty URI is treated by modern browsers 1895 * as a reference to "." rather than as a synonym for the current 1896 * URI. Should we do that here? 1897 */ 1898 if (URI == NULL) 1899 ret = -1; 1900 else { 1901 if (*URI) { 1902 ref = xmlCreateURI(); 1903 if (ref == NULL) 1904 goto done; 1905 ret = xmlParseURIReference(ref, (const char *) URI); 1906 } 1907 else 1908 ret = 0; 1909 } 1910 if (ret != 0) 1911 goto done; 1912 if ((ref != NULL) && (ref->scheme != NULL)) { 1913 /* 1914 * The URI is absolute don't modify. 1915 */ 1916 val = xmlStrdup(URI); 1917 goto done; 1918 } 1919 if (base == NULL) 1920 ret = -1; 1921 else { 1922 bas = xmlCreateURI(); 1923 if (bas == NULL) 1924 goto done; 1925 ret = xmlParseURIReference(bas, (const char *) base); 1926 } 1927 if (ret != 0) { 1928 if (ref) 1929 val = xmlSaveUri(ref); 1930 goto done; 1931 } 1932 if (ref == NULL) { 1933 /* 1934 * the base fragment must be ignored 1935 */ 1936 if (bas->fragment != NULL) { 1937 xmlFree(bas->fragment); 1938 bas->fragment = NULL; 1939 } 1940 val = xmlSaveUri(bas); 1941 goto done; 1942 } 1943 1944 /* 1945 * 2) If the path component is empty and the scheme, authority, and 1946 * query components are undefined, then it is a reference to the 1947 * current document and we are done. Otherwise, the reference URI's 1948 * query and fragment components are defined as found (or not found) 1949 * within the URI reference and not inherited from the base URI. 1950 * 1951 * NOTE that in modern browsers, the parsing differs from the above 1952 * in the following aspect: the query component is allowed to be 1953 * defined while still treating this as a reference to the current 1954 * document. 1955 */ 1956 res = xmlCreateURI(); 1957 if (res == NULL) 1958 goto done; 1959 if ((ref->scheme == NULL) && (ref->path == NULL) && 1960 ((ref->authority == NULL) && (ref->server == NULL))) { 1961 if (bas->scheme != NULL) 1962 res->scheme = xmlMemStrdup(bas->scheme); 1963 if (bas->authority != NULL) 1964 res->authority = xmlMemStrdup(bas->authority); 1965 else if (bas->server != NULL) { 1966 res->server = xmlMemStrdup(bas->server); 1967 if (bas->user != NULL) 1968 res->user = xmlMemStrdup(bas->user); 1969 res->port = bas->port; 1970 } 1971 if (bas->path != NULL) 1972 res->path = xmlMemStrdup(bas->path); 1973 if (ref->query_raw != NULL) 1974 res->query_raw = xmlMemStrdup (ref->query_raw); 1975 else if (ref->query != NULL) 1976 res->query = xmlMemStrdup(ref->query); 1977 else if (bas->query_raw != NULL) 1978 res->query_raw = xmlMemStrdup(bas->query_raw); 1979 else if (bas->query != NULL) 1980 res->query = xmlMemStrdup(bas->query); 1981 if (ref->fragment != NULL) 1982 res->fragment = xmlMemStrdup(ref->fragment); 1983 goto step_7; 1984 } 1985 1986 /* 1987 * 3) If the scheme component is defined, indicating that the reference 1988 * starts with a scheme name, then the reference is interpreted as an 1989 * absolute URI and we are done. Otherwise, the reference URI's 1990 * scheme is inherited from the base URI's scheme component. 1991 */ 1992 if (ref->scheme != NULL) { 1993 val = xmlSaveUri(ref); 1994 goto done; 1995 } 1996 if (bas->scheme != NULL) 1997 res->scheme = xmlMemStrdup(bas->scheme); 1998 1999 if (ref->query_raw != NULL) 2000 res->query_raw = xmlMemStrdup(ref->query_raw); 2001 else if (ref->query != NULL) 2002 res->query = xmlMemStrdup(ref->query); 2003 if (ref->fragment != NULL) 2004 res->fragment = xmlMemStrdup(ref->fragment); 2005 2006 /* 2007 * 4) If the authority component is defined, then the reference is a 2008 * network-path and we skip to step 7. Otherwise, the reference 2009 * URI's authority is inherited from the base URI's authority 2010 * component, which will also be undefined if the URI scheme does not 2011 * use an authority component. 2012 */ 2013 if ((ref->authority != NULL) || (ref->server != NULL)) { 2014 if (ref->authority != NULL) 2015 res->authority = xmlMemStrdup(ref->authority); 2016 else { 2017 res->server = xmlMemStrdup(ref->server); 2018 if (ref->user != NULL) 2019 res->user = xmlMemStrdup(ref->user); 2020 res->port = ref->port; 2021 } 2022 if (ref->path != NULL) 2023 res->path = xmlMemStrdup(ref->path); 2024 goto step_7; 2025 } 2026 if (bas->authority != NULL) 2027 res->authority = xmlMemStrdup(bas->authority); 2028 else if (bas->server != NULL) { 2029 res->server = xmlMemStrdup(bas->server); 2030 if (bas->user != NULL) 2031 res->user = xmlMemStrdup(bas->user); 2032 res->port = bas->port; 2033 } 2034 2035 /* 2036 * 5) If the path component begins with a slash character ("/"), then 2037 * the reference is an absolute-path and we skip to step 7. 2038 */ 2039 if ((ref->path != NULL) && (ref->path[0] == '/')) { 2040 res->path = xmlMemStrdup(ref->path); 2041 goto step_7; 2042 } 2043 2044 2045 /* 2046 * 6) If this step is reached, then we are resolving a relative-path 2047 * reference. The relative path needs to be merged with the base 2048 * URI's path. Although there are many ways to do this, we will 2049 * describe a simple method using a separate string buffer. 2050 * 2051 * Allocate a buffer large enough for the result string. 2052 */ 2053 len = 2; /* extra / and 0 */ 2054 if (ref->path != NULL) 2055 len += strlen(ref->path); 2056 if (bas->path != NULL) 2057 len += strlen(bas->path); 2058 res->path = (char *) xmlMallocAtomic(len); 2059 if (res->path == NULL) { 2060 xmlURIErrMemory("resolving URI against base\n"); 2061 goto done; 2062 } 2063 res->path[0] = 0; 2064 2065 /* 2066 * a) All but the last segment of the base URI's path component is 2067 * copied to the buffer. In other words, any characters after the 2068 * last (right-most) slash character, if any, are excluded. 2069 */ 2070 cur = 0; 2071 out = 0; 2072 if (bas->path != NULL) { 2073 while (bas->path[cur] != 0) { 2074 while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2075 cur++; 2076 if (bas->path[cur] == 0) 2077 break; 2078 2079 cur++; 2080 while (out < cur) { 2081 res->path[out] = bas->path[out]; 2082 out++; 2083 } 2084 } 2085 } 2086 res->path[out] = 0; 2087 2088 /* 2089 * b) The reference's path component is appended to the buffer 2090 * string. 2091 */ 2092 if (ref->path != NULL && ref->path[0] != 0) { 2093 indx = 0; 2094 /* 2095 * Ensure the path includes a '/' 2096 */ 2097 if ((out == 0) && (bas->server != NULL)) 2098 res->path[out++] = '/'; 2099 while (ref->path[indx] != 0) { 2100 res->path[out++] = ref->path[indx++]; 2101 } 2102 } 2103 res->path[out] = 0; 2104 2105 /* 2106 * Steps c) to h) are really path normalization steps 2107 */ 2108 xmlNormalizeURIPath(res->path); 2109 2110step_7: 2111 2112 /* 2113 * 7) The resulting URI components, including any inherited from the 2114 * base URI, are recombined to give the absolute form of the URI 2115 * reference. 2116 */ 2117 val = xmlSaveUri(res); 2118 2119done: 2120 if (ref != NULL) 2121 xmlFreeURI(ref); 2122 if (bas != NULL) 2123 xmlFreeURI(bas); 2124 if (res != NULL) 2125 xmlFreeURI(res); 2126 return(val); 2127} 2128 2129/** 2130 * xmlBuildRelativeURI: 2131 * @URI: the URI reference under consideration 2132 * @base: the base value 2133 * 2134 * Expresses the URI of the reference in terms relative to the 2135 * base. Some examples of this operation include: 2136 * base = "http://site1.com/docs/book1.html" 2137 * URI input URI returned 2138 * docs/pic1.gif pic1.gif 2139 * docs/img/pic1.gif img/pic1.gif 2140 * img/pic1.gif ../img/pic1.gif 2141 * http://site1.com/docs/pic1.gif pic1.gif 2142 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2143 * 2144 * base = "docs/book1.html" 2145 * URI input URI returned 2146 * docs/pic1.gif pic1.gif 2147 * docs/img/pic1.gif img/pic1.gif 2148 * img/pic1.gif ../img/pic1.gif 2149 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2150 * 2151 * 2152 * Note: if the URI reference is really wierd or complicated, it may be 2153 * worthwhile to first convert it into a "nice" one by calling 2154 * xmlBuildURI (using 'base') before calling this routine, 2155 * since this routine (for reasonable efficiency) assumes URI has 2156 * already been through some validation. 2157 * 2158 * Returns a new URI string (to be freed by the caller) or NULL in case 2159 * error. 2160 */ 2161xmlChar * 2162xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2163{ 2164 xmlChar *val = NULL; 2165 int ret; 2166 int ix; 2167 int pos = 0; 2168 int nbslash = 0; 2169 int len; 2170 xmlURIPtr ref = NULL; 2171 xmlURIPtr bas = NULL; 2172 xmlChar *bptr, *uptr, *vptr; 2173 int remove_path = 0; 2174 2175 if ((URI == NULL) || (*URI == 0)) 2176 return NULL; 2177 2178 /* 2179 * First parse URI into a standard form 2180 */ 2181 ref = xmlCreateURI (); 2182 if (ref == NULL) 2183 return NULL; 2184 /* If URI not already in "relative" form */ 2185 if (URI[0] != '.') { 2186 ret = xmlParseURIReference (ref, (const char *) URI); 2187 if (ret != 0) 2188 goto done; /* Error in URI, return NULL */ 2189 } else 2190 ref->path = (char *)xmlStrdup(URI); 2191 2192 /* 2193 * Next parse base into the same standard form 2194 */ 2195 if ((base == NULL) || (*base == 0)) { 2196 val = xmlStrdup (URI); 2197 goto done; 2198 } 2199 bas = xmlCreateURI (); 2200 if (bas == NULL) 2201 goto done; 2202 if (base[0] != '.') { 2203 ret = xmlParseURIReference (bas, (const char *) base); 2204 if (ret != 0) 2205 goto done; /* Error in base, return NULL */ 2206 } else 2207 bas->path = (char *)xmlStrdup(base); 2208 2209 /* 2210 * If the scheme / server on the URI differs from the base, 2211 * just return the URI 2212 */ 2213 if ((ref->scheme != NULL) && 2214 ((bas->scheme == NULL) || 2215 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2216 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2217 val = xmlStrdup (URI); 2218 goto done; 2219 } 2220 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2221 val = xmlStrdup(BAD_CAST ""); 2222 goto done; 2223 } 2224 if (bas->path == NULL) { 2225 val = xmlStrdup((xmlChar *)ref->path); 2226 goto done; 2227 } 2228 if (ref->path == NULL) { 2229 ref->path = (char *) "/"; 2230 remove_path = 1; 2231 } 2232 2233 /* 2234 * At this point (at last!) we can compare the two paths 2235 * 2236 * First we take care of the special case where either of the 2237 * two path components may be missing (bug 316224) 2238 */ 2239 if (bas->path == NULL) { 2240 if (ref->path != NULL) { 2241 uptr = (xmlChar *) ref->path; 2242 if (*uptr == '/') 2243 uptr++; 2244 /* exception characters from xmlSaveUri */ 2245 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2246 } 2247 goto done; 2248 } 2249 bptr = (xmlChar *)bas->path; 2250 if (ref->path == NULL) { 2251 for (ix = 0; bptr[ix] != 0; ix++) { 2252 if (bptr[ix] == '/') 2253 nbslash++; 2254 } 2255 uptr = NULL; 2256 len = 1; /* this is for a string terminator only */ 2257 } else { 2258 /* 2259 * Next we compare the two strings and find where they first differ 2260 */ 2261 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) 2262 pos += 2; 2263 if ((*bptr == '.') && (bptr[1] == '/')) 2264 bptr += 2; 2265 else if ((*bptr == '/') && (ref->path[pos] != '/')) 2266 bptr++; 2267 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) 2268 pos++; 2269 2270 if (bptr[pos] == ref->path[pos]) { 2271 val = xmlStrdup(BAD_CAST ""); 2272 goto done; /* (I can't imagine why anyone would do this) */ 2273 } 2274 2275 /* 2276 * In URI, "back up" to the last '/' encountered. This will be the 2277 * beginning of the "unique" suffix of URI 2278 */ 2279 ix = pos; 2280 if ((ref->path[ix] == '/') && (ix > 0)) 2281 ix--; 2282 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) 2283 ix -= 2; 2284 for (; ix > 0; ix--) { 2285 if (ref->path[ix] == '/') 2286 break; 2287 } 2288 if (ix == 0) { 2289 uptr = (xmlChar *)ref->path; 2290 } else { 2291 ix++; 2292 uptr = (xmlChar *)&ref->path[ix]; 2293 } 2294 2295 /* 2296 * In base, count the number of '/' from the differing point 2297 */ 2298 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ 2299 for (; bptr[ix] != 0; ix++) { 2300 if (bptr[ix] == '/') 2301 nbslash++; 2302 } 2303 } 2304 len = xmlStrlen (uptr) + 1; 2305 } 2306 2307 if (nbslash == 0) { 2308 if (uptr != NULL) 2309 /* exception characters from xmlSaveUri */ 2310 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2311 goto done; 2312 } 2313 2314 /* 2315 * Allocate just enough space for the returned string - 2316 * length of the remainder of the URI, plus enough space 2317 * for the "../" groups, plus one for the terminator 2318 */ 2319 val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2320 if (val == NULL) { 2321 xmlURIErrMemory("building relative URI\n"); 2322 goto done; 2323 } 2324 vptr = val; 2325 /* 2326 * Put in as many "../" as needed 2327 */ 2328 for (; nbslash>0; nbslash--) { 2329 *vptr++ = '.'; 2330 *vptr++ = '.'; 2331 *vptr++ = '/'; 2332 } 2333 /* 2334 * Finish up with the end of the URI 2335 */ 2336 if (uptr != NULL) { 2337 if ((vptr > val) && (len > 0) && 2338 (uptr[0] == '/') && (vptr[-1] == '/')) { 2339 memcpy (vptr, uptr + 1, len - 1); 2340 vptr[len - 2] = 0; 2341 } else { 2342 memcpy (vptr, uptr, len); 2343 vptr[len - 1] = 0; 2344 } 2345 } else { 2346 vptr[len - 1] = 0; 2347 } 2348 2349 /* escape the freshly-built path */ 2350 vptr = val; 2351 /* exception characters from xmlSaveUri */ 2352 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,"); 2353 xmlFree(vptr); 2354 2355done: 2356 /* 2357 * Free the working variables 2358 */ 2359 if (remove_path != 0) 2360 ref->path = NULL; 2361 if (ref != NULL) 2362 xmlFreeURI (ref); 2363 if (bas != NULL) 2364 xmlFreeURI (bas); 2365 2366 return val; 2367} 2368 2369/** 2370 * xmlCanonicPath: 2371 * @path: the resource locator in a filesystem notation 2372 * 2373 * Constructs a canonic path from the specified path. 2374 * 2375 * Returns a new canonic path, or a duplicate of the path parameter if the 2376 * construction fails. The caller is responsible for freeing the memory occupied 2377 * by the returned string. If there is insufficient memory available, or the 2378 * argument is NULL, the function returns NULL. 2379 */ 2380#define IS_WINDOWS_PATH(p) \ 2381 ((p != NULL) && \ 2382 (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2383 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2384 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2385xmlChar * 2386xmlCanonicPath(const xmlChar *path) 2387{ 2388/* 2389 * For Windows implementations, additional work needs to be done to 2390 * replace backslashes in pathnames with "forward slashes" 2391 */ 2392#if defined(_WIN32) && !defined(__CYGWIN__) 2393 int len = 0; 2394 int i = 0; 2395 xmlChar *p = NULL; 2396#endif 2397 xmlURIPtr uri; 2398 xmlChar *ret; 2399 const xmlChar *absuri; 2400 2401 if (path == NULL) 2402 return(NULL); 2403 2404#if defined(_WIN32) 2405 /* 2406 * We must not change the backslashes to slashes if the the path 2407 * starts with \\?\ 2408 * Those paths can be up to 32k characters long. 2409 * Was added specifically for OpenOffice, those paths can't be converted 2410 * to URIs anyway. 2411 */ 2412 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') && 2413 (path[3] == '\\') ) 2414 return xmlStrdup((const xmlChar *) path); 2415#endif 2416 2417 /* sanitize filename starting with // so it can be used as URI */ 2418 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/')) 2419 path++; 2420 2421 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2422 xmlFreeURI(uri); 2423 return xmlStrdup(path); 2424 } 2425 2426 /* Check if this is an "absolute uri" */ 2427 absuri = xmlStrstr(path, BAD_CAST "://"); 2428 if (absuri != NULL) { 2429 int l, j; 2430 unsigned char c; 2431 xmlChar *escURI; 2432 2433 /* 2434 * this looks like an URI where some parts have not been 2435 * escaped leading to a parsing problem. Check that the first 2436 * part matches a protocol. 2437 */ 2438 l = absuri - path; 2439 /* Bypass if first part (part before the '://') is > 20 chars */ 2440 if ((l <= 0) || (l > 20)) 2441 goto path_processing; 2442 /* Bypass if any non-alpha characters are present in first part */ 2443 for (j = 0;j < l;j++) { 2444 c = path[j]; 2445 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2446 goto path_processing; 2447 } 2448 2449 /* Escape all except the characters specified in the supplied path */ 2450 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2451 if (escURI != NULL) { 2452 /* Try parsing the escaped path */ 2453 uri = xmlParseURI((const char *) escURI); 2454 /* If successful, return the escaped string */ 2455 if (uri != NULL) { 2456 xmlFreeURI(uri); 2457 return escURI; 2458 } 2459 } 2460 } 2461 2462path_processing: 2463/* For Windows implementations, replace backslashes with 'forward slashes' */ 2464#if defined(_WIN32) && !defined(__CYGWIN__) 2465 /* 2466 * Create a URI structure 2467 */ 2468 uri = xmlCreateURI(); 2469 if (uri == NULL) { /* Guard against 'out of memory' */ 2470 return(NULL); 2471 } 2472 2473 len = xmlStrlen(path); 2474 if ((len > 2) && IS_WINDOWS_PATH(path)) { 2475 /* make the scheme 'file' */ 2476 uri->scheme = xmlStrdup(BAD_CAST "file"); 2477 /* allocate space for leading '/' + path + string terminator */ 2478 uri->path = xmlMallocAtomic(len + 2); 2479 if (uri->path == NULL) { 2480 xmlFreeURI(uri); /* Guard agains 'out of memory' */ 2481 return(NULL); 2482 } 2483 /* Put in leading '/' plus path */ 2484 uri->path[0] = '/'; 2485 p = uri->path + 1; 2486 strncpy(p, path, len + 1); 2487 } else { 2488 uri->path = xmlStrdup(path); 2489 if (uri->path == NULL) { 2490 xmlFreeURI(uri); 2491 return(NULL); 2492 } 2493 p = uri->path; 2494 } 2495 /* Now change all occurences of '\' to '/' */ 2496 while (*p != '\0') { 2497 if (*p == '\\') 2498 *p = '/'; 2499 p++; 2500 } 2501 2502 if (uri->scheme == NULL) { 2503 ret = xmlStrdup((const xmlChar *) uri->path); 2504 } else { 2505 ret = xmlSaveUri(uri); 2506 } 2507 2508 xmlFreeURI(uri); 2509#else 2510 ret = xmlStrdup((const xmlChar *) path); 2511#endif 2512 return(ret); 2513} 2514 2515/** 2516 * xmlPathToURI: 2517 * @path: the resource locator in a filesystem notation 2518 * 2519 * Constructs an URI expressing the existing path 2520 * 2521 * Returns a new URI, or a duplicate of the path parameter if the 2522 * construction fails. The caller is responsible for freeing the memory 2523 * occupied by the returned string. If there is insufficient memory available, 2524 * or the argument is NULL, the function returns NULL. 2525 */ 2526xmlChar * 2527xmlPathToURI(const xmlChar *path) 2528{ 2529 xmlURIPtr uri; 2530 xmlURI temp; 2531 xmlChar *ret, *cal; 2532 2533 if (path == NULL) 2534 return(NULL); 2535 2536 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2537 xmlFreeURI(uri); 2538 return xmlStrdup(path); 2539 } 2540 cal = xmlCanonicPath(path); 2541 if (cal == NULL) 2542 return(NULL); 2543#if defined(_WIN32) && !defined(__CYGWIN__) 2544 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 2545 If 'cal' is a valid URI allready then we are done here, as continuing would make 2546 it invalid. */ 2547 if ((uri = xmlParseURI((const char *) cal)) != NULL) { 2548 xmlFreeURI(uri); 2549 return cal; 2550 } 2551 /* 'cal' can contain a relative path with backslashes. If that is processed 2552 by xmlSaveURI, they will be escaped and the external entity loader machinery 2553 will fail. So convert them to slashes. Misuse 'ret' for walking. */ 2554 ret = cal; 2555 while (*ret != '\0') { 2556 if (*ret == '\\') 2557 *ret = '/'; 2558 ret++; 2559 } 2560#endif 2561 memset(&temp, 0, sizeof(temp)); 2562 temp.path = (char *) cal; 2563 ret = xmlSaveUri(&temp); 2564 xmlFree(cal); 2565 return(ret); 2566} 2567#define bottom_uri 2568#include "elfgcchack.h" 2569