uri.c revision 3c908dca479ed50dca24b8593bca90e40dbde6b8
1/** 2 * uri.c: set of generic URI related routines 3 * 4 * Reference: RFC 2396 5 * 6 * See Copyright for the status of this software. 7 * 8 * daniel@veillard.com 9 */ 10 11#define IN_LIBXML 12#include "libxml.h" 13 14#include <string.h> 15 16#include <libxml/xmlmemory.h> 17#include <libxml/uri.h> 18#include <libxml/globals.h> 19#include <libxml/xmlerror.h> 20 21/************************************************************************ 22 * * 23 * Macros to differentiate various character type * 24 * directly extracted from RFC 2396 * 25 * * 26 ************************************************************************/ 27 28/* 29 * alpha = lowalpha | upalpha 30 */ 31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 32 33 34/* 35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 37 * "u" | "v" | "w" | "x" | "y" | "z" 38 */ 39 40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 41 42/* 43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 45 * "U" | "V" | "W" | "X" | "Y" | "Z" 46 */ 47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 48 49/* 50 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 51 */ 52 53#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 54 55/* 56 * alphanum = alpha | digit 57 */ 58 59#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 60 61/* 62 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | 63 * "a" | "b" | "c" | "d" | "e" | "f" 64 */ 65 66#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \ 67 (((x) >= 'A') && ((x) <= 'F'))) 68 69/* 70 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 71 */ 72 73#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 74 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 75 ((x) == '(') || ((x) == ')')) 76 77 78/* 79 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," 80 */ 81 82#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 83 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 84 ((x) == '+') || ((x) == '$') || ((x) == ',')) 85 86/* 87 * unreserved = alphanum | mark 88 */ 89 90#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 91 92/* 93 * escaped = "%" hex hex 94 */ 95 96#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \ 97 (IS_HEX((p)[2]))) 98 99/* 100 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | 101 * "&" | "=" | "+" | "$" | "," 102 */ 103#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\ 104 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\ 105 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\ 106 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) 107 108/* 109 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | "," 110 */ 111#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 112 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\ 113 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\ 114 ((*(p) == ','))) 115 116/* 117 * rel_segment = 1*( unreserved | escaped | 118 * ";" | "@" | "&" | "=" | "+" | "$" | "," ) 119 */ 120 121#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 122 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \ 123 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ 124 ((*(p) == ','))) 125 126/* 127 * scheme = alpha *( alpha | digit | "+" | "-" | "." ) 128 */ 129 130#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \ 131 ((x) == '+') || ((x) == '-') || ((x) == '.')) 132 133/* 134 * reg_name = 1*( unreserved | escaped | "$" | "," | 135 * ";" | ":" | "@" | "&" | "=" | "+" ) 136 */ 137 138#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 139 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 140 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \ 141 ((*(p) == '=')) || ((*(p) == '+'))) 142 143/* 144 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" | 145 * "+" | "$" | "," ) 146 */ 147#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 148 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \ 149 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ 150 ((*(p) == ','))) 151 152/* 153 * uric = reserved | unreserved | escaped 154 */ 155 156#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 157 (IS_RESERVED(*(p)))) 158 159/* 160* unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" 161*/ 162 163#define IS_UNWISE(p) \ 164 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 165 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 166 ((*(p) == ']')) || ((*(p) == '`'))) 167 168/* 169 * Skip to next pointer char, handle escaped sequences 170 */ 171 172#define NEXT(p) ((*p == '%')? p += 3 : p++) 173 174/* 175 * Productions from the spec. 176 * 177 * authority = server | reg_name 178 * reg_name = 1*( unreserved | escaped | "$" | "," | 179 * ";" | ":" | "@" | "&" | "=" | "+" ) 180 * 181 * path = [ abs_path | opaque_part ] 182 */ 183 184/************************************************************************ 185 * * 186 * Generic URI structure functions * 187 * * 188 ************************************************************************/ 189 190/** 191 * xmlCreateURI: 192 * 193 * Simply creates an empty xmlURI 194 * 195 * Returns the new structure or NULL in case of error 196 */ 197xmlURIPtr 198xmlCreateURI(void) { 199 xmlURIPtr ret; 200 201 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 202 if (ret == NULL) { 203 xmlGenericError(xmlGenericErrorContext, 204 "xmlCreateURI: out of memory\n"); 205 return(NULL); 206 } 207 memset(ret, 0, sizeof(xmlURI)); 208 return(ret); 209} 210 211/** 212 * xmlSaveUri: 213 * @uri: pointer to an xmlURI 214 * 215 * Save the URI as an escaped string 216 * 217 * Returns a new string (to be deallocated by caller) 218 */ 219xmlChar * 220xmlSaveUri(xmlURIPtr uri) { 221 xmlChar *ret = NULL; 222 const char *p; 223 int len; 224 int max; 225 226 if (uri == NULL) return(NULL); 227 228 229 max = 80; 230 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 231 if (ret == NULL) { 232 xmlGenericError(xmlGenericErrorContext, 233 "xmlSaveUri: out of memory\n"); 234 return(NULL); 235 } 236 len = 0; 237 238 if (uri->scheme != NULL) { 239 p = uri->scheme; 240 while (*p != 0) { 241 if (len >= max) { 242 max *= 2; 243 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 244 if (ret == NULL) { 245 xmlGenericError(xmlGenericErrorContext, 246 "xmlSaveUri: out of memory\n"); 247 return(NULL); 248 } 249 } 250 ret[len++] = *p++; 251 } 252 if (len >= max) { 253 max *= 2; 254 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 255 if (ret == NULL) { 256 xmlGenericError(xmlGenericErrorContext, 257 "xmlSaveUri: out of memory\n"); 258 return(NULL); 259 } 260 } 261 ret[len++] = ':'; 262 } 263 if (uri->opaque != NULL) { 264 p = uri->opaque; 265 while (*p != 0) { 266 if (len + 3 >= max) { 267 max *= 2; 268 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 269 if (ret == NULL) { 270 xmlGenericError(xmlGenericErrorContext, 271 "xmlSaveUri: out of memory\n"); 272 return(NULL); 273 } 274 } 275 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 276 ret[len++] = *p++; 277 else { 278 int val = *(unsigned char *)p++; 279 int hi = val / 0x10, lo = val % 0x10; 280 ret[len++] = '%'; 281 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 282 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 283 } 284 } 285 } else { 286 if (uri->server != NULL) { 287 if (len + 3 >= max) { 288 max *= 2; 289 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 290 if (ret == NULL) { 291 xmlGenericError(xmlGenericErrorContext, 292 "xmlSaveUri: out of memory\n"); 293 return(NULL); 294 } 295 } 296 ret[len++] = '/'; 297 ret[len++] = '/'; 298 if (uri->user != NULL) { 299 p = uri->user; 300 while (*p != 0) { 301 if (len + 3 >= max) { 302 max *= 2; 303 ret = (xmlChar *) xmlRealloc(ret, 304 (max + 1) * sizeof(xmlChar)); 305 if (ret == NULL) { 306 xmlGenericError(xmlGenericErrorContext, 307 "xmlSaveUri: out of memory\n"); 308 return(NULL); 309 } 310 } 311 if ((IS_UNRESERVED(*(p))) || 312 ((*(p) == ';')) || ((*(p) == ':')) || 313 ((*(p) == '&')) || ((*(p) == '=')) || 314 ((*(p) == '+')) || ((*(p) == '$')) || 315 ((*(p) == ','))) 316 ret[len++] = *p++; 317 else { 318 int val = *(unsigned char *)p++; 319 int hi = val / 0x10, lo = val % 0x10; 320 ret[len++] = '%'; 321 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 322 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 323 } 324 } 325 if (len + 3 >= max) { 326 max *= 2; 327 ret = (xmlChar *) xmlRealloc(ret, 328 (max + 1) * sizeof(xmlChar)); 329 if (ret == NULL) { 330 xmlGenericError(xmlGenericErrorContext, 331 "xmlSaveUri: out of memory\n"); 332 return(NULL); 333 } 334 } 335 ret[len++] = '@'; 336 } 337 p = uri->server; 338 while (*p != 0) { 339 if (len >= max) { 340 max *= 2; 341 ret = (xmlChar *) xmlRealloc(ret, 342 (max + 1) * sizeof(xmlChar)); 343 if (ret == NULL) { 344 xmlGenericError(xmlGenericErrorContext, 345 "xmlSaveUri: out of memory\n"); 346 return(NULL); 347 } 348 } 349 ret[len++] = *p++; 350 } 351 if (uri->port > 0) { 352 if (len + 10 >= max) { 353 max *= 2; 354 ret = (xmlChar *) xmlRealloc(ret, 355 (max + 1) * sizeof(xmlChar)); 356 if (ret == NULL) { 357 xmlGenericError(xmlGenericErrorContext, 358 "xmlSaveUri: out of memory\n"); 359 return(NULL); 360 } 361 } 362 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 363 } 364 } else if (uri->authority != NULL) { 365 if (len + 3 >= max) { 366 max *= 2; 367 ret = (xmlChar *) xmlRealloc(ret, 368 (max + 1) * sizeof(xmlChar)); 369 if (ret == NULL) { 370 xmlGenericError(xmlGenericErrorContext, 371 "xmlSaveUri: out of memory\n"); 372 return(NULL); 373 } 374 } 375 ret[len++] = '/'; 376 ret[len++] = '/'; 377 p = uri->authority; 378 while (*p != 0) { 379 if (len + 3 >= max) { 380 max *= 2; 381 ret = (xmlChar *) xmlRealloc(ret, 382 (max + 1) * sizeof(xmlChar)); 383 if (ret == NULL) { 384 xmlGenericError(xmlGenericErrorContext, 385 "xmlSaveUri: out of memory\n"); 386 return(NULL); 387 } 388 } 389 if ((IS_UNRESERVED(*(p))) || 390 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 391 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 392 ((*(p) == '=')) || ((*(p) == '+'))) 393 ret[len++] = *p++; 394 else { 395 int val = *(unsigned char *)p++; 396 int hi = val / 0x10, lo = val % 0x10; 397 ret[len++] = '%'; 398 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 399 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 400 } 401 } 402 } else if (uri->scheme != NULL) { 403 if (len + 3 >= max) { 404 max *= 2; 405 ret = (xmlChar *) xmlRealloc(ret, 406 (max + 1) * sizeof(xmlChar)); 407 if (ret == NULL) { 408 xmlGenericError(xmlGenericErrorContext, 409 "xmlSaveUri: out of memory\n"); 410 return(NULL); 411 } 412 } 413 ret[len++] = '/'; 414 ret[len++] = '/'; 415 } 416 if (uri->path != NULL) { 417 p = uri->path; 418 while (*p != 0) { 419 if (len + 3 >= max) { 420 max *= 2; 421 ret = (xmlChar *) xmlRealloc(ret, 422 (max + 1) * sizeof(xmlChar)); 423 if (ret == NULL) { 424 xmlGenericError(xmlGenericErrorContext, 425 "xmlSaveUri: out of memory\n"); 426 return(NULL); 427 } 428 } 429 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 430 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 431 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 432 ((*(p) == ','))) 433 ret[len++] = *p++; 434 else { 435 int val = *(unsigned char *)p++; 436 int hi = val / 0x10, lo = val % 0x10; 437 ret[len++] = '%'; 438 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 439 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 440 } 441 } 442 } 443 if (uri->query != NULL) { 444 if (len + 3 >= max) { 445 max *= 2; 446 ret = (xmlChar *) xmlRealloc(ret, 447 (max + 1) * sizeof(xmlChar)); 448 if (ret == NULL) { 449 xmlGenericError(xmlGenericErrorContext, 450 "xmlSaveUri: out of memory\n"); 451 return(NULL); 452 } 453 } 454 ret[len++] = '?'; 455 p = uri->query; 456 while (*p != 0) { 457 if (len + 3 >= max) { 458 max *= 2; 459 ret = (xmlChar *) xmlRealloc(ret, 460 (max + 1) * sizeof(xmlChar)); 461 if (ret == NULL) { 462 xmlGenericError(xmlGenericErrorContext, 463 "xmlSaveUri: out of memory\n"); 464 return(NULL); 465 } 466 } 467 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 468 ret[len++] = *p++; 469 else { 470 int val = *(unsigned char *)p++; 471 int hi = val / 0x10, lo = val % 0x10; 472 ret[len++] = '%'; 473 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 474 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 475 } 476 } 477 } 478 } 479 if (uri->fragment != NULL) { 480 if (len + 3 >= max) { 481 max *= 2; 482 ret = (xmlChar *) xmlRealloc(ret, 483 (max + 1) * sizeof(xmlChar)); 484 if (ret == NULL) { 485 xmlGenericError(xmlGenericErrorContext, 486 "xmlSaveUri: out of memory\n"); 487 return(NULL); 488 } 489 } 490 ret[len++] = '#'; 491 p = uri->fragment; 492 while (*p != 0) { 493 if (len + 3 >= max) { 494 max *= 2; 495 ret = (xmlChar *) xmlRealloc(ret, 496 (max + 1) * sizeof(xmlChar)); 497 if (ret == NULL) { 498 xmlGenericError(xmlGenericErrorContext, 499 "xmlSaveUri: out of memory\n"); 500 return(NULL); 501 } 502 } 503 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 504 ret[len++] = *p++; 505 else { 506 int val = *(unsigned char *)p++; 507 int hi = val / 0x10, lo = val % 0x10; 508 ret[len++] = '%'; 509 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 510 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 511 } 512 } 513 } 514 if (len >= max) { 515 max *= 2; 516 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 517 if (ret == NULL) { 518 xmlGenericError(xmlGenericErrorContext, 519 "xmlSaveUri: out of memory\n"); 520 return(NULL); 521 } 522 } 523 ret[len++] = 0; 524 return(ret); 525} 526 527/** 528 * xmlPrintURI: 529 * @stream: a FILE* for the output 530 * @uri: pointer to an xmlURI 531 * 532 * Prints the URI in the stream @steam. 533 */ 534void 535xmlPrintURI(FILE *stream, xmlURIPtr uri) { 536 xmlChar *out; 537 538 out = xmlSaveUri(uri); 539 if (out != NULL) { 540 fprintf(stream, "%s", (char *) out); 541 xmlFree(out); 542 } 543} 544 545/** 546 * xmlCleanURI: 547 * @uri: pointer to an xmlURI 548 * 549 * Make sure the xmlURI struct is free of content 550 */ 551static void 552xmlCleanURI(xmlURIPtr uri) { 553 if (uri == NULL) return; 554 555 if (uri->scheme != NULL) xmlFree(uri->scheme); 556 uri->scheme = NULL; 557 if (uri->server != NULL) xmlFree(uri->server); 558 uri->server = NULL; 559 if (uri->user != NULL) xmlFree(uri->user); 560 uri->user = NULL; 561 if (uri->path != NULL) xmlFree(uri->path); 562 uri->path = NULL; 563 if (uri->fragment != NULL) xmlFree(uri->fragment); 564 uri->fragment = NULL; 565 if (uri->opaque != NULL) xmlFree(uri->opaque); 566 uri->opaque = NULL; 567 if (uri->authority != NULL) xmlFree(uri->authority); 568 uri->authority = NULL; 569 if (uri->query != NULL) xmlFree(uri->query); 570 uri->query = NULL; 571} 572 573/** 574 * xmlFreeURI: 575 * @uri: pointer to an xmlURI 576 * 577 * Free up the xmlURI struct 578 */ 579void 580xmlFreeURI(xmlURIPtr uri) { 581 if (uri == NULL) return; 582 583 if (uri->scheme != NULL) xmlFree(uri->scheme); 584 if (uri->server != NULL) xmlFree(uri->server); 585 if (uri->user != NULL) xmlFree(uri->user); 586 if (uri->path != NULL) xmlFree(uri->path); 587 if (uri->fragment != NULL) xmlFree(uri->fragment); 588 if (uri->opaque != NULL) xmlFree(uri->opaque); 589 if (uri->authority != NULL) xmlFree(uri->authority); 590 if (uri->query != NULL) xmlFree(uri->query); 591 xmlFree(uri); 592} 593 594/************************************************************************ 595 * * 596 * Helper functions * 597 * * 598 ************************************************************************/ 599 600/** 601 * xmlNormalizeURIPath: 602 * @path: pointer to the path string 603 * 604 * Applies the 5 normalization steps to a path string--that is, RFC 2396 605 * Section 5.2, steps 6.c through 6.g. 606 * 607 * Normalization occurs directly on the string, no new allocation is done 608 * 609 * Returns 0 or an error code 610 */ 611int 612xmlNormalizeURIPath(char *path) { 613 char *cur, *out; 614 615 if (path == NULL) 616 return(-1); 617 618 /* Skip all initial "/" chars. We want to get to the beginning of the 619 * first non-empty segment. 620 */ 621 cur = path; 622 while (cur[0] == '/') 623 ++cur; 624 if (cur[0] == '\0') 625 return(0); 626 627 /* Keep everything we've seen so far. */ 628 out = cur; 629 630 /* 631 * Analyze each segment in sequence for cases (c) and (d). 632 */ 633 while (cur[0] != '\0') { 634 /* 635 * c) All occurrences of "./", where "." is a complete path segment, 636 * are removed from the buffer string. 637 */ 638 if ((cur[0] == '.') && (cur[1] == '/')) { 639 cur += 2; 640 /* '//' normalization should be done at this point too */ 641 while (cur[0] == '/') 642 cur++; 643 continue; 644 } 645 646 /* 647 * d) If the buffer string ends with "." as a complete path segment, 648 * that "." is removed. 649 */ 650 if ((cur[0] == '.') && (cur[1] == '\0')) 651 break; 652 653 /* Otherwise keep the segment. */ 654 while (cur[0] != '/') { 655 if (cur[0] == '\0') 656 goto done_cd; 657 (out++)[0] = (cur++)[0]; 658 } 659 /* nomalize // */ 660 while ((cur[0] == '/') && (cur[1] == '/')) 661 cur++; 662 663 (out++)[0] = (cur++)[0]; 664 } 665 done_cd: 666 out[0] = '\0'; 667 668 /* Reset to the beginning of the first segment for the next sequence. */ 669 cur = path; 670 while (cur[0] == '/') 671 ++cur; 672 if (cur[0] == '\0') 673 return(0); 674 675 /* 676 * Analyze each segment in sequence for cases (e) and (f). 677 * 678 * e) All occurrences of "<segment>/../", where <segment> is a 679 * complete path segment not equal to "..", are removed from the 680 * buffer string. Removal of these path segments is performed 681 * iteratively, removing the leftmost matching pattern on each 682 * iteration, until no matching pattern remains. 683 * 684 * f) If the buffer string ends with "<segment>/..", where <segment> 685 * is a complete path segment not equal to "..", that 686 * "<segment>/.." is removed. 687 * 688 * To satisfy the "iterative" clause in (e), we need to collapse the 689 * string every time we find something that needs to be removed. Thus, 690 * we don't need to keep two pointers into the string: we only need a 691 * "current position" pointer. 692 */ 693 while (1) { 694 char *segp; 695 696 /* At the beginning of each iteration of this loop, "cur" points to 697 * the first character of the segment we want to examine. 698 */ 699 700 /* Find the end of the current segment. */ 701 segp = cur; 702 while ((segp[0] != '/') && (segp[0] != '\0')) 703 ++segp; 704 705 /* If this is the last segment, we're done (we need at least two 706 * segments to meet the criteria for the (e) and (f) cases). 707 */ 708 if (segp[0] == '\0') 709 break; 710 711 /* If the first segment is "..", or if the next segment _isn't_ "..", 712 * keep this segment and try the next one. 713 */ 714 ++segp; 715 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 716 || ((segp[0] != '.') || (segp[1] != '.') 717 || ((segp[2] != '/') && (segp[2] != '\0')))) { 718 cur = segp; 719 continue; 720 } 721 722 /* If we get here, remove this segment and the next one and back up 723 * to the previous segment (if there is one), to implement the 724 * "iteratively" clause. It's pretty much impossible to back up 725 * while maintaining two pointers into the buffer, so just compact 726 * the whole buffer now. 727 */ 728 729 /* If this is the end of the buffer, we're done. */ 730 if (segp[2] == '\0') { 731 cur[0] = '\0'; 732 break; 733 } 734 strcpy(cur, segp + 3); 735 736 /* If there are no previous segments, then keep going from here. */ 737 segp = cur; 738 while ((segp > path) && ((--segp)[0] == '/')) 739 ; 740 if (segp == path) 741 continue; 742 743 /* "segp" is pointing to the end of a previous segment; find it's 744 * start. We need to back up to the previous segment and start 745 * over with that to handle things like "foo/bar/../..". If we 746 * don't do this, then on the first pass we'll remove the "bar/..", 747 * but be pointing at the second ".." so we won't realize we can also 748 * remove the "foo/..". 749 */ 750 cur = segp; 751 while ((cur > path) && (cur[-1] != '/')) 752 --cur; 753 } 754 out[0] = '\0'; 755 756 /* 757 * g) If the resulting buffer string still begins with one or more 758 * complete path segments of "..", then the reference is 759 * considered to be in error. Implementations may handle this 760 * error by retaining these components in the resolved path (i.e., 761 * treating them as part of the final URI), by removing them from 762 * the resolved path (i.e., discarding relative levels above the 763 * root), or by avoiding traversal of the reference. 764 * 765 * We discard them from the final path. 766 */ 767 if (path[0] == '/') { 768 cur = path; 769 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 770 && ((cur[3] == '/') || (cur[3] == '\0'))) 771 cur += 3; 772 773 if (cur != path) { 774 out = path; 775 while (cur[0] != '\0') 776 (out++)[0] = (cur++)[0]; 777 out[0] = 0; 778 } 779 } 780 781 return(0); 782} 783 784/** 785 * xmlURIUnescapeString: 786 * @str: the string to unescape 787 * @len: the length in bytes to unescape (or <= 0 to indicate full string) 788 * @target: optional destination buffer 789 * 790 * Unescaping routine, does not do validity checks ! 791 * Output is direct unsigned char translation of %XX values (no encoding) 792 * 793 * Returns an copy of the string, but unescaped 794 */ 795char * 796xmlURIUnescapeString(const char *str, int len, char *target) { 797 char *ret, *out; 798 const char *in; 799 800 if (str == NULL) 801 return(NULL); 802 if (len <= 0) len = strlen(str); 803 if (len < 0) return(NULL); 804 805 if (target == NULL) { 806 ret = (char *) xmlMallocAtomic(len + 1); 807 if (ret == NULL) { 808 xmlGenericError(xmlGenericErrorContext, 809 "xmlURIUnescapeString: out of memory\n"); 810 return(NULL); 811 } 812 } else 813 ret = target; 814 in = str; 815 out = ret; 816 while(len > 0) { 817 if (*in == '%') { 818 in++; 819 if ((*in >= '0') && (*in <= '9')) 820 *out = (*in - '0'); 821 else if ((*in >= 'a') && (*in <= 'f')) 822 *out = (*in - 'a') + 10; 823 else if ((*in >= 'A') && (*in <= 'F')) 824 *out = (*in - 'A') + 10; 825 in++; 826 if ((*in >= '0') && (*in <= '9')) 827 *out = *out * 16 + (*in - '0'); 828 else if ((*in >= 'a') && (*in <= 'f')) 829 *out = *out * 16 + (*in - 'a') + 10; 830 else if ((*in >= 'A') && (*in <= 'F')) 831 *out = *out * 16 + (*in - 'A') + 10; 832 in++; 833 len -= 3; 834 out++; 835 } else { 836 *out++ = *in++; 837 len--; 838 } 839 } 840 *out = 0; 841 return(ret); 842} 843 844/** 845 * xmlURIEscapeStr: 846 * @str: string to escape 847 * @list: exception list string of chars not to escape 848 * 849 * This routine escapes a string to hex, ignoring reserved characters (a-z) 850 * and the characters in the exception list. 851 * 852 * Returns a new escaped string or NULL in case of error. 853 */ 854xmlChar * 855xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 856 xmlChar *ret, ch; 857 const xmlChar *in; 858 859 unsigned int len, out; 860 861 if (str == NULL) 862 return(NULL); 863 len = xmlStrlen(str); 864 if (!(len > 0)) return(NULL); 865 866 len += 20; 867 ret = (xmlChar *) xmlMallocAtomic(len); 868 if (ret == NULL) { 869 xmlGenericError(xmlGenericErrorContext, 870 "xmlURIEscapeStr: out of memory\n"); 871 return(NULL); 872 } 873 in = (const xmlChar *) str; 874 out = 0; 875 while(*in != 0) { 876 if (len - out <= 3) { 877 len += 20; 878 ret = (xmlChar *) xmlRealloc(ret, len); 879 if (ret == NULL) { 880 xmlGenericError(xmlGenericErrorContext, 881 "xmlURIEscapeStr: out of memory\n"); 882 return(NULL); 883 } 884 } 885 886 ch = *in; 887 888 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 889 unsigned char val; 890 ret[out++] = '%'; 891 val = ch >> 4; 892 if (val <= 9) 893 ret[out++] = '0' + val; 894 else 895 ret[out++] = 'A' + val - 0xA; 896 val = ch & 0xF; 897 if (val <= 9) 898 ret[out++] = '0' + val; 899 else 900 ret[out++] = 'A' + val - 0xA; 901 in++; 902 } else { 903 ret[out++] = *in++; 904 } 905 906 } 907 ret[out] = 0; 908 return(ret); 909} 910 911/** 912 * xmlURIEscape: 913 * @str: the string of the URI to escape 914 * 915 * Escaping routine, does not do validity checks ! 916 * It will try to escape the chars needing this, but this is heuristic 917 * based it's impossible to be sure. 918 * 919 * Returns an copy of the string, but escaped 920 * 921 * 25 May 2001 922 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 923 * according to RFC2396. 924 * - Carl Douglas 925 */ 926xmlChar * 927xmlURIEscape(const xmlChar * str) 928{ 929 xmlChar *ret, *segment = NULL; 930 xmlURIPtr uri; 931 int ret2; 932 933#define NULLCHK(p) if(!p) { \ 934 xmlGenericError(xmlGenericErrorContext, \ 935 "xmlURIEscape: out of memory\n"); \ 936 return NULL; } 937 938 if (str == NULL) 939 return (NULL); 940 941 uri = xmlCreateURI(); 942 if (uri != NULL) { 943 /* 944 * Allow escaping errors in the unescaped form 945 */ 946 uri->cleanup = 1; 947 ret2 = xmlParseURIReference(uri, (const char *)str); 948 if (ret2) { 949 xmlFreeURI(uri); 950 return (NULL); 951 } 952 } 953 954 if (!uri) 955 return NULL; 956 957 ret = NULL; 958 959 if (uri->scheme) { 960 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 961 NULLCHK(segment) 962 ret = xmlStrcat(ret, segment); 963 ret = xmlStrcat(ret, BAD_CAST ":"); 964 xmlFree(segment); 965 } 966 967 if (uri->authority) { 968 segment = 969 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 970 NULLCHK(segment) 971 ret = xmlStrcat(ret, BAD_CAST "//"); 972 ret = xmlStrcat(ret, segment); 973 xmlFree(segment); 974 } 975 976 if (uri->user) { 977 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 978 NULLCHK(segment) 979 ret = xmlStrcat(ret, segment); 980 ret = xmlStrcat(ret, BAD_CAST "@"); 981 xmlFree(segment); 982 } 983 984 if (uri->server) { 985 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 986 NULLCHK(segment) 987 ret = xmlStrcat(ret, BAD_CAST "//"); 988 ret = xmlStrcat(ret, segment); 989 xmlFree(segment); 990 } 991 992 if (uri->port) { 993 xmlChar port[10]; 994 995 snprintf((char *) port, 10, "%d", uri->port); 996 ret = xmlStrcat(ret, BAD_CAST ":"); 997 ret = xmlStrcat(ret, port); 998 } 999 1000 if (uri->path) { 1001 segment = 1002 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1003 NULLCHK(segment) 1004 ret = xmlStrcat(ret, segment); 1005 xmlFree(segment); 1006 } 1007 1008 if (uri->query) { 1009 segment = 1010 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1011 NULLCHK(segment) 1012 ret = xmlStrcat(ret, BAD_CAST "?"); 1013 ret = xmlStrcat(ret, segment); 1014 xmlFree(segment); 1015 } 1016 1017 if (uri->opaque) { 1018 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1019 NULLCHK(segment) 1020 ret = xmlStrcat(ret, segment); 1021 xmlFree(segment); 1022 } 1023 1024 if (uri->fragment) { 1025 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1026 NULLCHK(segment) 1027 ret = xmlStrcat(ret, BAD_CAST "#"); 1028 ret = xmlStrcat(ret, segment); 1029 xmlFree(segment); 1030 } 1031 1032 xmlFreeURI(uri); 1033#undef NULLCHK 1034 1035 return (ret); 1036} 1037 1038/************************************************************************ 1039 * * 1040 * Escaped URI parsing * 1041 * * 1042 ************************************************************************/ 1043 1044/** 1045 * xmlParseURIFragment: 1046 * @uri: pointer to an URI structure 1047 * @str: pointer to the string to analyze 1048 * 1049 * Parse an URI fragment string and fills in the appropriate fields 1050 * of the @uri structure. 1051 * 1052 * fragment = *uric 1053 * 1054 * Returns 0 or the error code 1055 */ 1056static int 1057xmlParseURIFragment(xmlURIPtr uri, const char **str) 1058{ 1059 const char *cur = *str; 1060 1061 if (str == NULL) 1062 return (-1); 1063 1064 while (IS_URIC(cur) || IS_UNWISE(cur)) 1065 NEXT(cur); 1066 if (uri != NULL) { 1067 if (uri->fragment != NULL) 1068 xmlFree(uri->fragment); 1069 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 1070 } 1071 *str = cur; 1072 return (0); 1073} 1074 1075/** 1076 * xmlParseURIQuery: 1077 * @uri: pointer to an URI structure 1078 * @str: pointer to the string to analyze 1079 * 1080 * Parse the query part of an URI 1081 * 1082 * query = *uric 1083 * 1084 * Returns 0 or the error code 1085 */ 1086static int 1087xmlParseURIQuery(xmlURIPtr uri, const char **str) 1088{ 1089 const char *cur = *str; 1090 1091 if (str == NULL) 1092 return (-1); 1093 1094 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) 1095 NEXT(cur); 1096 if (uri != NULL) { 1097 if (uri->query != NULL) 1098 xmlFree(uri->query); 1099 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 1100 } 1101 *str = cur; 1102 return (0); 1103} 1104 1105/** 1106 * xmlParseURIScheme: 1107 * @uri: pointer to an URI structure 1108 * @str: pointer to the string to analyze 1109 * 1110 * Parse an URI scheme 1111 * 1112 * scheme = alpha *( alpha | digit | "+" | "-" | "." ) 1113 * 1114 * Returns 0 or the error code 1115 */ 1116static int 1117xmlParseURIScheme(xmlURIPtr uri, const char **str) { 1118 const char *cur; 1119 1120 if (str == NULL) 1121 return(-1); 1122 1123 cur = *str; 1124 if (!IS_ALPHA(*cur)) 1125 return(2); 1126 cur++; 1127 while (IS_SCHEME(*cur)) cur++; 1128 if (uri != NULL) { 1129 if (uri->scheme != NULL) xmlFree(uri->scheme); 1130 /* !!! strndup */ 1131 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); 1132 } 1133 *str = cur; 1134 return(0); 1135} 1136 1137/** 1138 * xmlParseURIOpaquePart: 1139 * @uri: pointer to an URI structure 1140 * @str: pointer to the string to analyze 1141 * 1142 * Parse an URI opaque part 1143 * 1144 * opaque_part = uric_no_slash *uric 1145 * 1146 * Returns 0 or the error code 1147 */ 1148static int 1149xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) 1150{ 1151 const char *cur; 1152 1153 if (str == NULL) 1154 return (-1); 1155 1156 cur = *str; 1157 if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) { 1158 return (3); 1159 } 1160 NEXT(cur); 1161 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) 1162 NEXT(cur); 1163 if (uri != NULL) { 1164 if (uri->opaque != NULL) 1165 xmlFree(uri->opaque); 1166 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); 1167 } 1168 *str = cur; 1169 return (0); 1170} 1171 1172/** 1173 * xmlParseURIServer: 1174 * @uri: pointer to an URI structure 1175 * @str: pointer to the string to analyze 1176 * 1177 * Parse a server subpart of an URI, it's a finer grain analysis 1178 * of the authority part. 1179 * 1180 * server = [ [ userinfo "@" ] hostport ] 1181 * userinfo = *( unreserved | escaped | 1182 * ";" | ":" | "&" | "=" | "+" | "$" | "," ) 1183 * hostport = host [ ":" port ] 1184 * host = hostname | IPv4address 1185 * hostname = *( domainlabel "." ) toplabel [ "." ] 1186 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 1187 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 1188 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit 1189 * port = *digit 1190 * 1191 * Returns 0 or the error code 1192 */ 1193static int 1194xmlParseURIServer(xmlURIPtr uri, const char **str) { 1195 const char *cur; 1196 const char *host, *tmp; 1197 const int IPmax = 4; 1198 int oct; 1199 1200 if (str == NULL) 1201 return(-1); 1202 1203 cur = *str; 1204 1205 /* 1206 * is there an userinfo ? 1207 */ 1208 while (IS_USERINFO(cur)) NEXT(cur); 1209 if (*cur == '@') { 1210 if (uri != NULL) { 1211 if (uri->user != NULL) xmlFree(uri->user); 1212 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 1213 } 1214 cur++; 1215 } else { 1216 if (uri != NULL) { 1217 if (uri->user != NULL) xmlFree(uri->user); 1218 uri->user = NULL; 1219 } 1220 cur = *str; 1221 } 1222 /* 1223 * This can be empty in the case where there is no server 1224 */ 1225 host = cur; 1226 if (*cur == '/') { 1227 if (uri != NULL) { 1228 if (uri->authority != NULL) xmlFree(uri->authority); 1229 uri->authority = NULL; 1230 if (uri->server != NULL) xmlFree(uri->server); 1231 uri->server = NULL; 1232 uri->port = 0; 1233 } 1234 return(0); 1235 } 1236 /* 1237 * host part of hostport can derive either an IPV4 address 1238 * or an unresolved name. Check the IP first, it easier to detect 1239 * errors if wrong one 1240 */ 1241 for (oct = 0; oct < IPmax; ++oct) { 1242 if (*cur == '.') 1243 return(3); /* e.g. http://.xml/ or http://18.29..30/ */ 1244 while(IS_DIGIT(*cur)) cur++; 1245 if (oct == (IPmax-1)) 1246 continue; 1247 if (*cur != '.') 1248 break; 1249 cur++; 1250 } 1251 if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) { 1252 /* maybe host_name */ 1253 if (!IS_ALPHANUM(*cur)) 1254 return(4); /* e.g. http://xml.$oft */ 1255 do { 1256 do ++cur; while (IS_ALPHANUM(*cur)); 1257 if (*cur == '-') { 1258 --cur; 1259 if (*cur == '.') 1260 return(5); /* e.g. http://xml.-soft */ 1261 ++cur; 1262 continue; 1263 } 1264 if (*cur == '.') { 1265 --cur; 1266 if (*cur == '-') 1267 return(6); /* e.g. http://xml-.soft */ 1268 if (*cur == '.') 1269 return(7); /* e.g. http://xml..soft */ 1270 ++cur; 1271 continue; 1272 } 1273 break; 1274 } while (1); 1275 tmp = cur; 1276 if (tmp[-1] == '.') 1277 --tmp; /* e.g. http://xml.$Oft/ */ 1278 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp)); 1279 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp)) 1280 return(8); /* e.g. http://xmlsOft.0rg/ */ 1281 } 1282 if (uri != NULL) { 1283 if (uri->authority != NULL) xmlFree(uri->authority); 1284 uri->authority = NULL; 1285 if (uri->server != NULL) xmlFree(uri->server); 1286 uri->server = xmlURIUnescapeString(host, cur - host, NULL); 1287 } 1288 /* 1289 * finish by checking for a port presence. 1290 */ 1291 if (*cur == ':') { 1292 cur++; 1293 if (IS_DIGIT(*cur)) { 1294 if (uri != NULL) 1295 uri->port = 0; 1296 while (IS_DIGIT(*cur)) { 1297 if (uri != NULL) 1298 uri->port = uri->port * 10 + (*cur - '0'); 1299 cur++; 1300 } 1301 } 1302 } 1303 *str = cur; 1304 return(0); 1305} 1306 1307/** 1308 * xmlParseURIRelSegment: 1309 * @uri: pointer to an URI structure 1310 * @str: pointer to the string to analyze 1311 * 1312 * Parse an URI relative segment 1313 * 1314 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" | 1315 * "+" | "$" | "," ) 1316 * 1317 * Returns 0 or the error code 1318 */ 1319static int 1320xmlParseURIRelSegment(xmlURIPtr uri, const char **str) 1321{ 1322 const char *cur; 1323 1324 if (str == NULL) 1325 return (-1); 1326 1327 cur = *str; 1328 if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) { 1329 return (3); 1330 } 1331 NEXT(cur); 1332 while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) 1333 NEXT(cur); 1334 if (uri != NULL) { 1335 if (uri->path != NULL) 1336 xmlFree(uri->path); 1337 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 1338 } 1339 *str = cur; 1340 return (0); 1341} 1342 1343/** 1344 * xmlParseURIPathSegments: 1345 * @uri: pointer to an URI structure 1346 * @str: pointer to the string to analyze 1347 * @slash: should we add a leading slash 1348 * 1349 * Parse an URI set of path segments 1350 * 1351 * path_segments = segment *( "/" segment ) 1352 * segment = *pchar *( ";" param ) 1353 * param = *pchar 1354 * 1355 * Returns 0 or the error code 1356 */ 1357static int 1358xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) 1359{ 1360 const char *cur; 1361 1362 if (str == NULL) 1363 return (-1); 1364 1365 cur = *str; 1366 1367 do { 1368 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) 1369 NEXT(cur); 1370 while (*cur == ';') { 1371 cur++; 1372 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) 1373 NEXT(cur); 1374 } 1375 if (*cur != '/') 1376 break; 1377 cur++; 1378 } while (1); 1379 if (uri != NULL) { 1380 int len, len2 = 0; 1381 char *path; 1382 1383 /* 1384 * Concat the set of path segments to the current path 1385 */ 1386 len = cur - *str; 1387 if (slash) 1388 len++; 1389 1390 if (uri->path != NULL) { 1391 len2 = strlen(uri->path); 1392 len += len2; 1393 } 1394 path = (char *) xmlMallocAtomic(len + 1); 1395 if (path == NULL) { 1396 xmlGenericError(xmlGenericErrorContext, 1397 "xmlParseURIPathSegments: out of memory\n"); 1398 *str = cur; 1399 return (-1); 1400 } 1401 if (uri->path != NULL) 1402 memcpy(path, uri->path, len2); 1403 if (slash) { 1404 path[len2] = '/'; 1405 len2++; 1406 } 1407 path[len2] = 0; 1408 if (cur - *str > 0) 1409 xmlURIUnescapeString(*str, cur - *str, &path[len2]); 1410 if (uri->path != NULL) 1411 xmlFree(uri->path); 1412 uri->path = path; 1413 } 1414 *str = cur; 1415 return (0); 1416} 1417 1418/** 1419 * xmlParseURIAuthority: 1420 * @uri: pointer to an URI structure 1421 * @str: pointer to the string to analyze 1422 * 1423 * Parse the authority part of an URI. 1424 * 1425 * authority = server | reg_name 1426 * server = [ [ userinfo "@" ] hostport ] 1427 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" | 1428 * "@" | "&" | "=" | "+" ) 1429 * 1430 * Note : this is completely ambiguous since reg_name is allowed to 1431 * use the full set of chars in use by server: 1432 * 1433 * 3.2.1. Registry-based Naming Authority 1434 * 1435 * The structure of a registry-based naming authority is specific 1436 * to the URI scheme, but constrained to the allowed characters 1437 * for an authority component. 1438 * 1439 * Returns 0 or the error code 1440 */ 1441static int 1442xmlParseURIAuthority(xmlURIPtr uri, const char **str) { 1443 const char *cur; 1444 int ret; 1445 1446 if (str == NULL) 1447 return(-1); 1448 1449 cur = *str; 1450 1451 /* 1452 * try first to parse it as a server string. 1453 */ 1454 ret = xmlParseURIServer(uri, str); 1455 if ((ret == 0) && (*str != NULL) && 1456 ((**str == 0) || (**str == '/') || (**str == '?'))) 1457 return(0); 1458 *str = cur; 1459 1460 /* 1461 * failed, fallback to reg_name 1462 */ 1463 if (!IS_REG_NAME(cur)) { 1464 return(5); 1465 } 1466 NEXT(cur); 1467 while (IS_REG_NAME(cur)) NEXT(cur); 1468 if (uri != NULL) { 1469 if (uri->server != NULL) xmlFree(uri->server); 1470 uri->server = NULL; 1471 if (uri->user != NULL) xmlFree(uri->user); 1472 uri->user = NULL; 1473 if (uri->authority != NULL) xmlFree(uri->authority); 1474 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); 1475 } 1476 *str = cur; 1477 return(0); 1478} 1479 1480/** 1481 * xmlParseURIHierPart: 1482 * @uri: pointer to an URI structure 1483 * @str: pointer to the string to analyze 1484 * 1485 * Parse an URI hierarchical part 1486 * 1487 * hier_part = ( net_path | abs_path ) [ "?" query ] 1488 * abs_path = "/" path_segments 1489 * net_path = "//" authority [ abs_path ] 1490 * 1491 * Returns 0 or the error code 1492 */ 1493static int 1494xmlParseURIHierPart(xmlURIPtr uri, const char **str) { 1495 int ret; 1496 const char *cur; 1497 1498 if (str == NULL) 1499 return(-1); 1500 1501 cur = *str; 1502 1503 if ((cur[0] == '/') && (cur[1] == '/')) { 1504 cur += 2; 1505 ret = xmlParseURIAuthority(uri, &cur); 1506 if (ret != 0) 1507 return(ret); 1508 if (cur[0] == '/') { 1509 cur++; 1510 ret = xmlParseURIPathSegments(uri, &cur, 1); 1511 } 1512 } else if (cur[0] == '/') { 1513 cur++; 1514 ret = xmlParseURIPathSegments(uri, &cur, 1); 1515 } else { 1516 return(4); 1517 } 1518 if (ret != 0) 1519 return(ret); 1520 if (*cur == '?') { 1521 cur++; 1522 ret = xmlParseURIQuery(uri, &cur); 1523 if (ret != 0) 1524 return(ret); 1525 } 1526 *str = cur; 1527 return(0); 1528} 1529 1530/** 1531 * xmlParseAbsoluteURI: 1532 * @uri: pointer to an URI structure 1533 * @str: pointer to the string to analyze 1534 * 1535 * Parse an URI reference string and fills in the appropriate fields 1536 * of the @uri structure 1537 * 1538 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 1539 * 1540 * Returns 0 or the error code 1541 */ 1542static int 1543xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) { 1544 int ret; 1545 const char *cur; 1546 1547 if (str == NULL) 1548 return(-1); 1549 1550 cur = *str; 1551 1552 ret = xmlParseURIScheme(uri, str); 1553 if (ret != 0) return(ret); 1554 if (**str != ':') { 1555 *str = cur; 1556 return(1); 1557 } 1558 (*str)++; 1559 if (**str == '/') 1560 return(xmlParseURIHierPart(uri, str)); 1561 return(xmlParseURIOpaquePart(uri, str)); 1562} 1563 1564/** 1565 * xmlParseRelativeURI: 1566 * @uri: pointer to an URI structure 1567 * @str: pointer to the string to analyze 1568 * 1569 * Parse an relative URI string and fills in the appropriate fields 1570 * of the @uri structure 1571 * 1572 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 1573 * abs_path = "/" path_segments 1574 * net_path = "//" authority [ abs_path ] 1575 * rel_path = rel_segment [ abs_path ] 1576 * 1577 * Returns 0 or the error code 1578 */ 1579static int 1580xmlParseRelativeURI(xmlURIPtr uri, const char **str) { 1581 int ret = 0; 1582 const char *cur; 1583 1584 if (str == NULL) 1585 return(-1); 1586 1587 cur = *str; 1588 if ((cur[0] == '/') && (cur[1] == '/')) { 1589 cur += 2; 1590 ret = xmlParseURIAuthority(uri, &cur); 1591 if (ret != 0) 1592 return(ret); 1593 if (cur[0] == '/') { 1594 cur++; 1595 ret = xmlParseURIPathSegments(uri, &cur, 1); 1596 } 1597 } else if (cur[0] == '/') { 1598 cur++; 1599 ret = xmlParseURIPathSegments(uri, &cur, 1); 1600 } else if (cur[0] != '#' && cur[0] != '?') { 1601 ret = xmlParseURIRelSegment(uri, &cur); 1602 if (ret != 0) 1603 return(ret); 1604 if (cur[0] == '/') { 1605 cur++; 1606 ret = xmlParseURIPathSegments(uri, &cur, 1); 1607 } 1608 } 1609 if (ret != 0) 1610 return(ret); 1611 if (*cur == '?') { 1612 cur++; 1613 ret = xmlParseURIQuery(uri, &cur); 1614 if (ret != 0) 1615 return(ret); 1616 } 1617 *str = cur; 1618 return(ret); 1619} 1620 1621/** 1622 * xmlParseURIReference: 1623 * @uri: pointer to an URI structure 1624 * @str: the string to analyze 1625 * 1626 * Parse an URI reference string and fills in the appropriate fields 1627 * of the @uri structure 1628 * 1629 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1630 * 1631 * Returns 0 or the error code 1632 */ 1633int 1634xmlParseURIReference(xmlURIPtr uri, const char *str) { 1635 int ret; 1636 const char *tmp = str; 1637 1638 if (str == NULL) 1639 return(-1); 1640 xmlCleanURI(uri); 1641 1642 /* 1643 * Try first to parse absolute refs, then fallback to relative if 1644 * it fails. 1645 */ 1646 ret = xmlParseAbsoluteURI(uri, &str); 1647 if (ret != 0) { 1648 xmlCleanURI(uri); 1649 str = tmp; 1650 ret = xmlParseRelativeURI(uri, &str); 1651 } 1652 if (ret != 0) { 1653 xmlCleanURI(uri); 1654 return(ret); 1655 } 1656 1657 if (*str == '#') { 1658 str++; 1659 ret = xmlParseURIFragment(uri, &str); 1660 if (ret != 0) return(ret); 1661 } 1662 if (*str != 0) { 1663 xmlCleanURI(uri); 1664 return(1); 1665 } 1666 return(0); 1667} 1668 1669/** 1670 * xmlParseURI: 1671 * @str: the URI string to analyze 1672 * 1673 * Parse an URI 1674 * 1675 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1676 * 1677 * Returns a newly build xmlURIPtr or NULL in case of error 1678 */ 1679xmlURIPtr 1680xmlParseURI(const char *str) { 1681 xmlURIPtr uri; 1682 int ret; 1683 1684 if (str == NULL) 1685 return(NULL); 1686 uri = xmlCreateURI(); 1687 if (uri != NULL) { 1688 ret = xmlParseURIReference(uri, str); 1689 if (ret) { 1690 xmlFreeURI(uri); 1691 return(NULL); 1692 } 1693 } 1694 return(uri); 1695} 1696 1697/************************************************************************ 1698 * * 1699 * Public functions * 1700 * * 1701 ************************************************************************/ 1702 1703/** 1704 * xmlBuildURI: 1705 * @URI: the URI instance found in the document 1706 * @base: the base value 1707 * 1708 * Computes he final URI of the reference done by checking that 1709 * the given URI is valid, and building the final URI using the 1710 * base URI. This is processed according to section 5.2 of the 1711 * RFC 2396 1712 * 1713 * 5.2. Resolving Relative References to Absolute Form 1714 * 1715 * Returns a new URI string (to be freed by the caller) or NULL in case 1716 * of error. 1717 */ 1718xmlChar * 1719xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1720 xmlChar *val = NULL; 1721 int ret, len, indx, cur, out; 1722 xmlURIPtr ref = NULL; 1723 xmlURIPtr bas = NULL; 1724 xmlURIPtr res = NULL; 1725 1726 /* 1727 * 1) The URI reference is parsed into the potential four components and 1728 * fragment identifier, as described in Section 4.3. 1729 * 1730 * NOTE that a completely empty URI is treated by modern browsers 1731 * as a reference to "." rather than as a synonym for the current 1732 * URI. Should we do that here? 1733 */ 1734 if (URI == NULL) 1735 ret = -1; 1736 else { 1737 if (*URI) { 1738 ref = xmlCreateURI(); 1739 if (ref == NULL) 1740 goto done; 1741 ret = xmlParseURIReference(ref, (const char *) URI); 1742 } 1743 else 1744 ret = 0; 1745 } 1746 if (ret != 0) 1747 goto done; 1748 if ((ref != NULL) && (ref->scheme != NULL)) { 1749 /* 1750 * The URI is absolute don't modify. 1751 */ 1752 val = xmlStrdup(URI); 1753 goto done; 1754 } 1755 if (base == NULL) 1756 ret = -1; 1757 else { 1758 bas = xmlCreateURI(); 1759 if (bas == NULL) 1760 goto done; 1761 ret = xmlParseURIReference(bas, (const char *) base); 1762 } 1763 if (ret != 0) { 1764 if (ref) 1765 val = xmlSaveUri(ref); 1766 goto done; 1767 } 1768 if (ref == NULL) { 1769 /* 1770 * the base fragment must be ignored 1771 */ 1772 if (bas->fragment != NULL) { 1773 xmlFree(bas->fragment); 1774 bas->fragment = NULL; 1775 } 1776 val = xmlSaveUri(bas); 1777 goto done; 1778 } 1779 1780 /* 1781 * 2) If the path component is empty and the scheme, authority, and 1782 * query components are undefined, then it is a reference to the 1783 * current document and we are done. Otherwise, the reference URI's 1784 * query and fragment components are defined as found (or not found) 1785 * within the URI reference and not inherited from the base URI. 1786 * 1787 * NOTE that in modern browsers, the parsing differs from the above 1788 * in the following aspect: the query component is allowed to be 1789 * defined while still treating this as a reference to the current 1790 * document. 1791 */ 1792 res = xmlCreateURI(); 1793 if (res == NULL) 1794 goto done; 1795 if ((ref->scheme == NULL) && (ref->path == NULL) && 1796 ((ref->authority == NULL) && (ref->server == NULL))) { 1797 if (bas->scheme != NULL) 1798 res->scheme = xmlMemStrdup(bas->scheme); 1799 if (bas->authority != NULL) 1800 res->authority = xmlMemStrdup(bas->authority); 1801 else if (bas->server != NULL) { 1802 res->server = xmlMemStrdup(bas->server); 1803 if (bas->user != NULL) 1804 res->user = xmlMemStrdup(bas->user); 1805 res->port = bas->port; 1806 } 1807 if (bas->path != NULL) 1808 res->path = xmlMemStrdup(bas->path); 1809 if (ref->query != NULL) 1810 res->query = xmlMemStrdup(ref->query); 1811 else if (bas->query != NULL) 1812 res->query = xmlMemStrdup(bas->query); 1813 if (ref->fragment != NULL) 1814 res->fragment = xmlMemStrdup(ref->fragment); 1815 goto step_7; 1816 } 1817 1818 /* 1819 * 3) If the scheme component is defined, indicating that the reference 1820 * starts with a scheme name, then the reference is interpreted as an 1821 * absolute URI and we are done. Otherwise, the reference URI's 1822 * scheme is inherited from the base URI's scheme component. 1823 */ 1824 if (ref->scheme != NULL) { 1825 val = xmlSaveUri(ref); 1826 goto done; 1827 } 1828 if (bas->scheme != NULL) 1829 res->scheme = xmlMemStrdup(bas->scheme); 1830 1831 if (ref->query != NULL) 1832 res->query = xmlMemStrdup(ref->query); 1833 if (ref->fragment != NULL) 1834 res->fragment = xmlMemStrdup(ref->fragment); 1835 1836 /* 1837 * 4) If the authority component is defined, then the reference is a 1838 * network-path and we skip to step 7. Otherwise, the reference 1839 * URI's authority is inherited from the base URI's authority 1840 * component, which will also be undefined if the URI scheme does not 1841 * use an authority component. 1842 */ 1843 if ((ref->authority != NULL) || (ref->server != NULL)) { 1844 if (ref->authority != NULL) 1845 res->authority = xmlMemStrdup(ref->authority); 1846 else { 1847 res->server = xmlMemStrdup(ref->server); 1848 if (ref->user != NULL) 1849 res->user = xmlMemStrdup(ref->user); 1850 res->port = ref->port; 1851 } 1852 if (ref->path != NULL) 1853 res->path = xmlMemStrdup(ref->path); 1854 goto step_7; 1855 } 1856 if (bas->authority != NULL) 1857 res->authority = xmlMemStrdup(bas->authority); 1858 else if (bas->server != NULL) { 1859 res->server = xmlMemStrdup(bas->server); 1860 if (bas->user != NULL) 1861 res->user = xmlMemStrdup(bas->user); 1862 res->port = bas->port; 1863 } 1864 1865 /* 1866 * 5) If the path component begins with a slash character ("/"), then 1867 * the reference is an absolute-path and we skip to step 7. 1868 */ 1869 if ((ref->path != NULL) && (ref->path[0] == '/')) { 1870 res->path = xmlMemStrdup(ref->path); 1871 goto step_7; 1872 } 1873 1874 1875 /* 1876 * 6) If this step is reached, then we are resolving a relative-path 1877 * reference. The relative path needs to be merged with the base 1878 * URI's path. Although there are many ways to do this, we will 1879 * describe a simple method using a separate string buffer. 1880 * 1881 * Allocate a buffer large enough for the result string. 1882 */ 1883 len = 2; /* extra / and 0 */ 1884 if (ref->path != NULL) 1885 len += strlen(ref->path); 1886 if (bas->path != NULL) 1887 len += strlen(bas->path); 1888 res->path = (char *) xmlMallocAtomic(len); 1889 if (res->path == NULL) { 1890 xmlGenericError(xmlGenericErrorContext, 1891 "xmlBuildURI: out of memory\n"); 1892 goto done; 1893 } 1894 res->path[0] = 0; 1895 1896 /* 1897 * a) All but the last segment of the base URI's path component is 1898 * copied to the buffer. In other words, any characters after the 1899 * last (right-most) slash character, if any, are excluded. 1900 */ 1901 cur = 0; 1902 out = 0; 1903 if (bas->path != NULL) { 1904 while (bas->path[cur] != 0) { 1905 while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 1906 cur++; 1907 if (bas->path[cur] == 0) 1908 break; 1909 1910 cur++; 1911 while (out < cur) { 1912 res->path[out] = bas->path[out]; 1913 out++; 1914 } 1915 } 1916 } 1917 res->path[out] = 0; 1918 1919 /* 1920 * b) The reference's path component is appended to the buffer 1921 * string. 1922 */ 1923 if (ref->path != NULL && ref->path[0] != 0) { 1924 indx = 0; 1925 /* 1926 * Ensure the path includes a '/' 1927 */ 1928 if ((out == 0) && (bas->server != NULL)) 1929 res->path[out++] = '/'; 1930 while (ref->path[indx] != 0) { 1931 res->path[out++] = ref->path[indx++]; 1932 } 1933 } 1934 res->path[out] = 0; 1935 1936 /* 1937 * Steps c) to h) are really path normalization steps 1938 */ 1939 xmlNormalizeURIPath(res->path); 1940 1941step_7: 1942 1943 /* 1944 * 7) The resulting URI components, including any inherited from the 1945 * base URI, are recombined to give the absolute form of the URI 1946 * reference. 1947 */ 1948 val = xmlSaveUri(res); 1949 1950done: 1951 if (ref != NULL) 1952 xmlFreeURI(ref); 1953 if (bas != NULL) 1954 xmlFreeURI(bas); 1955 if (res != NULL) 1956 xmlFreeURI(res); 1957 return(val); 1958} 1959 1960/** 1961 * xmlCanonicPath: 1962 * @path: the resource locator in a filesystem notation 1963 * 1964 * Constructs a canonic path from the specified path. 1965 * 1966 * Returns a new canonic path, or a duplicate of the path parameter if the 1967 * construction fails. The caller is responsible for freeing the memory occupied 1968 * by the returned string. If there is insufficient memory available, or the 1969 * argument is NULL, the function returns NULL. 1970 */ 1971#define IS_WINDOWS_PATH(p) \ 1972 ((p != NULL) && \ 1973 (((p[0] >= 'a') && (p[0] <= 'z')) || \ 1974 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 1975 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 1976xmlChar* 1977xmlCanonicPath(const xmlChar *path) 1978{ 1979#if defined(_WIN32) && !defined(__CYGWIN__) 1980 int len = 0; 1981 int i = 0; 1982 xmlChar *p = NULL; 1983#endif 1984 xmlChar *ret; 1985 xmlURIPtr uri; 1986 1987 if (path == NULL) 1988 return(NULL); 1989 if ((uri = xmlParseURI((const char *) path)) != NULL) { 1990 xmlFreeURI(uri); 1991 return xmlStrdup(path); 1992 } 1993 1994 uri = xmlCreateURI(); 1995 1996#if defined(_WIN32) && !defined(__CYGWIN__) 1997 len = xmlStrlen(path); 1998 if ((len > 2) && IS_WINDOWS_PATH(path)) { 1999 uri->scheme = xmlStrdup(BAD_CAST "file"); 2000 uri->path = xmlMallocAtomic(len + 2); 2001 uri->path[0] = '/'; 2002 p = uri->path + 1; 2003 strncpy(p, path, len + 1); 2004 } else { 2005 uri->path = xmlStrdup(path); 2006 p = uri->path; 2007 } 2008 while (*p != '\0') { 2009 if (*p == '\\') 2010 *p = '/'; 2011 p++; 2012 } 2013#else 2014 uri->path = (char *) xmlStrdup((const xmlChar *) path); 2015#endif 2016 2017 ret = xmlSaveUri(uri); 2018 xmlFreeURI(uri); 2019 return(ret); 2020} 2021 2022