1/* ----------------------------------------------------------------------- * 2 * 3 * Copyright 2009-2011 Intel Corporation; author: H. Peter Anvin 4 * 5 * Permission is hereby granted, free of charge, to any person 6 * obtaining a copy of this software and associated documentation 7 * files (the "Software"), to deal in the Software without 8 * restriction, including without limitation the rights to use, 9 * copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom 11 * the Software is furnished to do so, subject to the following 12 * conditions: 13 * 14 * The above copyright notice and this permission notice shall 15 * be included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 * 26 * ----------------------------------------------------------------------- */ 27 28/* 29 * urlparse.c 30 */ 31 32#include <string.h> 33#include <stdlib.h> 34#include <stdio.h> 35#include "url.h" 36 37/* 38 * Return the type of a URL without modifying the string 39 */ 40enum url_type url_type(const char *url) 41{ 42 const char *q; 43 44 q = strchr(url, ':'); 45 if (!q) 46 return URL_SUFFIX; 47 48 if (q[1] == '/' && q[2] == '/') 49 return URL_NORMAL; 50 51 if (q[1] == ':') 52 return URL_OLD_TFTP; 53 54 return URL_SUFFIX; 55} 56 57/* 58 * Decompose a URL into its components. This is done in-place; 59 * this routine does not allocate any additional storage. Freeing the 60 * original buffer frees all storage used. 61 */ 62void parse_url(struct url_info *ui, char *url) 63{ 64 char *p = url; 65 char *q, *r, *s; 66 int c; 67 68 memset(ui, 0, sizeof *ui); 69 70 q = strchr(p, ':'); 71 if (q && (q[1] == '/' && q[2] == '/')) { 72 ui->type = URL_NORMAL; 73 74 ui->scheme = p; 75 *q = '\0'; 76 p = q+3; 77 78 q = strchr(p, '/'); 79 if (q) { 80 *q = '\0'; 81 ui->path = q+1; 82 q = strchr(q+1, '#'); 83 if (q) 84 *q = '\0'; 85 } else { 86 ui->path = ""; 87 } 88 89 r = strchr(p, '@'); 90 if (r) { 91 ui->user = p; 92 *r = '\0'; 93 s = strchr(p, ':'); 94 if (s) { 95 *s = '\0'; 96 ui->passwd = s+1; 97 } 98 p = r+1; 99 } 100 101 ui->host = p; 102 r = strchr(p, ':'); 103 if (r) { 104 *r++ = '\0'; 105 ui->port = 0; 106 while ((c = *r++)) { 107 c -= '0'; 108 if (c > 9) 109 break; 110 ui->port = ui->port * 10 + c; 111 } 112 } 113 } else if (q && q[1] == ':') { 114 *q = '\0'; 115 ui->scheme = "tftp"; 116 ui->host = p; 117 ui->path = q+2; 118 ui->type = URL_OLD_TFTP; 119 } else { 120 ui->path = p; 121 ui->type = URL_SUFFIX; 122 } 123} 124 125/* 126 * Escapes unsafe characters in a URL. 127 * This does *not* escape things like query characters! 128 * Returns the number of characters in the total output. 129 */ 130size_t url_escape_unsafe(char *output, const char *input, size_t bufsize) 131{ 132 static const char uchexchar[] = "0123456789ABCDEF"; 133 const char *p; 134 unsigned char c; 135 char *q; 136 size_t n = 0; 137 138 q = output; 139 for (p = input; (c = *p); p++) { 140 if (c <= ' ' || c > '~') { 141 if (++n < bufsize) *q++ = '%'; 142 if (++n < bufsize) *q++ = uchexchar[c >> 4]; 143 if (++n < bufsize) *q++ = uchexchar[c & 15]; 144 } else { 145 if (++n < bufsize) *q++ = c; 146 } 147 } 148 149 *q = '\0'; 150 return n; 151} 152 153static int hexdigit(char c) 154{ 155 if (c >= '0' && c <= '9') 156 return c - '0'; 157 c |= 0x20; 158 if (c >= 'a' && c <= 'f') 159 return c - 'a' + 10; 160 return -1; 161} 162 163/* 164 * Unescapes a buffer, optionally ending at an *unescaped* terminator 165 * (like ; for TFTP). The unescaping is done in-place. 166 * 167 * If a terminator is reached, return a pointer to the first character 168 * after the terminator. 169 */ 170char *url_unescape(char *buffer, char terminator) 171{ 172 char *p = buffer; 173 char *q = buffer; 174 unsigned char c; 175 int x, y; 176 177 while ((c = *p)) { 178 if (c == terminator) { 179 *q = '\0'; 180 return p; 181 } 182 p++; 183 if (c == '%') { 184 x = hexdigit(p[0]); 185 if (x >= 0) { 186 y = hexdigit(p[1]); 187 if (y >= 0) { 188 *q++ = (x << 4) + y; 189 p += 2; 190 continue; 191 } 192 } 193 } 194 *q++ = c; 195 } 196 *q = '\0'; 197 return NULL; 198} 199 200#ifdef URL_TEST 201 202int main(int argc, char *argv[]) 203{ 204 int i; 205 struct url_info url; 206 207 for (i = 1; i < argc; i++) { 208 parse_url(&url, argv[i]); 209 printf("scheme: %s\n" 210 "user: %s\n" 211 "passwd: %s\n" 212 "host: %s\n" 213 "port: %d\n" 214 "path: %s\n" 215 "type: %d\n", 216 url.scheme, url.user, url.passwd, url.host, url.port, 217 url.path, url.type); 218 } 219 220 return 0; 221} 222 223#endif 224