1/* ----------------------------------------------------------------------- *
2 *
3 *   Copyright 2009-2011 Intel Corporation; author: H. Peter Anvin
4 *
5 *   Permission is hereby granted, free of charge, to any person
6 *   obtaining a copy of this software and associated documentation
7 *   files (the "Software"), to deal in the Software without
8 *   restriction, including without limitation the rights to use,
9 *   copy, modify, merge, publish, distribute, sublicense, and/or
10 *   sell copies of the Software, and to permit persons to whom
11 *   the Software is furnished to do so, subject to the following
12 *   conditions:
13 *
14 *   The above copyright notice and this permission notice shall
15 *   be included in all copies or substantial portions of the Software.
16 *
17 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 *   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 *   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 *   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 *   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 *   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 *   OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * ----------------------------------------------------------------------- */
27
28/*
29 * urlparse.c
30 */
31
32#include <string.h>
33#include <stdlib.h>
34#include <stdio.h>
35#include "url.h"
36
37/*
38 * Return the type of a URL without modifying the string
39 */
40enum url_type url_type(const char *url)
41{
42    const char *q;
43
44    q = strchr(url, ':');
45    if (!q)
46	return URL_SUFFIX;
47
48    if (q[1] == '/' && q[2] == '/')
49	return URL_NORMAL;
50
51    if (q[1] == ':')
52	return URL_OLD_TFTP;
53
54    return URL_SUFFIX;
55}
56
57/*
58 * Decompose a URL into its components.  This is done in-place;
59 * this routine does not allocate any additional storage.  Freeing the
60 * original buffer frees all storage used.
61 */
62void parse_url(struct url_info *ui, char *url)
63{
64    char *p = url;
65    char *q, *r, *s;
66    int c;
67
68    memset(ui, 0, sizeof *ui);
69
70    q = strchr(p, ':');
71    if (q && (q[1] == '/' && q[2] == '/')) {
72	ui->type = URL_NORMAL;
73
74	ui->scheme = p;
75	*q = '\0';
76	p = q+3;
77
78	q = strchr(p, '/');
79	if (q) {
80	    *q = '\0';
81	    ui->path = q+1;
82	    q = strchr(q+1, '#');
83	    if (q)
84		*q = '\0';
85	} else {
86	    ui->path = "";
87	}
88
89	r = strchr(p, '@');
90	if (r) {
91	    ui->user = p;
92	    *r = '\0';
93	    s = strchr(p, ':');
94	    if (s) {
95		*s = '\0';
96		ui->passwd = s+1;
97	    }
98	    p = r+1;
99	}
100
101	ui->host = p;
102	r = strchr(p, ':');
103	if (r) {
104	    *r++ = '\0';
105	    ui->port = 0;
106	    while ((c = *r++)) {
107		c -= '0';
108		if (c > 9)
109		    break;
110		ui->port = ui->port * 10 + c;
111	    }
112	}
113    } else if (q && q[1] == ':') {
114	*q = '\0';
115	ui->scheme = "tftp";
116	ui->host = p;
117	ui->path = q+2;
118	ui->type = URL_OLD_TFTP;
119    } else {
120	ui->path = p;
121	ui->type = URL_SUFFIX;
122    }
123}
124
125/*
126 * Escapes unsafe characters in a URL.
127 * This does *not* escape things like query characters!
128 * Returns the number of characters in the total output.
129 */
130size_t url_escape_unsafe(char *output, const char *input, size_t bufsize)
131{
132    static const char uchexchar[] = "0123456789ABCDEF";
133    const char *p;
134    unsigned char c;
135    char *q;
136    size_t n = 0;
137
138    q = output;
139    for (p = input; (c = *p); p++) {
140	if (c <= ' ' || c > '~') {
141	    if (++n < bufsize) *q++ = '%';
142	    if (++n < bufsize) *q++ = uchexchar[c >> 4];
143	    if (++n < bufsize) *q++ = uchexchar[c & 15];
144	} else {
145	    if (++n < bufsize) *q++ = c;
146	}
147    }
148
149    *q = '\0';
150    return n;
151}
152
153static int hexdigit(char c)
154{
155    if (c >= '0' && c <= '9')
156	return c - '0';
157    c |= 0x20;
158    if (c >= 'a' && c <= 'f')
159	return c - 'a' + 10;
160    return -1;
161}
162
163/*
164 * Unescapes a buffer, optionally ending at an *unescaped* terminator
165 * (like ; for TFTP).  The unescaping is done in-place.
166 *
167 * If a terminator is reached, return a pointer to the first character
168 * after the terminator.
169 */
170char *url_unescape(char *buffer, char terminator)
171{
172    char *p = buffer;
173    char *q = buffer;
174    unsigned char c;
175    int x, y;
176
177    while ((c = *p)) {
178	if (c == terminator) {
179	    *q = '\0';
180	    return p;
181	}
182	p++;
183	if (c == '%') {
184	    x = hexdigit(p[0]);
185	    if (x >= 0) {
186		y = hexdigit(p[1]);
187		if (y >= 0) {
188		    *q++ = (x << 4) + y;
189		    p += 2;
190		    continue;
191		}
192	    }
193	}
194	*q++ = c;
195    }
196    *q = '\0';
197    return NULL;
198}
199
200#ifdef URL_TEST
201
202int main(int argc, char *argv[])
203{
204    int i;
205    struct url_info url;
206
207    for (i = 1; i < argc; i++) {
208	parse_url(&url, argv[i]);
209	printf("scheme:  %s\n"
210	       "user:    %s\n"
211	       "passwd:  %s\n"
212	       "host:    %s\n"
213	       "port:    %d\n"
214	       "path:    %s\n"
215	       "type:    %d\n",
216	       url.scheme, url.user, url.passwd, url.host, url.port,
217	       url.path, url.type);
218    }
219
220    return 0;
221}
222
223#endif
224