1/* ----------------------------------------------------------------------- * 2 * 3 * Copyright 2011 Intel Corporation; author: H. Peter Anvin 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 8 * Boston MA 02110-1301, USA; either version 2 of the License, or 9 * (at your option) any later version; incorporated herein by reference. 10 * 11 * ----------------------------------------------------------------------- */ 12 13#include <inttypes.h> 14#include <string.h> 15#include <stdlib.h> 16#include <ctype.h> 17#include <dprintf.h> 18#include "pxe.h" 19 20enum http_readdir_state { 21 st_start, /* 0 Initial state */ 22 st_open, /* 1 "<" */ 23 st_a, /* 2 "<a" */ 24 st_attribute, /* 3 "<a " */ 25 st_h, /* 4 "<a h" */ 26 st_hr, /* 5 */ 27 st_hre, /* 6 */ 28 st_href, /* 7 */ 29 st_hrefeq, /* 8 */ 30 st_hrefqu, /* 9 */ 31 st_badtag, /* 10 */ 32 st_badtagqu, /* 11 */ 33 st_badattr, /* 12 */ 34 st_badattrqu, /* 13 */ 35}; 36 37struct machine { 38 char xchar; 39 uint8_t st_xchar; 40 uint8_t st_left; /* < */ 41 uint8_t st_right; /* > */ 42 uint8_t st_space; /* white */ 43 uint8_t st_other; /* anything else */ 44}; 45 46static const struct machine statemachine[] = { 47 /* xchar st_xchar st_left st_right st_space st_other */ 48 { 0, 0, st_open, st_start, st_start, st_start }, 49 { 'a', st_a, st_badtag, st_start, st_open, st_badtag }, 50 { 0, 0, st_open, st_open, st_attribute, st_badtag }, 51 { 'h', st_h, st_open, st_start, st_attribute, st_badattr }, 52 { 'r', st_hr, st_open, st_start, st_attribute, st_badattr }, 53 { 'e', st_hre, st_open, st_start, st_attribute, st_badattr }, 54 { 'f', st_href, st_open, st_start, st_attribute, st_badattr }, 55 { '=', st_hrefeq, st_open, st_start, st_attribute, st_badattr }, 56 { '\"', st_hrefqu, st_open, st_start, st_attribute, st_hrefeq }, 57 { '\"', st_attribute, st_hrefqu, st_hrefqu, st_hrefqu, st_hrefqu }, 58 { '\"', st_badtagqu, st_open, st_start, st_badtag, st_badtag }, 59 { '\"', st_badtag, st_badtagqu, st_badtagqu, st_badtagqu, st_badtagqu }, 60 { '\"', st_badattrqu, st_open, st_start, st_attribute, st_badattr }, 61 { '\"', st_attribute, st_badattrqu, st_badattrqu, st_badattrqu, st_badattrqu }, 62}; 63 64struct html_entity { 65 uint16_t ucs; 66 const char entity[9]; 67}; 68 69static const struct html_entity entities[] = { 70 { 34, "quot" }, 71 { 38, "amp" }, 72 { 60, "lt" }, 73 { 62, "gt" }, 74#ifdef HTTP_ALL_ENTITIES 75 { 160, "nbsp" }, 76 { 161, "iexcl" }, 77 { 162, "cent" }, 78 { 163, "pound" }, 79 { 164, "curren" }, 80 { 165, "yen" }, 81 { 166, "brvbar" }, 82 { 167, "sect" }, 83 { 168, "uml" }, 84 { 169, "copy" }, 85 { 170, "ordf" }, 86 { 171, "laquo" }, 87 { 172, "not" }, 88 { 173, "shy" }, 89 { 174, "reg" }, 90 { 175, "macr" }, 91 { 176, "deg" }, 92 { 177, "plusmn" }, 93 { 178, "sup2" }, 94 { 179, "sup3" }, 95 { 180, "acute" }, 96 { 181, "micro" }, 97 { 182, "para" }, 98 { 183, "middot" }, 99 { 184, "cedil" }, 100 { 185, "sup1" }, 101 { 186, "ordm" }, 102 { 187, "raquo" }, 103 { 188, "frac14" }, 104 { 189, "frac12" }, 105 { 190, "frac34" }, 106 { 191, "iquest" }, 107 { 192, "Agrave" }, 108 { 193, "Aacute" }, 109 { 194, "Acirc" }, 110 { 195, "Atilde" }, 111 { 196, "Auml" }, 112 { 197, "Aring" }, 113 { 198, "AElig" }, 114 { 199, "Ccedil" }, 115 { 200, "Egrave" }, 116 { 201, "Eacute" }, 117 { 202, "Ecirc" }, 118 { 203, "Euml" }, 119 { 204, "Igrave" }, 120 { 205, "Iacute" }, 121 { 206, "Icirc" }, 122 { 207, "Iuml" }, 123 { 208, "ETH" }, 124 { 209, "Ntilde" }, 125 { 210, "Ograve" }, 126 { 211, "Oacute" }, 127 { 212, "Ocirc" }, 128 { 213, "Otilde" }, 129 { 214, "Ouml" }, 130 { 215, "times" }, 131 { 216, "Oslash" }, 132 { 217, "Ugrave" }, 133 { 218, "Uacute" }, 134 { 219, "Ucirc" }, 135 { 220, "Uuml" }, 136 { 221, "Yacute" }, 137 { 222, "THORN" }, 138 { 223, "szlig" }, 139 { 224, "agrave" }, 140 { 225, "aacute" }, 141 { 226, "acirc" }, 142 { 227, "atilde" }, 143 { 228, "auml" }, 144 { 229, "aring" }, 145 { 230, "aelig" }, 146 { 231, "ccedil" }, 147 { 232, "egrave" }, 148 { 233, "eacute" }, 149 { 234, "ecirc" }, 150 { 235, "euml" }, 151 { 236, "igrave" }, 152 { 237, "iacute" }, 153 { 238, "icirc" }, 154 { 239, "iuml" }, 155 { 240, "eth" }, 156 { 241, "ntilde" }, 157 { 242, "ograve" }, 158 { 243, "oacute" }, 159 { 244, "ocirc" }, 160 { 245, "otilde" }, 161 { 246, "ouml" }, 162 { 247, "divide" }, 163 { 248, "oslash" }, 164 { 249, "ugrave" }, 165 { 250, "uacute" }, 166 { 251, "ucirc" }, 167 { 252, "uuml" }, 168 { 253, "yacute" }, 169 { 254, "thorn" }, 170 { 255, "yuml" }, 171 { 338, "OElig" }, 172 { 339, "oelig" }, 173 { 352, "Scaron" }, 174 { 353, "scaron" }, 175 { 376, "Yuml" }, 176 { 402, "fnof" }, 177 { 710, "circ" }, 178 { 732, "tilde" }, 179 { 913, "Alpha" }, 180 { 914, "Beta" }, 181 { 915, "Gamma" }, 182 { 916, "Delta" }, 183 { 917, "Epsilon" }, 184 { 918, "Zeta" }, 185 { 919, "Eta" }, 186 { 920, "Theta" }, 187 { 921, "Iota" }, 188 { 922, "Kappa" }, 189 { 923, "Lambda" }, 190 { 924, "Mu" }, 191 { 925, "Nu" }, 192 { 926, "Xi" }, 193 { 927, "Omicron" }, 194 { 928, "Pi" }, 195 { 929, "Rho" }, 196 { 931, "Sigma" }, 197 { 932, "Tau" }, 198 { 933, "Upsilon" }, 199 { 934, "Phi" }, 200 { 935, "Chi" }, 201 { 936, "Psi" }, 202 { 937, "Omega" }, 203 { 945, "alpha" }, 204 { 946, "beta" }, 205 { 947, "gamma" }, 206 { 948, "delta" }, 207 { 949, "epsilon" }, 208 { 950, "zeta" }, 209 { 951, "eta" }, 210 { 952, "theta" }, 211 { 953, "iota" }, 212 { 954, "kappa" }, 213 { 955, "lambda" }, 214 { 956, "mu" }, 215 { 957, "nu" }, 216 { 958, "xi" }, 217 { 959, "omicron" }, 218 { 960, "pi" }, 219 { 961, "rho" }, 220 { 962, "sigmaf" }, 221 { 963, "sigma" }, 222 { 964, "tau" }, 223 { 965, "upsilon" }, 224 { 966, "phi" }, 225 { 967, "chi" }, 226 { 968, "psi" }, 227 { 969, "omega" }, 228 { 977, "thetasym" }, 229 { 978, "upsih" }, 230 { 982, "piv" }, 231 { 8194, "ensp" }, 232 { 8195, "emsp" }, 233 { 8201, "thinsp" }, 234 { 8204, "zwnj" }, 235 { 8205, "zwj" }, 236 { 8206, "lrm" }, 237 { 8207, "rlm" }, 238 { 8211, "ndash" }, 239 { 8212, "mdash" }, 240 { 8216, "lsquo" }, 241 { 8217, "rsquo" }, 242 { 8218, "sbquo" }, 243 { 8220, "ldquo" }, 244 { 8221, "rdquo" }, 245 { 8222, "bdquo" }, 246 { 8224, "dagger" }, 247 { 8225, "Dagger" }, 248 { 8226, "bull" }, 249 { 8230, "hellip" }, 250 { 8240, "permil" }, 251 { 8242, "prime" }, 252 { 8243, "Prime" }, 253 { 8249, "lsaquo" }, 254 { 8250, "rsaquo" }, 255 { 8254, "oline" }, 256 { 8260, "frasl" }, 257 { 8364, "euro" }, 258 { 8465, "image" }, 259 { 8472, "weierp" }, 260 { 8476, "real" }, 261 { 8482, "trade" }, 262 { 8501, "alefsym" }, 263 { 8592, "larr" }, 264 { 8593, "uarr" }, 265 { 8594, "rarr" }, 266 { 8595, "darr" }, 267 { 8596, "harr" }, 268 { 8629, "crarr" }, 269 { 8656, "lArr" }, 270 { 8657, "uArr" }, 271 { 8658, "rArr" }, 272 { 8659, "dArr" }, 273 { 8660, "hArr" }, 274 { 8704, "forall" }, 275 { 8706, "part" }, 276 { 8707, "exist" }, 277 { 8709, "empty" }, 278 { 8711, "nabla" }, 279 { 8712, "isin" }, 280 { 8713, "notin" }, 281 { 8715, "ni" }, 282 { 8719, "prod" }, 283 { 8721, "sum" }, 284 { 8722, "minus" }, 285 { 8727, "lowast" }, 286 { 8730, "radic" }, 287 { 8733, "prop" }, 288 { 8734, "infin" }, 289 { 8736, "ang" }, 290 { 8743, "and" }, 291 { 8744, "or" }, 292 { 8745, "cap" }, 293 { 8746, "cup" }, 294 { 8747, "int" }, 295 { 8756, "there4" }, 296 { 8764, "sim" }, 297 { 8773, "cong" }, 298 { 8776, "asymp" }, 299 { 8800, "ne" }, 300 { 8801, "equiv" }, 301 { 8804, "le" }, 302 { 8805, "ge" }, 303 { 8834, "sub" }, 304 { 8835, "sup" }, 305 { 8836, "nsub" }, 306 { 8838, "sube" }, 307 { 8839, "supe" }, 308 { 8853, "oplus" }, 309 { 8855, "otimes" }, 310 { 8869, "perp" }, 311 { 8901, "sdot" }, 312 { 8968, "lceil" }, 313 { 8969, "rceil" }, 314 { 8970, "lfloor" }, 315 { 8971, "rfloor" }, 316 { 9001, "lang" }, 317 { 9002, "rang" }, 318 { 9674, "loz" }, 319 { 9824, "spades" }, 320 { 9827, "clubs" }, 321 { 9829, "hearts" }, 322 { 9830, "diams" }, 323#endif /* HTTP_ALL_ENTITIES */ 324 { 0, "" } 325}; 326 327struct entity_state { 328 char entity_buf[16]; 329 char *ep; 330}; 331 332static char *emit(char *p, int c, struct entity_state *st) 333{ 334 const struct html_entity *ent; 335 unsigned int ucs; 336 337 if (!st->ep) { 338 if (c == '&') { 339 /* Entity open */ 340 st->ep = st->entity_buf; 341 } else { 342 *p++ = c; 343 } 344 } else { 345 if (c == ';') { 346 st->ep = NULL; 347 *p = '\0'; 348 if (st->entity_buf[0] == '#') { 349 if ((st->entity_buf[1] | 0x20)== 'x') { 350 ucs = strtoul(st->entity_buf + 2, NULL, 16); 351 } else { 352 ucs = strtoul(st->entity_buf + 1, NULL, 10); 353 } 354 } else { 355 for (ent = entities; ent->ucs; ent++) { 356 if (!strcmp(st->entity_buf, ent->entity)) 357 break; 358 } 359 ucs = ent->ucs; 360 } 361 if (ucs < 32 || ucs >= 0x10ffff) 362 return p; /* Bogus */ 363 if (ucs >= 0x10000) { 364 *p++ = 0xf0 + (ucs >> 18); 365 *p++ = 0x80 + ((ucs >> 12) & 0x3f); 366 *p++ = 0x80 + ((ucs >> 6) & 0x3f); 367 *p++ = 0x80 + (ucs & 0x3f); 368 } else if (ucs >= 0x800) { 369 *p++ = 0xe0 + (ucs >> 12); 370 *p++ = 0x80 + ((ucs >> 6) & 0x3f); 371 *p++ = 0x80 + (ucs & 0x3f); 372 } else if (ucs >= 0x80) { 373 *p++ = 0xc0 + (ucs >> 6); 374 *p++ = 0x80 + (ucs & 0x3f); 375 } else { 376 *p++ = ucs; 377 } 378 } else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) { 379 *st->ep++ = c; 380 } 381 } 382 return p; 383} 384 385static const char *http_get_filename(struct inode *inode, char *buf) 386{ 387 int c, lc; 388 char *p; 389 const struct machine *sm; 390 struct entity_state es; 391 enum http_readdir_state state = st_start; 392 enum http_readdir_state pstate = st_start; 393 394 memset(&es, 0, sizeof es); 395 396 p = buf; 397 for (;;) { 398 c = pxe_getc(inode); 399 if (c == -1) 400 return NULL; 401 402 lc = tolower(c); 403 404 sm = &statemachine[state]; 405 406 if (lc == sm->xchar) 407 state = sm->st_xchar; 408 else if (c == '<') 409 state = sm->st_left; 410 else if (c == '>') 411 state = sm->st_right; 412 else if (isspace(c)) 413 state = sm->st_space; 414 else 415 state = sm->st_other; 416 417 if (state == st_hrefeq || state == st_hrefqu) { 418 if (state != pstate) 419 p = buf; 420 else if (p < buf + FILENAME_MAX) 421 p = emit(p, c, &es); 422 pstate = state; 423 } else { 424 if (pstate != st_start) 425 pstate = st_start; 426 if (p != buf && state == st_start) { 427 *p = '\0'; 428 return buf; 429 } 430 } 431 } 432} 433 434int http_readdir(struct inode *inode, struct dirent *dirent) 435{ 436 char buf[FILENAME_MAX + 6]; 437 const char *fn, *sp; 438 439 for (;;) { 440 fn = http_get_filename(inode, buf); 441 442 if (!fn) 443 return -1; /* End of directory */ 444 445 /* Ignore entries with http special characters */ 446 if (strchr(fn, '#')) 447 continue; 448 if (strchr(fn, '?')) 449 continue; 450 451 /* A slash if present has to be the last character, and not the first */ 452 sp = strchr(fn, '/'); 453 if (sp) { 454 if (sp == fn || sp[1]) 455 continue; 456 } else { 457 sp = strchr(fn, '\0'); 458 } 459 460 if (sp > fn + NAME_MAX) 461 continue; 462 463 dirent->d_ino = 0; /* Not applicable */ 464 dirent->d_off = 0; /* Not applicable */ 465 dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1; 466 dirent->d_type = *sp == '/' ? DT_DIR : DT_REG; 467 memcpy(dirent->d_name, fn, sp-fn); 468 dirent->d_name[sp-fn] = '\0'; 469 return 0; 470 } 471} 472