1/*
2 * Copyright 2001-2004 Brandon Long
3 * All Rights Reserved.
4 *
5 * ClearSilver Templating System
6 *
7 * This code is made available under the terms of the ClearSilver License.
8 * http://www.clearsilver.net/license.hdf
9 *
10 */
11
12/* rfc2388 defines multipart/form-data which is primarily used for
13 * HTTP file upload
14 */
15
16#include "cs_config.h"
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <unistd.h>
21#include <sys/stat.h>
22#include <limits.h>
23#include <ctype.h>
24#include <string.h>
25#include "util/neo_misc.h"
26#include "util/neo_err.h"
27#include "util/neo_str.h"
28#include "cgi.h"
29#include "cgiwrap.h"
30
31static NEOERR * _header_value (char *hdr, char **val)
32{
33  char *p, *q;
34  int l;
35
36  *val = NULL;
37
38  p = hdr;
39  while (*p && isspace(*p)) p++;
40  q = p;
41  while (*q && !isspace(*q) && *q != ';') q++;
42  if (!*p || p == q) return STATUS_OK;
43
44  l = q - p;
45  *val = (char *) malloc (l+1);
46  if (*val == NULL)
47    return nerr_raise (NERR_NOMEM, "Unable to allocate space for val");
48  memcpy (*val, p, l);
49  (*val)[l] = '\0';
50
51  return STATUS_OK;
52}
53
54static NEOERR * _header_attr (char *hdr, char *attr, char **val)
55{
56  char *p, *k, *v;
57  int found = 0;
58  int l, al;
59  char *r;
60
61  *val = NULL;
62  l = strlen(attr);
63
64  /* skip value */
65  p = hdr;
66  while (*p && *p != ';') p++;
67  if (!*p) return STATUS_OK;
68
69  p++;
70  while(*p && !found)
71  {
72    while (*p && isspace(*p)) p++;
73    if (!*p) return STATUS_OK;
74    /* attr name */
75    k = p;
76    while (*p && !isspace(*p) && *p != ';' && *p != '=') p++;
77    if (!*p) return STATUS_OK;
78    if (l == (p-k) && !strncasecmp(attr, k, l))
79      found = 1;
80
81    while (*p && isspace(*p)) p++;
82    if (*p != ';' && *p != '=') return STATUS_OK;
83    if (*p == ';')
84    {
85      if (found)
86      {
87	*val = strdup ("");
88	if (*val == NULL)
89	  return nerr_raise (NERR_NOMEM, "Unable to allocate value");
90	return STATUS_OK;
91      }
92    }
93    else
94    {
95      p++;
96      if (*p == '"')
97      {
98	v = ++p;
99	while (*p && *p != '"') p++;
100	al = p-v;
101	if (*p) p++;
102      }
103      else
104      {
105	v = p;
106	while (*p && !isspace(*p) && *p != ';') p++;
107	al = p-v;
108      }
109      if (found)
110      {
111	r = (char *) malloc (al+1);
112	if (r == NULL)
113	  return nerr_raise (NERR_NOMEM, "Unable to allocate value");
114	memcpy (r, v, al);
115	r[al] = '\0';
116	*val = r;
117	return STATUS_OK;
118      }
119    }
120    if (*p) p++;
121  }
122  return STATUS_OK;
123}
124
125static NEOERR * _read_line (CGI *cgi, char **s, int *l, int *done)
126{
127  int ofs = 0;
128  char *p;
129  int to_read;
130
131  if (cgi->buf == NULL)
132  {
133    cgi->buflen = 4096;
134    cgi->buf = (char *) malloc (sizeof(char) * cgi->buflen);
135    if (cgi->buf == NULL)
136      return nerr_raise (NERR_NOMEM, "Unable to allocate cgi buf");
137  }
138  if (cgi->unget)
139  {
140    cgi->unget = FALSE;
141    *s = cgi->last_start;
142    *l = cgi->last_length;
143    return STATUS_OK;
144  }
145  if (cgi->found_nl)
146  {
147    p = memchr (cgi->buf + cgi->nl, '\n', cgi->readlen - cgi->nl);
148    if (p) {
149      cgi->last_start = *s = cgi->buf + cgi->nl;
150      cgi->last_length = *l = p - (cgi->buf + cgi->nl) + 1;
151      cgi->found_nl = TRUE;
152      cgi->nl = p - cgi->buf + 1;
153      return STATUS_OK;
154    }
155    ofs = cgi->readlen - cgi->nl;
156    memmove(cgi->buf, cgi->buf + cgi->nl, ofs);
157  }
158  // Read either as much buffer space as we have left, or up to
159  // the amount of data remaining according to Content-Length
160  // If there is no Content-Length, just use the buffer space, but recognize
161  // that it might not work on some servers or cgiwrap implementations.
162  // Some servers will close their end of the stdin pipe, so cgiwrap_read
163  // will return if we ask for too much.  Techically, not including
164  // Content-Length is against the HTTP spec, so we should consider failing
165  // earlier if we don't have a length.
166  to_read = cgi->buflen - ofs;
167  if (cgi->data_expected && (to_read > cgi->data_expected - cgi->data_read))
168  {
169    to_read = cgi->data_expected - cgi->data_read;
170  }
171  cgiwrap_read (cgi->buf + ofs, to_read, &(cgi->readlen));
172  if (cgi->readlen < 0)
173  {
174    return nerr_raise_errno (NERR_IO, "POST Read Error");
175  }
176  if (cgi->readlen == 0)
177  {
178    *done = 1;
179    return STATUS_OK;
180  }
181  cgi->data_read += cgi->readlen;
182  if (cgi->upload_cb)
183  {
184    if (cgi->upload_cb (cgi, cgi->data_read, cgi->data_expected))
185      return nerr_raise (CGIUploadCancelled, "Upload Cancelled");
186  }
187  cgi->readlen += ofs;
188  p = memchr (cgi->buf, '\n', cgi->readlen);
189  if (!p)
190  {
191    cgi->found_nl = FALSE;
192    cgi->last_start = *s = cgi->buf;
193    cgi->last_length = *l = cgi->readlen;
194    return STATUS_OK;
195  }
196  cgi->last_start = *s = cgi->buf;
197  cgi->last_length = *l = p - cgi->buf + 1;
198  cgi->found_nl = TRUE;
199  cgi->nl = *l;
200  return STATUS_OK;
201}
202
203static NEOERR * _read_header_line (CGI *cgi, STRING *line, int *done)
204{
205  NEOERR *err;
206  char *s, *p;
207  int l;
208
209  err = _read_line (cgi, &s, &l, done);
210  if (err) return nerr_pass (err);
211  if (*done || (l == 0)) return STATUS_OK;
212  if (isspace (s[0])) return STATUS_OK;
213  while (l && isspace(s[l-1])) l--;
214  err = string_appendn (line, s, l);
215  if (err) return nerr_pass (err);
216
217  while (1)
218  {
219    err = _read_line (cgi, &s, &l, done);
220    if (err) break;
221    if (l == 0) break;
222    if (*done) break;
223    if (!(s[0] == ' ' || s[0] == '\t'))
224    {
225      cgi->unget = TRUE;
226      break;
227    }
228    while (l && isspace(s[l-1])) l--;
229    p = s;
230    while (*p && isspace(*p) && (p-s < l)) p++;
231    err = string_append_char (line, ' ');
232    if (err) break;
233    err = string_appendn (line, p, l - (p-s));
234    if (err) break;
235    if (line->len > 50*1024*1024)
236    {
237      string_clear(line);
238      return nerr_raise(NERR_ASSERT, "read_header_line exceeded 50MB");
239    }
240  }
241  return nerr_pass (err);
242}
243
244static BOOL _is_boundary (char *boundary, char *s, int l, int *done)
245{
246  static char *old_boundary = NULL;
247  static int bl;
248
249  /* cache the boundary strlen... more pointless optimization by blong */
250  if (old_boundary != boundary)
251  {
252    old_boundary = boundary;
253    bl = strlen(boundary);
254  }
255
256  if (s[l-1] != '\n')
257    return FALSE;
258  l--;
259  if (s[l-1] == '\r')
260    l--;
261
262  if (bl+2 == l && s[0] == '-' && s[1] == '-' && !strncmp (s+2, boundary, bl))
263    return TRUE;
264  if (bl+4 == l && s[0] == '-' && s[1] == '-' &&
265      !strncmp (s+2, boundary, bl) &&
266      s[l-1] == '-' && s[l-2] == '-')
267  {
268    *done = 1;
269    return TRUE;
270  }
271  return FALSE;
272}
273
274static NEOERR * _find_boundary (CGI *cgi, char *boundary, int *done)
275{
276  NEOERR *err;
277  char *s;
278  int l;
279
280  *done = 0;
281  while (1)
282  {
283    err = _read_line (cgi, &s, &l, done);
284    if (err) return nerr_pass (err);
285    if ((l == 0) || (*done)) {
286      *done = 1;
287      return STATUS_OK;
288    }
289    if (_is_boundary(boundary, s, l, done))
290      return STATUS_OK;
291  }
292  return STATUS_OK;
293}
294
295NEOERR *open_upload(CGI *cgi, int unlink_files, FILE **fpw)
296{
297  NEOERR *err = STATUS_OK;
298  FILE *fp;
299  char path[_POSIX_PATH_MAX];
300  int fd;
301
302  *fpw = NULL;
303
304  snprintf (path, sizeof(path), "%s/cgi_upload.XXXXXX",
305      hdf_get_value(cgi->hdf, "Config.Upload.TmpDir", "/var/tmp"));
306
307  fd = mkstemp(path);
308  if (fd == -1)
309  {
310    return nerr_raise_errno (NERR_SYSTEM, "Unable to open temp file %s",
311	path);
312  }
313
314  fp = fdopen (fd, "w+");
315  if (fp == NULL)
316  {
317    close(fd);
318    return nerr_raise_errno (NERR_SYSTEM, "Unable to fdopen file %s", path);
319  }
320  if (unlink_files) unlink(path);
321  if (cgi->files == NULL)
322  {
323    err = uListInit (&(cgi->files), 10, 0);
324    if (err)
325    {
326      fclose(fp);
327      return nerr_pass(err);
328    }
329  }
330  err = uListAppend (cgi->files, fp);
331  if (err)
332  {
333    fclose (fp);
334    return nerr_pass(err);
335  }
336  if (!unlink_files) {
337    if (cgi->filenames == NULL)
338    {
339      err = uListInit (&(cgi->filenames), 10, 0);
340      if (err)
341      {
342	fclose(fp);
343	return nerr_pass(err);
344      }
345    }
346    err = uListAppend (cgi->filenames, strdup(path));
347    if (err)
348    {
349      fclose (fp);
350      return nerr_pass(err);
351    }
352  }
353  *fpw = fp;
354  return STATUS_OK;
355}
356
357static NEOERR * _read_part (CGI *cgi, char *boundary, int *done)
358{
359  NEOERR *err = STATUS_OK;
360  STRING str;
361  HDF *child, *obj = NULL;
362  FILE *fp = NULL;
363  char buf[256];
364  char *p;
365  char *name = NULL, *filename = NULL;
366  char *type = NULL, *tmp = NULL;
367  char *last = NULL;
368  int unlink_files = hdf_get_int_value(cgi->hdf, "Config.Upload.Unlink", 1);
369
370  string_init (&str);
371
372  while (1)
373  {
374    err = _read_header_line (cgi, &str, done);
375    if (err) break;
376    if (*done) break;
377    if (str.buf == NULL || str.buf[0] == '\0') break;
378    p = strchr (str.buf, ':');
379    if (p)
380    {
381      *p = '\0';
382      if (!strcasecmp(str.buf, "content-disposition"))
383      {
384	err = _header_attr (p+1, "name", &name);
385	if (err) break;
386	err = _header_attr (p+1, "filename", &filename);
387	if (err) break;
388      }
389      else if (!strcasecmp(str.buf, "content-type"))
390      {
391	err = _header_value (p+1, &type);
392	if (err) break;
393      }
394      else if (!strcasecmp(str.buf, "content-encoding"))
395      {
396	err = _header_value (p+1, &tmp);
397	if (err) break;
398	if (tmp && strcmp(tmp, "7bit") && strcmp(tmp, "8bit") &&
399	    strcmp(tmp, "binary"))
400	{
401	  free(tmp);
402	  err = nerr_raise (NERR_ASSERT, "form-data encoding is not supported");
403	  break;
404	}
405	free(tmp);
406      }
407    }
408    string_set(&str, "");
409  }
410  if (err)
411  {
412    string_clear(&str);
413    if (name) free(name);
414    if (filename) free(filename);
415    if (type) free(type);
416    return nerr_pass (err);
417  }
418
419  do
420  {
421    if (filename)
422    {
423      err = open_upload(cgi, unlink_files, &fp);
424      if (err) break;
425    }
426
427    string_set(&str, "");
428    while (!(*done))
429    {
430      char *s;
431      int l, w;
432
433      err = _read_line (cgi, &s, &l, done);
434      if (err) break;
435      if (*done || (l == 0)) break;
436      if (_is_boundary(boundary, s, l, done)) break;
437      if (filename)
438      {
439	if (last) fwrite (last, sizeof(char), strlen(last), fp);
440	if (l > 1 && s[l-1] == '\n' && s[l-2] == '\r')
441	{
442	  last = "\r\n";
443	  l-=2;
444	}
445	else if (l > 0 && s[l-1] == '\n')
446	{
447	  last = "\n";
448	  l--;
449	}
450	else last = NULL;
451	w = fwrite (s, sizeof(char), l, fp);
452	if (w != l)
453	{
454	  err = nerr_raise_errno (NERR_IO,
455	      "Short write on file %s upload %d < %d", filename, w, l);
456	  break;
457	}
458      }
459      else
460      {
461	err = string_appendn(&str, s, l);
462	if (err) break;
463      }
464    }
465    if (err) break;
466  } while (0);
467
468  /* Set up the cgi data */
469  if (!err)
470  {
471    do {
472      /* FIXME: Hmm, if we've seen the same name here before, what should we do?
473       */
474      if (filename)
475      {
476	fseek(fp, 0, SEEK_SET);
477	snprintf (buf, sizeof(buf), "Query.%s", name);
478	err = hdf_set_value (cgi->hdf, buf, filename);
479	if (!err && type)
480	{
481	  snprintf (buf, sizeof(buf), "Query.%s.Type", name);
482	  err = hdf_set_value (cgi->hdf, buf, type);
483	}
484	if (!err)
485	{
486	  snprintf (buf, sizeof(buf), "Query.%s.FileHandle", name);
487	  err = hdf_set_int_value (cgi->hdf, buf, uListLength(cgi->files));
488	}
489	if (!err && !unlink_files)
490	{
491	  char *path;
492	  snprintf (buf, sizeof(buf), "Query.%s.FileName", name);
493	  err = uListGet(cgi->filenames, uListLength(cgi->filenames)-1,
494	      (void *)&path);
495	  if (!err) err = hdf_set_value (cgi->hdf, buf, path);
496	}
497      }
498      else
499      {
500	snprintf (buf, sizeof(buf), "Query.%s", name);
501	while (str.len && isspace(str.buf[str.len-1]))
502	{
503	  str.buf[str.len-1] = '\0';
504	  str.len--;
505	}
506	if (!(cgi->ignore_empty_form_vars && str.len == 0))
507	{
508	  /* If we've seen it before... we force it into a list */
509	  obj = hdf_get_obj (cgi->hdf, buf);
510	  if (obj != NULL)
511	  {
512	    int i = 0;
513	    char buf2[10];
514	    char *t;
515	    child = hdf_obj_child (obj);
516	    if (child == NULL)
517	    {
518	      t = hdf_obj_value (obj);
519	      err = hdf_set_value (obj, "0", t);
520	      if (err != STATUS_OK) break;
521	      i = 1;
522	    }
523	    else
524	    {
525	      while (child != NULL)
526	      {
527		i++;
528		child = hdf_obj_next (child);
529		if (err != STATUS_OK) break;
530	      }
531	      if (err != STATUS_OK) break;
532	    }
533	    snprintf (buf2, sizeof(buf2), "%d", i);
534	    err = hdf_set_value (obj, buf2, str.buf);
535	    if (err != STATUS_OK) break;
536	  }
537	  err = hdf_set_value (cgi->hdf, buf, str.buf);
538	}
539      }
540    } while (0);
541  }
542
543  string_clear(&str);
544  if (name) free(name);
545  if (filename) free(filename);
546  if (type) free(type);
547
548  return nerr_pass (err);
549}
550
551NEOERR * parse_rfc2388 (CGI *cgi)
552{
553  NEOERR *err;
554  char *ct_hdr;
555  char *boundary = NULL;
556  int l;
557  int done = 0;
558
559  l = hdf_get_int_value (cgi->hdf, "CGI.ContentLength", -1);
560  ct_hdr = hdf_get_value (cgi->hdf, "CGI.ContentType", NULL);
561  if (ct_hdr == NULL)
562    return nerr_raise (NERR_ASSERT, "No content type header?");
563
564  cgi->data_expected = l;
565  cgi->data_read = 0;
566  if (cgi->upload_cb)
567  {
568    if (cgi->upload_cb (cgi, cgi->data_read, cgi->data_expected))
569      return nerr_raise (CGIUploadCancelled, "Upload Cancelled");
570  }
571
572  err = _header_attr (ct_hdr, "boundary", &boundary);
573  if (err) return nerr_pass (err);
574  err = _find_boundary(cgi, boundary, &done);
575  while (!err && !done)
576  {
577    err = _read_part (cgi, boundary, &done);
578  }
579
580  if (boundary) free(boundary);
581  return nerr_pass(err);
582}
583
584/* this is here because it gets populated in this file */
585FILE *cgi_filehandle (CGI *cgi, const char *form_name)
586{
587  NEOERR *err;
588  FILE *fp;
589  char buf[256];
590  int n;
591
592  if ((form_name == NULL) || (form_name[0] == '\0'))
593  {
594    /* if NULL, then its the PUT data we're looking for... */
595    n = hdf_get_int_value (cgi->hdf, "PUT.FileHandle", -1);
596  }
597  else
598  {
599    snprintf (buf, sizeof(buf), "Query.%s.FileHandle", form_name);
600    n = hdf_get_int_value (cgi->hdf, buf, -1);
601  }
602  if (n == -1) return NULL;
603  err = uListGet(cgi->files, n-1, (void *)&fp);
604  if (err)
605  {
606    nerr_ignore(&err);
607    return NULL;
608  }
609  return fp;
610}
611