1/*
2 * Copyright 2001-2004 Brandon Long
3 * All Rights Reserved.
4 *
5 * ClearSilver Templating System
6 *
7 * This code is made available under the terms of the ClearSilver License.
8 * http://www.clearsilver.net/license.hdf
9 *
10 */
11
12#ifndef __CGI_H_
13#define __CGI_H_ 1
14
15#include <stdarg.h>
16#include "util/neo_err.h"
17#include "util/neo_hdf.h"
18#include "cs/cs.h"
19
20__BEGIN_DECLS
21
22extern NERR_TYPE CGIFinished;
23extern NERR_TYPE CGIUploadCancelled;
24extern NERR_TYPE CGIParseNotHandled;
25
26/* HACK: Set this value if you want to treat empty CGI Query variables as
27 * non-existant.
28 */
29extern int IgnoreEmptyFormVars;
30
31typedef struct _cgi CGI;
32
33typedef int (*UPLOAD_CB)(CGI *, int nread, int expected);
34typedef NEOERR* (*CGI_PARSE_CB)(CGI *, char *method, char *ctype, void *rock);
35
36struct _cgi_parse_cb
37{
38  char *method;
39  int any_method;
40  char *ctype;
41  int any_ctype;
42  void *rock;
43  CGI_PARSE_CB parse_cb;
44  struct _cgi_parse_cb *next;
45};
46
47struct _cgi
48{
49  /* Only public parts of this structure */
50  void *data;  /* you can store your own information here */
51  HDF *hdf;    /* the HDF dataset associated with this CGI */
52
53  BOOL ignore_empty_form_vars;
54
55  UPLOAD_CB upload_cb;
56
57  int data_expected;
58  int data_read;
59  struct _cgi_parse_cb *parse_callbacks;
60
61  /* For line oriented reading of form-data input.  Used during cgi_init
62   * only */
63  char *buf;
64  int buflen;
65  int readlen;
66  BOOL found_nl;
67  BOOL unget;
68  char *last_start;
69  int last_length;
70  int nl;
71
72  /* this is a list of filepointers pointing at files that were uploaded */
73  /* Use cgi_filehandle to access these */
74  ULIST *files;
75
76  /* By default, cgi_parse unlinks uploaded files as it opens them. */
77  /* If Config.Upload.Unlink is set to 0, the files are not unlinked */
78  /* and there names are stored in this list. */
79  /* Use Query.*.FileName to access these */
80  ULIST *filenames;
81
82  /* keep track of the time between cgi_init and cgi_render */
83  double time_start;
84  double time_end;
85};
86
87
88/*
89 * Function: cgi_init - Initialize ClearSilver CGI environment
90 * Description: cgi_init initializes the ClearSilver CGI environment,
91 *              including creating the HDF data set.  It will then import
92 *              the standard CGI environment variables into that dataset,
93 *              will parse the QUERY_STRING into the data set, and parse
94 *              the HTTP_COOKIE into the data set.  Note that if the
95 *              var xdisplay is in the form data, cgi_init will attempt
96 *              to validate the value and launch the configured debugger
97 *              on the CGI program.  These variables have to be
98 *              specified in the hdf_file pointed to by hdf_file.  The
99 *              default settings do not allow debugger launching for
100 *              security reasons.
101 * Input: cgi - a pointer to a CGI pointer
102 *        hdf_file - the path to an HDF data set file that will also be
103 *                   loaded into the dataset.  This will likely have to
104 *                   a be a full path, as the HDF search paths are not
105 *                   yet set up.  Certain things, like
106 * Output: cgi - an allocated CGI struct, including
107 * Return: NERR_PARSE - parse error in CGI input
108 *         NERR_NOMEM - unable to allocate memory
109 *         NERR_NOT_FOUND - hdf_file doesn't exist
110 */
111NEOERR *cgi_init (CGI **cgi, HDF *hdf);
112
113/*
114 * Function: cgi_parse - Parse incoming CGI data
115 * Description: We split cgi_init into two sections, one that parses
116 * 		just the basics, and the second is cgi_parse.  cgi_parse
117 * 		is responsible for parsing the entity body of the HTTP
118 * 		request.  This payload is typically only sent (expected)
119 * 		on POST/PUT requests, but generally this is called on
120 * 		all incoming requests.  This function walks the list of
121 * 		registered parse callbacks (see cgi_register_parse_cb),
122 * 		and if none of those matches or handles the request, it
123 * 		falls back to the builtin handlers:
124 * 		  POST w/ application/x-www-form-urlencoded
125 * 		  POST w/ application/form-data
126 * 		  PUT w/ any content type
127 * 		In general, if there is no Content-Length, then
128 * 		cgi_parse ignores the payload and doesn't raise an
129 * 		error.
130 * Input: cgi - a pointer to a CGI pointer
131 * Output: Either data populated into files and cgi->hdf, or whatever
132 *         other side effects of your own registered callbacks.
133 * Return: NERR_PARSE - parse error in CGI input
134 *         NERR_NOMEM - unable to allocate memory
135 *         NERR_NOT_FOUND - hdf_file doesn't exist
136 *         NERR_IO - error reading HDF file or reading CGI stdin, or
137 *                   writing data on multipart/form-data file submission
138 *         Anything else you raise.
139 */
140NEOERR *cgi_parse (CGI *cgi);
141
142/*
143 * Function: cgi_register_parse_cb - Register a parse callback
144 * Description: The ClearSilver CGI Kit has built-in functionality to handle
145 *              the following methods:
146 *              GET -> doesn't have any data except query string, which
147 *                is processed for all methods
148 *              POST w/ application/x-www-form-urlencoded
149 *              POST w/ multipart/form-data
150 *                processed as RFC2388 data into files and HDF (see
151 *                cgi_filehandle())
152 *              PUT (any type)
153 *                The entire data chunk is stored as a file, with meta
154 *                data in HDF (similar to single files in RFC2388).
155 *                The data is accessible via cgi_filehandle with NULL
156 *                for name.
157 *              To handle other methods/content types, you have to
158 *              register your own parse function.  This isn't necessary
159 *              if you aren't expecting any data, and technically HTTP
160 *              only allows data on PUT/POST requests (and presumably
161 *              user defined methods).  In particular, if you want to
162 *              implement XML-RPC or SOAP, you'll have to register a
163 *              callback here to grab the XML data chunk.  Usually
164 *              you'll want to register POST w/ application/xml or POST
165 *              w/ text/xml (you either need to register both or
166 *              register POST w/ * and check the ctype yourself,
167 *              remember to nerr_raise(CGIParseNotHandled) if you aren't
168 *              handling the POST).
169 *              In general, your callback should:
170 *                Find out how much data is available:
171 *                 l = hdf_get_value (cgi->hdf, "CGI.ContentLength", NULL);
172 *                 len = atoi(l);
173 *                And read/handle all of the data using cgiwrap_read.
174 *                See the builtin handlers for how this is done.  Note
175 *                that cgiwrap_read is not guarunteed to return all of
176 *                the data you request (just like fread(3)) since it
177 *                might be reading of a socket.  Sorry.
178 *                You should be careful when reading the data to watch
179 *                for short reads (ie, end of file) and cases where the
180 *                client sends you data ad infinitum.
181 * Input: cgi - a CGI struct
182 *        method - the HTTP method you want to handle, or * for all
183 *        ctype - the HTTP Content-Type you want to handle, or * for all
184 *        rock - opaque data that we'll pass to your call back
185 * Output: None
186 * Return: CGIParseNotHandled if your callback doesn't want to handle
187 *         this.  This causes cgi_parse to continue walking the list of
188 *         callbacks.
189 *
190 */
191NEOERR *cgi_register_parse_cb(CGI *cgi, const char *method, const char *ctype,
192                              void *rock, CGI_PARSE_CB parse_cb);
193
194/*
195 * Function: cgi_destroy - deallocate the data associated with a CGI
196 * Description: cgi_destroy will destroy all the data associated with a
197 *              CGI, which mostly means the associated HDF and removal
198 *              of any files that were uploaded via multipart/form-data.
199 *              (Note that even in the event of a crash, these files
200 *              will be deleted, as they were unlinked on creation and
201 *              only exist because of the open file pointer)
202 * Input: cgi - a pointer to a pointer to a CGI struct
203 * Output: cgi - NULL on output
204 * Return: None
205 */
206void cgi_destroy (CGI **cgi);
207
208/*
209 * Function: cgi_cs_init - initialize CS parser with the CGI defaults
210 * Description: cgi_cs_init initializes a CS parser with the CGI HDF
211 *              context, and registers the standard CGI filters
212 * Input: cgi - a pointer a CGI struct allocated with cgi_init
213 *        cs - a pointer to a CS struct pointer
214 * Output: cs - the allocated/initialized CS struct
215 * Return: NERR_NOMEM - no memory was available to render the template
216 */
217NEOERR *cgi_cs_init(CGI *cgi, CSPARSE **cs);
218
219/*
220 * Function: cgi_display - render and display the CGI output to the user
221 * Description: cgi_display will render the CS template pointed to by
222 *              cs_file using the CGI's HDF data set, and send the
223 *              output to the user.  Note that the output is actually
224 *              rendered into memory first.
225 * Input: cgi - a pointer a CGI struct allocated with cgi_init
226 *        cs_file - a ClearSilver template file
227 * Output: None
228 * Return: NERR_IO - an IO error occured during output
229 *         NERR_NOMEM - no memory was available to render the template
230 */
231NEOERR *cgi_display (CGI *cgi, const char *cs_file);
232
233/*
234 * Function: cgi_output - display the CGI output to the user
235 * Description: Normally, this is called by cgi_display, but some
236 *              people wanted it external so they could call it
237 *              directly.
238 * Input: cgi - a pointer a CGI struct allocated with cgi_init
239 *        output - the data to send to output from the CGI
240 * Output: None
241 * Return: NERR_IO - an IO error occured during output
242 *         NERR_NOMEM - no memory was available to render the template
243 */
244NEOERR *cgi_output (CGI *cgi, STRING *output);
245
246/*
247 * Function: cgi_filehandle - return a file pointer to an uploaded file
248 * Description: cgi_filehandle will return the stdio FILE pointer
249 *              associated with a file that was uploaded using
250 *              multipart/form-data.  The FILE pointer is positioned at
251 *              the start of the file when first available.
252 * Input: cgi - a pointer to a CGI struct allocated with cgi_init
253 *        form_name - the form name that the file was uploaded as
254 *                    (not the filename) (if NULL, we're asking for the
255 *                    file handle for the PUT upload)
256 * Output: None
257 * Return: A stdio FILE pointer, or NULL if an error occurs (usually
258 *         indicates that the form_name wasn't found, but might indicate
259 *         a problem with the HDF dataset)
260 */
261FILE *cgi_filehandle (CGI *cgi, const char *form_name);
262
263/*
264 * Function: cgi_neo_error - display a NEOERR call backtrace
265 * Description: cgi_neo_error will output a 500 error containing the
266 *              NEOERR call backtrace.  This function is likely to be
267 *              removed from future versions in favor of some sort of
268 *              user error mechanism.
269 * Input: cgi - a pointer to a CGI struct
270 *        err - a NEOERR (see util/neo_err.h for details)
271 * Output: None
272 * Return: None
273 */
274void cgi_neo_error (CGI *cgi, NEOERR *err);
275
276/*
277 * Function: cgi_error - display an error string to the user
278 * Description: cgi_error will output a 500 error containing the
279 *              specified error message.  This function is likely to be
280 *              removed from future versions in favor of a user error
281 *              mechanism.
282 * Input: cgi - a pointer to a CGI struct
283 *        fmt - printf style format string and arguments
284 * Output: None
285 * Return: None
286 */
287void cgi_error (CGI *cgi, const char *fmt, ...)
288                ATTRIBUTE_PRINTF(2,3);
289
290/*
291 * Function: cgi_debug_init - initialize standalone debugging
292 * Description: cgi_debug_init initializes a CGI program for standalone
293 *              debugging.  By running a ClearSilver CGI program with a
294 *              filename on the command line as the first argument, the
295 *              CGI program will load that file of the form K=V as a set
296 *              of HTTP/CGI environment variables.  This allows you to
297 *              run the program under a debugger in a reproducible
298 *              environment.
299 * Input: argc/argv - the arguments from main
300 * Output: None
301 * Return: None
302 */
303void cgi_debug_init (int argc, char **argv);
304
305/*
306 * Function: cgi_url_escape - url escape a string
307 * Description: cgi_url_escape will do URL escaping on the passed in
308 *              string, and return a newly allocated string that is escaped.
309 *              Characters which are escaped include control characters,
310 *              %, ?, +, space, =, &, /, and "
311 * Input: buf - a 0 terminated string
312 * Output: esc - a newly allocated string
313 * Return: NERR_NOMEM - no memory available to allocate the escaped string
314 */
315NEOERR *cgi_url_escape (const char *buf, char **esc);
316
317/*
318 * Function: cgi_url_escape_more - url escape a string
319 * Description: cgi_url_escape_more will do URL escaping on the passed in
320 *              string, and return a newly allocated string that is escaped.
321 *              Characters which are escaped include control characters,
322 *              %, ?, +, space, =, &, /, and " and any characters in
323 *              other
324 * Input: buf - a 0 terminated string
325 *        other - a 0 terminated string of characters to escape
326 * Output: esc - a newly allocated string
327 * Return: NERR_NOMEM - no memory available to allocate the escaped string
328 */
329NEOERR *cgi_url_escape_more (const char *buf, char **esc, const char *other);
330
331/*
332 * Function: cgi_url_validate - validate that url is of an allowed format
333 * Description: cgi_url_validate will check that a URL starts with
334 *              one of the accepted safe schemes.
335 *              If not, it returns "#" as a safe substitute.
336 *              Currently accepted schemes are http, https, ftp and mailto.
337 *              It then html escapes the entire URL so that it is safe to
338 *              insert in an href attribute.
339 * Input: buf - a 0 terminated string
340 * Output: esc - a newly allocated string
341 * Return: NERR_NOMEM - no memory available to allocate the escaped string
342 */
343NEOERR *cgi_url_validate (const char *buf, char **esc);
344
345/*
346 * Function: cgi_url_unescape - unescape an url encoded string
347 * Description: cgi_url_unescape will do URL unescaping on the passed in
348 *              string.  This function modifies the string in place
349 *              This function will decode any %XX character, and will
350 *              decode + as space
351 * Input: buf - a 0 terminated string
352 * Return: pointer to same buf
353 */
354char *cgi_url_unescape (char *buf);
355
356/*
357 * Function: cgi_redirect - send an HTTP 302 redirect response
358 * Description: cgi_redirect will redirect the user to another page on
359 *              your site.  This version takes only the path portion of
360 *              the URL.  As with all printf style commands, you should
361 *              not call this with arbitrary input that may contain %
362 *              characters, if you are forwarding something directly,
363 *              use a format like cgi_redirect (cgi, "%s", buf)
364 * Input: cgi - cgi struct
365 *        fmt - printf style format with args
366 * Output: None
367 * Return: None
368 */
369void cgi_redirect (CGI *cgi, const char *fmt, ...)
370                   ATTRIBUTE_PRINTF(2,3);
371
372/*
373 * Function: cgi_redirect_uri - send an HTTP 302 redirect response
374 * Description: cgi_redirect_uri will redirect the user to another page on
375 *              your site.  This version takes the full URL, including
376 *              protocol/domain/port/path.
377 *              As with all printf style commands, you should
378 *              not call this with arbitrary input that may contain %
379 *              characters, if you are forwarding something directly,
380 *              use a format like cgi_redirect (cgi, "%s", buf)
381 * Input: cgi - cgi struct
382 *        fmt - printf style format with args
383 * Output: None
384 * Return: None
385 */
386void cgi_redirect_uri (CGI *cgi, const char *fmt, ...)
387                       ATTRIBUTE_PRINTF(2,3);
388
389/*
390 * Function: cgi_vredirect - send an HTTP 302 redirect response
391 * Description: cgi_vredirect is mostly used internally, but can be used
392 *              if you need a varargs version of the function.
393 * Input: cgi - cgi struct
394 *        uri - whether the URL is full (1) or path only (0)
395 *        fmt - printf format string
396 *        ap - stdarg va_list
397 * Output: None
398 * Return: None
399 */
400void cgi_vredirect (CGI *cgi, int uri, const char *fmt, va_list ap);
401
402
403/*
404 * Function: cgi_cookie_authority - determine the cookie authority for a
405 *            domain
406 * Description: cgi_cookie_authority will walk the CookieAuthority
407 *              portion of the CGI HDF data set, and return the matching
408 *              domain if it exists.  The purpose of this is so that you
409 *              set domain specific cookies.  For instance, you might
410 *              have
411 *                CookieAuthority.0 = neotonic.com
412 *              In which case, any webserver using a hostname ending in
413 *              neotonic.com will generate a cookie authority of
414 *              neotonic.com.
415 * Input: cgi - a CGI struct
416 *        host - optional host to match against.  If NULL, the function
417 *               will use the HTTP.Host HDF variable.
418 * Output: None
419 * Return: The authority domain, or NULL if none found.
420 */
421char *cgi_cookie_authority (CGI *cgi, const char *host);
422
423/*
424 * Function: cgi_cookie_set - Set a browser Cookie
425 * Description: cgi_cookie_set will issue a Set-Cookie header that
426 *              should cause a browser to return a cookie when required.
427 *              Note this function does no escaping of anything, you
428 *              have to take care of that first.
429 * Input: cgi - a CGI struct
430 *        name - the name of the cookie
431 *        value - the value to set the cookie to.
432 *        path - optional path for which the cookie is valid.  Default
433 *               is /
434 *        domain - optional domain for which the cookie is valid.  You
435 *                 can use cgi_cookie_authority to determine this
436 *                 domain.  Default is none, which is interpreted by
437 *                 the browser as the sending domain only.
438 *        time_str - expiration time string in the following format
439 *                   Wdy, DD-Mon-YYYY HH:MM:SS GMT.  Only used if
440 *                   persistent.  Default is one year from time of call.
441 *        persistent - cookie will be stored by the browser between sessions
442 *        secure - cookie will only be sent over secure connections
443 * Output: None
444 * Return: NERR_IO
445 */
446NEOERR *cgi_cookie_set (CGI *cgi, const char *name, const char *value,
447                        const char *path, const char *domain,
448                        const char *time_str, int persistent, int secure);
449
450/*
451 * Function: cgi_cookie_clear - clear browser cookie
452 * Description: cgi_cookie_clear will send back a Set-Cookie string that
453 *              will attempt to stop a browser from continuing to send
454 *              back a cookie.  Note that the cookie has to match in
455 *              name, domain, and path, and the luck of the Irish has to
456 *              be with you for this work all the time, but at the least
457 *              it will make the browser send back a cookie with no
458 *              value, which the ClearSilver cookie parsing code will
459 *              ignore.
460 * Input: cgi - a CGI struct
461 *        name - the cookie name to clear
462 *        domain - the domain to clear, NULL for none
463 *        path - the cookie's path
464 * Output: None
465 * Return: NERR_IO
466 */
467NEOERR *cgi_cookie_clear (CGI *cgi, const char *name, const char *domain,
468                          const char *path);
469
470/* not documented *yet* */
471NEOERR *cgi_text_html_strfunc(const char *str, char **ret);
472NEOERR *cgi_html_strip_strfunc(const char *str, char **ret);
473NEOERR *cgi_html_escape_strfunc(const char *str, char **ret);
474NEOERR *cgi_js_escape (const char *buf, char **esc);
475void cgi_html_ws_strip(STRING *str, int level);
476NEOERR *cgi_register_strfuncs(CSPARSE *cs);
477
478/* internal use only */
479NEOERR * parse_rfc2388 (CGI *cgi);
480NEOERR * open_upload(CGI *cgi, int unlink_files, FILE **fpw);
481
482__END_DECLS
483
484#endif /* __CGI_H_ */
485