1/* -*- mode: C; c-file-style: "gnu" -*- */ 2/* dbus-shell.c Shell command line utility functions. 3 * 4 * Copyright (C) 2002, 2003 Red Hat, Inc. 5 * Copyright (C) 2003 CodeFactory AB 6 * 7 * Licensed under the Academic Free License version 2.1 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License as published by 11 * the Free Software Foundation; either version 2 of the License, or 12 * (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 * 23 */ 24 25#include <string.h> 26#include "dbus-internals.h" 27#include "dbus-list.h" 28#include "dbus-memory.h" 29#include "dbus-protocol.h" 30#include "dbus-shell.h" 31#include "dbus-string.h" 32 33/* Single quotes preserve the literal string exactly. escape 34 * sequences are not allowed; not even \' - if you want a ' 35 * in the quoted text, you have to do something like 'foo'\''bar' 36 * 37 * Double quotes allow $ ` " \ and newline to be escaped with backslash. 38 * Otherwise double quotes preserve things literally. 39 */ 40 41static dbus_bool_t 42unquote_string_inplace (char* str, char** end) 43{ 44 char* dest; 45 char* s; 46 char quote_char; 47 48 dest = s = str; 49 50 quote_char = *s; 51 52 if (!(*s == '"' || *s == '\'')) 53 { 54 *end = str; 55 return FALSE; 56 } 57 58 /* Skip the initial quote mark */ 59 ++s; 60 61 if (quote_char == '"') 62 { 63 while (*s) 64 { 65 _dbus_assert(s > dest); /* loop invariant */ 66 67 switch (*s) 68 { 69 case '"': 70 /* End of the string, return now */ 71 *dest = '\0'; 72 ++s; 73 *end = s; 74 return TRUE; 75 76 case '\\': 77 /* Possible escaped quote or \ */ 78 ++s; 79 switch (*s) 80 { 81 case '"': 82 case '\\': 83 case '`': 84 case '$': 85 case '\n': 86 *dest = *s; 87 ++s; 88 ++dest; 89 break; 90 91 default: 92 /* not an escaped char */ 93 *dest = '\\'; 94 ++dest; 95 /* ++s already done. */ 96 break; 97 } 98 break; 99 100 default: 101 *dest = *s; 102 ++dest; 103 ++s; 104 break; 105 } 106 107 _dbus_assert(s > dest); /* loop invariant */ 108 } 109 } 110 else 111 { 112 while (*s) 113 { 114 _dbus_assert(s > dest); /* loop invariant */ 115 116 if (*s == '\'') 117 { 118 /* End of the string, return now */ 119 *dest = '\0'; 120 ++s; 121 *end = s; 122 return TRUE; 123 } 124 else 125 { 126 *dest = *s; 127 ++dest; 128 ++s; 129 } 130 131 _dbus_assert(s > dest); /* loop invariant */ 132 } 133 } 134 135 /* If we reach here this means the close quote was never encountered */ 136 137 *dest = '\0'; 138 139 *end = s; 140 return FALSE; 141} 142 143/** 144 * Quotes a string so that the shell (/bin/sh) will interpret the 145 * quoted string to mean @unquoted_string. If you pass a filename to 146 * the shell, for example, you should first quote it with this 147 * function. The return value must be freed with dbus_free(). The 148 * quoting style used is undefined (single or double quotes may be 149 * used). 150 * 151 * @unquoted_string: a literal string 152 **/ 153char* 154_dbus_shell_quote (const char *unquoted_string) 155{ 156 /* We always use single quotes, because the algorithm is cheesier. 157 * We could use double if we felt like it, that might be more 158 * human-readable. 159 */ 160 161 const char *p; 162 char *ret; 163 DBusString dest; 164 165 _dbus_string_init (&dest); 166 167 p = unquoted_string; 168 169 /* could speed this up a lot by appending chunks of text at a 170 * time. 171 */ 172 while (*p) 173 { 174 /* Replace literal ' with a close ', a \', and a open ' */ 175 if (*p == '\'') 176 { 177 if (!_dbus_string_append (&dest, "'\\''")) 178 { 179 _dbus_string_free (&dest); 180 return NULL; 181 } 182 } 183 else 184 { 185 if (!_dbus_string_append_byte (&dest, *p)) 186 { 187 _dbus_string_free (&dest); 188 return NULL; 189 } 190 } 191 192 ++p; 193 } 194 195 /* close the quote */ 196 if (_dbus_string_append_byte (&dest, '\'')) 197 { 198 ret = _dbus_strdup (_dbus_string_get_data (&dest)); 199 _dbus_string_free (&dest); 200 201 return ret; 202 } 203 204 _dbus_string_free (&dest); 205 206 return NULL; 207} 208 209/** 210 * Unquotes a string as the shell (/bin/sh) would. Only handles 211 * quotes; if a string contains file globs, arithmetic operators, 212 * variables, backticks, redirections, or other special-to-the-shell 213 * features, the result will be different from the result a real shell 214 * would produce (the variables, backticks, etc. will be passed 215 * through literally instead of being expanded). This function is 216 * guaranteed to succeed if applied to the result of 217 * _dbus_shell_quote(). If it fails, it returns %NULL. 218 * The @quoted_string need not actually contain quoted or 219 * escaped text; _dbus_shell_unquote() simply goes through the string and 220 * unquotes/unescapes anything that the shell would. Both single and 221 * double quotes are handled, as are escapes including escaped 222 * newlines. The return value must be freed with dbus_free(). 223 * 224 * Shell quoting rules are a bit strange. Single quotes preserve the 225 * literal string exactly. escape sequences are not allowed; not even 226 * \' - if you want a ' in the quoted text, you have to do something 227 * like 'foo'\''bar'. Double quotes allow $, `, ", \, and newline to 228 * be escaped with backslash. Otherwise double quotes preserve things 229 * literally. 230 * 231 * @quoted_string: shell-quoted string 232 **/ 233char* 234_dbus_shell_unquote (const char *quoted_string) 235{ 236 char *unquoted; 237 char *end; 238 char *start; 239 char *ret; 240 DBusString retval; 241 242 unquoted = _dbus_strdup (quoted_string); 243 if (unquoted == NULL) 244 return NULL; 245 246 start = unquoted; 247 end = unquoted; 248 if (!_dbus_string_init (&retval)) 249 { 250 dbus_free (unquoted); 251 return NULL; 252 } 253 254 /* The loop allows cases such as 255 * "foo"blah blah'bar'woo foo"baz"la la la\'\''foo' 256 */ 257 while (*start) 258 { 259 /* Append all non-quoted chars, honoring backslash escape 260 */ 261 262 while (*start && !(*start == '"' || *start == '\'')) 263 { 264 if (*start == '\\') 265 { 266 /* all characters can get escaped by backslash, 267 * except newline, which is removed if it follows 268 * a backslash outside of quotes 269 */ 270 271 ++start; 272 if (*start) 273 { 274 if (*start != '\n') 275 { 276 if (!_dbus_string_append_byte (&retval, *start)) 277 goto error; 278 } 279 ++start; 280 } 281 } 282 else 283 { 284 if (!_dbus_string_append_byte (&retval, *start)) 285 goto error; 286 ++start; 287 } 288 } 289 290 if (*start) 291 { 292 if (!unquote_string_inplace (start, &end)) 293 goto error; 294 else 295 { 296 if (!_dbus_string_append (&retval, start)) 297 goto error; 298 start = end; 299 } 300 } 301 } 302 303 ret = _dbus_strdup (_dbus_string_get_data (&retval)); 304 if (!ret) 305 goto error; 306 307 dbus_free (unquoted); 308 _dbus_string_free (&retval); 309 310 return ret; 311 312 error: 313 dbus_free (unquoted); 314 _dbus_string_free (&retval); 315 return NULL; 316} 317 318/* _dbus_shell_parse_argv() does a semi-arbitrary weird subset of the way 319 * the shell parses a command line. We don't do variable expansion, 320 * don't understand that operators are tokens, don't do tilde expansion, 321 * don't do command substitution, no arithmetic expansion, IFS gets ignored, 322 * don't do filename globs, don't remove redirection stuff, etc. 323 * 324 * READ THE UNIX98 SPEC on "Shell Command Language" before changing 325 * the behavior of this code. 326 * 327 * Steps to parsing the argv string: 328 * 329 * - tokenize the string (but since we ignore operators, 330 * our tokenization may diverge from what the shell would do) 331 * note that tokenization ignores the internals of a quoted 332 * word and it always splits on spaces, not on IFS even 333 * if we used IFS. We also ignore "end of input indicator" 334 * (I guess this is control-D?) 335 * 336 * Tokenization steps, from UNIX98 with operator stuff removed, 337 * are: 338 * 339 * 1) "If the current character is backslash, single-quote or 340 * double-quote (\, ' or ") and it is not quoted, it will affect 341 * quoting for subsequent characters up to the end of the quoted 342 * text. The rules for quoting are as described in Quoting 343 * . During token recognition no substitutions will be actually 344 * performed, and the result token will contain exactly the 345 * characters that appear in the input (except for newline 346 * character joining), unmodified, including any embedded or 347 * enclosing quotes or substitution operators, between the quote 348 * mark and the end of the quoted text. The token will not be 349 * delimited by the end of the quoted field." 350 * 351 * 2) "If the current character is an unquoted newline character, 352 * the current token will be delimited." 353 * 354 * 3) "If the current character is an unquoted blank character, any 355 * token containing the previous character is delimited and the 356 * current character will be discarded." 357 * 358 * 4) "If the previous character was part of a word, the current 359 * character will be appended to that word." 360 * 361 * 5) "If the current character is a "#", it and all subsequent 362 * characters up to, but excluding, the next newline character 363 * will be discarded as a comment. The newline character that 364 * ends the line is not considered part of the comment. The 365 * "#" starts a comment only when it is at the beginning of a 366 * token. Since the search for the end-of-comment does not 367 * consider an escaped newline character specially, a comment 368 * cannot be continued to the next line." 369 * 370 * 6) "The current character will be used as the start of a new word." 371 * 372 * 373 * - for each token (word), perform portions of word expansion, namely 374 * field splitting (using default whitespace IFS) and quote 375 * removal. Field splitting may increase the number of words. 376 * Quote removal does not increase the number of words. 377 * 378 * "If the complete expansion appropriate for a word results in an 379 * empty field, that empty field will be deleted from the list of 380 * fields that form the completely expanded command, unless the 381 * original word contained single-quote or double-quote characters." 382 * - UNIX98 spec 383 * 384 * 385 */ 386 387static dbus_bool_t 388delimit_token (DBusString *token, 389 DBusList **retval, 390 DBusError *error) 391{ 392 char *str; 393 394 str = _dbus_strdup (_dbus_string_get_data (token)); 395 if (!str) 396 { 397 _DBUS_SET_OOM (error); 398 return FALSE; 399 } 400 401 if (!_dbus_list_append (retval, str)) 402 { 403 dbus_free (str); 404 _DBUS_SET_OOM (error); 405 return FALSE; 406 } 407 408 return TRUE; 409} 410 411static DBusList* 412tokenize_command_line (const char *command_line, DBusError *error) 413{ 414 char current_quote; 415 const char *p; 416 DBusString current_token; 417 DBusList *retval = NULL; 418 dbus_bool_t quoted;; 419 420 current_quote = '\0'; 421 quoted = FALSE; 422 p = command_line; 423 424 if (!_dbus_string_init (¤t_token)) 425 { 426 _DBUS_SET_OOM (error); 427 return NULL; 428 } 429 430 while (*p) 431 { 432 if (current_quote == '\\') 433 { 434 if (*p == '\n') 435 { 436 /* we append nothing; backslash-newline become nothing */ 437 } 438 else 439 { 440 if (!_dbus_string_append_byte (¤t_token, '\\') || 441 !_dbus_string_append_byte (¤t_token, *p)) 442 { 443 _DBUS_SET_OOM (error); 444 goto error; 445 } 446 } 447 448 current_quote = '\0'; 449 } 450 else if (current_quote == '#') 451 { 452 /* Discard up to and including next newline */ 453 while (*p && *p != '\n') 454 ++p; 455 456 current_quote = '\0'; 457 458 if (*p == '\0') 459 break; 460 } 461 else if (current_quote) 462 { 463 if (*p == current_quote && 464 /* check that it isn't an escaped double quote */ 465 !(current_quote == '"' && quoted)) 466 { 467 /* close the quote */ 468 current_quote = '\0'; 469 } 470 471 /* Everything inside quotes, and the close quote, 472 * gets appended literally. 473 */ 474 475 if (!_dbus_string_append_byte (¤t_token, *p)) 476 { 477 _DBUS_SET_OOM (error); 478 goto error; 479 } 480 } 481 else 482 { 483 switch (*p) 484 { 485 case '\n': 486 if (!delimit_token (¤t_token, &retval, error)) 487 goto error; 488 489 _dbus_string_free (¤t_token); 490 491 if (!_dbus_string_init (¤t_token)) 492 { 493 _DBUS_SET_OOM (error); 494 goto init_error; 495 } 496 497 break; 498 499 case ' ': 500 case '\t': 501 /* If the current token contains the previous char, delimit 502 * the current token. A nonzero length 503 * token should always contain the previous char. 504 */ 505 if (_dbus_string_get_length (¤t_token) > 0) 506 { 507 if (!delimit_token (¤t_token, &retval, error)) 508 goto error; 509 510 _dbus_string_free (¤t_token); 511 512 if (!_dbus_string_init (¤t_token)) 513 { 514 _DBUS_SET_OOM (error); 515 goto init_error; 516 } 517 518 } 519 520 /* discard all unquoted blanks (don't add them to a token) */ 521 break; 522 523 524 /* single/double quotes are appended to the token, 525 * escapes are maybe appended next time through the loop, 526 * comment chars are never appended. 527 */ 528 529 case '\'': 530 case '"': 531 if (!_dbus_string_append_byte (¤t_token, *p)) 532 { 533 _DBUS_SET_OOM (error); 534 goto error; 535 } 536 537 /* FALL THRU */ 538 539 case '#': 540 case '\\': 541 current_quote = *p; 542 break; 543 544 default: 545 /* Combines rules 4) and 6) - if we have a token, append to it, 546 * otherwise create a new token. 547 */ 548 if (!_dbus_string_append_byte (¤t_token, *p)) 549 { 550 _DBUS_SET_OOM (error); 551 goto error; 552 } 553 break; 554 } 555 } 556 557 /* We need to count consecutive backslashes mod 2, 558 * to detect escaped doublequotes. 559 */ 560 if (*p != '\\') 561 quoted = FALSE; 562 else 563 quoted = !quoted; 564 565 ++p; 566 } 567 568 if (!delimit_token (¤t_token, &retval, error)) 569 goto error; 570 571 if (current_quote) 572 { 573 dbus_set_error_const (error, DBUS_ERROR_INVALID_ARGS, "Unclosed quotes in command line"); 574 goto error; 575 } 576 577 if (retval == NULL) 578 { 579 dbus_set_error_const (error, DBUS_ERROR_INVALID_ARGS, "No tokens found in command line"); 580 goto error; 581 } 582 583 _dbus_string_free (¤t_token); 584 585 return retval; 586 587 error: 588 _dbus_string_free (¤t_token); 589 590 init_error: 591 if (retval) 592 { 593 _dbus_list_foreach (&retval, (DBusForeachFunction) dbus_free, NULL); 594 _dbus_list_clear (&retval); 595 } 596 597 return NULL; 598} 599 600/** 601 * _dbus_shell_parse_argv: 602 * 603 * Parses a command line into an argument vector, in much the same way 604 * the shell would, but without many of the expansions the shell would 605 * perform (variable expansion, globs, operators, filename expansion, 606 * etc. are not supported). The results are defined to be the same as 607 * those you would get from a UNIX98 /bin/sh, as long as the input 608 * contains none of the unsupported shell expansions. If the input 609 * does contain such expansions, they are passed through 610 * literally. Free the returned vector with dbus_free_string_array(). 611 * 612 * @command_line: command line to parse 613 * @argcp: return location for number of args 614 * @argvp: return location for array of args 615 * @error: error information 616 **/ 617dbus_bool_t 618_dbus_shell_parse_argv (const char *command_line, 619 int *argcp, 620 char ***argvp, 621 DBusError *error) 622{ 623 /* Code based on poptParseArgvString() from libpopt */ 624 int argc = 0; 625 char **argv = NULL; 626 DBusList *tokens = NULL; 627 int i; 628 DBusList *tmp_list; 629 630 if (!command_line) 631 { 632 _dbus_verbose ("Command line is NULL\n"); 633 return FALSE; 634 } 635 636 tokens = tokenize_command_line (command_line, error); 637 if (tokens == NULL) 638 { 639 _dbus_verbose ("No tokens for command line '%s'\n", command_line); 640 return FALSE; 641 } 642 643 /* Because we can't have introduced any new blank space into the 644 * tokens (we didn't do any new expansions), we don't need to 645 * perform field splitting. If we were going to honor IFS or do any 646 * expansions, we would have to do field splitting on each word 647 * here. Also, if we were going to do any expansion we would need to 648 * remove any zero-length words that didn't contain quotes 649 * originally; but since there's no expansion we know all words have 650 * nonzero length, unless they contain quotes. 651 * 652 * So, we simply remove quotes, and don't do any field splitting or 653 * empty word removal, since we know there was no way to introduce 654 * such things. 655 */ 656 657 argc = _dbus_list_get_length (&tokens); 658 argv = dbus_new (char *, argc + 1); 659 if (!argv) 660 { 661 _DBUS_SET_OOM (error); 662 goto error; 663 } 664 665 i = 0; 666 tmp_list = tokens; 667 while (tmp_list) 668 { 669 argv[i] = _dbus_shell_unquote (tmp_list->data); 670 671 if (!argv[i]) 672 { 673 int j; 674 for (j = 0; j < i; j++) 675 dbus_free(argv[j]); 676 677 dbus_free (argv); 678 _DBUS_SET_OOM (error); 679 goto error; 680 } 681 682 tmp_list = _dbus_list_get_next_link (&tokens, tmp_list); 683 ++i; 684 } 685 argv[argc] = NULL; 686 687 _dbus_list_foreach (&tokens, (DBusForeachFunction) dbus_free, NULL); 688 _dbus_list_clear (&tokens); 689 690 if (argcp) 691 *argcp = argc; 692 693 if (argvp) 694 *argvp = argv; 695 else 696 dbus_free_string_array (argv); 697 698 return TRUE; 699 700 error: 701 _dbus_list_foreach (&tokens, (DBusForeachFunction) dbus_free, NULL); 702 _dbus_list_clear (&tokens); 703 704 return FALSE; 705 706} 707