1/* quotearg.c - quote arguments for output 2 3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free 4 Software Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 20/* Written by Paul Eggert <eggert@twinsun.com> */ 21 22#ifdef HAVE_CONFIG_H 23# include <config.h> 24#endif 25 26#include "quotearg.h" 27 28#include "xalloc.h" 29 30#include <ctype.h> 31#include <errno.h> 32#include <limits.h> 33#include <stdbool.h> 34#include <stdlib.h> 35#include <string.h> 36 37#include "gettext.h" 38#define _(msgid) gettext (msgid) 39#define N_(msgid) msgid 40 41#if HAVE_WCHAR_H 42 43/* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */ 44# include <stdio.h> 45# include <time.h> 46 47# include <wchar.h> 48#endif 49 50#if !HAVE_MBRTOWC 51/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the 52 other macros are defined only for documentation and to satisfy C 53 syntax. */ 54# undef MB_CUR_MAX 55# define MB_CUR_MAX 1 56# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) 57# define iswprint(wc) isprint ((unsigned char) (wc)) 58# undef HAVE_MBSINIT 59#endif 60 61#if !defined mbsinit && !HAVE_MBSINIT 62# define mbsinit(ps) 1 63#endif 64 65#ifndef iswprint 66# if HAVE_WCTYPE_H 67# include <wctype.h> 68# endif 69# if !defined iswprint && !HAVE_ISWPRINT 70# define iswprint(wc) 1 71# endif 72#endif 73 74#ifndef SIZE_MAX 75# define SIZE_MAX ((size_t) -1) 76#endif 77 78#define INT_BITS (sizeof (int) * CHAR_BIT) 79 80struct quoting_options 81{ 82 /* Basic quoting style. */ 83 enum quoting_style style; 84 85 /* Quote the characters indicated by this bit vector even if the 86 quoting style would not normally require them to be quoted. */ 87 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; 88}; 89 90/* Names of quoting styles. */ 91char const *const quoting_style_args[] = 92{ 93 "literal", 94 "shell", 95 "shell-always", 96 "c", 97 "escape", 98 "locale", 99 "clocale", 100 0 101}; 102 103/* Correspondences to quoting style names. */ 104enum quoting_style const quoting_style_vals[] = 105{ 106 literal_quoting_style, 107 shell_quoting_style, 108 shell_always_quoting_style, 109 c_quoting_style, 110 escape_quoting_style, 111 locale_quoting_style, 112 clocale_quoting_style 113}; 114 115/* The default quoting options. */ 116static struct quoting_options default_quoting_options; 117 118/* Allocate a new set of quoting options, with contents initially identical 119 to O if O is not null, or to the default if O is null. 120 It is the caller's responsibility to free the result. */ 121struct quoting_options * 122clone_quoting_options (struct quoting_options *o) 123{ 124 int e = errno; 125 struct quoting_options *p = xmalloc (sizeof *p); 126 *p = *(o ? o : &default_quoting_options); 127 errno = e; 128 return p; 129} 130 131/* Get the value of O's quoting style. If O is null, use the default. */ 132enum quoting_style 133get_quoting_style (struct quoting_options *o) 134{ 135 return (o ? o : &default_quoting_options)->style; 136} 137 138/* In O (or in the default if O is null), 139 set the value of the quoting style to S. */ 140void 141set_quoting_style (struct quoting_options *o, enum quoting_style s) 142{ 143 (o ? o : &default_quoting_options)->style = s; 144} 145 146/* In O (or in the default if O is null), 147 set the value of the quoting options for character C to I. 148 Return the old value. Currently, the only values defined for I are 149 0 (the default) and 1 (which means to quote the character even if 150 it would not otherwise be quoted). */ 151int 152set_char_quoting (struct quoting_options *o, char c, int i) 153{ 154 unsigned char uc = c; 155 unsigned int *p = 156 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; 157 int shift = uc % INT_BITS; 158 int r = (*p >> shift) & 1; 159 *p ^= ((i & 1) ^ r) << shift; 160 return r; 161} 162 163/* MSGID approximates a quotation mark. Return its translation if it 164 has one; otherwise, return either it or "\"", depending on S. */ 165static char const * 166gettext_quote (char const *msgid, enum quoting_style s) 167{ 168 char const *translation = _(msgid); 169 if (translation == msgid && s == clocale_quoting_style) 170 translation = "\""; 171 return translation; 172} 173 174/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 175 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the 176 non-quoting-style part of O to control quoting. 177 Terminate the output with a null character, and return the written 178 size of the output, not counting the terminating null. 179 If BUFFERSIZE is too small to store the output string, return the 180 value that would have been returned had BUFFERSIZE been large enough. 181 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. 182 183 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, 184 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting 185 style specified by O, and O may not be null. */ 186 187static size_t 188quotearg_buffer_restyled (char *buffer, size_t buffersize, 189 char const *arg, size_t argsize, 190 enum quoting_style quoting_style, 191 struct quoting_options const *o) 192{ 193 size_t i; 194 size_t len = 0; 195 char const *quote_string = 0; 196 size_t quote_string_len = 0; 197 bool backslash_escapes = false; 198 bool unibyte_locale = MB_CUR_MAX == 1; 199 200#define STORE(c) \ 201 do \ 202 { \ 203 if (len < buffersize) \ 204 buffer[len] = (c); \ 205 len++; \ 206 } \ 207 while (0) 208 209 switch (quoting_style) 210 { 211 case c_quoting_style: 212 STORE ('"'); 213 backslash_escapes = true; 214 quote_string = "\""; 215 quote_string_len = 1; 216 break; 217 218 case escape_quoting_style: 219 backslash_escapes = true; 220 break; 221 222 case locale_quoting_style: 223 case clocale_quoting_style: 224 { 225 /* TRANSLATORS: 226 Get translations for open and closing quotation marks. 227 228 The message catalog should translate "`" to a left 229 quotation mark suitable for the locale, and similarly for 230 "'". If the catalog has no translation, 231 locale_quoting_style quotes `like this', and 232 clocale_quoting_style quotes "like this". 233 234 For example, an American English Unicode locale should 235 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and 236 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION 237 MARK). A British English Unicode locale should instead 238 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and 239 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. 240 241 If you don't know what to put here, please see 242 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs> 243 and use glyphs suitable for your language. */ 244 245 char const *left = gettext_quote (N_("`"), quoting_style); 246 char const *right = gettext_quote (N_("'"), quoting_style); 247 for (quote_string = left; *quote_string; quote_string++) 248 STORE (*quote_string); 249 backslash_escapes = true; 250 quote_string = right; 251 quote_string_len = strlen (quote_string); 252 } 253 break; 254 255 case shell_always_quoting_style: 256 STORE ('\''); 257 quote_string = "'"; 258 quote_string_len = 1; 259 break; 260 261 default: 262 break; 263 } 264 265 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) 266 { 267 unsigned char c; 268 unsigned char esc; 269 270 if (backslash_escapes 271 && quote_string_len 272 && i + quote_string_len <= argsize 273 && memcmp (arg + i, quote_string, quote_string_len) == 0) 274 STORE ('\\'); 275 276 c = arg[i]; 277 switch (c) 278 { 279 case '\0': 280 if (backslash_escapes) 281 { 282 STORE ('\\'); 283 STORE ('0'); 284 STORE ('0'); 285 c = '0'; 286 } 287 break; 288 289 case '?': 290 switch (quoting_style) 291 { 292 case shell_quoting_style: 293 goto use_shell_always_quoting_style; 294 295 case c_quoting_style: 296 if (i + 2 < argsize && arg[i + 1] == '?') 297 switch (arg[i + 2]) 298 { 299 case '!': case '\'': 300 case '(': case ')': case '-': case '/': 301 case '<': case '=': case '>': 302 /* Escape the second '?' in what would otherwise be 303 a trigraph. */ 304 c = arg[i + 2]; 305 i += 2; 306 STORE ('?'); 307 STORE ('\\'); 308 STORE ('?'); 309 break; 310 311 default: 312 break; 313 } 314 break; 315 316 default: 317 break; 318 } 319 break; 320 321 case '\a': esc = 'a'; goto c_escape; 322 case '\b': esc = 'b'; goto c_escape; 323 case '\f': esc = 'f'; goto c_escape; 324 case '\n': esc = 'n'; goto c_and_shell_escape; 325 case '\r': esc = 'r'; goto c_and_shell_escape; 326 case '\t': esc = 't'; goto c_and_shell_escape; 327 case '\v': esc = 'v'; goto c_escape; 328 case '\\': esc = c; goto c_and_shell_escape; 329 330 c_and_shell_escape: 331 if (quoting_style == shell_quoting_style) 332 goto use_shell_always_quoting_style; 333 c_escape: 334 if (backslash_escapes) 335 { 336 c = esc; 337 goto store_escape; 338 } 339 break; 340 341 case '{': case '}': /* sometimes special if isolated */ 342 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) 343 break; 344 /* Fall through. */ 345 case '#': case '~': 346 if (i != 0) 347 break; 348 /* Fall through. */ 349 case ' ': 350 case '!': /* special in bash */ 351 case '"': case '$': case '&': 352 case '(': case ')': case '*': case ';': 353 case '<': 354 case '=': /* sometimes special in 0th or (with "set -k") later args */ 355 case '>': case '[': 356 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ 357 case '`': case '|': 358 /* A shell special character. In theory, '$' and '`' could 359 be the first bytes of multibyte characters, which means 360 we should check them with mbrtowc, but in practice this 361 doesn't happen so it's not worth worrying about. */ 362 if (quoting_style == shell_quoting_style) 363 goto use_shell_always_quoting_style; 364 break; 365 366 case '\'': 367 switch (quoting_style) 368 { 369 case shell_quoting_style: 370 goto use_shell_always_quoting_style; 371 372 case shell_always_quoting_style: 373 STORE ('\''); 374 STORE ('\\'); 375 STORE ('\''); 376 break; 377 378 default: 379 break; 380 } 381 break; 382 383 case '%': case '+': case ',': case '-': case '.': case '/': 384 case '0': case '1': case '2': case '3': case '4': case '5': 385 case '6': case '7': case '8': case '9': case ':': 386 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 387 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 388 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 389 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 390 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': 391 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': 392 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 393 case 'o': case 'p': case 'q': case 'r': case 's': case 't': 394 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': 395 /* These characters don't cause problems, no matter what the 396 quoting style is. They cannot start multibyte sequences. */ 397 break; 398 399 default: 400 /* If we have a multibyte sequence, copy it until we reach 401 its end, find an error, or come back to the initial shift 402 state. For C-like styles, if the sequence has 403 unprintable characters, escape the whole sequence, since 404 we can't easily escape single characters within it. */ 405 { 406 /* Length of multibyte sequence found so far. */ 407 size_t m; 408 409 bool printable; 410 411 if (unibyte_locale) 412 { 413 m = 1; 414 printable = isprint (c) != 0; 415 } 416 else 417 { 418 mbstate_t mbstate; 419 memset (&mbstate, 0, sizeof mbstate); 420 421 m = 0; 422 printable = true; 423 if (argsize == SIZE_MAX) 424 argsize = strlen (arg); 425 426 do 427 { 428 wchar_t w; 429 size_t bytes = mbrtowc (&w, &arg[i + m], 430 argsize - (i + m), &mbstate); 431 if (bytes == 0) 432 break; 433 else if (bytes == (size_t) -1) 434 { 435 printable = false; 436 break; 437 } 438 else if (bytes == (size_t) -2) 439 { 440 printable = false; 441 while (i + m < argsize && arg[i + m]) 442 m++; 443 break; 444 } 445 else 446 { 447 /* Work around a bug with older shells that "see" a '\' 448 that is really the 2nd byte of a multibyte character. 449 In practice the problem is limited to ASCII 450 chars >= '@' that are shell special chars. */ 451 if ('[' == 0x5b && quoting_style == shell_quoting_style) 452 { 453 size_t j; 454 for (j = 1; j < bytes; j++) 455 switch (arg[i + m + j]) 456 { 457 case '[': case '\\': case '^': 458 case '`': case '|': 459 goto use_shell_always_quoting_style; 460 461 default: 462 break; 463 } 464 } 465 466 if (! iswprint (w)) 467 printable = false; 468 m += bytes; 469 } 470 } 471 while (! mbsinit (&mbstate)); 472 } 473 474 if (1 < m || (backslash_escapes && ! printable)) 475 { 476 /* Output a multibyte sequence, or an escaped 477 unprintable unibyte character. */ 478 size_t ilim = i + m; 479 480 for (;;) 481 { 482 if (backslash_escapes && ! printable) 483 { 484 STORE ('\\'); 485 STORE ('0' + (c >> 6)); 486 STORE ('0' + ((c >> 3) & 7)); 487 c = '0' + (c & 7); 488 } 489 if (ilim <= i + 1) 490 break; 491 STORE (c); 492 c = arg[++i]; 493 } 494 495 goto store_c; 496 } 497 } 498 } 499 500 if (! (backslash_escapes 501 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) 502 goto store_c; 503 504 store_escape: 505 STORE ('\\'); 506 507 store_c: 508 STORE (c); 509 } 510 511 if (i == 0 && quoting_style == shell_quoting_style) 512 goto use_shell_always_quoting_style; 513 514 if (quote_string) 515 for (; *quote_string; quote_string++) 516 STORE (*quote_string); 517 518 if (len < buffersize) 519 buffer[len] = '\0'; 520 return len; 521 522 use_shell_always_quoting_style: 523 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 524 shell_always_quoting_style, o); 525} 526 527/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 528 argument ARG (of size ARGSIZE), using O to control quoting. 529 If O is null, use the default. 530 Terminate the output with a null character, and return the written 531 size of the output, not counting the terminating null. 532 If BUFFERSIZE is too small to store the output string, return the 533 value that would have been returned had BUFFERSIZE been large enough. 534 If ARGSIZE is SIZE_MAX, use the string length of the argument for 535 ARGSIZE. */ 536size_t 537quotearg_buffer (char *buffer, size_t buffersize, 538 char const *arg, size_t argsize, 539 struct quoting_options const *o) 540{ 541 struct quoting_options const *p = o ? o : &default_quoting_options; 542 int e = errno; 543 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 544 p->style, p); 545 errno = e; 546 return r; 547} 548 549/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly 550 allocated storage containing the quoted string. */ 551char * 552quotearg_alloc (char const *arg, size_t argsize, 553 struct quoting_options const *o) 554{ 555 int e = errno; 556 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1; 557 char *buf = xmalloc (bufsize); 558 quotearg_buffer (buf, bufsize, arg, argsize, o); 559 errno = e; 560 return buf; 561} 562 563/* Use storage slot N to return a quoted version of argument ARG. 564 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a 565 null-terminated string. 566 OPTIONS specifies the quoting options. 567 The returned value points to static storage that can be 568 reused by the next call to this function with the same value of N. 569 N must be nonnegative. N is deliberately declared with type "int" 570 to allow for future extensions (using negative values). */ 571static char * 572quotearg_n_options (int n, char const *arg, size_t argsize, 573 struct quoting_options const *options) 574{ 575 int e = errno; 576 577 /* Preallocate a slot 0 buffer, so that the caller can always quote 578 one small component of a "memory exhausted" message in slot 0. */ 579 static char slot0[256]; 580 static unsigned int nslots = 1; 581 unsigned int n0 = n; 582 struct slotvec 583 { 584 size_t size; 585 char *val; 586 }; 587 static struct slotvec slotvec0 = {sizeof slot0, slot0}; 588 static struct slotvec *slotvec = &slotvec0; 589 590 if (n < 0) 591 abort (); 592 593 if (nslots <= n0) 594 { 595 /* FIXME: technically, the type of n1 should be `unsigned int', 596 but that evokes an unsuppressible warning from gcc-4.0.1 and 597 older. If gcc ever provides an option to suppress that warning, 598 revert to the original type, so that the test in xalloc_oversized 599 is once again performed only at compile time. */ 600 size_t n1 = n0 + 1; 601 602 if (xalloc_oversized (n1, sizeof *slotvec)) 603 xalloc_die (); 604 605 if (slotvec == &slotvec0) 606 { 607 slotvec = xmalloc (sizeof *slotvec); 608 *slotvec = slotvec0; 609 } 610 slotvec = xrealloc (slotvec, n1 * sizeof *slotvec); 611 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec); 612 nslots = n1; 613 } 614 615 { 616 size_t size = slotvec[n].size; 617 char *val = slotvec[n].val; 618 size_t qsize = quotearg_buffer (val, size, arg, argsize, options); 619 620 if (size <= qsize) 621 { 622 slotvec[n].size = size = qsize + 1; 623 if (val != slot0) 624 free (val); 625 slotvec[n].val = val = xmalloc (size); 626 quotearg_buffer (val, size, arg, argsize, options); 627 } 628 629 errno = e; 630 return val; 631 } 632} 633 634char * 635quotearg_n (int n, char const *arg) 636{ 637 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); 638} 639 640char * 641quotearg (char const *arg) 642{ 643 return quotearg_n (0, arg); 644} 645 646/* Return quoting options for STYLE, with no extra quoting. */ 647static struct quoting_options 648quoting_options_from_style (enum quoting_style style) 649{ 650 struct quoting_options o; 651 o.style = style; 652 memset (o.quote_these_too, 0, sizeof o.quote_these_too); 653 return o; 654} 655 656char * 657quotearg_n_style (int n, enum quoting_style s, char const *arg) 658{ 659 struct quoting_options const o = quoting_options_from_style (s); 660 return quotearg_n_options (n, arg, SIZE_MAX, &o); 661} 662 663char * 664quotearg_n_style_mem (int n, enum quoting_style s, 665 char const *arg, size_t argsize) 666{ 667 struct quoting_options const o = quoting_options_from_style (s); 668 return quotearg_n_options (n, arg, argsize, &o); 669} 670 671char * 672quotearg_style (enum quoting_style s, char const *arg) 673{ 674 return quotearg_n_style (0, s, arg); 675} 676 677char * 678quotearg_char (char const *arg, char ch) 679{ 680 struct quoting_options options; 681 options = default_quoting_options; 682 set_char_quoting (&options, ch, 1); 683 return quotearg_n_options (0, arg, SIZE_MAX, &options); 684} 685 686char * 687quotearg_colon (char const *arg) 688{ 689 return quotearg_char (arg, ':'); 690} 691