1/* GNU SED, a batch stream editor. 2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008,2009 3 Free Software Foundation, Inc. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19#undef EXPERIMENTAL_DASH_N_OPTIMIZATION /*don't use -- is very buggy*/ 20#define INITIAL_BUFFER_SIZE 50 21#define FREAD_BUFFER_SIZE 8192 22 23#include "sed.h" 24 25#include <stddef.h> 26#include <stdio.h> 27#include <ctype.h> 28 29#include <errno.h> 30#ifndef errno 31extern int errno; 32#endif 33 34#ifndef BOOTSTRAP 35#include <selinux/selinux.h> 36#include <selinux/context.h> 37#endif 38 39#ifdef HAVE_UNISTD_H 40# include <unistd.h> 41#endif 42 43#ifndef BOOTSTRAP 44#include "acl.h" 45#endif 46 47#ifdef __GNUC__ 48# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7) 49 /* silence warning about unused parameter even for "gcc -W -Wunused" */ 50# define UNUSED __attribute__((unused)) 51# endif 52#endif 53#ifndef UNUSED 54# define UNUSED 55#endif 56 57#ifdef HAVE_STRINGS_H 58# include <strings.h> 59#else 60# include <string.h> 61#endif /*HAVE_STRINGS_H*/ 62#ifdef HAVE_MEMORY_H 63# include <memory.h> 64#endif 65 66#ifndef HAVE_STRCHR 67# define strchr index 68# define strrchr rindex 69#endif 70 71#ifdef HAVE_STDLIB_H 72# include <stdlib.h> 73#endif 74#ifndef EXIT_SUCCESS 75# define EXIT_SUCCESS 0 76#endif 77 78#ifdef HAVE_SYS_TYPES_H 79# include <sys/types.h> 80#endif 81 82#include <sys/stat.h> 83#include "stat-macros.h" 84 85 86/* Sed operates a line at a time. */ 87struct line { 88 char *text; /* Pointer to line allocated by malloc. */ 89 char *active; /* Pointer to non-consumed part of text. */ 90 size_t length; /* Length of text (or active, if used). */ 91 size_t alloc; /* Allocated space for active. */ 92 bool chomped; /* Was a trailing newline dropped? */ 93#ifdef HAVE_MBRTOWC 94 mbstate_t mbstate; 95#endif 96}; 97 98#ifdef HAVE_MBRTOWC 99#define SIZEOF_LINE offsetof (struct line, mbstate) 100#else 101#define SIZEOF_LINE (sizeof (struct line)) 102#endif 103 104/* A queue of text to write out at the end of a cycle 105 (filled by the "a", "r" and "R" commands.) */ 106struct append_queue { 107 const char *fname; 108 char *text; 109 size_t textlen; 110 struct append_queue *next; 111 bool free; 112}; 113 114/* State information for the input stream. */ 115struct input { 116 /* The list of yet-to-be-opened files. It is invalid for file_list 117 to be NULL. When *file_list is NULL we are currently processing 118 the last file. */ 119 120 char **file_list; 121 122 /* Count of files we failed to open. */ 123 countT bad_count; 124 125 /* Current input line number (over all files). */ 126 countT line_number; 127 128 /* True if we'll reset line numbers and addresses before 129 starting to process the next (possibly the first) file. */ 130 bool reset_at_next_file; 131 132 /* Function to read one line. If FP is NULL, read_fn better not 133 be one which uses fp; in particular, read_always_fail() is 134 recommended. */ 135 bool (*read_fn) P_((struct input *)); /* read one line */ 136 137 char *out_file_name; 138 139 const char *in_file_name; 140 141 /* Owner and mode to be set just before closing the file. */ 142 struct stat st; 143 144 /* if NULL, none of the following are valid */ 145 FILE *fp; 146 147 bool no_buffering; 148}; 149 150 151/* Have we done any replacements lately? This is used by the `t' command. */ 152static bool replaced = false; 153 154/* The current output file (stdout if -i is not being used. */ 155static struct output output_file; 156 157/* The `current' input line. */ 158static struct line line; 159 160/* An input line used to accumulate the result of the s and e commands. */ 161static struct line s_accum; 162 163/* An input line that's been stored by later use by the program */ 164static struct line hold; 165 166/* The buffered input look-ahead. The only field that should be 167 used outside of read_mem_line() or line_init() is buffer.length. */ 168static struct line buffer; 169 170static struct append_queue *append_head = NULL; 171static struct append_queue *append_tail = NULL; 172 173 174#ifdef BOOTSTRAP 175/* We can't be sure that the system we're boostrapping on has 176 memchr(), and ../lib/memchr.c requires configuration knowledge 177 about how many bits are in a `long'. This implementation 178 is far from ideal, but it should get us up-and-limping well 179 enough to run the configure script, which is all that matters. 180*/ 181# ifdef memchr 182# undef memchr 183# endif 184# define memchr bootstrap_memchr 185 186static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n)); 187static VOID * 188bootstrap_memchr(s, c, n) 189 const VOID *s; 190 int c; 191 size_t n; 192{ 193 char *p; 194 195 for (p=(char *)s; n-- > 0; ++p) 196 if (*p == c) 197 return p; 198 return CAST(VOID *)0; 199} 200#endif /*BOOTSTRAP*/ 201 202/* increase a struct line's length, making some attempt at 203 keeping realloc() calls under control by padding for future growth. */ 204static void resize_line P_((struct line *, size_t)); 205static void 206resize_line(lb, len) 207 struct line *lb; 208 size_t len; 209{ 210 int inactive; 211 inactive = lb->active - lb->text; 212 213 /* If the inactive part has got to more than two thirds of the buffer, 214 * remove it. */ 215 if (inactive > lb->alloc * 2) 216 { 217 MEMMOVE(lb->text, lb->active, lb->length); 218 lb->alloc += lb->active - lb->text; 219 lb->active = lb->text; 220 inactive = 0; 221 222 if (lb->alloc > len) 223 return; 224 } 225 226 lb->alloc *= 2; 227 if (lb->alloc < len) 228 lb->alloc = len; 229 if (lb->alloc < INITIAL_BUFFER_SIZE) 230 lb->alloc = INITIAL_BUFFER_SIZE; 231 232 lb->text = REALLOC(lb->text, inactive + lb->alloc, char); 233 lb->active = lb->text + inactive; 234} 235 236/* Append `length' bytes from `string' to the line `to'. */ 237static void str_append P_((struct line *, const char *, size_t)); 238static void 239str_append(to, string, length) 240 struct line *to; 241 const char *string; 242 size_t length; 243{ 244 size_t new_length = to->length + length; 245 246 if (to->alloc < new_length) 247 resize_line(to, new_length); 248 MEMCPY(to->active + to->length, string, length); 249 to->length = new_length; 250 251#ifdef HAVE_MBRTOWC 252 if (mb_cur_max > 1 && !is_utf8) 253 while (length) 254 { 255 size_t n = MBRLEN (string, length, &to->mbstate); 256 257 /* An invalid sequence is treated like a singlebyte character. */ 258 if (n == (size_t) -1) 259 { 260 memset (&to->mbstate, 0, sizeof (to->mbstate)); 261 n = 1; 262 } 263 264 if (n > 0) 265 { 266 string += n; 267 length -= n; 268 } 269 else 270 break; 271 } 272#endif 273} 274 275static void str_append_modified P_((struct line *, const char *, size_t, 276 enum replacement_types)); 277static void 278str_append_modified(to, string, length, type) 279 struct line *to; 280 const char *string; 281 size_t length; 282 enum replacement_types type; 283{ 284#ifdef HAVE_MBRTOWC 285 mbstate_t from_stat; 286 287 if (type == REPL_ASIS) 288 { 289 str_append(to, string, length); 290 return; 291 } 292 293 if (to->alloc - to->length < length * mb_cur_max) 294 resize_line(to, to->length + length * mb_cur_max); 295 296 MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t)); 297 while (length) 298 { 299 wchar_t wc; 300 int n = MBRTOWC (&wc, string, length, &from_stat); 301 302 /* An invalid sequence is treated like a singlebyte character. */ 303 if (n == -1) 304 { 305 memset (&to->mbstate, 0, sizeof (from_stat)); 306 n = 1; 307 } 308 309 if (n > 0) 310 string += n, length -= n; 311 else 312 { 313 /* Incomplete sequence, copy it manually. */ 314 str_append(to, string, length); 315 return; 316 } 317 318 /* Convert the first character specially... */ 319 if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST)) 320 { 321 if (type & REPL_UPPERCASE_FIRST) 322 wc = towupper(wc); 323 else 324 wc = towlower(wc); 325 326 type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST); 327 if (type == REPL_ASIS) 328 { 329 n = WCRTOMB (to->active + to->length, wc, &to->mbstate); 330 to->length += n; 331 str_append(to, string, length); 332 return; 333 } 334 } 335 336 else if (type & REPL_UPPERCASE) 337 wc = towupper(wc); 338 else 339 wc = towlower(wc); 340 341 /* Copy the new wide character to the end of the string. */ 342 n = WCRTOMB (to->active + to->length, wc, &to->mbstate); 343 to->length += n; 344 if (n == -1) 345 { 346 fprintf (stderr, "Case conversion produced an invalid character!"); 347 abort (); 348 } 349 } 350#else 351 size_t old_length = to->length; 352 char *start, *end; 353 354 str_append(to, string, length); 355 start = to->active + old_length; 356 end = start + length; 357 358 /* Now do the required modifications. First \[lu]... */ 359 if (type & REPL_UPPERCASE_FIRST) 360 { 361 *start = toupper(*start); 362 start++; 363 type &= ~REPL_UPPERCASE_FIRST; 364 } 365 else if (type & REPL_LOWERCASE_FIRST) 366 { 367 *start = tolower(*start); 368 start++; 369 type &= ~REPL_LOWERCASE_FIRST; 370 } 371 372 if (type == REPL_ASIS) 373 return; 374 375 /* ...and then \[LU] */ 376 if (type == REPL_UPPERCASE) 377 for (; start != end; start++) 378 *start = toupper(*start); 379 else 380 for (; start != end; start++) 381 *start = tolower(*start); 382#endif 383} 384 385/* Initialize a "struct line" buffer. Copy multibyte state from `state' 386 if not null. */ 387static void line_init P_((struct line *, struct line *, size_t initial_size)); 388static void 389line_init(buf, state, initial_size) 390 struct line *buf; 391 struct line *state; 392 size_t initial_size; 393{ 394 buf->text = MALLOC(initial_size, char); 395 buf->active = buf->text; 396 buf->alloc = initial_size; 397 buf->length = 0; 398 buf->chomped = true; 399 400#ifdef HAVE_MBRTOWC 401 if (state) 402 memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate)); 403 else 404 memset (&buf->mbstate, 0, sizeof (buf->mbstate)); 405#endif 406} 407 408/* Reset a "struct line" buffer to length zero. Copy multibyte state from 409 `state' if not null. */ 410static void line_reset P_((struct line *, struct line *)); 411static void 412line_reset(buf, state) 413 struct line *buf, *state; 414{ 415 if (buf->alloc == 0) 416 line_init(buf, state, INITIAL_BUFFER_SIZE); 417 else 418 { 419 buf->length = 0; 420#ifdef HAVE_MBRTOWC 421 if (state) 422 memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate)); 423 else 424 memset (&buf->mbstate, 0, sizeof (buf->mbstate)); 425#endif 426 } 427} 428 429/* Copy the contents of the line `from' into the line `to'. 430 This destroys the old contents of `to'. 431 Copy the multibyte state if `state' is true. */ 432static void line_copy P_((struct line *from, struct line *to, int state)); 433static void 434line_copy(from, to, state) 435 struct line *from; 436 struct line *to; 437 int state; 438{ 439 /* Remove the inactive portion in the destination buffer. */ 440 to->alloc += to->active - to->text; 441 442 if (to->alloc < from->length) 443 { 444 to->alloc *= 2; 445 if (to->alloc < from->length) 446 to->alloc = from->length; 447 if (to->alloc < INITIAL_BUFFER_SIZE) 448 to->alloc = INITIAL_BUFFER_SIZE; 449 /* Use FREE()+MALLOC() instead of REALLOC() to 450 avoid unnecessary copying of old text. */ 451 FREE(to->text); 452 to->text = MALLOC(to->alloc, char); 453 } 454 455 to->active = to->text; 456 to->length = from->length; 457 to->chomped = from->chomped; 458 MEMCPY(to->active, from->active, from->length); 459 460#ifdef HAVE_MBRTOWC 461 if (state) 462 MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate)); 463#endif 464} 465 466/* Append the contents of the line `from' to the line `to'. 467 Copy the multibyte state if `state' is true. */ 468static void line_append P_((struct line *from, struct line *to, int state)); 469static void 470line_append(from, to, state) 471 struct line *from; 472 struct line *to; 473 int state; 474{ 475 str_append(to, "\n", 1); 476 str_append(to, from->active, from->length); 477 to->chomped = from->chomped; 478 479#ifdef HAVE_MBRTOWC 480 if (state) 481 MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate)); 482#endif 483} 484 485/* Exchange two "struct line" buffers. 486 Copy the multibyte state if `state' is true. */ 487static void line_exchange P_((struct line *a, struct line *b, int state)); 488static void 489line_exchange(a, b, state) 490 struct line *a; 491 struct line *b; 492 int state; 493{ 494 struct line t; 495 496 if (state) 497 { 498 MEMCPY(&t, a, sizeof (struct line)); 499 MEMCPY( a, b, sizeof (struct line)); 500 MEMCPY( b, &t, sizeof (struct line)); 501 } 502 else 503 { 504 MEMCPY(&t, a, SIZEOF_LINE); 505 MEMCPY( a, b, SIZEOF_LINE); 506 MEMCPY( b, &t, SIZEOF_LINE); 507 } 508} 509 510 511/* dummy function to simplify read_pattern_space() */ 512static bool read_always_fail P_((struct input *)); 513static bool 514read_always_fail(input) 515 struct input *input UNUSED; 516{ 517 return false; 518} 519 520static bool read_file_line P_((struct input *)); 521static bool 522read_file_line(input) 523 struct input *input; 524{ 525 static char *b; 526 static size_t blen; 527 528 long result = ck_getline (&b, &blen, input->fp); 529 if (result <= 0) 530 return false; 531 532 /* Remove the trailing new-line that is left by getline. */ 533 if (b[result - 1] == '\n') 534 --result; 535 else 536 line.chomped = false; 537 538 str_append(&line, b, result); 539 return true; 540} 541 542 543static inline void output_missing_newline P_((struct output *)); 544static inline void 545output_missing_newline(outf) 546 struct output *outf; 547{ 548 if (outf->missing_newline) 549 { 550 ck_fwrite("\n", 1, 1, outf->fp); 551 outf->missing_newline = false; 552 } 553} 554 555static inline void flush_output P_((FILE *)); 556static inline void 557flush_output(fp) 558 FILE *fp; 559{ 560 if (fp != stdout || unbuffered_output) 561 ck_fflush(fp); 562} 563 564static void output_line P_((const char *, size_t, int, struct output *)); 565static void 566output_line(text, length, nl, outf) 567 const char *text; 568 size_t length; 569 int nl; 570 struct output *outf; 571{ 572 if (!text) 573 return; 574 575 output_missing_newline(outf); 576 if (length) 577 ck_fwrite(text, 1, length, outf->fp); 578 if (nl) 579 ck_fwrite("\n", 1, 1, outf->fp); 580 else 581 outf->missing_newline = true; 582 583 flush_output(outf->fp); 584} 585 586static struct append_queue *next_append_slot P_((void)); 587static struct append_queue * 588next_append_slot() 589{ 590 struct append_queue *n = MALLOC(1, struct append_queue); 591 592 n->fname = NULL; 593 n->text = NULL; 594 n->textlen = 0; 595 n->next = NULL; 596 n->free = false; 597 598 if (append_tail) 599 append_tail->next = n; 600 else 601 append_head = n; 602 return append_tail = n; 603} 604 605static void release_append_queue P_((void)); 606static void 607release_append_queue() 608{ 609 struct append_queue *p, *q; 610 611 for (p=append_head; p; p=q) 612 { 613 if (p->free) 614 FREE(p->text); 615 616 q = p->next; 617 FREE(p); 618 } 619 append_head = append_tail = NULL; 620} 621 622static void dump_append_queue P_((void)); 623static void 624dump_append_queue() 625{ 626 struct append_queue *p; 627 628 output_missing_newline(&output_file); 629 for (p=append_head; p; p=p->next) 630 { 631 if (p->text) 632 ck_fwrite(p->text, 1, p->textlen, output_file.fp); 633 634 if (p->fname) 635 { 636 char buf[FREAD_BUFFER_SIZE]; 637 size_t cnt; 638 FILE *fp; 639 640 /* "If _fname_ does not exist or cannot be read, it shall 641 be treated as if it were an empty file, causing no error 642 condition." IEEE Std 1003.2-1992 643 So, don't fail. */ 644 fp = ck_fopen(p->fname, read_mode, false); 645 if (fp) 646 { 647 while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0) 648 ck_fwrite(buf, 1, cnt, output_file.fp); 649 ck_fclose(fp); 650 } 651 } 652 } 653 654 flush_output(output_file.fp); 655 release_append_queue(); 656} 657 658 659/* Compute the name of the backup file for in-place editing */ 660static char *get_backup_file_name P_((const char *)); 661static char * 662get_backup_file_name(name) 663 const char *name; 664{ 665 char *old_asterisk, *asterisk, *backup, *p; 666 int name_length = strlen(name), backup_length = strlen(in_place_extension); 667 668 /* Compute the length of the backup file */ 669 for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; 670 (asterisk = strchr(old_asterisk, '*')); 671 old_asterisk = asterisk + 1) 672 backup_length += name_length - 1; 673 674 p = backup = xmalloc(backup_length + 1); 675 676 /* Each iteration gobbles up to an asterisk */ 677 for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; 678 (asterisk = strchr(old_asterisk, '*')); 679 old_asterisk = asterisk + 1) 680 { 681 MEMCPY (p, old_asterisk, asterisk - old_asterisk); 682 p += asterisk - old_asterisk; 683 strcpy (p, name); 684 p += name_length; 685 } 686 687 /* Tack on what's after the last asterisk */ 688 strcpy (p, old_asterisk); 689 return backup; 690} 691 692/* Initialize a struct input for the named file. */ 693static void open_next_file P_((const char *name, struct input *)); 694static void 695open_next_file(name, input) 696 const char *name; 697 struct input *input; 698{ 699 buffer.length = 0; 700 701 if (name[0] == '-' && name[1] == '\0' && !in_place_extension) 702 { 703 clearerr(stdin); /* clear any stale EOF indication */ 704 input->fp = ck_fdopen (fileno (stdin), "stdin", read_mode, false); 705 } 706 else if ( ! (input->fp = ck_fopen(name, read_mode, false)) ) 707 { 708 const char *ptr = strerror(errno); 709 fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr); 710 input->read_fn = read_always_fail; /* a redundancy */ 711 ++input->bad_count; 712 return; 713 } 714 715 input->read_fn = read_file_line; 716 717 if (in_place_extension) 718 { 719 int input_fd; 720 char *tmpdir, *p; 721#ifndef BOOTSTRAP 722 security_context_t old_fscreatecon; 723 int reset_fscreatecon = 0; 724 memset (&old_fscreatecon, 0, sizeof (old_fscreatecon)); 725#endif 726 727 if (follow_symlinks) 728 input->in_file_name = follow_symlink (name); 729 else 730 input->in_file_name = name; 731 732 /* get the base name */ 733 tmpdir = ck_strdup(input->in_file_name); 734 if ((p = strrchr(tmpdir, '/'))) 735 *p = 0; 736 else 737 strcpy(tmpdir, "."); 738 739 if (isatty (fileno (input->fp))) 740 panic(_("couldn't edit %s: is a terminal"), input->in_file_name); 741 742 input_fd = fileno (input->fp); 743 fstat (input_fd, &input->st); 744 if (!S_ISREG (input->st.st_mode)) 745 panic(_("couldn't edit %s: not a regular file"), input->in_file_name); 746 747#ifndef BOOTSTRAP 748 if (is_selinux_enabled ()) 749 { 750 security_context_t con; 751 if (getfilecon (input->in_file_name, &con) != -1) 752 { 753 /* Save and restore the old context for the sake of w and W 754 commands. */ 755 reset_fscreatecon = getfscreatecon (&old_fscreatecon) >= 0; 756 if (setfscreatecon (con) < 0) 757 fprintf (stderr, _("%s: warning: failed to set default file creation context to %s: %s"), 758 myname, con, strerror (errno)); 759 freecon (con); 760 } 761 else 762 { 763 if (errno != ENOSYS) 764 fprintf (stderr, _("%s: warning: failed to get security context of %s: %s"), 765 myname, input->in_file_name, strerror (errno)); 766 } 767 } 768#endif 769 770 output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed"); 771 output_file.missing_newline = false; 772 free (tmpdir); 773 774#ifndef BOOTSTRAP 775 if (reset_fscreatecon) 776 { 777 setfscreatecon (old_fscreatecon); 778 freecon (old_fscreatecon); 779 } 780#endif 781 782 if (!output_file.fp) 783 panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno)); 784 } 785 else 786 output_file.fp = stdout; 787} 788 789 790/* Clean up an input stream that we are done with. */ 791static void closedown P_((struct input *)); 792static void 793closedown(input) 794 struct input *input; 795{ 796 input->read_fn = read_always_fail; 797 if (!input->fp) 798 return; 799 800 if (in_place_extension && output_file.fp != NULL) 801 { 802 const char *target_name; 803 int input_fd, output_fd; 804 805 target_name = input->in_file_name; 806 input_fd = fileno (input->fp); 807 output_fd = fileno (output_file.fp); 808 copy_acl (input->in_file_name, input_fd, 809 input->out_file_name, output_fd, 810 input->st.st_mode); 811#ifdef HAVE_FCHOWN 812 if (fchown (output_fd, input->st.st_uid, input->st.st_gid) == -1) 813 fchown (output_fd, -1, input->st.st_gid); 814#endif 815 816 ck_fclose (input->fp); 817 ck_fclose (output_file.fp); 818 if (strcmp(in_place_extension, "*") != 0) 819 { 820 char *backup_file_name = get_backup_file_name(target_name); 821 ck_rename (target_name, backup_file_name, input->out_file_name); 822 free (backup_file_name); 823 } 824 825 ck_rename (input->out_file_name, target_name, input->out_file_name); 826 free (input->out_file_name); 827 } 828 else 829 ck_fclose (input->fp); 830 831 input->fp = NULL; 832} 833 834/* Reset range commands so that they are marked as non-matching */ 835static void reset_addresses P_((struct vector *)); 836static void 837reset_addresses(vec) 838 struct vector *vec; 839{ 840 struct sed_cmd *cur_cmd; 841 int n; 842 843 for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++) 844 if (cur_cmd->a1 845 && cur_cmd->a1->addr_type == ADDR_IS_NUM 846 && cur_cmd->a1->addr_number == 0) 847 cur_cmd->range_state = RANGE_ACTIVE; 848 else 849 cur_cmd->range_state = RANGE_INACTIVE; 850} 851 852/* Read in the next line of input, and store it in the pattern space. 853 Return zero if there is nothing left to input. */ 854static bool read_pattern_space P_((struct input *, struct vector *, int)); 855static bool 856read_pattern_space(input, the_program, append) 857 struct input *input; 858 struct vector *the_program; 859 int append; 860{ 861 if (append_head) /* redundant test to optimize for common case */ 862 dump_append_queue(); 863 replaced = false; 864 if (!append) 865 line.length = 0; 866 line.chomped = true; /* default, until proved otherwise */ 867 868 while ( ! (*input->read_fn)(input) ) 869 { 870 closedown(input); 871 872 if (!*input->file_list) 873 return false; 874 875 if (input->reset_at_next_file) 876 { 877 input->line_number = 0; 878 hold.length = 0; 879 reset_addresses (the_program); 880 rewind_read_files (); 881 882 /* If doing in-place editing, we will never append the 883 new-line to this file; but if the output goes to stdout, 884 we might still have to output the missing new-line. */ 885 if (in_place_extension) 886 output_file.missing_newline = false; 887 888 input->reset_at_next_file = separate_files; 889 } 890 891 open_next_file (*input->file_list++, input); 892 } 893 894 ++input->line_number; 895 return true; 896} 897 898 899static bool last_file_with_data_p P_((struct input *)); 900static bool 901last_file_with_data_p(input) 902 struct input *input; 903{ 904 for (;;) 905 { 906 int ch; 907 908 closedown(input); 909 if (!*input->file_list) 910 return true; 911 open_next_file(*input->file_list++, input); 912 if (input->fp) 913 { 914 if ((ch = getc(input->fp)) != EOF) 915 { 916 ungetc(ch, input->fp); 917 return false; 918 } 919 } 920 } 921} 922 923/* Determine if we match the `$' address. */ 924static bool test_eof P_((struct input *)); 925static bool 926test_eof(input) 927 struct input *input; 928{ 929 int ch; 930 931 if (buffer.length) 932 return false; 933 if (!input->fp) 934 return separate_files || last_file_with_data_p(input); 935 if (feof(input->fp)) 936 return separate_files || last_file_with_data_p(input); 937 if ((ch = getc(input->fp)) == EOF) 938 return separate_files || last_file_with_data_p(input); 939 ungetc(ch, input->fp); 940 return false; 941} 942 943/* Return non-zero if the current line matches the address 944 pointed to by `addr'. */ 945static bool match_an_address_p P_((struct addr *, struct input *)); 946static bool 947match_an_address_p(addr, input) 948 struct addr *addr; 949 struct input *input; 950{ 951 switch (addr->addr_type) 952 { 953 case ADDR_IS_NULL: 954 return true; 955 956 case ADDR_IS_REGEX: 957 return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0); 958 959 case ADDR_IS_NUM_MOD: 960 return (input->line_number >= addr->addr_number 961 && ((input->line_number - addr->addr_number) % addr->addr_step) == 0); 962 963 case ADDR_IS_STEP: 964 case ADDR_IS_STEP_MOD: 965 /* reminder: these are only meaningful for a2 addresses */ 966 /* a2->addr_number needs to be recomputed each time a1 address 967 matches for the step and step_mod types */ 968 return (addr->addr_number <= input->line_number); 969 970 case ADDR_IS_LAST: 971 return test_eof(input); 972 973 /* ADDR_IS_NUM is handled in match_address_p. */ 974 case ADDR_IS_NUM: 975 default: 976 panic("INTERNAL ERROR: bad address type"); 977 } 978 /*NOTREACHED*/ 979 return false; 980} 981 982/* return non-zero if current address is valid for cmd */ 983static bool match_address_p P_((struct sed_cmd *, struct input *)); 984static bool 985match_address_p(cmd, input) 986 struct sed_cmd *cmd; 987 struct input *input; 988{ 989 if (!cmd->a1) 990 return true; 991 992 if (cmd->range_state != RANGE_ACTIVE) 993 { 994 /* Find if we are going to activate a range. Handle ADDR_IS_NUM 995 specially: it represent an "absolute" state, it should not 996 be computed like regexes. */ 997 if (cmd->a1->addr_type == ADDR_IS_NUM) 998 { 999 if (!cmd->a2) 1000 return (input->line_number == cmd->a1->addr_number); 1001 1002 if (cmd->range_state == RANGE_CLOSED 1003 || input->line_number < cmd->a1->addr_number) 1004 return false; 1005 } 1006 else 1007 { 1008 if (!cmd->a2) 1009 return match_an_address_p(cmd->a1, input); 1010 1011 if (!match_an_address_p(cmd->a1, input)) 1012 return false; 1013 } 1014 1015 /* Ok, start a new range. */ 1016 cmd->range_state = RANGE_ACTIVE; 1017 switch (cmd->a2->addr_type) 1018 { 1019 case ADDR_IS_REGEX: 1020 /* Always include at least two lines. */ 1021 return true; 1022 case ADDR_IS_NUM: 1023 /* Same handling as below, but always include at least one line. */ 1024 if (input->line_number >= cmd->a2->addr_number) 1025 cmd->range_state = RANGE_CLOSED; 1026 return true; 1027 case ADDR_IS_STEP: 1028 cmd->a2->addr_number = input->line_number + cmd->a2->addr_step; 1029 return true; 1030 case ADDR_IS_STEP_MOD: 1031 cmd->a2->addr_number = input->line_number + cmd->a2->addr_step 1032 - (input->line_number%cmd->a2->addr_step); 1033 return true; 1034 default: 1035 break; 1036 } 1037 } 1038 1039 /* cmd->range_state == RANGE_ACTIVE. Check if the range is 1040 ending; also handle ADDR_IS_NUM specially in this case. */ 1041 1042 if (cmd->a2->addr_type == ADDR_IS_NUM) 1043 { 1044 /* If the second address is a line number, and if we got past 1045 that line, fail to match (it can happen when you jump 1046 over such addresses with `b' and `t'. Use RANGE_CLOSED 1047 so that the range is not re-enabled anymore. */ 1048 if (input->line_number >= cmd->a2->addr_number) 1049 cmd->range_state = RANGE_CLOSED; 1050 1051 return (input->line_number <= cmd->a2->addr_number); 1052 } 1053 1054 /* Other addresses are treated as usual. */ 1055 if (match_an_address_p(cmd->a2, input)) 1056 cmd->range_state = RANGE_CLOSED; 1057 1058 return true; 1059} 1060 1061 1062static void do_list P_((int line_len)); 1063static void 1064do_list(line_len) 1065 int line_len; 1066{ 1067 unsigned char *p = CAST(unsigned char *)line.active; 1068 countT len = line.length; 1069 countT width = 0; 1070 char obuf[180]; /* just in case we encounter a 512-bit char (;-) */ 1071 char *o; 1072 size_t olen; 1073 FILE *fp = output_file.fp; 1074 1075 output_missing_newline(&output_file); 1076 for (; len--; ++p) { 1077 o = obuf; 1078 1079 /* Some locales define 8-bit characters as printable. This makes the 1080 testsuite fail at 8to7.sed because the `l' command in fact will not 1081 convert the 8-bit characters. */ 1082#if defined isascii || defined HAVE_ISASCII 1083 if (isascii(*p) && ISPRINT(*p)) { 1084#else 1085 if (ISPRINT(*p)) { 1086#endif 1087 *o++ = *p; 1088 if (*p == '\\') 1089 *o++ = '\\'; 1090 } else { 1091 *o++ = '\\'; 1092 switch (*p) { 1093#if defined __STDC__ && __STDC__-0 1094 case '\a': *o++ = 'a'; break; 1095#else /* Not STDC; we'll just assume ASCII */ 1096 case 007: *o++ = 'a'; break; 1097#endif 1098 case '\b': *o++ = 'b'; break; 1099 case '\f': *o++ = 'f'; break; 1100 case '\n': *o++ = 'n'; break; 1101 case '\r': *o++ = 'r'; break; 1102 case '\t': *o++ = 't'; break; 1103 case '\v': *o++ = 'v'; break; 1104 default: 1105 sprintf(o, "%03o", *p); 1106 o += strlen(o); 1107 break; 1108 } 1109 } 1110 olen = o - obuf; 1111 if (width+olen >= line_len && line_len > 0) { 1112 ck_fwrite("\\\n", 1, 2, fp); 1113 width = 0; 1114 } 1115 ck_fwrite(obuf, 1, olen, fp); 1116 width += olen; 1117 } 1118 ck_fwrite("$\n", 1, 2, fp); 1119 flush_output (fp); 1120} 1121 1122 1123static enum replacement_types append_replacement P_((struct line *, struct replacement *, 1124 struct re_registers *, 1125 enum replacement_types)); 1126static enum replacement_types 1127append_replacement (buf, p, regs, repl_mod) 1128 struct line *buf; 1129 struct replacement *p; 1130 struct re_registers *regs; 1131 enum replacement_types repl_mod; 1132{ 1133 for (; p; p=p->next) 1134 { 1135 int i = p->subst_id; 1136 enum replacement_types curr_type; 1137 1138 /* Apply a \[lu] modifier that was given earlier, but which we 1139 have not had yet the occasion to apply. But don't do it 1140 if this replacement has a modifier of its own. */ 1141 curr_type = (p->repl_type & REPL_MODIFIERS) 1142 ? p->repl_type 1143 : p->repl_type | repl_mod; 1144 1145 repl_mod = 0; 1146 if (p->prefix_length) 1147 { 1148 str_append_modified(buf, p->prefix, p->prefix_length, 1149 curr_type); 1150 curr_type &= ~REPL_MODIFIERS; 1151 } 1152 1153 if (0 <= i) 1154 { 1155 if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS) 1156 /* Save this modifier, we shall apply it later. 1157 e.g. in s/()([a-z])/\u\1\2/ 1158 the \u modifier is applied to \2, not \1 */ 1159 repl_mod = curr_type & REPL_MODIFIERS; 1160 1161 else if (regs->end[i] != regs->start[i]) 1162 str_append_modified(buf, line.active + regs->start[i], 1163 CAST(size_t)(regs->end[i] - regs->start[i]), 1164 curr_type); 1165 } 1166 } 1167 1168 return repl_mod; 1169} 1170 1171static void do_subst P_((struct subst *)); 1172static void 1173do_subst(sub) 1174 struct subst *sub; 1175{ 1176 size_t start = 0; /* where to start scan for (next) match in LINE */ 1177 size_t last_end = 0; /* where did the last successful match end in LINE */ 1178 countT count = 0; /* number of matches found */ 1179 bool again = true; 1180 1181 static struct re_registers regs; 1182 1183 line_reset(&s_accum, &line); 1184 1185 /* The first part of the loop optimizes s/xxx// when xxx is at the 1186 start, and s/xxx$// */ 1187 if (!match_regex(sub->regx, line.active, line.length, start, 1188 ®s, sub->max_id + 1)) 1189 return; 1190 1191 if (!sub->replacement && sub->numb <= 1) 1192 { 1193 if (regs.start[0] == 0 && !sub->global) 1194 { 1195 /* We found a match, set the `replaced' flag. */ 1196 replaced = true; 1197 1198 line.active += regs.end[0]; 1199 line.length -= regs.end[0]; 1200 line.alloc -= regs.end[0]; 1201 goto post_subst; 1202 } 1203 else if (regs.end[0] == line.length) 1204 { 1205 /* We found a match, set the `replaced' flag. */ 1206 replaced = true; 1207 1208 line.length = regs.start[0]; 1209 goto post_subst; 1210 } 1211 } 1212 1213 do 1214 { 1215 enum replacement_types repl_mod = 0; 1216 1217 size_t offset = regs.start[0]; 1218 size_t matched = regs.end[0] - regs.start[0]; 1219 1220 /* Copy stuff to the left of this match into the output string. */ 1221 if (start < offset) 1222 str_append(&s_accum, line.active + start, offset - start); 1223 1224 /* If we're counting up to the Nth match, are we there yet? 1225 And even if we are there, there is another case we have to 1226 skip: are we matching an empty string immediately following 1227 another match? 1228 1229 This latter case avoids that baaaac, when passed through 1230 s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is 1231 unacceptable because it is not consistently applied (for 1232 example, `baaaa' gives `xbx', not `xbxx'). */ 1233 if ((matched > 0 || count == 0 || offset > last_end) 1234 && ++count >= sub->numb) 1235 { 1236 /* We found a match, set the `replaced' flag. */ 1237 replaced = true; 1238 1239 /* Now expand the replacement string into the output string. */ 1240 repl_mod = append_replacement (&s_accum, sub->replacement, ®s, repl_mod); 1241 again = sub->global; 1242 } 1243 else 1244 { 1245 /* The match was not replaced. Copy the text until its 1246 end; if it was vacuous, skip over one character and 1247 add that character to the output. */ 1248 if (matched == 0) 1249 { 1250 if (start < line.length) 1251 matched = 1; 1252 else 1253 break; 1254 } 1255 1256 str_append(&s_accum, line.active + offset, matched); 1257 } 1258 1259 /* Start after the match. last_end is the real end of the matched 1260 substring, excluding characters that were skipped in case the RE 1261 matched the empty string. */ 1262 start = offset + matched; 1263 last_end = regs.end[0]; 1264 } 1265 while (again 1266 && start <= line.length 1267 && match_regex(sub->regx, line.active, line.length, start, 1268 ®s, sub->max_id + 1)); 1269 1270 /* Copy stuff to the right of the last match into the output string. */ 1271 if (start < line.length) 1272 str_append(&s_accum, line.active + start, line.length-start); 1273 s_accum.chomped = line.chomped; 1274 1275 /* Exchange line and s_accum. This can be much cheaper 1276 than copying s_accum.active into line.text (for huge lines). */ 1277 line_exchange(&line, &s_accum, false); 1278 1279 /* Finish up. */ 1280 if (count < sub->numb) 1281 return; 1282 1283 post_subst: 1284 if (sub->print & 1) 1285 output_line(line.active, line.length, line.chomped, &output_file); 1286 1287 if (sub->eval) 1288 { 1289#ifdef HAVE_POPEN 1290 FILE *pipe_fp; 1291 line_reset(&s_accum, NULL); 1292 1293 str_append (&line, "", 1); 1294 pipe_fp = popen(line.active, "r"); 1295 1296 if (pipe_fp != NULL) 1297 { 1298 while (!feof (pipe_fp)) 1299 { 1300 char buf[4096]; 1301 int n = fread (buf, sizeof(char), 4096, pipe_fp); 1302 if (n > 0) 1303 str_append(&s_accum, buf, n); 1304 } 1305 1306 pclose (pipe_fp); 1307 1308 /* Exchange line and s_accum. This can be much cheaper than copying 1309 s_accum.active into line.text (for huge lines). See comment above 1310 for 'g' as to while the third argument is incorrect anyway. */ 1311 line_exchange(&line, &s_accum, true); 1312 if (line.length && 1313 line.active[line.length - 1] == '\n') 1314 line.length--; 1315 } 1316 else 1317 panic(_("error in subprocess")); 1318#else 1319 panic(_("option `e' not supported")); 1320#endif 1321 } 1322 1323 if (sub->print & 2) 1324 output_line(line.active, line.length, line.chomped, &output_file); 1325 if (sub->outf) 1326 output_line(line.active, line.length, line.chomped, sub->outf); 1327} 1328 1329#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION 1330/* Used to attempt a simple-minded optimization. */ 1331 1332static countT branches; 1333 1334static countT count_branches P_((struct vector *)); 1335static countT 1336count_branches(program) 1337 struct vector *program; 1338{ 1339 struct sed_cmd *cur_cmd = program->v; 1340 countT isn_cnt = program->v_length; 1341 countT cnt = 0; 1342 1343 while (isn_cnt-- > 0) 1344 { 1345 switch (cur_cmd->cmd) 1346 { 1347 case 'b': 1348 case 't': 1349 case 'T': 1350 case '{': 1351 ++cnt; 1352 } 1353 } 1354 return cnt; 1355} 1356 1357static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *)); 1358static struct sed_cmd * 1359shrink_program(vec, cur_cmd) 1360 struct vector *vec; 1361 struct sed_cmd *cur_cmd; 1362{ 1363 struct sed_cmd *v = vec->v; 1364 struct sed_cmd *last_cmd = v + vec->v_length; 1365 struct sed_cmd *p; 1366 countT cmd_cnt; 1367 1368 for (p=v; p < cur_cmd; ++p) 1369 if (p->cmd != '#') 1370 MEMCPY(v++, p, sizeof *v); 1371 cmd_cnt = v - vec->v; 1372 1373 for (; p < last_cmd; ++p) 1374 if (p->cmd != '#') 1375 MEMCPY(v++, p, sizeof *v); 1376 vec->v_length = v - vec->v; 1377 1378 return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0; 1379} 1380#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ 1381 1382/* Execute the program `vec' on the current input line. 1383 Return exit status if caller should quit, -1 otherwise. */ 1384static int execute_program P_((struct vector *, struct input *)); 1385static int 1386execute_program(vec, input) 1387 struct vector *vec; 1388 struct input *input; 1389{ 1390 struct sed_cmd *cur_cmd; 1391 struct sed_cmd *end_cmd; 1392 1393 cur_cmd = vec->v; 1394 end_cmd = vec->v + vec->v_length; 1395 while (cur_cmd < end_cmd) 1396 { 1397 if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang) 1398 { 1399 switch (cur_cmd->cmd) 1400 { 1401 case 'a': 1402 { 1403 struct append_queue *aq = next_append_slot(); 1404 aq->text = cur_cmd->x.cmd_txt.text; 1405 aq->textlen = cur_cmd->x.cmd_txt.text_length; 1406 } 1407 break; 1408 1409 case '{': 1410 case 'b': 1411 cur_cmd = vec->v + cur_cmd->x.jump_index; 1412 continue; 1413 1414 case '}': 1415 case '#': 1416 case ':': 1417 /* Executing labels and block-ends are easy. */ 1418 break; 1419 1420 case 'c': 1421 if (cur_cmd->range_state != RANGE_ACTIVE) 1422 output_line(cur_cmd->x.cmd_txt.text, 1423 cur_cmd->x.cmd_txt.text_length - 1, true, 1424 &output_file); 1425 /* POSIX.2 is silent about c starting a new cycle, 1426 but it seems to be expected (and make sense). */ 1427 /* Fall Through */ 1428 case 'd': 1429 return -1; 1430 1431 case 'D': 1432 { 1433 char *p = memchr(line.active, '\n', line.length); 1434 if (!p) 1435 return -1; 1436 1437 ++p; 1438 line.alloc -= p - line.active; 1439 line.length -= p - line.active; 1440 line.active += p - line.active; 1441 1442 /* reset to start next cycle without reading a new line: */ 1443 cur_cmd = vec->v; 1444 continue; 1445 } 1446 1447 case 'e': { 1448#ifdef HAVE_POPEN 1449 FILE *pipe_fp; 1450 int cmd_length = cur_cmd->x.cmd_txt.text_length; 1451 line_reset(&s_accum, NULL); 1452 1453 if (!cmd_length) 1454 { 1455 str_append (&line, "", 1); 1456 pipe_fp = popen(line.active, "r"); 1457 } 1458 else 1459 { 1460 cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0; 1461 pipe_fp = popen(cur_cmd->x.cmd_txt.text, "r"); 1462 output_missing_newline(&output_file); 1463 } 1464 1465 if (pipe_fp != NULL) 1466 { 1467 char buf[4096]; 1468 int n; 1469 while (!feof (pipe_fp)) 1470 if ((n = fread (buf, sizeof(char), 4096, pipe_fp)) > 0) 1471 { 1472 if (!cmd_length) 1473 str_append(&s_accum, buf, n); 1474 else 1475 ck_fwrite(buf, 1, n, output_file.fp); 1476 } 1477 1478 pclose (pipe_fp); 1479 if (!cmd_length) 1480 { 1481 /* Store into pattern space for plain `e' commands */ 1482 if (s_accum.length && 1483 s_accum.active[s_accum.length - 1] == '\n') 1484 s_accum.length--; 1485 1486 /* Exchange line and s_accum. This can be much 1487 cheaper than copying s_accum.active into line.text 1488 (for huge lines). See comment above for 'g' as 1489 to while the third argument is incorrect anyway. */ 1490 line_exchange(&line, &s_accum, true); 1491 } 1492 else 1493 flush_output(output_file.fp); 1494 1495 } 1496 else 1497 panic(_("error in subprocess")); 1498#else 1499 panic(_("`e' command not supported")); 1500#endif 1501 break; 1502 } 1503 1504 case 'g': 1505 /* We do not have a really good choice for the third parameter. 1506 The problem is that hold space and the input file might as 1507 well have different states; copying it from hold space means 1508 that subsequent input might be read incorrectly, while 1509 keeping it as in pattern space means that commands operating 1510 on the moved buffer might consider a wrong character set. 1511 We keep it true because it's what sed <= 4.1.5 did. */ 1512 line_copy(&hold, &line, true); 1513 break; 1514 1515 case 'G': 1516 /* We do not have a really good choice for the third parameter. 1517 The problem is that hold space and pattern space might as 1518 well have different states. So, true is as wrong as false. 1519 We keep it true because it's what sed <= 4.1.5 did, but 1520 we could consider having line_ap. */ 1521 line_append(&hold, &line, true); 1522 break; 1523 1524 case 'h': 1525 /* Here, it is ok to have true. */ 1526 line_copy(&line, &hold, true); 1527 break; 1528 1529 case 'H': 1530 /* See comment above for 'G' regarding the third parameter. */ 1531 line_append(&line, &hold, true); 1532 break; 1533 1534 case 'i': 1535 output_line(cur_cmd->x.cmd_txt.text, 1536 cur_cmd->x.cmd_txt.text_length - 1, 1537 true, &output_file); 1538 break; 1539 1540 case 'l': 1541 do_list(cur_cmd->x.int_arg == -1 1542 ? lcmd_out_line_len 1543 : cur_cmd->x.int_arg); 1544 break; 1545 1546 case 'L': 1547 output_missing_newline(&output_file); 1548 fmt(line.active, line.active + line.length, 1549 cur_cmd->x.int_arg == -1 1550 ? lcmd_out_line_len 1551 : cur_cmd->x.int_arg, 1552 output_file.fp); 1553 flush_output(output_file.fp); 1554 break; 1555 1556 case 'n': 1557 if (!no_default_output) 1558 output_line(line.active, line.length, line.chomped, &output_file); 1559 if (test_eof(input) || !read_pattern_space(input, vec, false)) 1560 return -1; 1561 break; 1562 1563 case 'N': 1564 str_append(&line, "\n", 1); 1565 1566 if (test_eof(input) || !read_pattern_space(input, vec, true)) 1567 { 1568 line.length--; 1569 if (posixicity == POSIXLY_EXTENDED && !no_default_output) 1570 output_line(line.active, line.length, line.chomped, 1571 &output_file); 1572 return -1; 1573 } 1574 break; 1575 1576 case 'p': 1577 output_line(line.active, line.length, line.chomped, &output_file); 1578 break; 1579 1580 case 'P': 1581 { 1582 char *p = memchr(line.active, '\n', line.length); 1583 output_line(line.active, p ? p - line.active : line.length, 1584 p ? true : line.chomped, &output_file); 1585 } 1586 break; 1587 1588 case 'q': 1589 if (!no_default_output) 1590 output_line(line.active, line.length, line.chomped, &output_file); 1591 dump_append_queue(); 1592 1593 case 'Q': 1594 return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg; 1595 1596 case 'r': 1597 if (cur_cmd->x.fname) 1598 { 1599 struct append_queue *aq = next_append_slot(); 1600 aq->fname = cur_cmd->x.fname; 1601 } 1602 break; 1603 1604 case 'R': 1605 if (cur_cmd->x.fp && !feof (cur_cmd->x.fp)) 1606 { 1607 struct append_queue *aq; 1608 size_t buflen; 1609 char *text = NULL; 1610 int result; 1611 1612 result = ck_getline (&text, &buflen, cur_cmd->x.fp); 1613 if (result != EOF) 1614 { 1615 aq = next_append_slot(); 1616 aq->free = true; 1617 aq->text = text; 1618 aq->textlen = result; 1619 } 1620 } 1621 break; 1622 1623 case 's': 1624 do_subst(cur_cmd->x.cmd_subst); 1625 break; 1626 1627 case 't': 1628 if (replaced) 1629 { 1630 replaced = false; 1631 cur_cmd = vec->v + cur_cmd->x.jump_index; 1632 continue; 1633 } 1634 break; 1635 1636 case 'T': 1637 if (!replaced) 1638 { 1639 cur_cmd = vec->v + cur_cmd->x.jump_index; 1640 continue; 1641 } 1642 else 1643 replaced = false; 1644 break; 1645 1646 case 'w': 1647 if (cur_cmd->x.fp) 1648 output_line(line.active, line.length, 1649 line.chomped, cur_cmd->x.outf); 1650 break; 1651 1652 case 'W': 1653 if (cur_cmd->x.fp) 1654 { 1655 char *p = memchr(line.active, '\n', line.length); 1656 output_line(line.active, p ? p - line.active : line.length, 1657 p ? true : line.chomped, cur_cmd->x.outf); 1658 } 1659 break; 1660 1661 case 'x': 1662 /* See comment above for 'g' regarding the third parameter. */ 1663 line_exchange(&line, &hold, false); 1664 break; 1665 1666 case 'y': 1667 { 1668#ifdef HAVE_MBRTOWC 1669 if (mb_cur_max > 1) 1670 { 1671 int idx, prev_idx; /* index in the input line. */ 1672 char **trans; 1673 mbstate_t mbstate; 1674 memset(&mbstate, 0, sizeof(mbstate_t)); 1675 for (idx = 0; idx < line.length;) 1676 { 1677 int mbclen, i; 1678 mbclen = MBRLEN (line.active + idx, line.length - idx, 1679 &mbstate); 1680 /* An invalid sequence, or a truncated multibyte 1681 character. We treat it as a singlebyte character. 1682 */ 1683 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 1684 || mbclen == 0) 1685 mbclen = 1; 1686 1687 trans = cur_cmd->x.translatemb; 1688 /* `i' indicate i-th translate pair. */ 1689 for (i = 0; trans[2*i] != NULL; i++) 1690 { 1691 if (strncmp(line.active + idx, trans[2*i], mbclen) == 0) 1692 { 1693 bool move_remain_buffer = false; 1694 int trans_len = strlen(trans[2*i+1]); 1695 1696 if (mbclen < trans_len) 1697 { 1698 int new_len; 1699 new_len = line.length + 1 + trans_len - mbclen; 1700 /* We must extend the line buffer. */ 1701 if (line.alloc < new_len) 1702 { 1703 /* And we must resize the buffer. */ 1704 resize_line(&line, new_len); 1705 } 1706 move_remain_buffer = true; 1707 } 1708 else if (mbclen > trans_len) 1709 { 1710 /* We must truncate the line buffer. */ 1711 move_remain_buffer = true; 1712 } 1713 prev_idx = idx; 1714 if (move_remain_buffer) 1715 { 1716 int move_len, move_offset; 1717 char *move_from, *move_to; 1718 /* Move the remaining with \0. */ 1719 move_from = line.active + idx + mbclen; 1720 move_to = line.active + idx + trans_len; 1721 move_len = line.length + 1 - idx - mbclen; 1722 move_offset = trans_len - mbclen; 1723 memmove(move_to, move_from, move_len); 1724 line.length += move_offset; 1725 idx += move_offset; 1726 } 1727 strncpy(line.active + prev_idx, trans[2*i+1], 1728 trans_len); 1729 break; 1730 } 1731 } 1732 idx += mbclen; 1733 } 1734 } 1735 else 1736#endif /* HAVE_MBRTOWC */ 1737 { 1738 unsigned char *p, *e; 1739 p = CAST(unsigned char *)line.active; 1740 for (e=p+line.length; p<e; ++p) 1741 *p = cur_cmd->x.translate[*p]; 1742 } 1743 } 1744 break; 1745 1746 case 'z': 1747 line.length = 0; 1748 break; 1749 1750 case '=': 1751 output_missing_newline(&output_file); 1752 fprintf(output_file.fp, "%lu\n", 1753 CAST(unsigned long)input->line_number); 1754 flush_output(output_file.fp); 1755 break; 1756 1757 default: 1758 panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd); 1759 } 1760 } 1761 1762#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION 1763 /* If our top-level program consists solely of commands with 1764 ADDR_IS_NUM addresses then once we past the last mentioned 1765 line we should be able to quit if no_default_output is true, 1766 or otherwise quickly copy input to output. Now whether this 1767 optimization is a win or not depends on how cheaply we can 1768 implement this for the cases where it doesn't help, as 1769 compared against how much time is saved. One semantic 1770 difference (which I think is an improvement) is that *this* 1771 version will terminate after printing line two in the script 1772 "yes | sed -n 2p". 1773 1774 Don't use this when in-place editing is active, because line 1775 numbers restart each time then. */ 1776 else if (!separate_files) 1777 { 1778 if (cur_cmd->a1->addr_type == ADDR_IS_NUM 1779 && (cur_cmd->a2 1780 ? cur_cmd->range_state == RANGE_CLOSED 1781 : cur_cmd->a1->addr_number < input->line_number)) 1782 { 1783 /* Skip this address next time */ 1784 cur_cmd->addr_bang = !cur_cmd->addr_bang; 1785 cur_cmd->a1->addr_type = ADDR_IS_NULL; 1786 if (cur_cmd->a2) 1787 cur_cmd->a2->addr_type = ADDR_IS_NULL; 1788 1789 /* can we make an optimization? */ 1790 if (cur_cmd->addr_bang) 1791 { 1792 if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't' 1793 || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}') 1794 branches--; 1795 1796 cur_cmd->cmd = '#'; /* replace with no-op */ 1797 if (branches == 0) 1798 cur_cmd = shrink_program(vec, cur_cmd); 1799 if (!cur_cmd && no_default_output) 1800 return 0; 1801 end_cmd = vec->v + vec->v_length; 1802 if (!cur_cmd) 1803 cur_cmd = end_cmd; 1804 continue; 1805 } 1806 } 1807 } 1808#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ 1809 1810 /* this is buried down here so that a "continue" statement can skip it */ 1811 ++cur_cmd; 1812 } 1813 1814 if (!no_default_output) 1815 output_line(line.active, line.length, line.chomped, &output_file); 1816 return -1; 1817} 1818 1819 1820 1821/* Apply the compiled script to all the named files. */ 1822int 1823process_files(the_program, argv) 1824 struct vector *the_program; 1825 char **argv; 1826{ 1827 static char dash[] = "-"; 1828 static char *stdin_argv[2] = { dash, NULL }; 1829 struct input input; 1830 int status; 1831 1832 line_init(&line, NULL, INITIAL_BUFFER_SIZE); 1833 line_init(&hold, NULL, 0); 1834 line_init(&buffer, NULL, 0); 1835 1836#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION 1837 branches = count_branches(the_program); 1838#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ 1839 input.reset_at_next_file = true; 1840 if (argv && *argv) 1841 input.file_list = argv; 1842 else if (in_place_extension) 1843 panic(_("no input files")); 1844 else 1845 input.file_list = stdin_argv; 1846 1847 input.bad_count = 0; 1848 input.line_number = 0; 1849 input.read_fn = read_always_fail; 1850 input.fp = NULL; 1851 1852 status = EXIT_SUCCESS; 1853 while (read_pattern_space(&input, the_program, false)) 1854 { 1855 status = execute_program(the_program, &input); 1856 if (status == -1) 1857 status = EXIT_SUCCESS; 1858 else 1859 break; 1860 } 1861 closedown(&input); 1862 1863#ifdef DEBUG_LEAKS 1864 /* We're about to exit, so these free()s are redundant. 1865 But if we're running under a memory-leak detecting 1866 implementation of malloc(), we want to explicitly 1867 deallocate in order to avoid extraneous noise from 1868 the allocator. */ 1869 release_append_queue(); 1870 FREE(buffer.text); 1871 FREE(hold.text); 1872 FREE(line.text); 1873 FREE(s_accum.text); 1874#endif /*DEBUG_LEAKS*/ 1875 1876 if (input.bad_count) 1877 status = 2; 1878 1879 return status; 1880} 1881