1/*  GNU SED, a batch stream editor.
2    Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008,2009
3    Free Software Foundation, Inc.
4
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19#undef EXPERIMENTAL_DASH_N_OPTIMIZATION	/*don't use -- is very buggy*/
20#define INITIAL_BUFFER_SIZE	50
21#define FREAD_BUFFER_SIZE	8192
22
23#include "sed.h"
24
25#include <stddef.h>
26#include <stdio.h>
27#include <ctype.h>
28
29#include <errno.h>
30#ifndef errno
31extern int errno;
32#endif
33
34#ifndef BOOTSTRAP
35#include <selinux/selinux.h>
36#include <selinux/context.h>
37#endif
38
39#ifdef HAVE_UNISTD_H
40# include <unistd.h>
41#endif
42
43#ifndef BOOTSTRAP
44#include "acl.h"
45#endif
46
47#ifdef __GNUC__
48# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7)
49   /* silence warning about unused parameter even for "gcc -W -Wunused" */
50#  define UNUSED	__attribute__((unused))
51# endif
52#endif
53#ifndef UNUSED
54# define UNUSED
55#endif
56
57#ifdef HAVE_STRINGS_H
58# include <strings.h>
59#else
60# include <string.h>
61#endif /*HAVE_STRINGS_H*/
62#ifdef HAVE_MEMORY_H
63# include <memory.h>
64#endif
65
66#ifndef HAVE_STRCHR
67# define strchr index
68# define strrchr rindex
69#endif
70
71#ifdef HAVE_STDLIB_H
72# include <stdlib.h>
73#endif
74#ifndef EXIT_SUCCESS
75# define EXIT_SUCCESS 0
76#endif
77
78#ifdef HAVE_SYS_TYPES_H
79# include <sys/types.h>
80#endif
81
82#include <sys/stat.h>
83#include "stat-macros.h"
84
85
86/* Sed operates a line at a time. */
87struct line {
88  char *text;		/* Pointer to line allocated by malloc. */
89  char *active;		/* Pointer to non-consumed part of text. */
90  size_t length;	/* Length of text (or active, if used). */
91  size_t alloc;		/* Allocated space for active. */
92  bool chomped;		/* Was a trailing newline dropped? */
93#ifdef HAVE_MBRTOWC
94  mbstate_t mbstate;
95#endif
96};
97
98#ifdef HAVE_MBRTOWC
99#define SIZEOF_LINE	offsetof (struct line, mbstate)
100#else
101#define SIZEOF_LINE	(sizeof (struct line))
102#endif
103
104/* A queue of text to write out at the end of a cycle
105   (filled by the "a", "r" and "R" commands.) */
106struct append_queue {
107  const char *fname;
108  char *text;
109  size_t textlen;
110  struct append_queue *next;
111  bool free;
112};
113
114/* State information for the input stream. */
115struct input {
116  /* The list of yet-to-be-opened files.  It is invalid for file_list
117     to be NULL.  When *file_list is NULL we are currently processing
118     the last file.  */
119
120  char **file_list;
121
122  /* Count of files we failed to open. */
123  countT bad_count;
124
125  /* Current input line number (over all files).  */
126  countT line_number;
127
128  /* True if we'll reset line numbers and addresses before
129     starting to process the next (possibly the first) file.  */
130  bool reset_at_next_file;
131
132  /* Function to read one line.  If FP is NULL, read_fn better not
133     be one which uses fp; in particular, read_always_fail() is
134     recommended. */
135  bool (*read_fn) P_((struct input *));	/* read one line */
136
137  char *out_file_name;
138
139  const char *in_file_name;
140
141  /* Owner and mode to be set just before closing the file.  */
142  struct stat st;
143
144  /* if NULL, none of the following are valid */
145  FILE *fp;
146
147  bool no_buffering;
148};
149
150
151/* Have we done any replacements lately?  This is used by the `t' command. */
152static bool replaced = false;
153
154/* The current output file (stdout if -i is not being used. */
155static struct output output_file;
156
157/* The `current' input line. */
158static struct line line;
159
160/* An input line used to accumulate the result of the s and e commands. */
161static struct line s_accum;
162
163/* An input line that's been stored by later use by the program */
164static struct line hold;
165
166/* The buffered input look-ahead.  The only field that should be
167   used outside of read_mem_line() or line_init() is buffer.length. */
168static struct line buffer;
169
170static struct append_queue *append_head = NULL;
171static struct append_queue *append_tail = NULL;
172
173
174#ifdef BOOTSTRAP
175/* We can't be sure that the system we're boostrapping on has
176   memchr(), and ../lib/memchr.c requires configuration knowledge
177   about how many bits are in a `long'.  This implementation
178   is far from ideal, but it should get us up-and-limping well
179   enough to run the configure script, which is all that matters.
180*/
181# ifdef memchr
182#  undef memchr
183# endif
184# define memchr bootstrap_memchr
185
186static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n));
187static VOID *
188bootstrap_memchr(s, c, n)
189  const VOID *s;
190  int c;
191  size_t n;
192{
193  char *p;
194
195  for (p=(char *)s; n-- > 0; ++p)
196    if (*p == c)
197      return p;
198  return CAST(VOID *)0;
199}
200#endif /*BOOTSTRAP*/
201
202/* increase a struct line's length, making some attempt at
203   keeping realloc() calls under control by padding for future growth.  */
204static void resize_line P_((struct line *, size_t));
205static void
206resize_line(lb, len)
207  struct line *lb;
208  size_t len;
209{
210  int inactive;
211  inactive = lb->active - lb->text;
212
213  /* If the inactive part has got to more than two thirds of the buffer,
214   * remove it. */
215  if (inactive > lb->alloc * 2)
216    {
217      MEMMOVE(lb->text, lb->active, lb->length);
218      lb->alloc += lb->active - lb->text;
219      lb->active = lb->text;
220      inactive = 0;
221
222      if (lb->alloc > len)
223	return;
224    }
225
226  lb->alloc *= 2;
227  if (lb->alloc < len)
228    lb->alloc = len;
229  if (lb->alloc < INITIAL_BUFFER_SIZE)
230    lb->alloc = INITIAL_BUFFER_SIZE;
231
232  lb->text = REALLOC(lb->text, inactive + lb->alloc, char);
233  lb->active = lb->text + inactive;
234}
235
236/* Append `length' bytes from `string' to the line `to'. */
237static void str_append P_((struct line *, const char *, size_t));
238static void
239str_append(to, string, length)
240  struct line *to;
241  const char *string;
242  size_t length;
243{
244  size_t new_length = to->length + length;
245
246  if (to->alloc < new_length)
247    resize_line(to, new_length);
248  MEMCPY(to->active + to->length, string, length);
249  to->length = new_length;
250
251#ifdef HAVE_MBRTOWC
252  if (mb_cur_max > 1 && !is_utf8)
253    while (length)
254      {
255        size_t n = MBRLEN (string, length, &to->mbstate);
256
257        /* An invalid sequence is treated like a singlebyte character. */
258        if (n == (size_t) -1)
259	  {
260	    memset (&to->mbstate, 0, sizeof (to->mbstate));
261	    n = 1;
262	  }
263
264        if (n > 0)
265	  {
266	    string += n;
267	    length -= n;
268	  }
269        else
270	  break;
271      }
272#endif
273}
274
275static void str_append_modified P_((struct line *, const char *, size_t,
276				    enum replacement_types));
277static void
278str_append_modified(to, string, length, type)
279  struct line *to;
280  const char *string;
281  size_t length;
282  enum replacement_types type;
283{
284#ifdef HAVE_MBRTOWC
285  mbstate_t from_stat;
286
287  if (type == REPL_ASIS)
288    {
289      str_append(to, string, length);
290      return;
291    }
292
293  if (to->alloc - to->length < length * mb_cur_max)
294    resize_line(to, to->length + length * mb_cur_max);
295
296  MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t));
297  while (length)
298    {
299      wchar_t wc;
300      int n = MBRTOWC (&wc, string, length, &from_stat);
301
302      /* An invalid sequence is treated like a singlebyte character. */
303      if (n == -1)
304        {
305          memset (&to->mbstate, 0, sizeof (from_stat));
306          n = 1;
307        }
308
309      if (n > 0)
310        string += n, length -= n;
311      else
312	{
313	  /* Incomplete sequence, copy it manually.  */
314	  str_append(to, string, length);
315	  return;
316	}
317
318      /* Convert the first character specially... */
319      if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST))
320	{
321          if (type & REPL_UPPERCASE_FIRST)
322            wc = towupper(wc);
323          else
324            wc = towlower(wc);
325
326          type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
327	  if (type == REPL_ASIS)
328	    {
329	      n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
330	      to->length += n;
331	      str_append(to, string, length);
332	      return;
333	    }
334        }
335
336      else if (type & REPL_UPPERCASE)
337        wc = towupper(wc);
338      else
339        wc = towlower(wc);
340
341      /* Copy the new wide character to the end of the string. */
342      n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
343      to->length += n;
344      if (n == -1)
345	{
346	  fprintf (stderr, "Case conversion produced an invalid character!");
347	  abort ();
348	}
349    }
350#else
351  size_t old_length = to->length;
352  char *start, *end;
353
354  str_append(to, string, length);
355  start = to->active + old_length;
356  end = start + length;
357
358  /* Now do the required modifications.  First \[lu]... */
359  if (type & REPL_UPPERCASE_FIRST)
360    {
361      *start = toupper(*start);
362      start++;
363      type &= ~REPL_UPPERCASE_FIRST;
364    }
365  else if (type & REPL_LOWERCASE_FIRST)
366    {
367      *start = tolower(*start);
368      start++;
369      type &= ~REPL_LOWERCASE_FIRST;
370    }
371
372  if (type == REPL_ASIS)
373    return;
374
375  /* ...and then \[LU] */
376  if (type == REPL_UPPERCASE)
377    for (; start != end; start++)
378      *start = toupper(*start);
379  else
380    for (; start != end; start++)
381      *start = tolower(*start);
382#endif
383}
384
385/* Initialize a "struct line" buffer.  Copy multibyte state from `state'
386   if not null.  */
387static void line_init P_((struct line *, struct line *, size_t initial_size));
388static void
389line_init(buf, state, initial_size)
390  struct line *buf;
391  struct line *state;
392  size_t initial_size;
393{
394  buf->text = MALLOC(initial_size, char);
395  buf->active = buf->text;
396  buf->alloc = initial_size;
397  buf->length = 0;
398  buf->chomped = true;
399
400#ifdef HAVE_MBRTOWC
401  if (state)
402    memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate));
403  else
404    memset (&buf->mbstate, 0, sizeof (buf->mbstate));
405#endif
406}
407
408/* Reset a "struct line" buffer to length zero.  Copy multibyte state from
409   `state' if not null.  */
410static void line_reset P_((struct line *, struct line *));
411static void
412line_reset(buf, state)
413  struct line *buf, *state;
414{
415  if (buf->alloc == 0)
416    line_init(buf, state, INITIAL_BUFFER_SIZE);
417  else
418    {
419      buf->length = 0;
420#ifdef HAVE_MBRTOWC
421      if (state)
422        memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate));
423      else
424        memset (&buf->mbstate, 0, sizeof (buf->mbstate));
425#endif
426    }
427}
428
429/* Copy the contents of the line `from' into the line `to'.
430   This destroys the old contents of `to'.
431   Copy the multibyte state if `state' is true. */
432static void line_copy P_((struct line *from, struct line *to, int state));
433static void
434line_copy(from, to, state)
435  struct line *from;
436  struct line *to;
437  int state;
438{
439  /* Remove the inactive portion in the destination buffer. */
440  to->alloc += to->active - to->text;
441
442  if (to->alloc < from->length)
443    {
444      to->alloc *= 2;
445      if (to->alloc < from->length)
446	to->alloc = from->length;
447      if (to->alloc < INITIAL_BUFFER_SIZE)
448	to->alloc = INITIAL_BUFFER_SIZE;
449      /* Use FREE()+MALLOC() instead of REALLOC() to
450	 avoid unnecessary copying of old text. */
451      FREE(to->text);
452      to->text = MALLOC(to->alloc, char);
453    }
454
455  to->active = to->text;
456  to->length = from->length;
457  to->chomped = from->chomped;
458  MEMCPY(to->active, from->active, from->length);
459
460#ifdef HAVE_MBRTOWC
461  if (state)
462    MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate));
463#endif
464}
465
466/* Append the contents of the line `from' to the line `to'.
467   Copy the multibyte state if `state' is true. */
468static void line_append P_((struct line *from, struct line *to, int state));
469static void
470line_append(from, to, state)
471  struct line *from;
472  struct line *to;
473  int state;
474{
475  str_append(to, "\n", 1);
476  str_append(to, from->active, from->length);
477  to->chomped = from->chomped;
478
479#ifdef HAVE_MBRTOWC
480  if (state)
481    MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
482#endif
483}
484
485/* Exchange two "struct line" buffers.
486   Copy the multibyte state if `state' is true. */
487static void line_exchange P_((struct line *a, struct line *b, int state));
488static void
489line_exchange(a, b, state)
490  struct line *a;
491  struct line *b;
492  int state;
493{
494  struct line t;
495
496  if (state)
497    {
498      MEMCPY(&t,  a, sizeof (struct line));
499      MEMCPY( a,  b, sizeof (struct line));
500      MEMCPY( b, &t, sizeof (struct line));
501    }
502  else
503    {
504      MEMCPY(&t,  a, SIZEOF_LINE);
505      MEMCPY( a,  b, SIZEOF_LINE);
506      MEMCPY( b, &t, SIZEOF_LINE);
507    }
508}
509
510
511/* dummy function to simplify read_pattern_space() */
512static bool read_always_fail P_((struct input *));
513static bool
514read_always_fail(input)
515  struct input *input UNUSED;
516{
517  return false;
518}
519
520static bool read_file_line P_((struct input *));
521static bool
522read_file_line(input)
523  struct input *input;
524{
525  static char *b;
526  static size_t blen;
527
528  long result = ck_getline (&b, &blen, input->fp);
529  if (result <= 0)
530    return false;
531
532  /* Remove the trailing new-line that is left by getline. */
533  if (b[result - 1] == '\n')
534    --result;
535  else
536    line.chomped = false;
537
538  str_append(&line, b, result);
539  return true;
540}
541
542
543static inline void output_missing_newline P_((struct output *));
544static inline void
545output_missing_newline(outf)
546  struct output *outf;
547{
548  if (outf->missing_newline)
549    {
550      ck_fwrite("\n", 1, 1, outf->fp);
551      outf->missing_newline = false;
552    }
553}
554
555static inline void flush_output P_((FILE *));
556static inline void
557flush_output(fp)
558  FILE *fp;
559{
560  if (fp != stdout || unbuffered_output)
561    ck_fflush(fp);
562}
563
564static void output_line P_((const char *, size_t, int, struct output *));
565static void
566output_line(text, length, nl, outf)
567  const char *text;
568  size_t length;
569  int nl;
570  struct output *outf;
571{
572  if (!text)
573    return;
574
575  output_missing_newline(outf);
576  if (length)
577    ck_fwrite(text, 1, length, outf->fp);
578  if (nl)
579    ck_fwrite("\n", 1, 1, outf->fp);
580  else
581    outf->missing_newline = true;
582
583  flush_output(outf->fp);
584}
585
586static struct append_queue *next_append_slot P_((void));
587static struct append_queue *
588next_append_slot()
589{
590  struct append_queue *n = MALLOC(1, struct append_queue);
591
592  n->fname = NULL;
593  n->text = NULL;
594  n->textlen = 0;
595  n->next = NULL;
596  n->free = false;
597
598  if (append_tail)
599      append_tail->next = n;
600  else
601      append_head = n;
602  return append_tail = n;
603}
604
605static void release_append_queue P_((void));
606static void
607release_append_queue()
608{
609  struct append_queue *p, *q;
610
611  for (p=append_head; p; p=q)
612    {
613      if (p->free)
614        FREE(p->text);
615
616      q = p->next;
617      FREE(p);
618    }
619  append_head = append_tail = NULL;
620}
621
622static void dump_append_queue P_((void));
623static void
624dump_append_queue()
625{
626  struct append_queue *p;
627
628  output_missing_newline(&output_file);
629  for (p=append_head; p; p=p->next)
630    {
631      if (p->text)
632        ck_fwrite(p->text, 1, p->textlen, output_file.fp);
633
634      if (p->fname)
635	{
636	  char buf[FREAD_BUFFER_SIZE];
637	  size_t cnt;
638	  FILE *fp;
639
640	  /* "If _fname_ does not exist or cannot be read, it shall
641	     be treated as if it were an empty file, causing no error
642	     condition."  IEEE Std 1003.2-1992
643	     So, don't fail. */
644	  fp = ck_fopen(p->fname, read_mode, false);
645	  if (fp)
646	    {
647	      while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0)
648		ck_fwrite(buf, 1, cnt, output_file.fp);
649	      ck_fclose(fp);
650	    }
651	}
652    }
653
654  flush_output(output_file.fp);
655  release_append_queue();
656}
657
658
659/* Compute the name of the backup file for in-place editing */
660static char *get_backup_file_name P_((const char *));
661static char *
662get_backup_file_name(name)
663  const char *name;
664{
665  char *old_asterisk, *asterisk, *backup, *p;
666  int name_length = strlen(name), backup_length = strlen(in_place_extension);
667
668  /* Compute the length of the backup file */
669  for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
670       (asterisk = strchr(old_asterisk, '*'));
671       old_asterisk = asterisk + 1)
672    backup_length += name_length - 1;
673
674  p = backup = xmalloc(backup_length + 1);
675
676  /* Each iteration gobbles up to an asterisk */
677  for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
678       (asterisk = strchr(old_asterisk, '*'));
679       old_asterisk = asterisk + 1)
680    {
681      MEMCPY (p, old_asterisk, asterisk - old_asterisk);
682      p += asterisk - old_asterisk;
683      strcpy (p, name);
684      p += name_length;
685    }
686
687  /* Tack on what's after the last asterisk */
688  strcpy (p, old_asterisk);
689  return backup;
690}
691
692/* Initialize a struct input for the named file. */
693static void open_next_file P_((const char *name, struct input *));
694static void
695open_next_file(name, input)
696  const char *name;
697  struct input *input;
698{
699  buffer.length = 0;
700
701  if (name[0] == '-' && name[1] == '\0' && !in_place_extension)
702    {
703      clearerr(stdin);	/* clear any stale EOF indication */
704      input->fp = ck_fdopen (fileno (stdin), "stdin", read_mode, false);
705    }
706  else if ( ! (input->fp = ck_fopen(name, read_mode, false)) )
707    {
708      const char *ptr = strerror(errno);
709      fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr);
710      input->read_fn = read_always_fail; /* a redundancy */
711      ++input->bad_count;
712      return;
713    }
714
715  input->read_fn = read_file_line;
716
717  if (in_place_extension)
718    {
719      int input_fd;
720      char *tmpdir, *p;
721#ifndef BOOTSTRAP
722      security_context_t old_fscreatecon;
723      int reset_fscreatecon = 0;
724      memset (&old_fscreatecon, 0, sizeof (old_fscreatecon));
725#endif
726
727      if (follow_symlinks)
728	input->in_file_name = follow_symlink (name);
729      else
730        input->in_file_name = name;
731
732      /* get the base name */
733      tmpdir = ck_strdup(input->in_file_name);
734      if ((p = strrchr(tmpdir, '/')))
735	*p = 0;
736      else
737	strcpy(tmpdir, ".");
738
739      if (isatty (fileno (input->fp)))
740        panic(_("couldn't edit %s: is a terminal"), input->in_file_name);
741
742      input_fd = fileno (input->fp);
743      fstat (input_fd, &input->st);
744      if (!S_ISREG (input->st.st_mode))
745        panic(_("couldn't edit %s: not a regular file"), input->in_file_name);
746
747#ifndef BOOTSTRAP
748      if (is_selinux_enabled ())
749	{
750          security_context_t con;
751	  if (getfilecon (input->in_file_name, &con) != -1)
752	    {
753	      /* Save and restore the old context for the sake of w and W
754		 commands.  */
755	      reset_fscreatecon = getfscreatecon (&old_fscreatecon) >= 0;
756	      if (setfscreatecon (con) < 0)
757		fprintf (stderr, _("%s: warning: failed to set default file creation context to %s: %s"),
758			 myname, con, strerror (errno));
759	      freecon (con);
760	    }
761	  else
762	    {
763	      if (errno != ENOSYS)
764		fprintf (stderr, _("%s: warning: failed to get security context of %s: %s"),
765			 myname, input->in_file_name, strerror (errno));
766	    }
767	}
768#endif
769
770      output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed");
771      output_file.missing_newline = false;
772      free (tmpdir);
773
774#ifndef BOOTSTRAP
775      if (reset_fscreatecon)
776	{
777	  setfscreatecon (old_fscreatecon);
778	  freecon (old_fscreatecon);
779	}
780#endif
781
782      if (!output_file.fp)
783        panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno));
784    }
785  else
786    output_file.fp = stdout;
787}
788
789
790/* Clean up an input stream that we are done with. */
791static void closedown P_((struct input *));
792static void
793closedown(input)
794  struct input *input;
795{
796  input->read_fn = read_always_fail;
797  if (!input->fp)
798    return;
799
800  if (in_place_extension && output_file.fp != NULL)
801    {
802      const char *target_name;
803      int input_fd, output_fd;
804
805      target_name = input->in_file_name;
806      input_fd = fileno (input->fp);
807      output_fd = fileno (output_file.fp);
808      copy_acl (input->in_file_name, input_fd,
809		input->out_file_name, output_fd,
810		input->st.st_mode);
811#ifdef HAVE_FCHOWN
812      if (fchown (output_fd, input->st.st_uid, input->st.st_gid) == -1)
813        fchown (output_fd, -1, input->st.st_gid);
814#endif
815
816      ck_fclose (input->fp);
817      ck_fclose (output_file.fp);
818      if (strcmp(in_place_extension, "*") != 0)
819        {
820          char *backup_file_name = get_backup_file_name(target_name);
821	  ck_rename (target_name, backup_file_name, input->out_file_name);
822          free (backup_file_name);
823	}
824
825      ck_rename (input->out_file_name, target_name, input->out_file_name);
826      free (input->out_file_name);
827    }
828  else
829    ck_fclose (input->fp);
830
831  input->fp = NULL;
832}
833
834/* Reset range commands so that they are marked as non-matching */
835static void reset_addresses P_((struct vector *));
836static void
837reset_addresses(vec)
838     struct vector *vec;
839{
840  struct sed_cmd *cur_cmd;
841  int n;
842
843  for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++)
844    if (cur_cmd->a1
845	&& cur_cmd->a1->addr_type == ADDR_IS_NUM
846	&& cur_cmd->a1->addr_number == 0)
847      cur_cmd->range_state = RANGE_ACTIVE;
848    else
849      cur_cmd->range_state = RANGE_INACTIVE;
850}
851
852/* Read in the next line of input, and store it in the pattern space.
853   Return zero if there is nothing left to input. */
854static bool read_pattern_space P_((struct input *, struct vector *, int));
855static bool
856read_pattern_space(input, the_program, append)
857  struct input *input;
858  struct vector *the_program;
859  int append;
860{
861  if (append_head) /* redundant test to optimize for common case */
862    dump_append_queue();
863  replaced = false;
864  if (!append)
865    line.length = 0;
866  line.chomped = true;  /* default, until proved otherwise */
867
868  while ( ! (*input->read_fn)(input) )
869    {
870      closedown(input);
871
872      if (!*input->file_list)
873	return false;
874
875      if (input->reset_at_next_file)
876	{
877	  input->line_number = 0;
878	  hold.length = 0;
879	  reset_addresses (the_program);
880	  rewind_read_files ();
881
882	  /* If doing in-place editing, we will never append the
883	     new-line to this file; but if the output goes to stdout,
884	     we might still have to output the missing new-line.  */
885	  if (in_place_extension)
886	    output_file.missing_newline = false;
887
888	  input->reset_at_next_file = separate_files;
889	}
890
891      open_next_file (*input->file_list++, input);
892    }
893
894  ++input->line_number;
895  return true;
896}
897
898
899static bool last_file_with_data_p P_((struct input *));
900static bool
901last_file_with_data_p(input)
902  struct input *input;
903{
904  for (;;)
905    {
906      int ch;
907
908      closedown(input);
909      if (!*input->file_list)
910	return true;
911      open_next_file(*input->file_list++, input);
912      if (input->fp)
913	{
914	  if ((ch = getc(input->fp)) != EOF)
915	    {
916	      ungetc(ch, input->fp);
917	      return false;
918	    }
919	}
920    }
921}
922
923/* Determine if we match the `$' address. */
924static bool test_eof P_((struct input *));
925static bool
926test_eof(input)
927  struct input *input;
928{
929  int ch;
930
931  if (buffer.length)
932    return false;
933  if (!input->fp)
934    return separate_files || last_file_with_data_p(input);
935  if (feof(input->fp))
936    return separate_files || last_file_with_data_p(input);
937  if ((ch = getc(input->fp)) == EOF)
938    return separate_files || last_file_with_data_p(input);
939  ungetc(ch, input->fp);
940  return false;
941}
942
943/* Return non-zero if the current line matches the address
944   pointed to by `addr'. */
945static bool match_an_address_p P_((struct addr *, struct input *));
946static bool
947match_an_address_p(addr, input)
948  struct addr *addr;
949  struct input *input;
950{
951  switch (addr->addr_type)
952    {
953    case ADDR_IS_NULL:
954      return true;
955
956    case ADDR_IS_REGEX:
957      return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0);
958
959    case ADDR_IS_NUM_MOD:
960      return (input->line_number >= addr->addr_number
961	      && ((input->line_number - addr->addr_number) % addr->addr_step) == 0);
962
963    case ADDR_IS_STEP:
964    case ADDR_IS_STEP_MOD:
965      /* reminder: these are only meaningful for a2 addresses */
966      /* a2->addr_number needs to be recomputed each time a1 address
967         matches for the step and step_mod types */
968      return (addr->addr_number <= input->line_number);
969
970    case ADDR_IS_LAST:
971      return test_eof(input);
972
973      /* ADDR_IS_NUM is handled in match_address_p.  */
974    case ADDR_IS_NUM:
975    default:
976      panic("INTERNAL ERROR: bad address type");
977    }
978  /*NOTREACHED*/
979  return false;
980}
981
982/* return non-zero if current address is valid for cmd */
983static bool match_address_p P_((struct sed_cmd *, struct input *));
984static bool
985match_address_p(cmd, input)
986  struct sed_cmd *cmd;
987  struct input *input;
988{
989  if (!cmd->a1)
990    return true;
991
992  if (cmd->range_state != RANGE_ACTIVE)
993    {
994      /* Find if we are going to activate a range.  Handle ADDR_IS_NUM
995	 specially: it represent an "absolute" state, it should not
996	 be computed like regexes.  */
997      if (cmd->a1->addr_type == ADDR_IS_NUM)
998	{
999	  if (!cmd->a2)
1000	    return (input->line_number == cmd->a1->addr_number);
1001
1002	  if (cmd->range_state == RANGE_CLOSED
1003	      || input->line_number < cmd->a1->addr_number)
1004	    return false;
1005	}
1006      else
1007	{
1008          if (!cmd->a2)
1009	    return match_an_address_p(cmd->a1, input);
1010
1011	  if (!match_an_address_p(cmd->a1, input))
1012            return false;
1013	}
1014
1015      /* Ok, start a new range.  */
1016      cmd->range_state = RANGE_ACTIVE;
1017      switch (cmd->a2->addr_type)
1018	{
1019	case ADDR_IS_REGEX:
1020	  /* Always include at least two lines.  */
1021	  return true;
1022	case ADDR_IS_NUM:
1023	  /* Same handling as below, but always include at least one line.  */
1024          if (input->line_number >= cmd->a2->addr_number)
1025	    cmd->range_state = RANGE_CLOSED;
1026          return true;
1027	case ADDR_IS_STEP:
1028	  cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
1029	  return true;
1030	case ADDR_IS_STEP_MOD:
1031	  cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
1032				 - (input->line_number%cmd->a2->addr_step);
1033	  return true;
1034	default:
1035	  break;
1036        }
1037    }
1038
1039  /* cmd->range_state == RANGE_ACTIVE.  Check if the range is
1040     ending; also handle ADDR_IS_NUM specially in this case.  */
1041
1042  if (cmd->a2->addr_type == ADDR_IS_NUM)
1043    {
1044      /* If the second address is a line number, and if we got past
1045         that line, fail to match (it can happen when you jump
1046	 over such addresses with `b' and `t'.  Use RANGE_CLOSED
1047         so that the range is not re-enabled anymore.  */
1048      if (input->line_number >= cmd->a2->addr_number)
1049	cmd->range_state = RANGE_CLOSED;
1050
1051      return (input->line_number <= cmd->a2->addr_number);
1052   }
1053
1054  /* Other addresses are treated as usual.  */
1055  if (match_an_address_p(cmd->a2, input))
1056    cmd->range_state = RANGE_CLOSED;
1057
1058  return true;
1059}
1060
1061
1062static void do_list P_((int line_len));
1063static void
1064do_list(line_len)
1065     int line_len;
1066{
1067  unsigned char *p = CAST(unsigned char *)line.active;
1068  countT len = line.length;
1069  countT width = 0;
1070  char obuf[180];	/* just in case we encounter a 512-bit char (;-) */
1071  char *o;
1072  size_t olen;
1073  FILE *fp = output_file.fp;
1074
1075  output_missing_newline(&output_file);
1076  for (; len--; ++p) {
1077      o = obuf;
1078
1079      /* Some locales define 8-bit characters as printable.  This makes the
1080	 testsuite fail at 8to7.sed because the `l' command in fact will not
1081	 convert the 8-bit characters. */
1082#if defined isascii || defined HAVE_ISASCII
1083      if (isascii(*p) && ISPRINT(*p)) {
1084#else
1085      if (ISPRINT(*p)) {
1086#endif
1087	  *o++ = *p;
1088	  if (*p == '\\')
1089	    *o++ = '\\';
1090      } else {
1091	  *o++ = '\\';
1092	  switch (*p) {
1093#if defined __STDC__ && __STDC__-0
1094	    case '\a': *o++ = 'a'; break;
1095#else /* Not STDC; we'll just assume ASCII */
1096	    case 007:  *o++ = 'a'; break;
1097#endif
1098	    case '\b': *o++ = 'b'; break;
1099	    case '\f': *o++ = 'f'; break;
1100	    case '\n': *o++ = 'n'; break;
1101	    case '\r': *o++ = 'r'; break;
1102	    case '\t': *o++ = 't'; break;
1103	    case '\v': *o++ = 'v'; break;
1104	    default:
1105	      sprintf(o, "%03o", *p);
1106	      o += strlen(o);
1107	      break;
1108	    }
1109      }
1110      olen = o - obuf;
1111      if (width+olen >= line_len && line_len > 0) {
1112	  ck_fwrite("\\\n", 1, 2, fp);
1113	  width = 0;
1114      }
1115      ck_fwrite(obuf, 1, olen, fp);
1116      width += olen;
1117  }
1118  ck_fwrite("$\n", 1, 2, fp);
1119  flush_output (fp);
1120}
1121
1122
1123static enum replacement_types append_replacement P_((struct line *, struct replacement *,
1124						     struct re_registers *,
1125						     enum replacement_types));
1126static enum replacement_types
1127append_replacement (buf, p, regs, repl_mod)
1128  struct line *buf;
1129  struct replacement *p;
1130  struct re_registers *regs;
1131  enum replacement_types repl_mod;
1132{
1133  for (; p; p=p->next)
1134    {
1135      int i = p->subst_id;
1136      enum replacement_types curr_type;
1137
1138      /* Apply a \[lu] modifier that was given earlier, but which we
1139         have not had yet the occasion to apply.  But don't do it
1140         if this replacement has a modifier of its own. */
1141      curr_type = (p->repl_type & REPL_MODIFIERS)
1142        ? p->repl_type
1143        : p->repl_type | repl_mod;
1144
1145      repl_mod = 0;
1146      if (p->prefix_length)
1147        {
1148          str_append_modified(buf, p->prefix, p->prefix_length,
1149    			      curr_type);
1150          curr_type &= ~REPL_MODIFIERS;
1151        }
1152
1153      if (0 <= i)
1154	{
1155          if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS)
1156            /* Save this modifier, we shall apply it later.
1157	       e.g. in s/()([a-z])/\u\1\2/
1158	       the \u modifier is applied to \2, not \1 */
1159	    repl_mod = curr_type & REPL_MODIFIERS;
1160
1161	  else if (regs->end[i] != regs->start[i])
1162	    str_append_modified(buf, line.active + regs->start[i],
1163			        CAST(size_t)(regs->end[i] - regs->start[i]),
1164			        curr_type);
1165	}
1166    }
1167
1168  return repl_mod;
1169}
1170
1171static void do_subst P_((struct subst *));
1172static void
1173do_subst(sub)
1174  struct subst *sub;
1175{
1176  size_t start = 0;	/* where to start scan for (next) match in LINE */
1177  size_t last_end = 0;  /* where did the last successful match end in LINE */
1178  countT count = 0;	/* number of matches found */
1179  bool again = true;
1180
1181  static struct re_registers regs;
1182
1183  line_reset(&s_accum, &line);
1184
1185  /* The first part of the loop optimizes s/xxx// when xxx is at the
1186     start, and s/xxx$// */
1187  if (!match_regex(sub->regx, line.active, line.length, start,
1188		   &regs, sub->max_id + 1))
1189    return;
1190
1191  if (!sub->replacement && sub->numb <= 1)
1192    {
1193      if (regs.start[0] == 0 && !sub->global)
1194        {
1195	  /* We found a match, set the `replaced' flag. */
1196	  replaced = true;
1197
1198	  line.active += regs.end[0];
1199	  line.length -= regs.end[0];
1200	  line.alloc -= regs.end[0];
1201	  goto post_subst;
1202        }
1203      else if (regs.end[0] == line.length)
1204        {
1205	  /* We found a match, set the `replaced' flag. */
1206	  replaced = true;
1207
1208	  line.length = regs.start[0];
1209	  goto post_subst;
1210        }
1211    }
1212
1213  do
1214    {
1215      enum replacement_types repl_mod = 0;
1216
1217      size_t offset = regs.start[0];
1218      size_t matched = regs.end[0] - regs.start[0];
1219
1220      /* Copy stuff to the left of this match into the output string. */
1221      if (start < offset)
1222	str_append(&s_accum, line.active + start, offset - start);
1223
1224      /* If we're counting up to the Nth match, are we there yet?
1225         And even if we are there, there is another case we have to
1226	 skip: are we matching an empty string immediately following
1227         another match?
1228
1229         This latter case avoids that baaaac, when passed through
1230         s,a*,x,g, gives `xbxxcx' instead of xbxcx.  This behavior is
1231         unacceptable because it is not consistently applied (for
1232         example, `baaaa' gives `xbx', not `xbxx'). */
1233      if ((matched > 0 || count == 0 || offset > last_end)
1234	  && ++count >= sub->numb)
1235        {
1236          /* We found a match, set the `replaced' flag. */
1237          replaced = true;
1238
1239          /* Now expand the replacement string into the output string. */
1240          repl_mod = append_replacement (&s_accum, sub->replacement, &regs, repl_mod);
1241	  again = sub->global;
1242        }
1243      else
1244	{
1245          /* The match was not replaced.  Copy the text until its
1246             end; if it was vacuous, skip over one character and
1247	     add that character to the output.  */
1248	  if (matched == 0)
1249	    {
1250	      if (start < line.length)
1251	        matched = 1;
1252	      else
1253	        break;
1254	    }
1255
1256	  str_append(&s_accum, line.active + offset, matched);
1257        }
1258
1259      /* Start after the match.  last_end is the real end of the matched
1260	 substring, excluding characters that were skipped in case the RE
1261	 matched the empty string.  */
1262      start = offset + matched;
1263      last_end = regs.end[0];
1264    }
1265  while (again
1266	 && start <= line.length
1267	 && match_regex(sub->regx, line.active, line.length, start,
1268			&regs, sub->max_id + 1));
1269
1270  /* Copy stuff to the right of the last match into the output string. */
1271  if (start < line.length)
1272    str_append(&s_accum, line.active + start, line.length-start);
1273  s_accum.chomped = line.chomped;
1274
1275  /* Exchange line and s_accum.  This can be much cheaper
1276     than copying s_accum.active into line.text (for huge lines). */
1277  line_exchange(&line, &s_accum, false);
1278
1279  /* Finish up. */
1280  if (count < sub->numb)
1281    return;
1282
1283 post_subst:
1284  if (sub->print & 1)
1285    output_line(line.active, line.length, line.chomped, &output_file);
1286
1287  if (sub->eval)
1288    {
1289#ifdef HAVE_POPEN
1290      FILE *pipe_fp;
1291      line_reset(&s_accum, NULL);
1292
1293      str_append (&line, "", 1);
1294      pipe_fp = popen(line.active, "r");
1295
1296      if (pipe_fp != NULL)
1297	{
1298	  while (!feof (pipe_fp))
1299	    {
1300	      char buf[4096];
1301	      int n = fread (buf, sizeof(char), 4096, pipe_fp);
1302	      if (n > 0)
1303		str_append(&s_accum, buf, n);
1304	    }
1305
1306	  pclose (pipe_fp);
1307
1308	  /* Exchange line and s_accum.  This can be much cheaper than copying
1309	     s_accum.active into line.text (for huge lines).  See comment above
1310	     for 'g' as to while the third argument is incorrect anyway.  */
1311	  line_exchange(&line, &s_accum, true);
1312	  if (line.length &&
1313	      line.active[line.length - 1] == '\n')
1314	    line.length--;
1315	}
1316      else
1317	panic(_("error in subprocess"));
1318#else
1319      panic(_("option `e' not supported"));
1320#endif
1321    }
1322
1323  if (sub->print & 2)
1324    output_line(line.active, line.length, line.chomped, &output_file);
1325  if (sub->outf)
1326    output_line(line.active, line.length, line.chomped, sub->outf);
1327}
1328
1329#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1330/* Used to attempt a simple-minded optimization. */
1331
1332static countT branches;
1333
1334static countT count_branches P_((struct vector *));
1335static countT
1336count_branches(program)
1337  struct vector *program;
1338{
1339  struct sed_cmd *cur_cmd = program->v;
1340  countT isn_cnt = program->v_length;
1341  countT cnt = 0;
1342
1343  while (isn_cnt-- > 0)
1344    {
1345      switch (cur_cmd->cmd)
1346	{
1347	case 'b':
1348	case 't':
1349	case 'T':
1350	case '{':
1351	  ++cnt;
1352	}
1353    }
1354  return cnt;
1355}
1356
1357static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *));
1358static struct sed_cmd *
1359shrink_program(vec, cur_cmd)
1360  struct vector *vec;
1361  struct sed_cmd *cur_cmd;
1362{
1363  struct sed_cmd *v = vec->v;
1364  struct sed_cmd *last_cmd = v + vec->v_length;
1365  struct sed_cmd *p;
1366  countT cmd_cnt;
1367
1368  for (p=v; p < cur_cmd; ++p)
1369    if (p->cmd != '#')
1370      MEMCPY(v++, p, sizeof *v);
1371  cmd_cnt = v - vec->v;
1372
1373  for (; p < last_cmd; ++p)
1374    if (p->cmd != '#')
1375      MEMCPY(v++, p, sizeof *v);
1376  vec->v_length = v - vec->v;
1377
1378  return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0;
1379}
1380#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1381
1382/* Execute the program `vec' on the current input line.
1383   Return exit status if caller should quit, -1 otherwise. */
1384static int execute_program P_((struct vector *, struct input *));
1385static int
1386execute_program(vec, input)
1387  struct vector *vec;
1388  struct input *input;
1389{
1390  struct sed_cmd *cur_cmd;
1391  struct sed_cmd *end_cmd;
1392
1393  cur_cmd = vec->v;
1394  end_cmd = vec->v + vec->v_length;
1395  while (cur_cmd < end_cmd)
1396    {
1397      if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang)
1398	{
1399	  switch (cur_cmd->cmd)
1400	    {
1401	    case 'a':
1402	      {
1403		struct append_queue *aq = next_append_slot();
1404		aq->text = cur_cmd->x.cmd_txt.text;
1405		aq->textlen = cur_cmd->x.cmd_txt.text_length;
1406	      }
1407	      break;
1408
1409	    case '{':
1410	    case 'b':
1411	      cur_cmd = vec->v + cur_cmd->x.jump_index;
1412	      continue;
1413
1414	    case '}':
1415	    case '#':
1416	    case ':':
1417	      /* Executing labels and block-ends are easy. */
1418	      break;
1419
1420	    case 'c':
1421	      if (cur_cmd->range_state != RANGE_ACTIVE)
1422		output_line(cur_cmd->x.cmd_txt.text,
1423			    cur_cmd->x.cmd_txt.text_length - 1, true,
1424			    &output_file);
1425	      /* POSIX.2 is silent about c starting a new cycle,
1426		 but it seems to be expected (and make sense). */
1427	      /* Fall Through */
1428	    case 'd':
1429	      return -1;
1430
1431	    case 'D':
1432	      {
1433		char *p = memchr(line.active, '\n', line.length);
1434		if (!p)
1435		  return -1;
1436
1437		++p;
1438		line.alloc -= p - line.active;
1439		line.length -= p - line.active;
1440		line.active += p - line.active;
1441
1442		/* reset to start next cycle without reading a new line: */
1443		cur_cmd = vec->v;
1444		continue;
1445	      }
1446
1447	    case 'e': {
1448#ifdef HAVE_POPEN
1449	      FILE *pipe_fp;
1450	      int cmd_length = cur_cmd->x.cmd_txt.text_length;
1451	      line_reset(&s_accum, NULL);
1452
1453	      if (!cmd_length)
1454		{
1455		  str_append (&line, "", 1);
1456		  pipe_fp = popen(line.active, "r");
1457		}
1458	      else
1459		{
1460		  cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0;
1461		  pipe_fp = popen(cur_cmd->x.cmd_txt.text, "r");
1462                  output_missing_newline(&output_file);
1463		}
1464
1465	      if (pipe_fp != NULL)
1466		{
1467		  char buf[4096];
1468		  int n;
1469		  while (!feof (pipe_fp))
1470		    if ((n = fread (buf, sizeof(char), 4096, pipe_fp)) > 0)
1471		      {
1472			if (!cmd_length)
1473			  str_append(&s_accum, buf, n);
1474			else
1475			  ck_fwrite(buf, 1, n, output_file.fp);
1476		      }
1477
1478		  pclose (pipe_fp);
1479		  if (!cmd_length)
1480		    {
1481		      /* Store into pattern space for plain `e' commands */
1482		      if (s_accum.length &&
1483			  s_accum.active[s_accum.length - 1] == '\n')
1484			s_accum.length--;
1485
1486		      /* Exchange line and s_accum.  This can be much
1487			 cheaper than copying s_accum.active into line.text
1488			 (for huge lines).  See comment above for 'g' as
1489			 to while the third argument is incorrect anyway.  */
1490		      line_exchange(&line, &s_accum, true);
1491		    }
1492                  else
1493                    flush_output(output_file.fp);
1494
1495		}
1496	      else
1497		panic(_("error in subprocess"));
1498#else
1499	      panic(_("`e' command not supported"));
1500#endif
1501	      break;
1502	    }
1503
1504	    case 'g':
1505	      /* We do not have a really good choice for the third parameter.
1506		 The problem is that hold space and the input file might as
1507		 well have different states; copying it from hold space means
1508		 that subsequent input might be read incorrectly, while
1509		 keeping it as in pattern space means that commands operating
1510		 on the moved buffer might consider a wrong character set.
1511		 We keep it true because it's what sed <= 4.1.5 did.  */
1512	      line_copy(&hold, &line, true);
1513	      break;
1514
1515	    case 'G':
1516	      /* We do not have a really good choice for the third parameter.
1517		 The problem is that hold space and pattern space might as
1518		 well have different states.  So, true is as wrong as false.
1519		 We keep it true because it's what sed <= 4.1.5 did, but
1520		 we could consider having line_ap.  */
1521	      line_append(&hold, &line, true);
1522	      break;
1523
1524	    case 'h':
1525	      /* Here, it is ok to have true.  */
1526	      line_copy(&line, &hold, true);
1527	      break;
1528
1529	    case 'H':
1530	      /* See comment above for 'G' regarding the third parameter.  */
1531	      line_append(&line, &hold, true);
1532	      break;
1533
1534	    case 'i':
1535	      output_line(cur_cmd->x.cmd_txt.text,
1536			  cur_cmd->x.cmd_txt.text_length - 1,
1537			  true, &output_file);
1538	      break;
1539
1540	    case 'l':
1541	      do_list(cur_cmd->x.int_arg == -1
1542		      ? lcmd_out_line_len
1543		      : cur_cmd->x.int_arg);
1544	      break;
1545
1546	    case 'L':
1547              output_missing_newline(&output_file);
1548	      fmt(line.active, line.active + line.length,
1549		  cur_cmd->x.int_arg == -1
1550		  ? lcmd_out_line_len
1551		  : cur_cmd->x.int_arg,
1552		  output_file.fp);
1553              flush_output(output_file.fp);
1554	      break;
1555
1556	    case 'n':
1557	      if (!no_default_output)
1558		output_line(line.active, line.length, line.chomped, &output_file);
1559	      if (test_eof(input) || !read_pattern_space(input, vec, false))
1560		return -1;
1561	      break;
1562
1563	    case 'N':
1564	      str_append(&line, "\n", 1);
1565
1566              if (test_eof(input) || !read_pattern_space(input, vec, true))
1567                {
1568                  line.length--;
1569                  if (posixicity == POSIXLY_EXTENDED && !no_default_output)
1570                     output_line(line.active, line.length, line.chomped,
1571                                 &output_file);
1572                  return -1;
1573                }
1574	      break;
1575
1576	    case 'p':
1577	      output_line(line.active, line.length, line.chomped, &output_file);
1578	      break;
1579
1580	    case 'P':
1581	      {
1582		char *p = memchr(line.active, '\n', line.length);
1583		output_line(line.active, p ? p - line.active : line.length,
1584			    p ? true : line.chomped, &output_file);
1585	      }
1586	      break;
1587
1588            case 'q':
1589              if (!no_default_output)
1590                output_line(line.active, line.length, line.chomped, &output_file);
1591	      dump_append_queue();
1592
1593	    case 'Q':
1594	      return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg;
1595
1596	    case 'r':
1597	      if (cur_cmd->x.fname)
1598		{
1599		  struct append_queue *aq = next_append_slot();
1600		  aq->fname = cur_cmd->x.fname;
1601		}
1602	      break;
1603
1604	    case 'R':
1605	      if (cur_cmd->x.fp && !feof (cur_cmd->x.fp))
1606		{
1607		  struct append_queue *aq;
1608		  size_t buflen;
1609		  char *text = NULL;
1610		  int result;
1611
1612		  result = ck_getline (&text, &buflen, cur_cmd->x.fp);
1613		  if (result != EOF)
1614		    {
1615		      aq = next_append_slot();
1616		      aq->free = true;
1617		      aq->text = text;
1618		      aq->textlen = result;
1619		    }
1620		}
1621	      break;
1622
1623	    case 's':
1624	      do_subst(cur_cmd->x.cmd_subst);
1625	      break;
1626
1627	    case 't':
1628	      if (replaced)
1629		{
1630		  replaced = false;
1631		  cur_cmd = vec->v + cur_cmd->x.jump_index;
1632		  continue;
1633		}
1634	      break;
1635
1636	    case 'T':
1637	      if (!replaced)
1638		{
1639		  cur_cmd = vec->v + cur_cmd->x.jump_index;
1640		  continue;
1641		}
1642	      else
1643		replaced = false;
1644	      break;
1645
1646	    case 'w':
1647	      if (cur_cmd->x.fp)
1648		output_line(line.active, line.length,
1649			    line.chomped, cur_cmd->x.outf);
1650	      break;
1651
1652	    case 'W':
1653	      if (cur_cmd->x.fp)
1654	        {
1655		  char *p = memchr(line.active, '\n', line.length);
1656		  output_line(line.active, p ? p - line.active : line.length,
1657			      p ? true : line.chomped, cur_cmd->x.outf);
1658	        }
1659	      break;
1660
1661	    case 'x':
1662	      /* See comment above for 'g' regarding the third parameter.  */
1663	      line_exchange(&line, &hold, false);
1664	      break;
1665
1666	    case 'y':
1667	      {
1668#ifdef HAVE_MBRTOWC
1669               if (mb_cur_max > 1)
1670                 {
1671                   int idx, prev_idx; /* index in the input line.  */
1672                   char **trans;
1673                   mbstate_t mbstate;
1674                   memset(&mbstate, 0, sizeof(mbstate_t));
1675                   for (idx = 0; idx < line.length;)
1676                     {
1677                       int mbclen, i;
1678                       mbclen = MBRLEN (line.active + idx, line.length - idx,
1679                                          &mbstate);
1680                       /* An invalid sequence, or a truncated multibyte
1681                          character.  We treat it as a singlebyte character.
1682                       */
1683                       if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1684                           || mbclen == 0)
1685                         mbclen = 1;
1686
1687                       trans = cur_cmd->x.translatemb;
1688                       /* `i' indicate i-th translate pair.  */
1689                       for (i = 0; trans[2*i] != NULL; i++)
1690                         {
1691                           if (strncmp(line.active + idx, trans[2*i], mbclen) == 0)
1692                             {
1693                               bool move_remain_buffer = false;
1694                               int trans_len = strlen(trans[2*i+1]);
1695
1696                               if (mbclen < trans_len)
1697                                 {
1698                                   int new_len;
1699                                   new_len = line.length + 1 + trans_len - mbclen;
1700                                   /* We must extend the line buffer.  */
1701                                   if (line.alloc < new_len)
1702                                     {
1703                                       /* And we must resize the buffer.  */
1704                                       resize_line(&line, new_len);
1705                                     }
1706                                   move_remain_buffer = true;
1707                                 }
1708                               else if (mbclen > trans_len)
1709                                 {
1710                                   /* We must truncate the line buffer.  */
1711                                   move_remain_buffer = true;
1712                                 }
1713                               prev_idx = idx;
1714                               if (move_remain_buffer)
1715                                 {
1716                                   int move_len, move_offset;
1717                                   char *move_from, *move_to;
1718                                   /* Move the remaining with \0.  */
1719                                   move_from = line.active + idx + mbclen;
1720                                   move_to = line.active + idx + trans_len;
1721                                   move_len = line.length + 1 - idx - mbclen;
1722                                   move_offset = trans_len - mbclen;
1723                                   memmove(move_to, move_from, move_len);
1724                                   line.length += move_offset;
1725                                   idx += move_offset;
1726                                 }
1727                               strncpy(line.active + prev_idx, trans[2*i+1],
1728                                       trans_len);
1729                               break;
1730                             }
1731                         }
1732                       idx += mbclen;
1733                     }
1734                 }
1735               else
1736#endif /* HAVE_MBRTOWC */
1737                 {
1738                   unsigned char *p, *e;
1739                   p = CAST(unsigned char *)line.active;
1740                   for (e=p+line.length; p<e; ++p)
1741                     *p = cur_cmd->x.translate[*p];
1742                 }
1743	      }
1744	      break;
1745
1746	    case 'z':
1747	      line.length = 0;
1748	      break;
1749
1750	    case '=':
1751              output_missing_newline(&output_file);
1752              fprintf(output_file.fp, "%lu\n",
1753                      CAST(unsigned long)input->line_number);
1754              flush_output(output_file.fp);
1755	      break;
1756
1757	    default:
1758	      panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
1759	    }
1760	}
1761
1762#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1763      /* If our top-level program consists solely of commands with
1764         ADDR_IS_NUM addresses then once we past the last mentioned
1765         line we should be able to quit if no_default_output is true,
1766         or otherwise quickly copy input to output.  Now whether this
1767         optimization is a win or not depends on how cheaply we can
1768         implement this for the cases where it doesn't help, as
1769         compared against how much time is saved.  One semantic
1770         difference (which I think is an improvement) is that *this*
1771         version will terminate after printing line two in the script
1772         "yes | sed -n 2p".
1773
1774         Don't use this when in-place editing is active, because line
1775         numbers restart each time then. */
1776      else if (!separate_files)
1777	{
1778	  if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1779	      && (cur_cmd->a2
1780		  ? cur_cmd->range_state == RANGE_CLOSED
1781		  : cur_cmd->a1->addr_number < input->line_number))
1782	    {
1783	      /* Skip this address next time */
1784	      cur_cmd->addr_bang = !cur_cmd->addr_bang;
1785	      cur_cmd->a1->addr_type = ADDR_IS_NULL;
1786	      if (cur_cmd->a2)
1787		cur_cmd->a2->addr_type = ADDR_IS_NULL;
1788
1789	      /* can we make an optimization? */
1790	      if (cur_cmd->addr_bang)
1791		{
1792		  if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't'
1793		      || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}')
1794		    branches--;
1795
1796		  cur_cmd->cmd = '#';	/* replace with no-op */
1797	          if (branches == 0)
1798		    cur_cmd = shrink_program(vec, cur_cmd);
1799		  if (!cur_cmd && no_default_output)
1800		    return 0;
1801		  end_cmd = vec->v + vec->v_length;
1802		  if (!cur_cmd)
1803		    cur_cmd = end_cmd;
1804		  continue;
1805		}
1806	    }
1807	}
1808#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1809
1810      /* this is buried down here so that a "continue" statement can skip it */
1811      ++cur_cmd;
1812    }
1813
1814    if (!no_default_output)
1815      output_line(line.active, line.length, line.chomped, &output_file);
1816    return -1;
1817}
1818
1819
1820
1821/* Apply the compiled script to all the named files. */
1822int
1823process_files(the_program, argv)
1824  struct vector *the_program;
1825  char **argv;
1826{
1827  static char dash[] = "-";
1828  static char *stdin_argv[2] = { dash, NULL };
1829  struct input input;
1830  int status;
1831
1832  line_init(&line, NULL, INITIAL_BUFFER_SIZE);
1833  line_init(&hold, NULL, 0);
1834  line_init(&buffer, NULL, 0);
1835
1836#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1837  branches = count_branches(the_program);
1838#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1839  input.reset_at_next_file = true;
1840  if (argv && *argv)
1841    input.file_list = argv;
1842  else if (in_place_extension)
1843    panic(_("no input files"));
1844  else
1845    input.file_list = stdin_argv;
1846
1847  input.bad_count = 0;
1848  input.line_number = 0;
1849  input.read_fn = read_always_fail;
1850  input.fp = NULL;
1851
1852  status = EXIT_SUCCESS;
1853  while (read_pattern_space(&input, the_program, false))
1854    {
1855      status = execute_program(the_program, &input);
1856      if (status == -1)
1857	status = EXIT_SUCCESS;
1858      else
1859	break;
1860    }
1861  closedown(&input);
1862
1863#ifdef DEBUG_LEAKS
1864  /* We're about to exit, so these free()s are redundant.
1865     But if we're running under a memory-leak detecting
1866     implementation of malloc(), we want to explicitly
1867     deallocate in order to avoid extraneous noise from
1868     the allocator. */
1869  release_append_queue();
1870  FREE(buffer.text);
1871  FREE(hold.text);
1872  FREE(line.text);
1873  FREE(s_accum.text);
1874#endif /*DEBUG_LEAKS*/
1875
1876  if (input.bad_count)
1877    status = 2;
1878
1879  return status;
1880}
1881