1/*************************************************
2*               pcregrep program                 *
3*************************************************/
4
5/* This is a grep program that uses the PCRE regular expression library to do
6its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7recurse into directories, and in z/OS it can handle PDS files.
8
9Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10additional header is required. That header is not included in the main PCRE
11distribution because other apparatus is needed to compile pcregrep for z/OS.
12The header can be found in the special z/OS distribution, which is available
13from www.zaconsultants.net or from www.cbttape.org.
14
15           Copyright (c) 1997-2014 University of Cambridge
16
17-----------------------------------------------------------------------------
18Redistribution and use in source and binary forms, with or without
19modification, are permitted provided that the following conditions are met:
20
21    * Redistributions of source code must retain the above copyright notice,
22      this list of conditions and the following disclaimer.
23
24    * Redistributions in binary form must reproduce the above copyright
25      notice, this list of conditions and the following disclaimer in the
26      documentation and/or other materials provided with the distribution.
27
28    * Neither the name of the University of Cambridge nor the names of its
29      contributors may be used to endorse or promote products derived from
30      this software without specific prior written permission.
31
32THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42POSSIBILITY OF SUCH DAMAGE.
43-----------------------------------------------------------------------------
44*/
45
46#ifdef HAVE_CONFIG_H
47#include "config.h"
48#endif
49
50#include <ctype.h>
51#include <locale.h>
52#include <stdio.h>
53#include <string.h>
54#include <stdlib.h>
55#include <errno.h>
56
57#include <sys/types.h>
58#include <sys/stat.h>
59
60#ifdef HAVE_UNISTD_H
61#include <unistd.h>
62#endif
63
64#ifdef SUPPORT_LIBZ
65#include <zlib.h>
66#endif
67
68#ifdef SUPPORT_LIBBZ2
69#include <bzlib.h>
70#endif
71
72#include "pcre.h"
73
74#define FALSE 0
75#define TRUE 1
76
77typedef int BOOL;
78
79#define OFFSET_SIZE 99
80
81#if BUFSIZ > 8192
82#define MAXPATLEN BUFSIZ
83#else
84#define MAXPATLEN 8192
85#endif
86
87#define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
88
89/* Values for the "filenames" variable, which specifies options for file name
90output. The order is important; it is assumed that a file name is wanted for
91all values greater than FN_DEFAULT. */
92
93enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94
95/* File reading styles */
96
97enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98
99/* Actions for the -d and -D options */
100
101enum { dee_READ, dee_SKIP, dee_RECURSE };
102enum { DEE_READ, DEE_SKIP };
103
104/* Actions for special processing options (flag bits) */
105
106#define PO_WORD_MATCH     0x0001
107#define PO_LINE_MATCH     0x0002
108#define PO_FIXED_STRINGS  0x0004
109
110/* Line ending types */
111
112enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113
114/* Binary file options */
115
116enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117
118/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119environments), a warning is issued if the value of fwrite() is ignored.
120Unfortunately, casting to (void) does not suppress the warning. To get round
121this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122apply to fprintf(). */
123
124#define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125
126
127
128/*************************************************
129*               Global variables                 *
130*************************************************/
131
132/* Jeffrey Friedl has some debugging requirements that are not part of the
133regular code. */
134
135#ifdef JFRIEDL_DEBUG
136static int S_arg = -1;
137static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139static const char *jfriedl_prefix = "";
140static const char *jfriedl_postfix = "";
141#endif
142
143static int  endlinetype;
144
145static char *colour_string = (char *)"1;31";
146static char *colour_option = NULL;
147static char *dee_option = NULL;
148static char *DEE_option = NULL;
149static char *locale = NULL;
150static char *main_buffer = NULL;
151static char *newline = NULL;
152static char *om_separator = (char *)"";
153static char *stdin_name = (char *)"(standard input)";
154
155static const unsigned char *pcretables = NULL;
156
157static int after_context = 0;
158static int before_context = 0;
159static int binary_files = BIN_BINARY;
160static int both_context = 0;
161static int bufthird = PCREGREP_BUFSIZE;
162static int bufsize = 3*PCREGREP_BUFSIZE;
163
164#if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165static int dee_action = dee_SKIP;
166#else
167static int dee_action = dee_READ;
168#endif
169
170static int DEE_action = DEE_READ;
171static int error_count = 0;
172static int filenames = FN_DEFAULT;
173static int pcre_options = 0;
174static int process_options = 0;
175
176#ifdef SUPPORT_PCREGREP_JIT
177static int study_options = PCRE_STUDY_JIT_COMPILE;
178#else
179static int study_options = 0;
180#endif
181
182static unsigned long int match_limit = 0;
183static unsigned long int match_limit_recursion = 0;
184
185static BOOL count_only = FALSE;
186static BOOL do_colour = FALSE;
187static BOOL file_offsets = FALSE;
188static BOOL hyphenpending = FALSE;
189static BOOL invert = FALSE;
190static BOOL line_buffered = FALSE;
191static BOOL line_offsets = FALSE;
192static BOOL multiline = FALSE;
193static BOOL number = FALSE;
194static BOOL omit_zero_count = FALSE;
195static BOOL resource_error = FALSE;
196static BOOL quiet = FALSE;
197static BOOL show_only_matching = FALSE;
198static BOOL silent = FALSE;
199static BOOL utf8 = FALSE;
200
201/* Structure for list of --only-matching capturing numbers. */
202
203typedef struct omstr {
204  struct omstr *next;
205  int groupnum;
206} omstr;
207
208static omstr *only_matching = NULL;
209static omstr *only_matching_last = NULL;
210
211/* Structure for holding the two variables that describe a number chain. */
212
213typedef struct omdatastr {
214  omstr **anchor;
215  omstr **lastptr;
216} omdatastr;
217
218static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219
220/* Structure for list of file names (for -f and --{in,ex}clude-from) */
221
222typedef struct fnstr {
223  struct fnstr *next;
224  char *name;
225} fnstr;
226
227static fnstr *exclude_from = NULL;
228static fnstr *exclude_from_last = NULL;
229static fnstr *include_from = NULL;
230static fnstr *include_from_last = NULL;
231
232static fnstr *file_lists = NULL;
233static fnstr *file_lists_last = NULL;
234static fnstr *pattern_files = NULL;
235static fnstr *pattern_files_last = NULL;
236
237/* Structure for holding the two variables that describe a file name chain. */
238
239typedef struct fndatastr {
240  fnstr **anchor;
241  fnstr **lastptr;
242} fndatastr;
243
244static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245static fndatastr include_from_data = { &include_from, &include_from_last };
246static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248
249/* Structure for pattern and its compiled form; used for matching patterns and
250also for include/exclude patterns. */
251
252typedef struct patstr {
253  struct patstr *next;
254  char *string;
255  pcre *compiled;
256  pcre_extra *hint;
257} patstr;
258
259static patstr *patterns = NULL;
260static patstr *patterns_last = NULL;
261static patstr *include_patterns = NULL;
262static patstr *include_patterns_last = NULL;
263static patstr *exclude_patterns = NULL;
264static patstr *exclude_patterns_last = NULL;
265static patstr *include_dir_patterns = NULL;
266static patstr *include_dir_patterns_last = NULL;
267static patstr *exclude_dir_patterns = NULL;
268static patstr *exclude_dir_patterns_last = NULL;
269
270/* Structure holding the two variables that describe a pattern chain. A pointer
271to such structures is used for each appropriate option. */
272
273typedef struct patdatastr {
274  patstr **anchor;
275  patstr **lastptr;
276} patdatastr;
277
278static patdatastr match_patdata = { &patterns, &patterns_last };
279static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283
284static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285                                 &include_dir_patterns, &exclude_dir_patterns };
286
287static const char *incexname[4] = { "--include", "--exclude",
288                                    "--include-dir", "--exclude-dir" };
289
290/* Structure for options and list of them */
291
292enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293       OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294
295typedef struct option_item {
296  int type;
297  int one_char;
298  void *dataptr;
299  const char *long_name;
300  const char *help_text;
301} option_item;
302
303/* Options without a single-letter equivalent get a negative value. This can be
304used to identify them. */
305
306#define N_COLOUR       (-1)
307#define N_EXCLUDE      (-2)
308#define N_EXCLUDE_DIR  (-3)
309#define N_HELP         (-4)
310#define N_INCLUDE      (-5)
311#define N_INCLUDE_DIR  (-6)
312#define N_LABEL        (-7)
313#define N_LOCALE       (-8)
314#define N_NULL         (-9)
315#define N_LOFFSETS     (-10)
316#define N_FOFFSETS     (-11)
317#define N_LBUFFER      (-12)
318#define N_M_LIMIT      (-13)
319#define N_M_LIMIT_REC  (-14)
320#define N_BUFSIZE      (-15)
321#define N_NOJIT        (-16)
322#define N_FILE_LIST    (-17)
323#define N_BINARY_FILES (-18)
324#define N_EXCLUDE_FROM (-19)
325#define N_INCLUDE_FROM (-20)
326#define N_OM_SEPARATOR (-21)
327
328static option_item optionlist[] = {
329  { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
330  { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
331  { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
332  { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
333  { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
334  { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
335  { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
336  { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
337  { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
338  { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
339  { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
340  { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
341  { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
342  { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
343  { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
344  { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
345  { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346  { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
347  { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
348  { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
349  { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
350  { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
351#ifdef SUPPORT_PCREGREP_JIT
352  { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
353#else
354  { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
355#endif
356  { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
357  { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
358  { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
359  { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
360  { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
361  { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
362  { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
363  { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364  { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
365  { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366  { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
367  { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368  { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369  { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
370  { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
371  { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
372  { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
373  { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374  { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375  { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376  { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377
378  /* These two were accidentally implemented with underscores instead of
379  hyphens in the option names. As this was not discovered for several releases,
380  the incorrect versions are left in the table for compatibility. However, the
381  --help function misses out any option that has an underscore in its name. */
382
383  { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384  { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385
386#ifdef JFRIEDL_DEBUG
387  { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
388#endif
389  { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
390  { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
391  { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
392  { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
393  { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
394  { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
395  { OP_NODATA,    0,        NULL,               NULL,            NULL }
396};
397
398/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400that the combination of -w and -x has the same effect as -x on its own, so we
401can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402prefix+suffix is 10 characters; if anything longer is added, it must be
403adjusted. */
404
405static const char *prefix[] = {
406  "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
407
408static const char *suffix[] = {
409  "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
410
411/* UTF-8 tables - used only when the newline setting is "any". */
412
413const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414
415const char utf8_table4[] = {
416  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420
421
422
423/*************************************************
424*         Exit from the program                  *
425*************************************************/
426
427/* If there has been a resource error, give a suitable message.
428
429Argument:  the return code
430Returns:   does not return
431*/
432
433static void
434pcregrep_exit(int rc)
435{
436if (resource_error)
437  {
438  fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439    "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440    PCRE_ERROR_JIT_STACKLIMIT);
441  fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442  }
443exit(rc);
444}
445
446
447/*************************************************
448*          Add item to chain of patterns         *
449*************************************************/
450
451/* Used to add an item onto a chain, or just return an unconnected item if the
452"after" argument is NULL.
453
454Arguments:
455  s          pattern string to add
456  after      if not NULL points to item to insert after
457
458Returns:     new pattern block or NULL on error
459*/
460
461static patstr *
462add_pattern(char *s, patstr *after)
463{
464patstr *p = (patstr *)malloc(sizeof(patstr));
465if (p == NULL)
466  {
467  fprintf(stderr, "pcregrep: malloc failed\n");
468  pcregrep_exit(2);
469  }
470if (strlen(s) > MAXPATLEN)
471  {
472  fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473    MAXPATLEN);
474  free(p);
475  return NULL;
476  }
477p->next = NULL;
478p->string = s;
479p->compiled = NULL;
480p->hint = NULL;
481
482if (after != NULL)
483  {
484  p->next = after->next;
485  after->next = p;
486  }
487return p;
488}
489
490
491/*************************************************
492*           Free chain of patterns               *
493*************************************************/
494
495/* Used for several chains of patterns.
496
497Argument: pointer to start of chain
498Returns:  nothing
499*/
500
501static void
502free_pattern_chain(patstr *pc)
503{
504while (pc != NULL)
505  {
506  patstr *p = pc;
507  pc = p->next;
508  if (p->hint != NULL) pcre_free_study(p->hint);
509  if (p->compiled != NULL) pcre_free(p->compiled);
510  free(p);
511  }
512}
513
514
515/*************************************************
516*           Free chain of file names             *
517*************************************************/
518
519/*
520Argument: pointer to start of chain
521Returns:  nothing
522*/
523
524static void
525free_file_chain(fnstr *fn)
526{
527while (fn != NULL)
528  {
529  fnstr *f = fn;
530  fn = f->next;
531  free(f);
532  }
533}
534
535
536/*************************************************
537*            OS-specific functions               *
538*************************************************/
539
540/* These functions are defined so that they can be made system specific.
541At present there are versions for Unix-style environments, Windows, native
542z/OS, and "no support". */
543
544
545/************* Directory scanning Unix-style and z/OS ***********/
546
547#if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548#include <sys/types.h>
549#include <sys/stat.h>
550#include <dirent.h>
551
552#if defined NATIVE_ZOS
553/************* Directory and PDS/E scanning for z/OS ***********/
554/************* z/OS looks mostly like Unix with USS ************/
555/* However, z/OS needs the #include statements in this header */
556#include "pcrzosfs.h"
557/* That header is not included in the main PCRE distribution because
558   other apparatus is needed to compile pcregrep for z/OS. The header
559   can be found in the special z/OS distribution, which is available
560   from www.zaconsultants.net or from www.cbttape.org. */
561#endif
562
563typedef DIR directory_type;
564#define FILESEP '/'
565
566static int
567isdirectory(char *filename)
568{
569struct stat statbuf;
570if (stat(filename, &statbuf) < 0)
571  return 0;        /* In the expectation that opening as a file will fail */
572return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573}
574
575static directory_type *
576opendirectory(char *filename)
577{
578return opendir(filename);
579}
580
581static char *
582readdirectory(directory_type *dir)
583{
584for (;;)
585  {
586  struct dirent *dent = readdir(dir);
587  if (dent == NULL) return NULL;
588  if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
589    return dent->d_name;
590  }
591/* Control never reaches here */
592}
593
594static void
595closedirectory(directory_type *dir)
596{
597closedir(dir);
598}
599
600
601/************* Test for regular file, Unix-style **********/
602
603static int
604isregfile(char *filename)
605{
606struct stat statbuf;
607if (stat(filename, &statbuf) < 0)
608  return 1;        /* In the expectation that opening as a file will fail */
609return (statbuf.st_mode & S_IFMT) == S_IFREG;
610}
611
612
613#if defined NATIVE_ZOS
614/************* Test for a terminal in z/OS **********/
615/* isatty() does not work in a TSO environment, so always give FALSE.*/
616
617static BOOL
618is_stdout_tty(void)
619{
620return FALSE;
621}
622
623static BOOL
624is_file_tty(FILE *f)
625{
626return FALSE;
627}
628
629
630/************* Test for a terminal, Unix-style **********/
631
632#else
633static BOOL
634is_stdout_tty(void)
635{
636return isatty(fileno(stdout));
637}
638
639static BOOL
640is_file_tty(FILE *f)
641{
642return isatty(fileno(f));
643}
644#endif
645
646/* End of Unix-style or native z/OS environment functions. */
647
648
649/************* Directory scanning in Windows ***********/
650
651/* I (Philip Hazel) have no means of testing this code. It was contributed by
652Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653when it did not exist. David Byron added a patch that moved the #include of
654<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656undefined when it is indeed undefined. */
657
658#elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
659
660#ifndef STRICT
661# define STRICT
662#endif
663#ifndef WIN32_LEAN_AND_MEAN
664# define WIN32_LEAN_AND_MEAN
665#endif
666
667#include <windows.h>
668
669#ifndef INVALID_FILE_ATTRIBUTES
670#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
671#endif
672
673typedef struct directory_type
674{
675HANDLE handle;
676BOOL first;
677WIN32_FIND_DATA data;
678} directory_type;
679
680#define FILESEP '/'
681
682int
683isdirectory(char *filename)
684{
685DWORD attr = GetFileAttributes(filename);
686if (attr == INVALID_FILE_ATTRIBUTES)
687  return 0;
688return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689}
690
691directory_type *
692opendirectory(char *filename)
693{
694size_t len;
695char *pattern;
696directory_type *dir;
697DWORD err;
698len = strlen(filename);
699pattern = (char *)malloc(len + 3);
700dir = (directory_type *)malloc(sizeof(*dir));
701if ((pattern == NULL) || (dir == NULL))
702  {
703  fprintf(stderr, "pcregrep: malloc failed\n");
704  pcregrep_exit(2);
705  }
706memcpy(pattern, filename, len);
707memcpy(&(pattern[len]), "\\*", 3);
708dir->handle = FindFirstFile(pattern, &(dir->data));
709if (dir->handle != INVALID_HANDLE_VALUE)
710  {
711  free(pattern);
712  dir->first = TRUE;
713  return dir;
714  }
715err = GetLastError();
716free(pattern);
717free(dir);
718errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
719return NULL;
720}
721
722char *
723readdirectory(directory_type *dir)
724{
725for (;;)
726  {
727  if (!dir->first)
728    {
729    if (!FindNextFile(dir->handle, &(dir->data)))
730      return NULL;
731    }
732  else
733    {
734    dir->first = FALSE;
735    }
736  if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
737    return dir->data.cFileName;
738  }
739#ifndef _MSC_VER
740return NULL;   /* Keep compiler happy; never executed */
741#endif
742}
743
744void
745closedirectory(directory_type *dir)
746{
747FindClose(dir->handle);
748free(dir);
749}
750
751
752/************* Test for regular file in Windows **********/
753
754/* I don't know how to do this, or if it can be done; assume all paths are
755regular if they are not directories. */
756
757int isregfile(char *filename)
758{
759return !isdirectory(filename);
760}
761
762
763/************* Test for a terminal in Windows **********/
764
765/* I don't know how to do this; assume never */
766
767static BOOL
768is_stdout_tty(void)
769{
770return FALSE;
771}
772
773static BOOL
774is_file_tty(FILE *f)
775{
776return FALSE;
777}
778
779/* End of Windows functions */
780
781
782/************* Directory scanning when we can't do it ***********/
783
784/* The type is void, and apart from isdirectory(), the functions do nothing. */
785
786#else
787
788#define FILESEP 0
789typedef void directory_type;
790
791int isdirectory(char *filename) { return 0; }
792directory_type * opendirectory(char *filename) { return (directory_type*)0;}
793char *readdirectory(directory_type *dir) { return (char*)0;}
794void closedirectory(directory_type *dir) {}
795
796
797/************* Test for regular file when we can't do it **********/
798
799/* Assume all files are regular. */
800
801int isregfile(char *filename) { return 1; }
802
803
804/************* Test for a terminal when we can't do it **********/
805
806static BOOL
807is_stdout_tty(void)
808{
809return FALSE;
810}
811
812static BOOL
813is_file_tty(FILE *f)
814{
815return FALSE;
816}
817
818#endif  /* End of system-specific functions */
819
820
821
822#ifndef HAVE_STRERROR
823/*************************************************
824*     Provide strerror() for non-ANSI libraries  *
825*************************************************/
826
827/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
828in their libraries, but can provide the same facility by this simple
829alternative function. */
830
831extern int   sys_nerr;
832extern char *sys_errlist[];
833
834char *
835strerror(int n)
836{
837if (n < 0 || n >= sys_nerr) return "unknown error number";
838return sys_errlist[n];
839}
840#endif /* HAVE_STRERROR */
841
842
843
844/*************************************************
845*                Usage function                  *
846*************************************************/
847
848static int
849usage(int rc)
850{
851option_item *op;
852fprintf(stderr, "Usage: pcregrep [-");
853for (op = optionlist; op->one_char != 0; op++)
854  {
855  if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856  }
857fprintf(stderr, "] [long options] [pattern] [files]\n");
858fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859  "options.\n");
860return rc;
861}
862
863
864
865/*************************************************
866*                Help function                   *
867*************************************************/
868
869static void
870help(void)
871{
872option_item *op;
873
874printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875printf("Search for PATTERN in each FILE or standard input.\n");
876printf("PATTERN must be present if neither -e nor -f is used.\n");
877printf("\"-\" can be used as a file name to mean STDIN.\n");
878
879#ifdef SUPPORT_LIBZ
880printf("Files whose names end in .gz are read using zlib.\n");
881#endif
882
883#ifdef SUPPORT_LIBBZ2
884printf("Files whose names end in .bz2 are read using bzlib2.\n");
885#endif
886
887#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888printf("Other files and the standard input are read as plain files.\n\n");
889#else
890printf("All files are read as plain files, without any interpretation.\n\n");
891#endif
892
893printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894printf("Options:\n");
895
896for (op = optionlist; op->one_char != 0; op++)
897  {
898  int n;
899  char s[4];
900
901  /* Two options were accidentally implemented and documented with underscores
902  instead of hyphens in their names, something that was not noticed for quite a
903  few releases. When fixing this, I left the underscored versions in the list
904  in case people were using them. However, we don't want to display them in the
905  help data. There are no other options that contain underscores, and we do not
906  expect ever to implement such options. Therefore, just omit any option that
907  contains an underscore. */
908
909  if (strchr(op->long_name, '_') != NULL) continue;
910
911  if (op->one_char > 0 && (op->long_name)[0] == 0)
912    n = 31 - printf("  -%c", op->one_char);
913  else
914    {
915    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916      else strcpy(s, "   ");
917    n = 31 - printf("  %s --%s", s, op->long_name);
918    }
919
920  if (n < 1) n = 1;
921  printf("%.*s%s\n", n, "                           ", op->help_text);
922  }
923
924printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926printf("When reading patterns or file names from a file, trailing white\n");
927printf("space is removed and blank lines are ignored.\n");
928printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929
930printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932}
933
934
935
936/*************************************************
937*            Test exclude/includes               *
938*************************************************/
939
940/* If any exclude pattern matches, the path is excluded. Otherwise, unless
941there are no includes, the path must match an include pattern.
942
943Arguments:
944  path      the path to be matched
945  ip        the chain of include patterns
946  ep        the chain of exclude patterns
947
948Returns:    TRUE if the path is not excluded
949*/
950
951static BOOL
952test_incexc(char *path, patstr *ip, patstr *ep)
953{
954int plen = strlen(path);
955
956for (; ep != NULL; ep = ep->next)
957  {
958  if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959    return FALSE;
960  }
961
962if (ip == NULL) return TRUE;
963
964for (; ip != NULL; ip = ip->next)
965  {
966  if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967    return TRUE;
968  }
969
970return FALSE;
971}
972
973
974
975/*************************************************
976*         Decode integer argument value          *
977*************************************************/
978
979/* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981just keep it simple.
982
983Arguments:
984  option_data   the option data string
985  op            the option item (for error messages)
986  longop        TRUE if option given in long form
987
988Returns:        a long integer
989*/
990
991static long int
992decode_number(char *option_data, option_item *op, BOOL longop)
993{
994unsigned long int n = 0;
995char *endptr = option_data;
996while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997while (isdigit((unsigned char)(*endptr)))
998  n = n * 10 + (int)(*endptr++ - '0');
999if (toupper(*endptr) == 'K')
1000  {
1001  n *= 1024;
1002  endptr++;
1003  }
1004else if (toupper(*endptr) == 'M')
1005  {
1006  n *= 1024*1024;
1007  endptr++;
1008  }
1009
1010if (*endptr != 0)   /* Error */
1011  {
1012  if (longop)
1013    {
1014    char *equals = strchr(op->long_name, '=');
1015    int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016      (int)(equals - op->long_name);
1017    fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018      option_data, nlen, op->long_name);
1019    }
1020  else
1021    fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022      option_data, op->one_char);
1023  pcregrep_exit(usage(2));
1024  }
1025
1026return n;
1027}
1028
1029
1030
1031/*************************************************
1032*       Add item to a chain of numbers           *
1033*************************************************/
1034
1035/* Used to add an item onto a chain, or just return an unconnected item if the
1036"after" argument is NULL.
1037
1038Arguments:
1039  n          the number to add
1040  after      if not NULL points to item to insert after
1041
1042Returns:     new number block
1043*/
1044
1045static omstr *
1046add_number(int n, omstr *after)
1047{
1048omstr *om = (omstr *)malloc(sizeof(omstr));
1049
1050if (om == NULL)
1051  {
1052  fprintf(stderr, "pcregrep: malloc failed\n");
1053  pcregrep_exit(2);
1054  }
1055om->next = NULL;
1056om->groupnum = n;
1057
1058if (after != NULL)
1059  {
1060  om->next = after->next;
1061  after->next = om;
1062  }
1063return om;
1064}
1065
1066
1067
1068/*************************************************
1069*            Read one line of input              *
1070*************************************************/
1071
1072/* Normally, input is read using fread() into a large buffer, so many lines may
1073be read at once. However, doing this for tty input means that no output appears
1074until a lot of input has been typed. Instead, tty input is handled line by
1075line. We cannot use fgets() for this, because it does not stop at a binary
1076zero, and therefore there is no way of telling how many characters it has read,
1077because there may be binary zeros embedded in the data.
1078
1079Arguments:
1080  buffer     the buffer to read into
1081  length     the maximum number of characters to read
1082  f          the file
1083
1084Returns:     the number of characters read, zero at end of file
1085*/
1086
1087static unsigned int
1088read_one_line(char *buffer, int length, FILE *f)
1089{
1090int c;
1091int yield = 0;
1092while ((c = fgetc(f)) != EOF)
1093  {
1094  buffer[yield++] = c;
1095  if (c == '\n' || yield >= length) break;
1096  }
1097return yield;
1098}
1099
1100
1101
1102/*************************************************
1103*             Find end of line                   *
1104*************************************************/
1105
1106/* The length of the endline sequence that is found is set via lenptr. This may
1107be zero at the very end of the file if there is no line-ending sequence there.
1108
1109Arguments:
1110  p         current position in line
1111  endptr    end of available data
1112  lenptr    where to put the length of the eol sequence
1113
1114Returns:    pointer after the last byte of the line,
1115            including the newline byte(s)
1116*/
1117
1118static char *
1119end_of_line(char *p, char *endptr, int *lenptr)
1120{
1121switch(endlinetype)
1122  {
1123  default:      /* Just in case */
1124  case EL_LF:
1125  while (p < endptr && *p != '\n') p++;
1126  if (p < endptr)
1127    {
1128    *lenptr = 1;
1129    return p + 1;
1130    }
1131  *lenptr = 0;
1132  return endptr;
1133
1134  case EL_CR:
1135  while (p < endptr && *p != '\r') p++;
1136  if (p < endptr)
1137    {
1138    *lenptr = 1;
1139    return p + 1;
1140    }
1141  *lenptr = 0;
1142  return endptr;
1143
1144  case EL_CRLF:
1145  for (;;)
1146    {
1147    while (p < endptr && *p != '\r') p++;
1148    if (++p >= endptr)
1149      {
1150      *lenptr = 0;
1151      return endptr;
1152      }
1153    if (*p == '\n')
1154      {
1155      *lenptr = 2;
1156      return p + 1;
1157      }
1158    }
1159  break;
1160
1161  case EL_ANYCRLF:
1162  while (p < endptr)
1163    {
1164    int extra = 0;
1165    register int c = *((unsigned char *)p);
1166
1167    if (utf8 && c >= 0xc0)
1168      {
1169      int gcii, gcss;
1170      extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1171      gcss = 6*extra;
1172      c = (c & utf8_table3[extra]) << gcss;
1173      for (gcii = 1; gcii <= extra; gcii++)
1174        {
1175        gcss -= 6;
1176        c |= (p[gcii] & 0x3f) << gcss;
1177        }
1178      }
1179
1180    p += 1 + extra;
1181
1182    switch (c)
1183      {
1184      case '\n':
1185      *lenptr = 1;
1186      return p;
1187
1188      case '\r':
1189      if (p < endptr && *p == '\n')
1190        {
1191        *lenptr = 2;
1192        p++;
1193        }
1194      else *lenptr = 1;
1195      return p;
1196
1197      default:
1198      break;
1199      }
1200    }   /* End of loop for ANYCRLF case */
1201
1202  *lenptr = 0;  /* Must have hit the end */
1203  return endptr;
1204
1205  case EL_ANY:
1206  while (p < endptr)
1207    {
1208    int extra = 0;
1209    register int c = *((unsigned char *)p);
1210
1211    if (utf8 && c >= 0xc0)
1212      {
1213      int gcii, gcss;
1214      extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1215      gcss = 6*extra;
1216      c = (c & utf8_table3[extra]) << gcss;
1217      for (gcii = 1; gcii <= extra; gcii++)
1218        {
1219        gcss -= 6;
1220        c |= (p[gcii] & 0x3f) << gcss;
1221        }
1222      }
1223
1224    p += 1 + extra;
1225
1226    switch (c)
1227      {
1228      case '\n':    /* LF */
1229      case '\v':    /* VT */
1230      case '\f':    /* FF */
1231      *lenptr = 1;
1232      return p;
1233
1234      case '\r':    /* CR */
1235      if (p < endptr && *p == '\n')
1236        {
1237        *lenptr = 2;
1238        p++;
1239        }
1240      else *lenptr = 1;
1241      return p;
1242
1243#ifndef EBCDIC
1244      case 0x85:    /* Unicode NEL */
1245      *lenptr = utf8? 2 : 1;
1246      return p;
1247
1248      case 0x2028:  /* Unicode LS */
1249      case 0x2029:  /* Unicode PS */
1250      *lenptr = 3;
1251      return p;
1252#endif  /* Not EBCDIC */
1253
1254      default:
1255      break;
1256      }
1257    }   /* End of loop for ANY case */
1258
1259  *lenptr = 0;  /* Must have hit the end */
1260  return endptr;
1261  }     /* End of overall switch */
1262}
1263
1264
1265
1266/*************************************************
1267*         Find start of previous line            *
1268*************************************************/
1269
1270/* This is called when looking back for before lines to print.
1271
1272Arguments:
1273  p         start of the subsequent line
1274  startptr  start of available data
1275
1276Returns:    pointer to the start of the previous line
1277*/
1278
1279static char *
1280previous_line(char *p, char *startptr)
1281{
1282switch(endlinetype)
1283  {
1284  default:      /* Just in case */
1285  case EL_LF:
1286  p--;
1287  while (p > startptr && p[-1] != '\n') p--;
1288  return p;
1289
1290  case EL_CR:
1291  p--;
1292  while (p > startptr && p[-1] != '\n') p--;
1293  return p;
1294
1295  case EL_CRLF:
1296  for (;;)
1297    {
1298    p -= 2;
1299    while (p > startptr && p[-1] != '\n') p--;
1300    if (p <= startptr + 1 || p[-2] == '\r') return p;
1301    }
1302  /* Control can never get here */
1303
1304  case EL_ANY:
1305  case EL_ANYCRLF:
1306  if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1307  if (utf8) while ((*p & 0xc0) == 0x80) p--;
1308
1309  while (p > startptr)
1310    {
1311    register unsigned int c;
1312    char *pp = p - 1;
1313
1314    if (utf8)
1315      {
1316      int extra = 0;
1317      while ((*pp & 0xc0) == 0x80) pp--;
1318      c = *((unsigned char *)pp);
1319      if (c >= 0xc0)
1320        {
1321        int gcii, gcss;
1322        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1323        gcss = 6*extra;
1324        c = (c & utf8_table3[extra]) << gcss;
1325        for (gcii = 1; gcii <= extra; gcii++)
1326          {
1327          gcss -= 6;
1328          c |= (pp[gcii] & 0x3f) << gcss;
1329          }
1330        }
1331      }
1332    else c = *((unsigned char *)pp);
1333
1334    if (endlinetype == EL_ANYCRLF) switch (c)
1335      {
1336      case '\n':    /* LF */
1337      case '\r':    /* CR */
1338      return p;
1339
1340      default:
1341      break;
1342      }
1343
1344    else switch (c)
1345      {
1346      case '\n':    /* LF */
1347      case '\v':    /* VT */
1348      case '\f':    /* FF */
1349      case '\r':    /* CR */
1350#ifndef EBCDIE
1351      case 0x85:    /* Unicode NEL */
1352      case 0x2028:  /* Unicode LS */
1353      case 0x2029:  /* Unicode PS */
1354#endif  /* Not EBCDIC */
1355      return p;
1356
1357      default:
1358      break;
1359      }
1360
1361    p = pp;  /* Back one character */
1362    }        /* End of loop for ANY case */
1363
1364  return startptr;  /* Hit start of data */
1365  }     /* End of overall switch */
1366}
1367
1368
1369
1370
1371
1372/*************************************************
1373*       Print the previous "after" lines         *
1374*************************************************/
1375
1376/* This is called if we are about to lose said lines because of buffer filling,
1377and at the end of the file. The data in the line is written using fwrite() so
1378that a binary zero does not terminate it.
1379
1380Arguments:
1381  lastmatchnumber   the number of the last matching line, plus one
1382  lastmatchrestart  where we restarted after the last match
1383  endptr            end of available data
1384  printname         filename for printing
1385
1386Returns:            nothing
1387*/
1388
1389static void
1390do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1391  char *printname)
1392{
1393if (after_context > 0 && lastmatchnumber > 0)
1394  {
1395  int count = 0;
1396  while (lastmatchrestart < endptr && count++ < after_context)
1397    {
1398    int ellength;
1399    char *pp = lastmatchrestart;
1400    if (printname != NULL) fprintf(stdout, "%s-", printname);
1401    if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402    pp = end_of_line(pp, endptr, &ellength);
1403    FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404    lastmatchrestart = pp;
1405    }
1406  hyphenpending = TRUE;
1407  }
1408}
1409
1410
1411
1412/*************************************************
1413*   Apply patterns to subject till one matches   *
1414*************************************************/
1415
1416/* This function is called to run through all patterns, looking for a match. It
1417is used multiple times for the same subject when colouring is enabled, in order
1418to find all possible matches.
1419
1420Arguments:
1421  matchptr     the start of the subject
1422  length       the length of the subject to match
1423  options      options for pcre_exec
1424  startoffset  where to start matching
1425  offsets      the offets vector to fill in
1426  mrc          address of where to put the result of pcre_exec()
1427
1428Returns:      TRUE if there was a match
1429              FALSE if there was no match
1430              invert if there was a non-fatal error
1431*/
1432
1433static BOOL
1434match_patterns(char *matchptr, size_t length, unsigned int options,
1435  int startoffset, int *offsets, int *mrc)
1436{
1437int i;
1438size_t slen = length;
1439patstr *p = patterns;
1440const char *msg = "this text:\n\n";
1441
1442if (slen > 200)
1443  {
1444  slen = 200;
1445  msg = "text that starts:\n\n";
1446  }
1447for (i = 1; p != NULL; p = p->next, i++)
1448  {
1449  *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450    startoffset, options, offsets, OFFSET_SIZE);
1451  if (*mrc >= 0) return TRUE;
1452  if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453  fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454  if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455  fprintf(stderr, "%s", msg);
1456  FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1457  fprintf(stderr, "\n\n");
1458  if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459      *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460    resource_error = TRUE;
1461  if (error_count++ > 20)
1462    {
1463    fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464    pcregrep_exit(2);
1465    }
1466  return invert;    /* No more matching; don't show the line again */
1467  }
1468
1469return FALSE;  /* No match, no errors */
1470}
1471
1472
1473
1474/*************************************************
1475*            Grep an individual file             *
1476*************************************************/
1477
1478/* This is called from grep_or_recurse() below. It uses a buffer that is three
1479times the value of bufthird. The matching point is never allowed to stray into
1480the top third of the buffer, thus keeping more of the file available for
1481context printing or for multiline scanning. For large files, the pointer will
1482be in the middle third most of the time, so the bottom third is available for
1483"before" context printing.
1484
1485Arguments:
1486  handle       the fopened FILE stream for a normal file
1487               the gzFile pointer when reading is via libz
1488               the BZFILE pointer when reading is via libbz2
1489  frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490  filename     the file name or NULL (for errors)
1491  printname    the file name if it is to be printed for each match
1492               or NULL if the file name is not to be printed
1493               it cannot be NULL if filenames[_nomatch]_only is set
1494
1495Returns:       0 if there was at least one match
1496               1 otherwise (no matches)
1497               2 if an overlong line is encountered
1498               3 if there is a read error on a .bz2 file
1499*/
1500
1501static int
1502pcregrep(void *handle, int frtype, char *filename, char *printname)
1503{
1504int rc = 1;
1505int linenumber = 1;
1506int lastmatchnumber = 0;
1507int count = 0;
1508int filepos = 0;
1509int offsets[OFFSET_SIZE];
1510char *lastmatchrestart = NULL;
1511char *ptr = main_buffer;
1512char *endptr;
1513size_t bufflength;
1514BOOL binary = FALSE;
1515BOOL endhyphenpending = FALSE;
1516BOOL input_line_buffered = line_buffered;
1517FILE *in = NULL;                    /* Ensure initialized */
1518
1519#ifdef SUPPORT_LIBZ
1520gzFile ingz = NULL;
1521#endif
1522
1523#ifdef SUPPORT_LIBBZ2
1524BZFILE *inbz2 = NULL;
1525#endif
1526
1527
1528/* Do the first read into the start of the buffer and set up the pointer to end
1529of what we have. In the case of libz, a non-zipped .gz file will be read as a
1530plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531fail. */
1532
1533(void)frtype;
1534
1535#ifdef SUPPORT_LIBZ
1536if (frtype == FR_LIBZ)
1537  {
1538  ingz = (gzFile)handle;
1539  bufflength = gzread (ingz, main_buffer, bufsize);
1540  }
1541else
1542#endif
1543
1544#ifdef SUPPORT_LIBBZ2
1545if (frtype == FR_LIBBZ2)
1546  {
1547  inbz2 = (BZFILE *)handle;
1548  bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549  if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1550  }                                    /* without the cast it is unsigned. */
1551else
1552#endif
1553
1554  {
1555  in = (FILE *)handle;
1556  if (is_file_tty(in)) input_line_buffered = TRUE;
1557  bufflength = input_line_buffered?
1558    read_one_line(main_buffer, bufsize, in) :
1559    fread(main_buffer, 1, bufsize, in);
1560  }
1561
1562endptr = main_buffer + bufflength;
1563
1564/* Unless binary-files=text, see if we have a binary file. This uses the same
1565rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566file. */
1567
1568if (binary_files != BIN_TEXT)
1569  {
1570  binary =
1571    memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572  if (binary && binary_files == BIN_NOMATCH) return 1;
1573  }
1574
1575/* Loop while the current pointer is not at the end of the file. For large
1576files, endptr will be at the end of the buffer when we are in the middle of the
1577file, but ptr will never get there, because as soon as it gets over 2/3 of the
1578way, the buffer is shifted left and re-filled. */
1579
1580while (ptr < endptr)
1581  {
1582  int endlinelength;
1583  int mrc = 0;
1584  int startoffset = 0;
1585  unsigned int options = 0;
1586  BOOL match;
1587  char *matchptr = ptr;
1588  char *t = ptr;
1589  size_t length, linelength;
1590
1591  /* At this point, ptr is at the start of a line. We need to find the length
1592  of the subject string to pass to pcre_exec(). In multiline mode, it is the
1593  length remainder of the data in the buffer. Otherwise, it is the length of
1594  the next line, excluding the terminating newline. After matching, we always
1595  advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1596  option is used for compiling, so that any match is constrained to be in the
1597  first line. */
1598
1599  t = end_of_line(t, endptr, &endlinelength);
1600  linelength = t - ptr - endlinelength;
1601  length = multiline? (size_t)(endptr - ptr) : linelength;
1602
1603  /* Check to see if the line we are looking at extends right to the very end
1604  of the buffer without a line terminator. This means the line is too long to
1605  handle. */
1606
1607  if (endlinelength == 0 && t == main_buffer + bufsize)
1608    {
1609    fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1610                    "pcregrep: check the --buffer-size option\n",
1611                    linenumber,
1612                    (filename == NULL)? "" : " of file ",
1613                    (filename == NULL)? "" : filename);
1614    return 2;
1615    }
1616
1617  /* Extra processing for Jeffrey Friedl's debugging. */
1618
1619#ifdef JFRIEDL_DEBUG
1620  if (jfriedl_XT || jfriedl_XR)
1621  {
1622#     include <sys/time.h>
1623#     include <time.h>
1624      struct timeval start_time, end_time;
1625      struct timezone dummy;
1626      int i;
1627
1628      if (jfriedl_XT)
1629      {
1630          unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1631          const char *orig = ptr;
1632          ptr = malloc(newlen + 1);
1633          if (!ptr) {
1634                  printf("out of memory");
1635                  pcregrep_exit(2);
1636          }
1637          endptr = ptr;
1638          strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1639          for (i = 0; i < jfriedl_XT; i++) {
1640                  strncpy(endptr, orig,  length);
1641                  endptr += length;
1642          }
1643          strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1644          length = newlen;
1645      }
1646
1647      if (gettimeofday(&start_time, &dummy) != 0)
1648              perror("bad gettimeofday");
1649
1650
1651      for (i = 0; i < jfriedl_XR; i++)
1652          match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1653              PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1654
1655      if (gettimeofday(&end_time, &dummy) != 0)
1656              perror("bad gettimeofday");
1657
1658      double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1659                      -
1660                      (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1661
1662      printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1663      return 0;
1664  }
1665#endif
1666
1667  /* We come back here after a match when show_only_matching is set, in order
1668  to find any further matches in the same line. This applies to
1669  --only-matching, --file-offsets, and --line-offsets. */
1670
1671  ONLY_MATCHING_RESTART:
1672
1673  /* Run through all the patterns until one matches or there is an error other
1674  than NOMATCH. This code is in a subroutine so that it can be re-used for
1675  finding subsequent matches when colouring matched lines. After finding one
1676  match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1677  this line. */
1678
1679  match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1680  options = PCRE_NOTEMPTY;
1681
1682  /* If it's a match or a not-match (as required), do what's wanted. */
1683
1684  if (match != invert)
1685    {
1686    BOOL hyphenprinted = FALSE;
1687
1688    /* We've failed if we want a file that doesn't have any matches. */
1689
1690    if (filenames == FN_NOMATCH_ONLY) return 1;
1691
1692    /* Just count if just counting is wanted. */
1693
1694    if (count_only) count++;
1695
1696    /* When handling a binary file and binary-files==binary, the "binary"
1697    variable will be set true (it's false in all other cases). In this
1698    situation we just want to output the file name. No need to scan further. */
1699
1700    else if (binary)
1701      {
1702      fprintf(stdout, "Binary file %s matches\n", filename);
1703      return 0;
1704      }
1705
1706    /* If all we want is a file name, there is no need to scan any more lines
1707    in the file. */
1708
1709    else if (filenames == FN_MATCH_ONLY)
1710      {
1711      fprintf(stdout, "%s\n", printname);
1712      return 0;
1713      }
1714
1715    /* Likewise, if all we want is a yes/no answer. */
1716
1717    else if (quiet) return 0;
1718
1719    /* The --only-matching option prints just the substring that matched,
1720    and/or one or more captured portions of it, as long as these strings are
1721    not empty. The --file-offsets and --line-offsets options output offsets for
1722    the matching substring (all three set show_only_matching). None of these
1723    mutually exclusive options prints any context. Afterwards, adjust the start
1724    and then jump back to look for further matches in the same line. If we are
1725    in invert mode, however, nothing is printed and we do not restart - this
1726    could still be useful because the return code is set. */
1727
1728    else if (show_only_matching)
1729      {
1730      if (!invert)
1731        {
1732        if (printname != NULL) fprintf(stdout, "%s:", printname);
1733        if (number) fprintf(stdout, "%d:", linenumber);
1734
1735        /* Handle --line-offsets */
1736
1737        if (line_offsets)
1738          fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1739            offsets[1] - offsets[0]);
1740
1741        /* Handle --file-offsets */
1742
1743        else if (file_offsets)
1744          fprintf(stdout, "%d,%d\n",
1745            (int)(filepos + matchptr + offsets[0] - ptr),
1746            offsets[1] - offsets[0]);
1747
1748        /* Handle --only-matching, which may occur many times */
1749
1750        else
1751          {
1752          BOOL printed = FALSE;
1753          omstr *om;
1754
1755          for (om = only_matching; om != NULL; om = om->next)
1756            {
1757            int n = om->groupnum;
1758            if (n < mrc)
1759              {
1760              int plen = offsets[2*n + 1] - offsets[2*n];
1761              if (plen > 0)
1762                {
1763                if (printed) fprintf(stdout, "%s", om_separator);
1764                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1765                FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1766                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1767                printed = TRUE;
1768                }
1769              }
1770            }
1771
1772          if (printed || printname != NULL || number) fprintf(stdout, "\n");
1773          }
1774
1775        /* Prepare to repeat to find the next match */
1776
1777        match = FALSE;
1778        if (line_buffered) fflush(stdout);
1779        rc = 0;                      /* Had some success */
1780        startoffset = offsets[1];    /* Restart after the match */
1781        goto ONLY_MATCHING_RESTART;
1782        }
1783      }
1784
1785    /* This is the default case when none of the above options is set. We print
1786    the matching lines(s), possibly preceded and/or followed by other lines of
1787    context. */
1788
1789    else
1790      {
1791      /* See if there is a requirement to print some "after" lines from a
1792      previous match. We never print any overlaps. */
1793
1794      if (after_context > 0 && lastmatchnumber > 0)
1795        {
1796        int ellength;
1797        int linecount = 0;
1798        char *p = lastmatchrestart;
1799
1800        while (p < ptr && linecount < after_context)
1801          {
1802          p = end_of_line(p, ptr, &ellength);
1803          linecount++;
1804          }
1805
1806        /* It is important to advance lastmatchrestart during this printing so
1807        that it interacts correctly with any "before" printing below. Print
1808        each line's data using fwrite() in case there are binary zeroes. */
1809
1810        while (lastmatchrestart < p)
1811          {
1812          char *pp = lastmatchrestart;
1813          if (printname != NULL) fprintf(stdout, "%s-", printname);
1814          if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1815          pp = end_of_line(pp, endptr, &ellength);
1816          FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1817          lastmatchrestart = pp;
1818          }
1819        if (lastmatchrestart != ptr) hyphenpending = TRUE;
1820        }
1821
1822      /* If there were non-contiguous lines printed above, insert hyphens. */
1823
1824      if (hyphenpending)
1825        {
1826        fprintf(stdout, "--\n");
1827        hyphenpending = FALSE;
1828        hyphenprinted = TRUE;
1829        }
1830
1831      /* See if there is a requirement to print some "before" lines for this
1832      match. Again, don't print overlaps. */
1833
1834      if (before_context > 0)
1835        {
1836        int linecount = 0;
1837        char *p = ptr;
1838
1839        while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1840               linecount < before_context)
1841          {
1842          linecount++;
1843          p = previous_line(p, main_buffer);
1844          }
1845
1846        if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1847          fprintf(stdout, "--\n");
1848
1849        while (p < ptr)
1850          {
1851          int ellength;
1852          char *pp = p;
1853          if (printname != NULL) fprintf(stdout, "%s-", printname);
1854          if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1855          pp = end_of_line(pp, endptr, &ellength);
1856          FWRITE(p, 1, pp - p, stdout);
1857          p = pp;
1858          }
1859        }
1860
1861      /* Now print the matching line(s); ensure we set hyphenpending at the end
1862      of the file if any context lines are being output. */
1863
1864      if (after_context > 0 || before_context > 0)
1865        endhyphenpending = TRUE;
1866
1867      if (printname != NULL) fprintf(stdout, "%s:", printname);
1868      if (number) fprintf(stdout, "%d:", linenumber);
1869
1870      /* In multiline mode, we want to print to the end of the line in which
1871      the end of the matched string is found, so we adjust linelength and the
1872      line number appropriately, but only when there actually was a match
1873      (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1874      the match will always be before the first newline sequence. */
1875
1876      if (multiline & !invert)
1877        {
1878        char *endmatch = ptr + offsets[1];
1879        t = ptr;
1880        while (t <= endmatch)
1881          {
1882          t = end_of_line(t, endptr, &endlinelength);
1883          if (t < endmatch) linenumber++; else break;
1884          }
1885        linelength = t - ptr - endlinelength;
1886        }
1887
1888      /*** NOTE: Use only fwrite() to output the data line, so that binary
1889      zeroes are treated as just another data character. */
1890
1891      /* This extra option, for Jeffrey Friedl's debugging requirements,
1892      replaces the matched string, or a specific captured string if it exists,
1893      with X. When this happens, colouring is ignored. */
1894
1895#ifdef JFRIEDL_DEBUG
1896      if (S_arg >= 0 && S_arg < mrc)
1897        {
1898        int first = S_arg * 2;
1899        int last  = first + 1;
1900        FWRITE(ptr, 1, offsets[first], stdout);
1901        fprintf(stdout, "X");
1902        FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1903        }
1904      else
1905#endif
1906
1907      /* We have to split the line(s) up if colouring, and search for further
1908      matches, but not of course if the line is a non-match. */
1909
1910      if (do_colour && !invert)
1911        {
1912        int plength;
1913        FWRITE(ptr, 1, offsets[0], stdout);
1914        fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1915        FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1916        fprintf(stdout, "%c[00m", 0x1b);
1917        for (;;)
1918          {
1919          startoffset = offsets[1];
1920          if (startoffset >= (int)linelength + endlinelength ||
1921              !match_patterns(matchptr, length, options, startoffset, offsets,
1922                &mrc))
1923            break;
1924          FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1925          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1926          FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1927          fprintf(stdout, "%c[00m", 0x1b);
1928          }
1929
1930        /* In multiline mode, we may have already printed the complete line
1931        and its line-ending characters (if they matched the pattern), so there
1932        may be no more to print. */
1933
1934        plength = (int)((linelength + endlinelength) - startoffset);
1935        if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1936        }
1937
1938      /* Not colouring; no need to search for further matches */
1939
1940      else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1941      }
1942
1943    /* End of doing what has to be done for a match. If --line-buffered was
1944    given, flush the output. */
1945
1946    if (line_buffered) fflush(stdout);
1947    rc = 0;    /* Had some success */
1948
1949    /* Remember where the last match happened for after_context. We remember
1950    where we are about to restart, and that line's number. */
1951
1952    lastmatchrestart = ptr + linelength + endlinelength;
1953    lastmatchnumber = linenumber + 1;
1954    }
1955
1956  /* For a match in multiline inverted mode (which of course did not cause
1957  anything to be printed), we have to move on to the end of the match before
1958  proceeding. */
1959
1960  if (multiline && invert && match)
1961    {
1962    int ellength;
1963    char *endmatch = ptr + offsets[1];
1964    t = ptr;
1965    while (t < endmatch)
1966      {
1967      t = end_of_line(t, endptr, &ellength);
1968      if (t <= endmatch) linenumber++; else break;
1969      }
1970    endmatch = end_of_line(endmatch, endptr, &ellength);
1971    linelength = endmatch - ptr - ellength;
1972    }
1973
1974  /* Advance to after the newline and increment the line number. The file
1975  offset to the current line is maintained in filepos. */
1976
1977  ptr += linelength + endlinelength;
1978  filepos += (int)(linelength + endlinelength);
1979  linenumber++;
1980
1981  /* If input is line buffered, and the buffer is not yet full, read another
1982  line and add it into the buffer. */
1983
1984  if (input_line_buffered && bufflength < (size_t)bufsize)
1985    {
1986    int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1987    bufflength += add;
1988    endptr += add;
1989    }
1990
1991  /* If we haven't yet reached the end of the file (the buffer is full), and
1992  the current point is in the top 1/3 of the buffer, slide the buffer down by
1993  1/3 and refill it. Before we do this, if some unprinted "after" lines are
1994  about to be lost, print them. */
1995
1996  if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1997    {
1998    if (after_context > 0 &&
1999        lastmatchnumber > 0 &&
2000        lastmatchrestart < main_buffer + bufthird)
2001      {
2002      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2003      lastmatchnumber = 0;
2004      }
2005
2006    /* Now do the shuffle */
2007
2008    memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2009    ptr -= bufthird;
2010
2011#ifdef SUPPORT_LIBZ
2012    if (frtype == FR_LIBZ)
2013      bufflength = 2*bufthird +
2014        gzread (ingz, main_buffer + 2*bufthird, bufthird);
2015    else
2016#endif
2017
2018#ifdef SUPPORT_LIBBZ2
2019    if (frtype == FR_LIBBZ2)
2020      bufflength = 2*bufthird +
2021        BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2022    else
2023#endif
2024
2025    bufflength = 2*bufthird +
2026      (input_line_buffered?
2027       read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2028       fread(main_buffer + 2*bufthird, 1, bufthird, in));
2029    endptr = main_buffer + bufflength;
2030
2031    /* Adjust any last match point */
2032
2033    if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2034    }
2035  }     /* Loop through the whole file */
2036
2037/* End of file; print final "after" lines if wanted; do_after_lines sets
2038hyphenpending if it prints something. */
2039
2040if (!show_only_matching && !count_only)
2041  {
2042  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2043  hyphenpending |= endhyphenpending;
2044  }
2045
2046/* Print the file name if we are looking for those without matches and there
2047were none. If we found a match, we won't have got this far. */
2048
2049if (filenames == FN_NOMATCH_ONLY)
2050  {
2051  fprintf(stdout, "%s\n", printname);
2052  return 0;
2053  }
2054
2055/* Print the match count if wanted */
2056
2057if (count_only)
2058  {
2059  if (count > 0 || !omit_zero_count)
2060    {
2061    if (printname != NULL && filenames != FN_NONE)
2062      fprintf(stdout, "%s:", printname);
2063    fprintf(stdout, "%d\n", count);
2064    }
2065  }
2066
2067return rc;
2068}
2069
2070
2071
2072/*************************************************
2073*     Grep a file or recurse into a directory    *
2074*************************************************/
2075
2076/* Given a path name, if it's a directory, scan all the files if we are
2077recursing; if it's a file, grep it.
2078
2079Arguments:
2080  pathname          the path to investigate
2081  dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2082  only_one_at_top   TRUE if the path is the only one at toplevel
2083
2084Returns:  -1 the file/directory was skipped
2085           0 if there was at least one match
2086           1 if there were no matches
2087           2 there was some kind of error
2088
2089However, file opening failures are suppressed if "silent" is set.
2090*/
2091
2092static int
2093grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2094{
2095int rc = 1;
2096int frtype;
2097void *handle;
2098char *lastcomp;
2099FILE *in = NULL;           /* Ensure initialized */
2100
2101#ifdef SUPPORT_LIBZ
2102gzFile ingz = NULL;
2103#endif
2104
2105#ifdef SUPPORT_LIBBZ2
2106BZFILE *inbz2 = NULL;
2107#endif
2108
2109#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2110int pathlen;
2111#endif
2112
2113#if defined NATIVE_ZOS
2114int zos_type;
2115FILE *zos_test_file;
2116#endif
2117
2118/* If the file name is "-" we scan stdin */
2119
2120if (strcmp(pathname, "-") == 0)
2121  {
2122  return pcregrep(stdin, FR_PLAIN, stdin_name,
2123    (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2124      stdin_name : NULL);
2125  }
2126
2127/* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2128directories, whereas --include and --exclude apply to everything else. The test
2129is against the final component of the path. */
2130
2131lastcomp = strrchr(pathname, FILESEP);
2132lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2133
2134/* If the file is a directory, skip if not recursing or if explicitly excluded.
2135Otherwise, scan the directory and recurse for each path within it. The scanning
2136code is localized so it can be made system-specific. */
2137
2138
2139/* For z/OS, determine the file type. */
2140
2141#if defined NATIVE_ZOS
2142zos_test_file =  fopen(pathname,"rb");
2143
2144if (zos_test_file == NULL)
2145   {
2146   if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2147     pathname, strerror(errno));
2148   return -1;
2149   }
2150zos_type = identifyzosfiletype (zos_test_file);
2151fclose (zos_test_file);
2152
2153/* Handle a PDS in separate code */
2154
2155if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2156   {
2157   return travelonpdsdir (pathname, only_one_at_top);
2158   }
2159
2160/* Deal with regular files in the normal way below. These types are:
2161   zos_type == __ZOS_PDS_MEMBER
2162   zos_type == __ZOS_PS
2163   zos_type == __ZOS_VSAM_KSDS
2164   zos_type == __ZOS_VSAM_ESDS
2165   zos_type == __ZOS_VSAM_RRDS
2166*/
2167
2168/* Handle a z/OS directory using common code. */
2169
2170else if (zos_type == __ZOS_HFS)
2171 {
2172#endif  /* NATIVE_ZOS */
2173
2174
2175/* Handle directories: common code for all OS */
2176
2177if (isdirectory(pathname))
2178  {
2179  if (dee_action == dee_SKIP ||
2180      !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2181    return -1;
2182
2183  if (dee_action == dee_RECURSE)
2184    {
2185    char buffer[1024];
2186    char *nextfile;
2187    directory_type *dir = opendirectory(pathname);
2188
2189    if (dir == NULL)
2190      {
2191      if (!silent)
2192        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2193          strerror(errno));
2194      return 2;
2195      }
2196
2197    while ((nextfile = readdirectory(dir)) != NULL)
2198      {
2199      int frc;
2200      sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2201      frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2202      if (frc > 1) rc = frc;
2203       else if (frc == 0 && rc == 1) rc = 0;
2204      }
2205
2206    closedirectory(dir);
2207    return rc;
2208    }
2209  }
2210
2211#if defined NATIVE_ZOS
2212 }
2213#endif
2214
2215/* If the file is not a directory, check for a regular file, and if it is not,
2216skip it if that's been requested. Otherwise, check for an explicit inclusion or
2217exclusion. */
2218
2219else if (
2220#if defined NATIVE_ZOS
2221        (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2222#else  /* all other OS */
2223        (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2224#endif
2225        !test_incexc(lastcomp, include_patterns, exclude_patterns))
2226  return -1;  /* File skipped */
2227
2228/* Control reaches here if we have a regular file, or if we have a directory
2229and recursion or skipping was not requested, or if we have anything else and
2230skipping was not requested. The scan proceeds. If this is the first and only
2231argument at top level, we don't show the file name, unless we are only showing
2232the file name, or the filename was forced (-H). */
2233
2234#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2235pathlen = (int)(strlen(pathname));
2236#endif
2237
2238/* Open using zlib if it is supported and the file name ends with .gz. */
2239
2240#ifdef SUPPORT_LIBZ
2241if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2242  {
2243  ingz = gzopen(pathname, "rb");
2244  if (ingz == NULL)
2245    {
2246    if (!silent)
2247      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2248        strerror(errno));
2249    return 2;
2250    }
2251  handle = (void *)ingz;
2252  frtype = FR_LIBZ;
2253  }
2254else
2255#endif
2256
2257/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2258
2259#ifdef SUPPORT_LIBBZ2
2260if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2261  {
2262  inbz2 = BZ2_bzopen(pathname, "rb");
2263  handle = (void *)inbz2;
2264  frtype = FR_LIBBZ2;
2265  }
2266else
2267#endif
2268
2269/* Otherwise use plain fopen(). The label is so that we can come back here if
2270an attempt to read a .bz2 file indicates that it really is a plain file. */
2271
2272#ifdef SUPPORT_LIBBZ2
2273PLAIN_FILE:
2274#endif
2275  {
2276  in = fopen(pathname, "rb");
2277  handle = (void *)in;
2278  frtype = FR_PLAIN;
2279  }
2280
2281/* All the opening methods return errno when they fail. */
2282
2283if (handle == NULL)
2284  {
2285  if (!silent)
2286    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2287      strerror(errno));
2288  return 2;
2289  }
2290
2291/* Now grep the file */
2292
2293rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2294  (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2295
2296/* Close in an appropriate manner. */
2297
2298#ifdef SUPPORT_LIBZ
2299if (frtype == FR_LIBZ)
2300  gzclose(ingz);
2301else
2302#endif
2303
2304/* If it is a .bz2 file and the result is 3, it means that the first attempt to
2305read failed. If the error indicates that the file isn't in fact bzipped, try
2306again as a normal file. */
2307
2308#ifdef SUPPORT_LIBBZ2
2309if (frtype == FR_LIBBZ2)
2310  {
2311  if (rc == 3)
2312    {
2313    int errnum;
2314    const char *err = BZ2_bzerror(inbz2, &errnum);
2315    if (errnum == BZ_DATA_ERROR_MAGIC)
2316      {
2317      BZ2_bzclose(inbz2);
2318      goto PLAIN_FILE;
2319      }
2320    else if (!silent)
2321      fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2322        pathname, err);
2323    rc = 2;    /* The normal "something went wrong" code */
2324    }
2325  BZ2_bzclose(inbz2);
2326  }
2327else
2328#endif
2329
2330/* Normal file close */
2331
2332fclose(in);
2333
2334/* Pass back the yield from pcregrep(). */
2335
2336return rc;
2337}
2338
2339
2340
2341/*************************************************
2342*    Handle a single-letter, no data option      *
2343*************************************************/
2344
2345static int
2346handle_option(int letter, int options)
2347{
2348switch(letter)
2349  {
2350  case N_FOFFSETS: file_offsets = TRUE; break;
2351  case N_HELP: help(); pcregrep_exit(0);
2352  case N_LBUFFER: line_buffered = TRUE; break;
2353  case N_LOFFSETS: line_offsets = number = TRUE; break;
2354  case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2355  case 'a': binary_files = BIN_TEXT; break;
2356  case 'c': count_only = TRUE; break;
2357  case 'F': process_options |= PO_FIXED_STRINGS; break;
2358  case 'H': filenames = FN_FORCE; break;
2359  case 'I': binary_files = BIN_NOMATCH; break;
2360  case 'h': filenames = FN_NONE; break;
2361  case 'i': options |= PCRE_CASELESS; break;
2362  case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2363  case 'L': filenames = FN_NOMATCH_ONLY; break;
2364  case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2365  case 'n': number = TRUE; break;
2366
2367  case 'o':
2368  only_matching_last = add_number(0, only_matching_last);
2369  if (only_matching == NULL) only_matching = only_matching_last;
2370  break;
2371
2372  case 'q': quiet = TRUE; break;
2373  case 'r': dee_action = dee_RECURSE; break;
2374  case 's': silent = TRUE; break;
2375  case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2376  case 'v': invert = TRUE; break;
2377  case 'w': process_options |= PO_WORD_MATCH; break;
2378  case 'x': process_options |= PO_LINE_MATCH; break;
2379
2380  case 'V':
2381  fprintf(stdout, "pcregrep version %s\n", pcre_version());
2382  pcregrep_exit(0);
2383  break;
2384
2385  default:
2386  fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2387  pcregrep_exit(usage(2));
2388  }
2389
2390return options;
2391}
2392
2393
2394
2395
2396/*************************************************
2397*          Construct printed ordinal             *
2398*************************************************/
2399
2400/* This turns a number into "1st", "3rd", etc. */
2401
2402static char *
2403ordin(int n)
2404{
2405static char buffer[8];
2406char *p = buffer;
2407sprintf(p, "%d", n);
2408while (*p != 0) p++;
2409switch (n%10)
2410  {
2411  case 1: strcpy(p, "st"); break;
2412  case 2: strcpy(p, "nd"); break;
2413  case 3: strcpy(p, "rd"); break;
2414  default: strcpy(p, "th"); break;
2415  }
2416return buffer;
2417}
2418
2419
2420
2421/*************************************************
2422*          Compile a single pattern              *
2423*************************************************/
2424
2425/* Do nothing if the pattern has already been compiled. This is the case for
2426include/exclude patterns read from a file.
2427
2428When the -F option has been used, each "pattern" may be a list of strings,
2429separated by line breaks. They will be matched literally. We split such a
2430string and compile the first substring, inserting an additional block into the
2431pattern chain.
2432
2433Arguments:
2434  p              points to the pattern block
2435  options        the PCRE options
2436  popts          the processing options
2437  fromfile       TRUE if the pattern was read from a file
2438  fromtext       file name or identifying text (e.g. "include")
2439  count          0 if this is the only command line pattern, or
2440                 number of the command line pattern, or
2441                 linenumber for a pattern from a file
2442
2443Returns:         TRUE on success, FALSE after an error
2444*/
2445
2446static BOOL
2447compile_pattern(patstr *p, int options, int popts, int fromfile,
2448  const char *fromtext, int count)
2449{
2450char buffer[PATBUFSIZE];
2451const char *error;
2452char *ps = p->string;
2453int patlen = strlen(ps);
2454int errptr;
2455
2456if (p->compiled != NULL) return TRUE;
2457
2458if ((popts & PO_FIXED_STRINGS) != 0)
2459  {
2460  int ellength;
2461  char *eop = ps + patlen;
2462  char *pe = end_of_line(ps, eop, &ellength);
2463
2464  if (ellength != 0)
2465    {
2466    if (add_pattern(pe, p) == NULL) return FALSE;
2467    patlen = (int)(pe - ps - ellength);
2468    }
2469  }
2470
2471sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2472p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2473if (p->compiled != NULL) return TRUE;
2474
2475/* Handle compile errors */
2476
2477errptr -= (int)strlen(prefix[popts]);
2478if (errptr > patlen) errptr = patlen;
2479
2480if (fromfile)
2481  {
2482  fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2483    "at offset %d: %s\n", count, fromtext, errptr, error);
2484  }
2485else
2486  {
2487  if (count == 0)
2488    fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2489      fromtext, errptr, error);
2490  else
2491    fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2492      ordin(count), fromtext, errptr, error);
2493  }
2494
2495return FALSE;
2496}
2497
2498
2499
2500/*************************************************
2501*     Read and compile a file of patterns        *
2502*************************************************/
2503
2504/* This is used for --filelist, --include-from, and --exclude-from.
2505
2506Arguments:
2507  name         the name of the file; "-" is stdin
2508  patptr       pointer to the pattern chain anchor
2509  patlastptr   pointer to the last pattern pointer
2510  popts        the process options to pass to pattern_compile()
2511
2512Returns:       TRUE if all went well
2513*/
2514
2515static BOOL
2516read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2517{
2518int linenumber = 0;
2519FILE *f;
2520char *filename;
2521char buffer[PATBUFSIZE];
2522
2523if (strcmp(name, "-") == 0)
2524  {
2525  f = stdin;
2526  filename = stdin_name;
2527  }
2528else
2529  {
2530  f = fopen(name, "r");
2531  if (f == NULL)
2532    {
2533    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2534    return FALSE;
2535    }
2536  filename = name;
2537  }
2538
2539while (fgets(buffer, PATBUFSIZE, f) != NULL)
2540  {
2541  char *s = buffer + (int)strlen(buffer);
2542  while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2543  *s = 0;
2544  linenumber++;
2545  if (buffer[0] == 0) continue;   /* Skip blank lines */
2546
2547  /* Note: this call to add_pattern() puts a pointer to the local variable
2548  "buffer" into the pattern chain. However, that pointer is used only when
2549  compiling the pattern, which happens immediately below, so we flatten it
2550  afterwards, as a precaution against any later code trying to use it. */
2551
2552  *patlastptr = add_pattern(buffer, *patlastptr);
2553  if (*patlastptr == NULL)
2554    {
2555    if (f != stdin) fclose(f);
2556    return FALSE;
2557    }
2558  if (*patptr == NULL) *patptr = *patlastptr;
2559
2560  /* This loop is needed because compiling a "pattern" when -F is set may add
2561  on additional literal patterns if the original contains a newline. In the
2562  common case, it never will, because fgets() stops at a newline. However,
2563  the -N option can be used to give pcregrep a different newline setting. */
2564
2565  for(;;)
2566    {
2567    if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2568        linenumber))
2569      {
2570      if (f != stdin) fclose(f);
2571      return FALSE;
2572      }
2573    (*patlastptr)->string = NULL;            /* Insurance */
2574    if ((*patlastptr)->next == NULL) break;
2575    *patlastptr = (*patlastptr)->next;
2576    }
2577  }
2578
2579if (f != stdin) fclose(f);
2580return TRUE;
2581}
2582
2583
2584
2585/*************************************************
2586*                Main program                    *
2587*************************************************/
2588
2589/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2590
2591int
2592main(int argc, char **argv)
2593{
2594int i, j;
2595int rc = 1;
2596BOOL only_one_at_top;
2597patstr *cp;
2598fnstr *fn;
2599const char *locale_from = "--locale";
2600const char *error;
2601
2602#ifdef SUPPORT_PCREGREP_JIT
2603pcre_jit_stack *jit_stack = NULL;
2604#endif
2605
2606/* Set the default line ending value from the default in the PCRE library;
2607"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2608Note that the return values from pcre_config(), though derived from the ASCII
2609codes, are the same in EBCDIC environments, so we must use the actual values
2610rather than escapes such as as '\r'. */
2611
2612(void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2613switch(i)
2614  {
2615  default:               newline = (char *)"lf"; break;
2616  case 13:               newline = (char *)"cr"; break;
2617  case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2618  case -1:               newline = (char *)"any"; break;
2619  case -2:               newline = (char *)"anycrlf"; break;
2620  }
2621
2622/* Process the options */
2623
2624for (i = 1; i < argc; i++)
2625  {
2626  option_item *op = NULL;
2627  char *option_data = (char *)"";    /* default to keep compiler happy */
2628  BOOL longop;
2629  BOOL longopwasequals = FALSE;
2630
2631  if (argv[i][0] != '-') break;
2632
2633  /* If we hit an argument that is just "-", it may be a reference to STDIN,
2634  but only if we have previously had -e or -f to define the patterns. */
2635
2636  if (argv[i][1] == 0)
2637    {
2638    if (pattern_files != NULL || patterns != NULL) break;
2639      else pcregrep_exit(usage(2));
2640    }
2641
2642  /* Handle a long name option, or -- to terminate the options */
2643
2644  if (argv[i][1] == '-')
2645    {
2646    char *arg = argv[i] + 2;
2647    char *argequals = strchr(arg, '=');
2648
2649    if (*arg == 0)    /* -- terminates options */
2650      {
2651      i++;
2652      break;                /* out of the options-handling loop */
2653      }
2654
2655    longop = TRUE;
2656
2657    /* Some long options have data that follows after =, for example file=name.
2658    Some options have variations in the long name spelling: specifically, we
2659    allow "regexp" because GNU grep allows it, though I personally go along
2660    with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2661    These options are entered in the table as "regex(p)". Options can be in
2662    both these categories. */
2663
2664    for (op = optionlist; op->one_char != 0; op++)
2665      {
2666      char *opbra = strchr(op->long_name, '(');
2667      char *equals = strchr(op->long_name, '=');
2668
2669      /* Handle options with only one spelling of the name */
2670
2671      if (opbra == NULL)     /* Does not contain '(' */
2672        {
2673        if (equals == NULL)  /* Not thing=data case */
2674          {
2675          if (strcmp(arg, op->long_name) == 0) break;
2676          }
2677        else                 /* Special case xxx=data */
2678          {
2679          int oplen = (int)(equals - op->long_name);
2680          int arglen = (argequals == NULL)?
2681            (int)strlen(arg) : (int)(argequals - arg);
2682          if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2683            {
2684            option_data = arg + arglen;
2685            if (*option_data == '=')
2686              {
2687              option_data++;
2688              longopwasequals = TRUE;
2689              }
2690            break;
2691            }
2692          }
2693        }
2694
2695      /* Handle options with an alternate spelling of the name */
2696
2697      else
2698        {
2699        char buff1[24];
2700        char buff2[24];
2701
2702        int baselen = (int)(opbra - op->long_name);
2703        int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2704        int arglen = (argequals == NULL || equals == NULL)?
2705          (int)strlen(arg) : (int)(argequals - arg);
2706
2707        sprintf(buff1, "%.*s", baselen, op->long_name);
2708        sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2709
2710        if (strncmp(arg, buff1, arglen) == 0 ||
2711           strncmp(arg, buff2, arglen) == 0)
2712          {
2713          if (equals != NULL && argequals != NULL)
2714            {
2715            option_data = argequals;
2716            if (*option_data == '=')
2717              {
2718              option_data++;
2719              longopwasequals = TRUE;
2720              }
2721            }
2722          break;
2723          }
2724        }
2725      }
2726
2727    if (op->one_char == 0)
2728      {
2729      fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2730      pcregrep_exit(usage(2));
2731      }
2732    }
2733
2734  /* Jeffrey Friedl's debugging harness uses these additional options which
2735  are not in the right form for putting in the option table because they use
2736  only one hyphen, yet are more than one character long. By putting them
2737  separately here, they will not get displayed as part of the help() output,
2738  but I don't think Jeffrey will care about that. */
2739
2740#ifdef JFRIEDL_DEBUG
2741  else if (strcmp(argv[i], "-pre") == 0) {
2742          jfriedl_prefix = argv[++i];
2743          continue;
2744  } else if (strcmp(argv[i], "-post") == 0) {
2745          jfriedl_postfix = argv[++i];
2746          continue;
2747  } else if (strcmp(argv[i], "-XT") == 0) {
2748          sscanf(argv[++i], "%d", &jfriedl_XT);
2749          continue;
2750  } else if (strcmp(argv[i], "-XR") == 0) {
2751          sscanf(argv[++i], "%d", &jfriedl_XR);
2752          continue;
2753  }
2754#endif
2755
2756
2757  /* One-char options; many that have no data may be in a single argument; we
2758  continue till we hit the last one or one that needs data. */
2759
2760  else
2761    {
2762    char *s = argv[i] + 1;
2763    longop = FALSE;
2764
2765    while (*s != 0)
2766      {
2767      for (op = optionlist; op->one_char != 0; op++)
2768        {
2769        if (*s == op->one_char) break;
2770        }
2771      if (op->one_char == 0)
2772        {
2773        fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2774          *s, argv[i]);
2775        pcregrep_exit(usage(2));
2776        }
2777
2778      option_data = s+1;
2779
2780      /* Break out if this is the last character in the string; it's handled
2781      below like a single multi-char option. */
2782
2783      if (*option_data == 0) break;
2784
2785      /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2786      are used for ones that either have a numerical number or defaults, i.e.
2787      the data is optional. If a digit follows, there is data; if not, carry on
2788      with other single-character options in the same string. */
2789
2790      if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2791        {
2792        if (isdigit((unsigned char)s[1])) break;
2793        }
2794      else   /* Check for an option with data */
2795        {
2796        if (op->type != OP_NODATA) break;
2797        }
2798
2799      /* Handle a single-character option with no data, then loop for the
2800      next character in the string. */
2801
2802      pcre_options = handle_option(*s++, pcre_options);
2803      }
2804    }
2805
2806  /* At this point we should have op pointing to a matched option. If the type
2807  is NO_DATA, it means that there is no data, and the option might set
2808  something in the PCRE options. */
2809
2810  if (op->type == OP_NODATA)
2811    {
2812    pcre_options = handle_option(op->one_char, pcre_options);
2813    continue;
2814    }
2815
2816  /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2817  either has a value or defaults to something. It cannot have data in a
2818  separate item. At the moment, the only such options are "colo(u)r",
2819  "only-matching", and Jeffrey Friedl's special -S debugging option. */
2820
2821  if (*option_data == 0 &&
2822      (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2823       op->type == OP_OP_NUMBERS))
2824    {
2825    switch (op->one_char)
2826      {
2827      case N_COLOUR:
2828      colour_option = (char *)"auto";
2829      break;
2830
2831      case 'o':
2832      only_matching_last = add_number(0, only_matching_last);
2833      if (only_matching == NULL) only_matching = only_matching_last;
2834      break;
2835
2836#ifdef JFRIEDL_DEBUG
2837      case 'S':
2838      S_arg = 0;
2839      break;
2840#endif
2841      }
2842    continue;
2843    }
2844
2845  /* Otherwise, find the data string for the option. */
2846
2847  if (*option_data == 0)
2848    {
2849    if (i >= argc - 1 || longopwasequals)
2850      {
2851      fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2852      pcregrep_exit(usage(2));
2853      }
2854    option_data = argv[++i];
2855    }
2856
2857  /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2858  added to a chain of numbers. */
2859
2860  if (op->type == OP_OP_NUMBERS)
2861    {
2862    unsigned long int n = decode_number(option_data, op, longop);
2863    omdatastr *omd = (omdatastr *)op->dataptr;
2864    *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2865    if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2866    }
2867
2868  /* If the option type is OP_PATLIST, it's the -e option, or one of the
2869  include/exclude options, which can be called multiple times to create lists
2870  of patterns. */
2871
2872  else if (op->type == OP_PATLIST)
2873    {
2874    patdatastr *pd = (patdatastr *)op->dataptr;
2875    *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2876    if (*(pd->lastptr) == NULL) goto EXIT2;
2877    if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2878    }
2879
2880  /* If the option type is OP_FILELIST, it's one of the options that names a
2881  file. */
2882
2883  else if (op->type == OP_FILELIST)
2884    {
2885    fndatastr *fd = (fndatastr *)op->dataptr;
2886    fn = (fnstr *)malloc(sizeof(fnstr));
2887    if (fn == NULL)
2888      {
2889      fprintf(stderr, "pcregrep: malloc failed\n");
2890      goto EXIT2;
2891      }
2892    fn->next = NULL;
2893    fn->name = option_data;
2894    if (*(fd->anchor) == NULL)
2895      *(fd->anchor) = fn;
2896    else
2897      (*(fd->lastptr))->next = fn;
2898    *(fd->lastptr) = fn;
2899    }
2900
2901  /* Handle OP_BINARY_FILES */
2902
2903  else if (op->type == OP_BINFILES)
2904    {
2905    if (strcmp(option_data, "binary") == 0)
2906      binary_files = BIN_BINARY;
2907    else if (strcmp(option_data, "without-match") == 0)
2908      binary_files = BIN_NOMATCH;
2909    else if (strcmp(option_data, "text") == 0)
2910      binary_files = BIN_TEXT;
2911    else
2912      {
2913      fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2914        option_data);
2915      pcregrep_exit(usage(2));
2916      }
2917    }
2918
2919  /* Otherwise, deal with a single string or numeric data value. */
2920
2921  else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2922           op->type != OP_OP_NUMBER)
2923    {
2924    *((char **)op->dataptr) = option_data;
2925    }
2926  else
2927    {
2928    unsigned long int n = decode_number(option_data, op, longop);
2929    if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2930      else *((int *)op->dataptr) = n;
2931    }
2932  }
2933
2934/* Options have been decoded. If -C was used, its value is used as a default
2935for -A and -B. */
2936
2937if (both_context > 0)
2938  {
2939  if (after_context == 0) after_context = both_context;
2940  if (before_context == 0) before_context = both_context;
2941  }
2942
2943/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2944However, all three set show_only_matching because they display, each in their
2945own way, only the data that has matched. */
2946
2947if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2948    (file_offsets && line_offsets))
2949  {
2950  fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2951    "and/or --line-offsets\n");
2952  pcregrep_exit(usage(2));
2953  }
2954
2955if (only_matching != NULL || file_offsets || line_offsets)
2956  show_only_matching = TRUE;
2957
2958/* If a locale has not been provided as an option, see if the LC_CTYPE or
2959LC_ALL environment variable is set, and if so, use it. */
2960
2961if (locale == NULL)
2962  {
2963  locale = getenv("LC_ALL");
2964  locale_from = "LCC_ALL";
2965  }
2966
2967if (locale == NULL)
2968  {
2969  locale = getenv("LC_CTYPE");
2970  locale_from = "LC_CTYPE";
2971  }
2972
2973/* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
2974pcretables==NULL, which causes the use of default tables. */
2975
2976if (locale != NULL)
2977  {
2978  if (setlocale(LC_CTYPE, locale) == NULL)
2979    {
2980    fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2981      locale, locale_from);
2982    goto EXIT2;
2983    }
2984  pcretables = pcre_maketables();
2985  }
2986
2987/* Sort out colouring */
2988
2989if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2990  {
2991  if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2992  else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2993  else
2994    {
2995    fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2996      colour_option);
2997    goto EXIT2;
2998    }
2999  if (do_colour)
3000    {
3001    char *cs = getenv("PCREGREP_COLOUR");
3002    if (cs == NULL) cs = getenv("PCREGREP_COLOR");
3003    if (cs != NULL) colour_string = cs;
3004    }
3005  }
3006
3007/* Interpret the newline type; the default settings are Unix-like. */
3008
3009if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3010  {
3011  pcre_options |= PCRE_NEWLINE_CR;
3012  endlinetype = EL_CR;
3013  }
3014else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3015  {
3016  pcre_options |= PCRE_NEWLINE_LF;
3017  endlinetype = EL_LF;
3018  }
3019else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3020  {
3021  pcre_options |= PCRE_NEWLINE_CRLF;
3022  endlinetype = EL_CRLF;
3023  }
3024else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3025  {
3026  pcre_options |= PCRE_NEWLINE_ANY;
3027  endlinetype = EL_ANY;
3028  }
3029else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3030  {
3031  pcre_options |= PCRE_NEWLINE_ANYCRLF;
3032  endlinetype = EL_ANYCRLF;
3033  }
3034else
3035  {
3036  fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3037  goto EXIT2;
3038  }
3039
3040/* Interpret the text values for -d and -D */
3041
3042if (dee_option != NULL)
3043  {
3044  if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3045  else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3046  else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3047  else
3048    {
3049    fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3050    goto EXIT2;
3051    }
3052  }
3053
3054if (DEE_option != NULL)
3055  {
3056  if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3057  else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3058  else
3059    {
3060    fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3061    goto EXIT2;
3062    }
3063  }
3064
3065/* Check the values for Jeffrey Friedl's debugging options. */
3066
3067#ifdef JFRIEDL_DEBUG
3068if (S_arg > 9)
3069  {
3070  fprintf(stderr, "pcregrep: bad value for -S option\n");
3071  return 2;
3072  }
3073if (jfriedl_XT != 0 || jfriedl_XR != 0)
3074  {
3075  if (jfriedl_XT == 0) jfriedl_XT = 1;
3076  if (jfriedl_XR == 0) jfriedl_XR = 1;
3077  }
3078#endif
3079
3080/* Get memory for the main buffer. */
3081
3082bufsize = 3*bufthird;
3083main_buffer = (char *)malloc(bufsize);
3084
3085if (main_buffer == NULL)
3086  {
3087  fprintf(stderr, "pcregrep: malloc failed\n");
3088  goto EXIT2;
3089  }
3090
3091/* If no patterns were provided by -e, and there are no files provided by -f,
3092the first argument is the one and only pattern, and it must exist. */
3093
3094if (patterns == NULL && pattern_files == NULL)
3095  {
3096  if (i >= argc) return usage(2);
3097  patterns = patterns_last = add_pattern(argv[i++], NULL);
3098  if (patterns == NULL) goto EXIT2;
3099  }
3100
3101/* Compile the patterns that were provided on the command line, either by
3102multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3103after all the command-line options are read so that we know which PCRE options
3104to use. When -F is used, compile_pattern() may add another block into the
3105chain, so we must not access the next pointer till after the compile. */
3106
3107for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3108  {
3109  if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3110       (j == 1 && patterns->next == NULL)? 0 : j))
3111    goto EXIT2;
3112  }
3113
3114/* Read and compile the regular expressions that are provided in files. */
3115
3116for (fn = pattern_files; fn != NULL; fn = fn->next)
3117  {
3118  if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3119    goto EXIT2;
3120  }
3121
3122/* Study the regular expressions, as we will be running them many times. If an
3123extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3124returned, even if studying produces no data. */
3125
3126if (match_limit > 0 || match_limit_recursion > 0)
3127  study_options |= PCRE_STUDY_EXTRA_NEEDED;
3128
3129/* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3130
3131#ifdef SUPPORT_PCREGREP_JIT
3132if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3133  jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3134#endif
3135
3136for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3137  {
3138  cp->hint = pcre_study(cp->compiled, study_options, &error);
3139  if (error != NULL)
3140    {
3141    char s[16];
3142    if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3143    fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3144    goto EXIT2;
3145    }
3146#ifdef SUPPORT_PCREGREP_JIT
3147  if (jit_stack != NULL && cp->hint != NULL)
3148    pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3149#endif
3150  }
3151
3152/* If --match-limit or --recursion-limit was set, put the value(s) into the
3153pcre_extra block for each pattern. There will always be an extra block because
3154of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3155
3156for (cp = patterns; cp != NULL; cp = cp->next)
3157  {
3158  if (match_limit > 0)
3159    {
3160    cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3161    cp->hint->match_limit = match_limit;
3162    }
3163
3164  if (match_limit_recursion > 0)
3165    {
3166    cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3167    cp->hint->match_limit_recursion = match_limit_recursion;
3168    }
3169  }
3170
3171/* If there are include or exclude patterns read from the command line, compile
3172them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
31730. */
3174
3175for (j = 0; j < 4; j++)
3176  {
3177  int k;
3178  for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3179    {
3180    if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3181         (k == 1 && cp->next == NULL)? 0 : k))
3182      goto EXIT2;
3183    }
3184  }
3185
3186/* Read and compile include/exclude patterns from files. */
3187
3188for (fn = include_from; fn != NULL; fn = fn->next)
3189  {
3190  if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3191    goto EXIT2;
3192  }
3193
3194for (fn = exclude_from; fn != NULL; fn = fn->next)
3195  {
3196  if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3197    goto EXIT2;
3198  }
3199
3200/* If there are no files that contain lists of files to search, and there are
3201no file arguments, search stdin, and then exit. */
3202
3203if (file_lists == NULL && i >= argc)
3204  {
3205  rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3206    (filenames > FN_DEFAULT)? stdin_name : NULL);
3207  goto EXIT;
3208  }
3209
3210/* If any files that contains a list of files to search have been specified,
3211read them line by line and search the given files. */
3212
3213for (fn = file_lists; fn != NULL; fn = fn->next)
3214  {
3215  char buffer[PATBUFSIZE];
3216  FILE *fl;
3217  if (strcmp(fn->name, "-") == 0) fl = stdin; else
3218    {
3219    fl = fopen(fn->name, "rb");
3220    if (fl == NULL)
3221      {
3222      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3223        strerror(errno));
3224      goto EXIT2;
3225      }
3226    }
3227  while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3228    {
3229    int frc;
3230    char *end = buffer + (int)strlen(buffer);
3231    while (end > buffer && isspace(end[-1])) end--;
3232    *end = 0;
3233    if (*buffer != 0)
3234      {
3235      frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3236      if (frc > 1) rc = frc;
3237        else if (frc == 0 && rc == 1) rc = 0;
3238      }
3239    }
3240  if (fl != stdin) fclose(fl);
3241  }
3242
3243/* After handling file-list, work through remaining arguments. Pass in the fact
3244that there is only one argument at top level - this suppresses the file name if
3245the argument is not a directory and filenames are not otherwise forced. */
3246
3247only_one_at_top = i == argc - 1 && file_lists == NULL;
3248
3249for (; i < argc; i++)
3250  {
3251  int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3252    only_one_at_top);
3253  if (frc > 1) rc = frc;
3254    else if (frc == 0 && rc == 1) rc = 0;
3255  }
3256
3257EXIT:
3258#ifdef SUPPORT_PCREGREP_JIT
3259if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3260#endif
3261
3262free(main_buffer);
3263free((void *)pcretables);
3264
3265free_pattern_chain(patterns);
3266free_pattern_chain(include_patterns);
3267free_pattern_chain(include_dir_patterns);
3268free_pattern_chain(exclude_patterns);
3269free_pattern_chain(exclude_dir_patterns);
3270
3271free_file_chain(exclude_from);
3272free_file_chain(include_from);
3273free_file_chain(pattern_files);
3274free_file_chain(file_lists);
3275
3276while (only_matching != NULL)
3277  {
3278  omstr *this = only_matching;
3279  only_matching = this->next;
3280  free(this);
3281  }
3282
3283pcregrep_exit(rc);
3284
3285EXIT2:
3286rc = 2;
3287goto EXIT;
3288}
3289
3290/* End of pcregrep */
3291