1/************************************************* 2* pcregrep program * 3*************************************************/ 4 5/* This is a grep program that uses the PCRE regular expression library to do 6its pattern matching. On Unix-like, Windows, and native z/OS systems it can 7recurse into directories, and in z/OS it can handle PDS files. 8 9Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an 10additional header is required. That header is not included in the main PCRE 11distribution because other apparatus is needed to compile pcregrep for z/OS. 12The header can be found in the special z/OS distribution, which is available 13from www.zaconsultants.net or from www.cbttape.org. 14 15 Copyright (c) 1997-2014 University of Cambridge 16 17----------------------------------------------------------------------------- 18Redistribution and use in source and binary forms, with or without 19modification, are permitted provided that the following conditions are met: 20 21 * Redistributions of source code must retain the above copyright notice, 22 this list of conditions and the following disclaimer. 23 24 * Redistributions in binary form must reproduce the above copyright 25 notice, this list of conditions and the following disclaimer in the 26 documentation and/or other materials provided with the distribution. 27 28 * Neither the name of the University of Cambridge nor the names of its 29 contributors may be used to endorse or promote products derived from 30 this software without specific prior written permission. 31 32THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 33AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 36LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 37CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 38SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 39INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 40CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42POSSIBILITY OF SUCH DAMAGE. 43----------------------------------------------------------------------------- 44*/ 45 46#ifdef HAVE_CONFIG_H 47#include "config.h" 48#endif 49 50#include <ctype.h> 51#include <locale.h> 52#include <stdio.h> 53#include <string.h> 54#include <stdlib.h> 55#include <errno.h> 56 57#include <sys/types.h> 58#include <sys/stat.h> 59 60#ifdef HAVE_UNISTD_H 61#include <unistd.h> 62#endif 63 64#ifdef SUPPORT_LIBZ 65#include <zlib.h> 66#endif 67 68#ifdef SUPPORT_LIBBZ2 69#include <bzlib.h> 70#endif 71 72#include "pcre.h" 73 74#define FALSE 0 75#define TRUE 1 76 77typedef int BOOL; 78 79#define OFFSET_SIZE 99 80 81#if BUFSIZ > 8192 82#define MAXPATLEN BUFSIZ 83#else 84#define MAXPATLEN 8192 85#endif 86 87#define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */ 88 89/* Values for the "filenames" variable, which specifies options for file name 90output. The order is important; it is assumed that a file name is wanted for 91all values greater than FN_DEFAULT. */ 92 93enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; 94 95/* File reading styles */ 96 97enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; 98 99/* Actions for the -d and -D options */ 100 101enum { dee_READ, dee_SKIP, dee_RECURSE }; 102enum { DEE_READ, DEE_SKIP }; 103 104/* Actions for special processing options (flag bits) */ 105 106#define PO_WORD_MATCH 0x0001 107#define PO_LINE_MATCH 0x0002 108#define PO_FIXED_STRINGS 0x0004 109 110/* Line ending types */ 111 112enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF }; 113 114/* Binary file options */ 115 116enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT }; 117 118/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some 119environments), a warning is issued if the value of fwrite() is ignored. 120Unfortunately, casting to (void) does not suppress the warning. To get round 121this, we use a macro that compiles a fudge. Oddly, this does not also seem to 122apply to fprintf(). */ 123 124#define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {} 125 126 127 128/************************************************* 129* Global variables * 130*************************************************/ 131 132/* Jeffrey Friedl has some debugging requirements that are not part of the 133regular code. */ 134 135#ifdef JFRIEDL_DEBUG 136static int S_arg = -1; 137static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ 138static unsigned int jfriedl_XT = 0; /* replicate text this many times */ 139static const char *jfriedl_prefix = ""; 140static const char *jfriedl_postfix = ""; 141#endif 142 143static int endlinetype; 144 145static char *colour_string = (char *)"1;31"; 146static char *colour_option = NULL; 147static char *dee_option = NULL; 148static char *DEE_option = NULL; 149static char *locale = NULL; 150static char *main_buffer = NULL; 151static char *newline = NULL; 152static char *om_separator = (char *)""; 153static char *stdin_name = (char *)"(standard input)"; 154 155static const unsigned char *pcretables = NULL; 156 157static int after_context = 0; 158static int before_context = 0; 159static int binary_files = BIN_BINARY; 160static int both_context = 0; 161static int bufthird = PCREGREP_BUFSIZE; 162static int bufsize = 3*PCREGREP_BUFSIZE; 163 164#if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H 165static int dee_action = dee_SKIP; 166#else 167static int dee_action = dee_READ; 168#endif 169 170static int DEE_action = DEE_READ; 171static int error_count = 0; 172static int filenames = FN_DEFAULT; 173static int pcre_options = 0; 174static int process_options = 0; 175 176#ifdef SUPPORT_PCREGREP_JIT 177static int study_options = PCRE_STUDY_JIT_COMPILE; 178#else 179static int study_options = 0; 180#endif 181 182static unsigned long int match_limit = 0; 183static unsigned long int match_limit_recursion = 0; 184 185static BOOL count_only = FALSE; 186static BOOL do_colour = FALSE; 187static BOOL file_offsets = FALSE; 188static BOOL hyphenpending = FALSE; 189static BOOL invert = FALSE; 190static BOOL line_buffered = FALSE; 191static BOOL line_offsets = FALSE; 192static BOOL multiline = FALSE; 193static BOOL number = FALSE; 194static BOOL omit_zero_count = FALSE; 195static BOOL resource_error = FALSE; 196static BOOL quiet = FALSE; 197static BOOL show_only_matching = FALSE; 198static BOOL silent = FALSE; 199static BOOL utf8 = FALSE; 200 201/* Structure for list of --only-matching capturing numbers. */ 202 203typedef struct omstr { 204 struct omstr *next; 205 int groupnum; 206} omstr; 207 208static omstr *only_matching = NULL; 209static omstr *only_matching_last = NULL; 210 211/* Structure for holding the two variables that describe a number chain. */ 212 213typedef struct omdatastr { 214 omstr **anchor; 215 omstr **lastptr; 216} omdatastr; 217 218static omdatastr only_matching_data = { &only_matching, &only_matching_last }; 219 220/* Structure for list of file names (for -f and --{in,ex}clude-from) */ 221 222typedef struct fnstr { 223 struct fnstr *next; 224 char *name; 225} fnstr; 226 227static fnstr *exclude_from = NULL; 228static fnstr *exclude_from_last = NULL; 229static fnstr *include_from = NULL; 230static fnstr *include_from_last = NULL; 231 232static fnstr *file_lists = NULL; 233static fnstr *file_lists_last = NULL; 234static fnstr *pattern_files = NULL; 235static fnstr *pattern_files_last = NULL; 236 237/* Structure for holding the two variables that describe a file name chain. */ 238 239typedef struct fndatastr { 240 fnstr **anchor; 241 fnstr **lastptr; 242} fndatastr; 243 244static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last }; 245static fndatastr include_from_data = { &include_from, &include_from_last }; 246static fndatastr file_lists_data = { &file_lists, &file_lists_last }; 247static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last }; 248 249/* Structure for pattern and its compiled form; used for matching patterns and 250also for include/exclude patterns. */ 251 252typedef struct patstr { 253 struct patstr *next; 254 char *string; 255 pcre *compiled; 256 pcre_extra *hint; 257} patstr; 258 259static patstr *patterns = NULL; 260static patstr *patterns_last = NULL; 261static patstr *include_patterns = NULL; 262static patstr *include_patterns_last = NULL; 263static patstr *exclude_patterns = NULL; 264static patstr *exclude_patterns_last = NULL; 265static patstr *include_dir_patterns = NULL; 266static patstr *include_dir_patterns_last = NULL; 267static patstr *exclude_dir_patterns = NULL; 268static patstr *exclude_dir_patterns_last = NULL; 269 270/* Structure holding the two variables that describe a pattern chain. A pointer 271to such structures is used for each appropriate option. */ 272 273typedef struct patdatastr { 274 patstr **anchor; 275 patstr **lastptr; 276} patdatastr; 277 278static patdatastr match_patdata = { &patterns, &patterns_last }; 279static patdatastr include_patdata = { &include_patterns, &include_patterns_last }; 280static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last }; 281static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last }; 282static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last }; 283 284static patstr **incexlist[4] = { &include_patterns, &exclude_patterns, 285 &include_dir_patterns, &exclude_dir_patterns }; 286 287static const char *incexname[4] = { "--include", "--exclude", 288 "--include-dir", "--exclude-dir" }; 289 290/* Structure for options and list of them */ 291 292enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER, 293 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES }; 294 295typedef struct option_item { 296 int type; 297 int one_char; 298 void *dataptr; 299 const char *long_name; 300 const char *help_text; 301} option_item; 302 303/* Options without a single-letter equivalent get a negative value. This can be 304used to identify them. */ 305 306#define N_COLOUR (-1) 307#define N_EXCLUDE (-2) 308#define N_EXCLUDE_DIR (-3) 309#define N_HELP (-4) 310#define N_INCLUDE (-5) 311#define N_INCLUDE_DIR (-6) 312#define N_LABEL (-7) 313#define N_LOCALE (-8) 314#define N_NULL (-9) 315#define N_LOFFSETS (-10) 316#define N_FOFFSETS (-11) 317#define N_LBUFFER (-12) 318#define N_M_LIMIT (-13) 319#define N_M_LIMIT_REC (-14) 320#define N_BUFSIZE (-15) 321#define N_NOJIT (-16) 322#define N_FILE_LIST (-17) 323#define N_BINARY_FILES (-18) 324#define N_EXCLUDE_FROM (-19) 325#define N_INCLUDE_FROM (-20) 326#define N_OM_SEPARATOR (-21) 327 328static option_item optionlist[] = { 329 { OP_NODATA, N_NULL, NULL, "", "terminate options" }, 330 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, 331 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, 332 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" }, 333 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, 334 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" }, 335 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" }, 336 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, 337 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, 338 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, 339 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, 340 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, 341 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, 342 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" }, 343 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" }, 344 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" }, 345 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" }, 346 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, 347 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, 348 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, 349 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" }, 350 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, 351#ifdef SUPPORT_PCREGREP_JIT 352 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" }, 353#else 354 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" }, 355#endif 356 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, 357 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, 358 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, 359 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" }, 360 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, 361 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, 362 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" }, 363 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" }, 364 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, 365 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, 366 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, 367 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, 368 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, 369 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, 370 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, 371 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" }, 372 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" }, 373 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" }, 374 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" }, 375 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" }, 376 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" }, 377 378 /* These two were accidentally implemented with underscores instead of 379 hyphens in the option names. As this was not discovered for several releases, 380 the incorrect versions are left in the table for compatibility. However, the 381 --help function misses out any option that has an underscore in its name. */ 382 383 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" }, 384 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" }, 385 386#ifdef JFRIEDL_DEBUG 387 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, 388#endif 389 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, 390 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" }, 391 { OP_NODATA, 'V', NULL, "version", "print version information and exit" }, 392 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, 393 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, 394 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, 395 { OP_NODATA, 0, NULL, NULL, NULL } 396}; 397 398/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F 399options. These set the 1, 2, and 4 bits in process_options, respectively. Note 400that the combination of -w and -x has the same effect as -x on its own, so we 401can treat them as the same. Note that the MAXPATLEN macro assumes the longest 402prefix+suffix is 10 characters; if anything longer is added, it must be 403adjusted. */ 404 405static const char *prefix[] = { 406 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" }; 407 408static const char *suffix[] = { 409 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; 410 411/* UTF-8 tables - used only when the newline setting is "any". */ 412 413const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; 414 415const char utf8_table4[] = { 416 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 417 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 418 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 419 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; 420 421 422 423/************************************************* 424* Exit from the program * 425*************************************************/ 426 427/* If there has been a resource error, give a suitable message. 428 429Argument: the return code 430Returns: does not return 431*/ 432 433static void 434pcregrep_exit(int rc) 435{ 436if (resource_error) 437 { 438 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit " 439 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT, 440 PCRE_ERROR_JIT_STACKLIMIT); 441 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n"); 442 } 443exit(rc); 444} 445 446 447/************************************************* 448* Add item to chain of patterns * 449*************************************************/ 450 451/* Used to add an item onto a chain, or just return an unconnected item if the 452"after" argument is NULL. 453 454Arguments: 455 s pattern string to add 456 after if not NULL points to item to insert after 457 458Returns: new pattern block or NULL on error 459*/ 460 461static patstr * 462add_pattern(char *s, patstr *after) 463{ 464patstr *p = (patstr *)malloc(sizeof(patstr)); 465if (p == NULL) 466 { 467 fprintf(stderr, "pcregrep: malloc failed\n"); 468 pcregrep_exit(2); 469 } 470if (strlen(s) > MAXPATLEN) 471 { 472 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n", 473 MAXPATLEN); 474 free(p); 475 return NULL; 476 } 477p->next = NULL; 478p->string = s; 479p->compiled = NULL; 480p->hint = NULL; 481 482if (after != NULL) 483 { 484 p->next = after->next; 485 after->next = p; 486 } 487return p; 488} 489 490 491/************************************************* 492* Free chain of patterns * 493*************************************************/ 494 495/* Used for several chains of patterns. 496 497Argument: pointer to start of chain 498Returns: nothing 499*/ 500 501static void 502free_pattern_chain(patstr *pc) 503{ 504while (pc != NULL) 505 { 506 patstr *p = pc; 507 pc = p->next; 508 if (p->hint != NULL) pcre_free_study(p->hint); 509 if (p->compiled != NULL) pcre_free(p->compiled); 510 free(p); 511 } 512} 513 514 515/************************************************* 516* Free chain of file names * 517*************************************************/ 518 519/* 520Argument: pointer to start of chain 521Returns: nothing 522*/ 523 524static void 525free_file_chain(fnstr *fn) 526{ 527while (fn != NULL) 528 { 529 fnstr *f = fn; 530 fn = f->next; 531 free(f); 532 } 533} 534 535 536/************************************************* 537* OS-specific functions * 538*************************************************/ 539 540/* These functions are defined so that they can be made system specific. 541At present there are versions for Unix-style environments, Windows, native 542z/OS, and "no support". */ 543 544 545/************* Directory scanning Unix-style and z/OS ***********/ 546 547#if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS 548#include <sys/types.h> 549#include <sys/stat.h> 550#include <dirent.h> 551 552#if defined NATIVE_ZOS 553/************* Directory and PDS/E scanning for z/OS ***********/ 554/************* z/OS looks mostly like Unix with USS ************/ 555/* However, z/OS needs the #include statements in this header */ 556#include "pcrzosfs.h" 557/* That header is not included in the main PCRE distribution because 558 other apparatus is needed to compile pcregrep for z/OS. The header 559 can be found in the special z/OS distribution, which is available 560 from www.zaconsultants.net or from www.cbttape.org. */ 561#endif 562 563typedef DIR directory_type; 564#define FILESEP '/' 565 566static int 567isdirectory(char *filename) 568{ 569struct stat statbuf; 570if (stat(filename, &statbuf) < 0) 571 return 0; /* In the expectation that opening as a file will fail */ 572return (statbuf.st_mode & S_IFMT) == S_IFDIR; 573} 574 575static directory_type * 576opendirectory(char *filename) 577{ 578return opendir(filename); 579} 580 581static char * 582readdirectory(directory_type *dir) 583{ 584for (;;) 585 { 586 struct dirent *dent = readdir(dir); 587 if (dent == NULL) return NULL; 588 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) 589 return dent->d_name; 590 } 591/* Control never reaches here */ 592} 593 594static void 595closedirectory(directory_type *dir) 596{ 597closedir(dir); 598} 599 600 601/************* Test for regular file, Unix-style **********/ 602 603static int 604isregfile(char *filename) 605{ 606struct stat statbuf; 607if (stat(filename, &statbuf) < 0) 608 return 1; /* In the expectation that opening as a file will fail */ 609return (statbuf.st_mode & S_IFMT) == S_IFREG; 610} 611 612 613#if defined NATIVE_ZOS 614/************* Test for a terminal in z/OS **********/ 615/* isatty() does not work in a TSO environment, so always give FALSE.*/ 616 617static BOOL 618is_stdout_tty(void) 619{ 620return FALSE; 621} 622 623static BOOL 624is_file_tty(FILE *f) 625{ 626return FALSE; 627} 628 629 630/************* Test for a terminal, Unix-style **********/ 631 632#else 633static BOOL 634is_stdout_tty(void) 635{ 636return isatty(fileno(stdout)); 637} 638 639static BOOL 640is_file_tty(FILE *f) 641{ 642return isatty(fileno(f)); 643} 644#endif 645 646/* End of Unix-style or native z/OS environment functions. */ 647 648 649/************* Directory scanning in Windows ***********/ 650 651/* I (Philip Hazel) have no means of testing this code. It was contributed by 652Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES 653when it did not exist. David Byron added a patch that moved the #include of 654<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. 655The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is 656undefined when it is indeed undefined. */ 657 658#elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H 659 660#ifndef STRICT 661# define STRICT 662#endif 663#ifndef WIN32_LEAN_AND_MEAN 664# define WIN32_LEAN_AND_MEAN 665#endif 666 667#include <windows.h> 668 669#ifndef INVALID_FILE_ATTRIBUTES 670#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF 671#endif 672 673typedef struct directory_type 674{ 675HANDLE handle; 676BOOL first; 677WIN32_FIND_DATA data; 678} directory_type; 679 680#define FILESEP '/' 681 682int 683isdirectory(char *filename) 684{ 685DWORD attr = GetFileAttributes(filename); 686if (attr == INVALID_FILE_ATTRIBUTES) 687 return 0; 688return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0; 689} 690 691directory_type * 692opendirectory(char *filename) 693{ 694size_t len; 695char *pattern; 696directory_type *dir; 697DWORD err; 698len = strlen(filename); 699pattern = (char *)malloc(len + 3); 700dir = (directory_type *)malloc(sizeof(*dir)); 701if ((pattern == NULL) || (dir == NULL)) 702 { 703 fprintf(stderr, "pcregrep: malloc failed\n"); 704 pcregrep_exit(2); 705 } 706memcpy(pattern, filename, len); 707memcpy(&(pattern[len]), "\\*", 3); 708dir->handle = FindFirstFile(pattern, &(dir->data)); 709if (dir->handle != INVALID_HANDLE_VALUE) 710 { 711 free(pattern); 712 dir->first = TRUE; 713 return dir; 714 } 715err = GetLastError(); 716free(pattern); 717free(dir); 718errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT; 719return NULL; 720} 721 722char * 723readdirectory(directory_type *dir) 724{ 725for (;;) 726 { 727 if (!dir->first) 728 { 729 if (!FindNextFile(dir->handle, &(dir->data))) 730 return NULL; 731 } 732 else 733 { 734 dir->first = FALSE; 735 } 736 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0) 737 return dir->data.cFileName; 738 } 739#ifndef _MSC_VER 740return NULL; /* Keep compiler happy; never executed */ 741#endif 742} 743 744void 745closedirectory(directory_type *dir) 746{ 747FindClose(dir->handle); 748free(dir); 749} 750 751 752/************* Test for regular file in Windows **********/ 753 754/* I don't know how to do this, or if it can be done; assume all paths are 755regular if they are not directories. */ 756 757int isregfile(char *filename) 758{ 759return !isdirectory(filename); 760} 761 762 763/************* Test for a terminal in Windows **********/ 764 765/* I don't know how to do this; assume never */ 766 767static BOOL 768is_stdout_tty(void) 769{ 770return FALSE; 771} 772 773static BOOL 774is_file_tty(FILE *f) 775{ 776return FALSE; 777} 778 779/* End of Windows functions */ 780 781 782/************* Directory scanning when we can't do it ***********/ 783 784/* The type is void, and apart from isdirectory(), the functions do nothing. */ 785 786#else 787 788#define FILESEP 0 789typedef void directory_type; 790 791int isdirectory(char *filename) { return 0; } 792directory_type * opendirectory(char *filename) { return (directory_type*)0;} 793char *readdirectory(directory_type *dir) { return (char*)0;} 794void closedirectory(directory_type *dir) {} 795 796 797/************* Test for regular file when we can't do it **********/ 798 799/* Assume all files are regular. */ 800 801int isregfile(char *filename) { return 1; } 802 803 804/************* Test for a terminal when we can't do it **********/ 805 806static BOOL 807is_stdout_tty(void) 808{ 809return FALSE; 810} 811 812static BOOL 813is_file_tty(FILE *f) 814{ 815return FALSE; 816} 817 818#endif /* End of system-specific functions */ 819 820 821 822#ifndef HAVE_STRERROR 823/************************************************* 824* Provide strerror() for non-ANSI libraries * 825*************************************************/ 826 827/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() 828in their libraries, but can provide the same facility by this simple 829alternative function. */ 830 831extern int sys_nerr; 832extern char *sys_errlist[]; 833 834char * 835strerror(int n) 836{ 837if (n < 0 || n >= sys_nerr) return "unknown error number"; 838return sys_errlist[n]; 839} 840#endif /* HAVE_STRERROR */ 841 842 843 844/************************************************* 845* Usage function * 846*************************************************/ 847 848static int 849usage(int rc) 850{ 851option_item *op; 852fprintf(stderr, "Usage: pcregrep [-"); 853for (op = optionlist; op->one_char != 0; op++) 854 { 855 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); 856 } 857fprintf(stderr, "] [long options] [pattern] [files]\n"); 858fprintf(stderr, "Type `pcregrep --help' for more information and the long " 859 "options.\n"); 860return rc; 861} 862 863 864 865/************************************************* 866* Help function * 867*************************************************/ 868 869static void 870help(void) 871{ 872option_item *op; 873 874printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); 875printf("Search for PATTERN in each FILE or standard input.\n"); 876printf("PATTERN must be present if neither -e nor -f is used.\n"); 877printf("\"-\" can be used as a file name to mean STDIN.\n"); 878 879#ifdef SUPPORT_LIBZ 880printf("Files whose names end in .gz are read using zlib.\n"); 881#endif 882 883#ifdef SUPPORT_LIBBZ2 884printf("Files whose names end in .bz2 are read using bzlib2.\n"); 885#endif 886 887#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 888printf("Other files and the standard input are read as plain files.\n\n"); 889#else 890printf("All files are read as plain files, without any interpretation.\n\n"); 891#endif 892 893printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); 894printf("Options:\n"); 895 896for (op = optionlist; op->one_char != 0; op++) 897 { 898 int n; 899 char s[4]; 900 901 /* Two options were accidentally implemented and documented with underscores 902 instead of hyphens in their names, something that was not noticed for quite a 903 few releases. When fixing this, I left the underscored versions in the list 904 in case people were using them. However, we don't want to display them in the 905 help data. There are no other options that contain underscores, and we do not 906 expect ever to implement such options. Therefore, just omit any option that 907 contains an underscore. */ 908 909 if (strchr(op->long_name, '_') != NULL) continue; 910 911 if (op->one_char > 0 && (op->long_name)[0] == 0) 912 n = 31 - printf(" -%c", op->one_char); 913 else 914 { 915 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); 916 else strcpy(s, " "); 917 n = 31 - printf(" %s --%s", s, op->long_name); 918 } 919 920 if (n < 1) n = 1; 921 printf("%.*s%s\n", n, " ", op->help_text); 922 } 923 924printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n"); 925printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE); 926printf("When reading patterns or file names from a file, trailing white\n"); 927printf("space is removed and blank lines are ignored.\n"); 928printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN); 929 930printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n"); 931printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); 932} 933 934 935 936/************************************************* 937* Test exclude/includes * 938*************************************************/ 939 940/* If any exclude pattern matches, the path is excluded. Otherwise, unless 941there are no includes, the path must match an include pattern. 942 943Arguments: 944 path the path to be matched 945 ip the chain of include patterns 946 ep the chain of exclude patterns 947 948Returns: TRUE if the path is not excluded 949*/ 950 951static BOOL 952test_incexc(char *path, patstr *ip, patstr *ep) 953{ 954int plen = strlen(path); 955 956for (; ep != NULL; ep = ep->next) 957 { 958 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0) 959 return FALSE; 960 } 961 962if (ip == NULL) return TRUE; 963 964for (; ip != NULL; ip = ip->next) 965 { 966 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0) 967 return TRUE; 968 } 969 970return FALSE; 971} 972 973 974 975/************************************************* 976* Decode integer argument value * 977*************************************************/ 978 979/* Integer arguments can be followed by K or M. Avoid the use of strtoul() 980because SunOS4 doesn't have it. This is used only for unpicking arguments, so 981just keep it simple. 982 983Arguments: 984 option_data the option data string 985 op the option item (for error messages) 986 longop TRUE if option given in long form 987 988Returns: a long integer 989*/ 990 991static long int 992decode_number(char *option_data, option_item *op, BOOL longop) 993{ 994unsigned long int n = 0; 995char *endptr = option_data; 996while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++; 997while (isdigit((unsigned char)(*endptr))) 998 n = n * 10 + (int)(*endptr++ - '0'); 999if (toupper(*endptr) == 'K') 1000 { 1001 n *= 1024; 1002 endptr++; 1003 } 1004else if (toupper(*endptr) == 'M') 1005 { 1006 n *= 1024*1024; 1007 endptr++; 1008 } 1009 1010if (*endptr != 0) /* Error */ 1011 { 1012 if (longop) 1013 { 1014 char *equals = strchr(op->long_name, '='); 1015 int nlen = (equals == NULL)? (int)strlen(op->long_name) : 1016 (int)(equals - op->long_name); 1017 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n", 1018 option_data, nlen, op->long_name); 1019 } 1020 else 1021 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n", 1022 option_data, op->one_char); 1023 pcregrep_exit(usage(2)); 1024 } 1025 1026return n; 1027} 1028 1029 1030 1031/************************************************* 1032* Add item to a chain of numbers * 1033*************************************************/ 1034 1035/* Used to add an item onto a chain, or just return an unconnected item if the 1036"after" argument is NULL. 1037 1038Arguments: 1039 n the number to add 1040 after if not NULL points to item to insert after 1041 1042Returns: new number block 1043*/ 1044 1045static omstr * 1046add_number(int n, omstr *after) 1047{ 1048omstr *om = (omstr *)malloc(sizeof(omstr)); 1049 1050if (om == NULL) 1051 { 1052 fprintf(stderr, "pcregrep: malloc failed\n"); 1053 pcregrep_exit(2); 1054 } 1055om->next = NULL; 1056om->groupnum = n; 1057 1058if (after != NULL) 1059 { 1060 om->next = after->next; 1061 after->next = om; 1062 } 1063return om; 1064} 1065 1066 1067 1068/************************************************* 1069* Read one line of input * 1070*************************************************/ 1071 1072/* Normally, input is read using fread() into a large buffer, so many lines may 1073be read at once. However, doing this for tty input means that no output appears 1074until a lot of input has been typed. Instead, tty input is handled line by 1075line. We cannot use fgets() for this, because it does not stop at a binary 1076zero, and therefore there is no way of telling how many characters it has read, 1077because there may be binary zeros embedded in the data. 1078 1079Arguments: 1080 buffer the buffer to read into 1081 length the maximum number of characters to read 1082 f the file 1083 1084Returns: the number of characters read, zero at end of file 1085*/ 1086 1087static unsigned int 1088read_one_line(char *buffer, int length, FILE *f) 1089{ 1090int c; 1091int yield = 0; 1092while ((c = fgetc(f)) != EOF) 1093 { 1094 buffer[yield++] = c; 1095 if (c == '\n' || yield >= length) break; 1096 } 1097return yield; 1098} 1099 1100 1101 1102/************************************************* 1103* Find end of line * 1104*************************************************/ 1105 1106/* The length of the endline sequence that is found is set via lenptr. This may 1107be zero at the very end of the file if there is no line-ending sequence there. 1108 1109Arguments: 1110 p current position in line 1111 endptr end of available data 1112 lenptr where to put the length of the eol sequence 1113 1114Returns: pointer after the last byte of the line, 1115 including the newline byte(s) 1116*/ 1117 1118static char * 1119end_of_line(char *p, char *endptr, int *lenptr) 1120{ 1121switch(endlinetype) 1122 { 1123 default: /* Just in case */ 1124 case EL_LF: 1125 while (p < endptr && *p != '\n') p++; 1126 if (p < endptr) 1127 { 1128 *lenptr = 1; 1129 return p + 1; 1130 } 1131 *lenptr = 0; 1132 return endptr; 1133 1134 case EL_CR: 1135 while (p < endptr && *p != '\r') p++; 1136 if (p < endptr) 1137 { 1138 *lenptr = 1; 1139 return p + 1; 1140 } 1141 *lenptr = 0; 1142 return endptr; 1143 1144 case EL_CRLF: 1145 for (;;) 1146 { 1147 while (p < endptr && *p != '\r') p++; 1148 if (++p >= endptr) 1149 { 1150 *lenptr = 0; 1151 return endptr; 1152 } 1153 if (*p == '\n') 1154 { 1155 *lenptr = 2; 1156 return p + 1; 1157 } 1158 } 1159 break; 1160 1161 case EL_ANYCRLF: 1162 while (p < endptr) 1163 { 1164 int extra = 0; 1165 register int c = *((unsigned char *)p); 1166 1167 if (utf8 && c >= 0xc0) 1168 { 1169 int gcii, gcss; 1170 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1171 gcss = 6*extra; 1172 c = (c & utf8_table3[extra]) << gcss; 1173 for (gcii = 1; gcii <= extra; gcii++) 1174 { 1175 gcss -= 6; 1176 c |= (p[gcii] & 0x3f) << gcss; 1177 } 1178 } 1179 1180 p += 1 + extra; 1181 1182 switch (c) 1183 { 1184 case '\n': 1185 *lenptr = 1; 1186 return p; 1187 1188 case '\r': 1189 if (p < endptr && *p == '\n') 1190 { 1191 *lenptr = 2; 1192 p++; 1193 } 1194 else *lenptr = 1; 1195 return p; 1196 1197 default: 1198 break; 1199 } 1200 } /* End of loop for ANYCRLF case */ 1201 1202 *lenptr = 0; /* Must have hit the end */ 1203 return endptr; 1204 1205 case EL_ANY: 1206 while (p < endptr) 1207 { 1208 int extra = 0; 1209 register int c = *((unsigned char *)p); 1210 1211 if (utf8 && c >= 0xc0) 1212 { 1213 int gcii, gcss; 1214 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1215 gcss = 6*extra; 1216 c = (c & utf8_table3[extra]) << gcss; 1217 for (gcii = 1; gcii <= extra; gcii++) 1218 { 1219 gcss -= 6; 1220 c |= (p[gcii] & 0x3f) << gcss; 1221 } 1222 } 1223 1224 p += 1 + extra; 1225 1226 switch (c) 1227 { 1228 case '\n': /* LF */ 1229 case '\v': /* VT */ 1230 case '\f': /* FF */ 1231 *lenptr = 1; 1232 return p; 1233 1234 case '\r': /* CR */ 1235 if (p < endptr && *p == '\n') 1236 { 1237 *lenptr = 2; 1238 p++; 1239 } 1240 else *lenptr = 1; 1241 return p; 1242 1243#ifndef EBCDIC 1244 case 0x85: /* Unicode NEL */ 1245 *lenptr = utf8? 2 : 1; 1246 return p; 1247 1248 case 0x2028: /* Unicode LS */ 1249 case 0x2029: /* Unicode PS */ 1250 *lenptr = 3; 1251 return p; 1252#endif /* Not EBCDIC */ 1253 1254 default: 1255 break; 1256 } 1257 } /* End of loop for ANY case */ 1258 1259 *lenptr = 0; /* Must have hit the end */ 1260 return endptr; 1261 } /* End of overall switch */ 1262} 1263 1264 1265 1266/************************************************* 1267* Find start of previous line * 1268*************************************************/ 1269 1270/* This is called when looking back for before lines to print. 1271 1272Arguments: 1273 p start of the subsequent line 1274 startptr start of available data 1275 1276Returns: pointer to the start of the previous line 1277*/ 1278 1279static char * 1280previous_line(char *p, char *startptr) 1281{ 1282switch(endlinetype) 1283 { 1284 default: /* Just in case */ 1285 case EL_LF: 1286 p--; 1287 while (p > startptr && p[-1] != '\n') p--; 1288 return p; 1289 1290 case EL_CR: 1291 p--; 1292 while (p > startptr && p[-1] != '\n') p--; 1293 return p; 1294 1295 case EL_CRLF: 1296 for (;;) 1297 { 1298 p -= 2; 1299 while (p > startptr && p[-1] != '\n') p--; 1300 if (p <= startptr + 1 || p[-2] == '\r') return p; 1301 } 1302 /* Control can never get here */ 1303 1304 case EL_ANY: 1305 case EL_ANYCRLF: 1306 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; 1307 if (utf8) while ((*p & 0xc0) == 0x80) p--; 1308 1309 while (p > startptr) 1310 { 1311 register unsigned int c; 1312 char *pp = p - 1; 1313 1314 if (utf8) 1315 { 1316 int extra = 0; 1317 while ((*pp & 0xc0) == 0x80) pp--; 1318 c = *((unsigned char *)pp); 1319 if (c >= 0xc0) 1320 { 1321 int gcii, gcss; 1322 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1323 gcss = 6*extra; 1324 c = (c & utf8_table3[extra]) << gcss; 1325 for (gcii = 1; gcii <= extra; gcii++) 1326 { 1327 gcss -= 6; 1328 c |= (pp[gcii] & 0x3f) << gcss; 1329 } 1330 } 1331 } 1332 else c = *((unsigned char *)pp); 1333 1334 if (endlinetype == EL_ANYCRLF) switch (c) 1335 { 1336 case '\n': /* LF */ 1337 case '\r': /* CR */ 1338 return p; 1339 1340 default: 1341 break; 1342 } 1343 1344 else switch (c) 1345 { 1346 case '\n': /* LF */ 1347 case '\v': /* VT */ 1348 case '\f': /* FF */ 1349 case '\r': /* CR */ 1350#ifndef EBCDIE 1351 case 0x85: /* Unicode NEL */ 1352 case 0x2028: /* Unicode LS */ 1353 case 0x2029: /* Unicode PS */ 1354#endif /* Not EBCDIC */ 1355 return p; 1356 1357 default: 1358 break; 1359 } 1360 1361 p = pp; /* Back one character */ 1362 } /* End of loop for ANY case */ 1363 1364 return startptr; /* Hit start of data */ 1365 } /* End of overall switch */ 1366} 1367 1368 1369 1370 1371 1372/************************************************* 1373* Print the previous "after" lines * 1374*************************************************/ 1375 1376/* This is called if we are about to lose said lines because of buffer filling, 1377and at the end of the file. The data in the line is written using fwrite() so 1378that a binary zero does not terminate it. 1379 1380Arguments: 1381 lastmatchnumber the number of the last matching line, plus one 1382 lastmatchrestart where we restarted after the last match 1383 endptr end of available data 1384 printname filename for printing 1385 1386Returns: nothing 1387*/ 1388 1389static void 1390do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr, 1391 char *printname) 1392{ 1393if (after_context > 0 && lastmatchnumber > 0) 1394 { 1395 int count = 0; 1396 while (lastmatchrestart < endptr && count++ < after_context) 1397 { 1398 int ellength; 1399 char *pp = lastmatchrestart; 1400 if (printname != NULL) fprintf(stdout, "%s-", printname); 1401 if (number) fprintf(stdout, "%d-", lastmatchnumber++); 1402 pp = end_of_line(pp, endptr, &ellength); 1403 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 1404 lastmatchrestart = pp; 1405 } 1406 hyphenpending = TRUE; 1407 } 1408} 1409 1410 1411 1412/************************************************* 1413* Apply patterns to subject till one matches * 1414*************************************************/ 1415 1416/* This function is called to run through all patterns, looking for a match. It 1417is used multiple times for the same subject when colouring is enabled, in order 1418to find all possible matches. 1419 1420Arguments: 1421 matchptr the start of the subject 1422 length the length of the subject to match 1423 options options for pcre_exec 1424 startoffset where to start matching 1425 offsets the offets vector to fill in 1426 mrc address of where to put the result of pcre_exec() 1427 1428Returns: TRUE if there was a match 1429 FALSE if there was no match 1430 invert if there was a non-fatal error 1431*/ 1432 1433static BOOL 1434match_patterns(char *matchptr, size_t length, unsigned int options, 1435 int startoffset, int *offsets, int *mrc) 1436{ 1437int i; 1438size_t slen = length; 1439patstr *p = patterns; 1440const char *msg = "this text:\n\n"; 1441 1442if (slen > 200) 1443 { 1444 slen = 200; 1445 msg = "text that starts:\n\n"; 1446 } 1447for (i = 1; p != NULL; p = p->next, i++) 1448 { 1449 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length, 1450 startoffset, options, offsets, OFFSET_SIZE); 1451 if (*mrc >= 0) return TRUE; 1452 if (*mrc == PCRE_ERROR_NOMATCH) continue; 1453 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc); 1454 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i); 1455 fprintf(stderr, "%s", msg); 1456 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */ 1457 fprintf(stderr, "\n\n"); 1458 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT || 1459 *mrc == PCRE_ERROR_JIT_STACKLIMIT) 1460 resource_error = TRUE; 1461 if (error_count++ > 20) 1462 { 1463 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n"); 1464 pcregrep_exit(2); 1465 } 1466 return invert; /* No more matching; don't show the line again */ 1467 } 1468 1469return FALSE; /* No match, no errors */ 1470} 1471 1472 1473 1474/************************************************* 1475* Grep an individual file * 1476*************************************************/ 1477 1478/* This is called from grep_or_recurse() below. It uses a buffer that is three 1479times the value of bufthird. The matching point is never allowed to stray into 1480the top third of the buffer, thus keeping more of the file available for 1481context printing or for multiline scanning. For large files, the pointer will 1482be in the middle third most of the time, so the bottom third is available for 1483"before" context printing. 1484 1485Arguments: 1486 handle the fopened FILE stream for a normal file 1487 the gzFile pointer when reading is via libz 1488 the BZFILE pointer when reading is via libbz2 1489 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 1490 filename the file name or NULL (for errors) 1491 printname the file name if it is to be printed for each match 1492 or NULL if the file name is not to be printed 1493 it cannot be NULL if filenames[_nomatch]_only is set 1494 1495Returns: 0 if there was at least one match 1496 1 otherwise (no matches) 1497 2 if an overlong line is encountered 1498 3 if there is a read error on a .bz2 file 1499*/ 1500 1501static int 1502pcregrep(void *handle, int frtype, char *filename, char *printname) 1503{ 1504int rc = 1; 1505int linenumber = 1; 1506int lastmatchnumber = 0; 1507int count = 0; 1508int filepos = 0; 1509int offsets[OFFSET_SIZE]; 1510char *lastmatchrestart = NULL; 1511char *ptr = main_buffer; 1512char *endptr; 1513size_t bufflength; 1514BOOL binary = FALSE; 1515BOOL endhyphenpending = FALSE; 1516BOOL input_line_buffered = line_buffered; 1517FILE *in = NULL; /* Ensure initialized */ 1518 1519#ifdef SUPPORT_LIBZ 1520gzFile ingz = NULL; 1521#endif 1522 1523#ifdef SUPPORT_LIBBZ2 1524BZFILE *inbz2 = NULL; 1525#endif 1526 1527 1528/* Do the first read into the start of the buffer and set up the pointer to end 1529of what we have. In the case of libz, a non-zipped .gz file will be read as a 1530plain file. However, if a .bz2 file isn't actually bzipped, the first read will 1531fail. */ 1532 1533(void)frtype; 1534 1535#ifdef SUPPORT_LIBZ 1536if (frtype == FR_LIBZ) 1537 { 1538 ingz = (gzFile)handle; 1539 bufflength = gzread (ingz, main_buffer, bufsize); 1540 } 1541else 1542#endif 1543 1544#ifdef SUPPORT_LIBBZ2 1545if (frtype == FR_LIBBZ2) 1546 { 1547 inbz2 = (BZFILE *)handle; 1548 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize); 1549 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ 1550 } /* without the cast it is unsigned. */ 1551else 1552#endif 1553 1554 { 1555 in = (FILE *)handle; 1556 if (is_file_tty(in)) input_line_buffered = TRUE; 1557 bufflength = input_line_buffered? 1558 read_one_line(main_buffer, bufsize, in) : 1559 fread(main_buffer, 1, bufsize, in); 1560 } 1561 1562endptr = main_buffer + bufflength; 1563 1564/* Unless binary-files=text, see if we have a binary file. This uses the same 1565rule as GNU grep, namely, a search for a binary zero byte near the start of the 1566file. */ 1567 1568if (binary_files != BIN_TEXT) 1569 { 1570 binary = 1571 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL; 1572 if (binary && binary_files == BIN_NOMATCH) return 1; 1573 } 1574 1575/* Loop while the current pointer is not at the end of the file. For large 1576files, endptr will be at the end of the buffer when we are in the middle of the 1577file, but ptr will never get there, because as soon as it gets over 2/3 of the 1578way, the buffer is shifted left and re-filled. */ 1579 1580while (ptr < endptr) 1581 { 1582 int endlinelength; 1583 int mrc = 0; 1584 int startoffset = 0; 1585 unsigned int options = 0; 1586 BOOL match; 1587 char *matchptr = ptr; 1588 char *t = ptr; 1589 size_t length, linelength; 1590 1591 /* At this point, ptr is at the start of a line. We need to find the length 1592 of the subject string to pass to pcre_exec(). In multiline mode, it is the 1593 length remainder of the data in the buffer. Otherwise, it is the length of 1594 the next line, excluding the terminating newline. After matching, we always 1595 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE 1596 option is used for compiling, so that any match is constrained to be in the 1597 first line. */ 1598 1599 t = end_of_line(t, endptr, &endlinelength); 1600 linelength = t - ptr - endlinelength; 1601 length = multiline? (size_t)(endptr - ptr) : linelength; 1602 1603 /* Check to see if the line we are looking at extends right to the very end 1604 of the buffer without a line terminator. This means the line is too long to 1605 handle. */ 1606 1607 if (endlinelength == 0 && t == main_buffer + bufsize) 1608 { 1609 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n" 1610 "pcregrep: check the --buffer-size option\n", 1611 linenumber, 1612 (filename == NULL)? "" : " of file ", 1613 (filename == NULL)? "" : filename); 1614 return 2; 1615 } 1616 1617 /* Extra processing for Jeffrey Friedl's debugging. */ 1618 1619#ifdef JFRIEDL_DEBUG 1620 if (jfriedl_XT || jfriedl_XR) 1621 { 1622# include <sys/time.h> 1623# include <time.h> 1624 struct timeval start_time, end_time; 1625 struct timezone dummy; 1626 int i; 1627 1628 if (jfriedl_XT) 1629 { 1630 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); 1631 const char *orig = ptr; 1632 ptr = malloc(newlen + 1); 1633 if (!ptr) { 1634 printf("out of memory"); 1635 pcregrep_exit(2); 1636 } 1637 endptr = ptr; 1638 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); 1639 for (i = 0; i < jfriedl_XT; i++) { 1640 strncpy(endptr, orig, length); 1641 endptr += length; 1642 } 1643 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); 1644 length = newlen; 1645 } 1646 1647 if (gettimeofday(&start_time, &dummy) != 0) 1648 perror("bad gettimeofday"); 1649 1650 1651 for (i = 0; i < jfriedl_XR; i++) 1652 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0, 1653 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0); 1654 1655 if (gettimeofday(&end_time, &dummy) != 0) 1656 perror("bad gettimeofday"); 1657 1658 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) 1659 - 1660 (start_time.tv_sec + (start_time.tv_usec / 1000000.0))); 1661 1662 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta); 1663 return 0; 1664 } 1665#endif 1666 1667 /* We come back here after a match when show_only_matching is set, in order 1668 to find any further matches in the same line. This applies to 1669 --only-matching, --file-offsets, and --line-offsets. */ 1670 1671 ONLY_MATCHING_RESTART: 1672 1673 /* Run through all the patterns until one matches or there is an error other 1674 than NOMATCH. This code is in a subroutine so that it can be re-used for 1675 finding subsequent matches when colouring matched lines. After finding one 1676 match, set PCRE_NOTEMPTY to disable any further matches of null strings in 1677 this line. */ 1678 1679 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc); 1680 options = PCRE_NOTEMPTY; 1681 1682 /* If it's a match or a not-match (as required), do what's wanted. */ 1683 1684 if (match != invert) 1685 { 1686 BOOL hyphenprinted = FALSE; 1687 1688 /* We've failed if we want a file that doesn't have any matches. */ 1689 1690 if (filenames == FN_NOMATCH_ONLY) return 1; 1691 1692 /* Just count if just counting is wanted. */ 1693 1694 if (count_only) count++; 1695 1696 /* When handling a binary file and binary-files==binary, the "binary" 1697 variable will be set true (it's false in all other cases). In this 1698 situation we just want to output the file name. No need to scan further. */ 1699 1700 else if (binary) 1701 { 1702 fprintf(stdout, "Binary file %s matches\n", filename); 1703 return 0; 1704 } 1705 1706 /* If all we want is a file name, there is no need to scan any more lines 1707 in the file. */ 1708 1709 else if (filenames == FN_MATCH_ONLY) 1710 { 1711 fprintf(stdout, "%s\n", printname); 1712 return 0; 1713 } 1714 1715 /* Likewise, if all we want is a yes/no answer. */ 1716 1717 else if (quiet) return 0; 1718 1719 /* The --only-matching option prints just the substring that matched, 1720 and/or one or more captured portions of it, as long as these strings are 1721 not empty. The --file-offsets and --line-offsets options output offsets for 1722 the matching substring (all three set show_only_matching). None of these 1723 mutually exclusive options prints any context. Afterwards, adjust the start 1724 and then jump back to look for further matches in the same line. If we are 1725 in invert mode, however, nothing is printed and we do not restart - this 1726 could still be useful because the return code is set. */ 1727 1728 else if (show_only_matching) 1729 { 1730 if (!invert) 1731 { 1732 if (printname != NULL) fprintf(stdout, "%s:", printname); 1733 if (number) fprintf(stdout, "%d:", linenumber); 1734 1735 /* Handle --line-offsets */ 1736 1737 if (line_offsets) 1738 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), 1739 offsets[1] - offsets[0]); 1740 1741 /* Handle --file-offsets */ 1742 1743 else if (file_offsets) 1744 fprintf(stdout, "%d,%d\n", 1745 (int)(filepos + matchptr + offsets[0] - ptr), 1746 offsets[1] - offsets[0]); 1747 1748 /* Handle --only-matching, which may occur many times */ 1749 1750 else 1751 { 1752 BOOL printed = FALSE; 1753 omstr *om; 1754 1755 for (om = only_matching; om != NULL; om = om->next) 1756 { 1757 int n = om->groupnum; 1758 if (n < mrc) 1759 { 1760 int plen = offsets[2*n + 1] - offsets[2*n]; 1761 if (plen > 0) 1762 { 1763 if (printed) fprintf(stdout, "%s", om_separator); 1764 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1765 FWRITE(matchptr + offsets[n*2], 1, plen, stdout); 1766 if (do_colour) fprintf(stdout, "%c[00m", 0x1b); 1767 printed = TRUE; 1768 } 1769 } 1770 } 1771 1772 if (printed || printname != NULL || number) fprintf(stdout, "\n"); 1773 } 1774 1775 /* Prepare to repeat to find the next match */ 1776 1777 match = FALSE; 1778 if (line_buffered) fflush(stdout); 1779 rc = 0; /* Had some success */ 1780 startoffset = offsets[1]; /* Restart after the match */ 1781 goto ONLY_MATCHING_RESTART; 1782 } 1783 } 1784 1785 /* This is the default case when none of the above options is set. We print 1786 the matching lines(s), possibly preceded and/or followed by other lines of 1787 context. */ 1788 1789 else 1790 { 1791 /* See if there is a requirement to print some "after" lines from a 1792 previous match. We never print any overlaps. */ 1793 1794 if (after_context > 0 && lastmatchnumber > 0) 1795 { 1796 int ellength; 1797 int linecount = 0; 1798 char *p = lastmatchrestart; 1799 1800 while (p < ptr && linecount < after_context) 1801 { 1802 p = end_of_line(p, ptr, &ellength); 1803 linecount++; 1804 } 1805 1806 /* It is important to advance lastmatchrestart during this printing so 1807 that it interacts correctly with any "before" printing below. Print 1808 each line's data using fwrite() in case there are binary zeroes. */ 1809 1810 while (lastmatchrestart < p) 1811 { 1812 char *pp = lastmatchrestart; 1813 if (printname != NULL) fprintf(stdout, "%s-", printname); 1814 if (number) fprintf(stdout, "%d-", lastmatchnumber++); 1815 pp = end_of_line(pp, endptr, &ellength); 1816 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 1817 lastmatchrestart = pp; 1818 } 1819 if (lastmatchrestart != ptr) hyphenpending = TRUE; 1820 } 1821 1822 /* If there were non-contiguous lines printed above, insert hyphens. */ 1823 1824 if (hyphenpending) 1825 { 1826 fprintf(stdout, "--\n"); 1827 hyphenpending = FALSE; 1828 hyphenprinted = TRUE; 1829 } 1830 1831 /* See if there is a requirement to print some "before" lines for this 1832 match. Again, don't print overlaps. */ 1833 1834 if (before_context > 0) 1835 { 1836 int linecount = 0; 1837 char *p = ptr; 1838 1839 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && 1840 linecount < before_context) 1841 { 1842 linecount++; 1843 p = previous_line(p, main_buffer); 1844 } 1845 1846 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) 1847 fprintf(stdout, "--\n"); 1848 1849 while (p < ptr) 1850 { 1851 int ellength; 1852 char *pp = p; 1853 if (printname != NULL) fprintf(stdout, "%s-", printname); 1854 if (number) fprintf(stdout, "%d-", linenumber - linecount--); 1855 pp = end_of_line(pp, endptr, &ellength); 1856 FWRITE(p, 1, pp - p, stdout); 1857 p = pp; 1858 } 1859 } 1860 1861 /* Now print the matching line(s); ensure we set hyphenpending at the end 1862 of the file if any context lines are being output. */ 1863 1864 if (after_context > 0 || before_context > 0) 1865 endhyphenpending = TRUE; 1866 1867 if (printname != NULL) fprintf(stdout, "%s:", printname); 1868 if (number) fprintf(stdout, "%d:", linenumber); 1869 1870 /* In multiline mode, we want to print to the end of the line in which 1871 the end of the matched string is found, so we adjust linelength and the 1872 line number appropriately, but only when there actually was a match 1873 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of 1874 the match will always be before the first newline sequence. */ 1875 1876 if (multiline & !invert) 1877 { 1878 char *endmatch = ptr + offsets[1]; 1879 t = ptr; 1880 while (t <= endmatch) 1881 { 1882 t = end_of_line(t, endptr, &endlinelength); 1883 if (t < endmatch) linenumber++; else break; 1884 } 1885 linelength = t - ptr - endlinelength; 1886 } 1887 1888 /*** NOTE: Use only fwrite() to output the data line, so that binary 1889 zeroes are treated as just another data character. */ 1890 1891 /* This extra option, for Jeffrey Friedl's debugging requirements, 1892 replaces the matched string, or a specific captured string if it exists, 1893 with X. When this happens, colouring is ignored. */ 1894 1895#ifdef JFRIEDL_DEBUG 1896 if (S_arg >= 0 && S_arg < mrc) 1897 { 1898 int first = S_arg * 2; 1899 int last = first + 1; 1900 FWRITE(ptr, 1, offsets[first], stdout); 1901 fprintf(stdout, "X"); 1902 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout); 1903 } 1904 else 1905#endif 1906 1907 /* We have to split the line(s) up if colouring, and search for further 1908 matches, but not of course if the line is a non-match. */ 1909 1910 if (do_colour && !invert) 1911 { 1912 int plength; 1913 FWRITE(ptr, 1, offsets[0], stdout); 1914 fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1915 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 1916 fprintf(stdout, "%c[00m", 0x1b); 1917 for (;;) 1918 { 1919 startoffset = offsets[1]; 1920 if (startoffset >= (int)linelength + endlinelength || 1921 !match_patterns(matchptr, length, options, startoffset, offsets, 1922 &mrc)) 1923 break; 1924 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout); 1925 fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1926 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 1927 fprintf(stdout, "%c[00m", 0x1b); 1928 } 1929 1930 /* In multiline mode, we may have already printed the complete line 1931 and its line-ending characters (if they matched the pattern), so there 1932 may be no more to print. */ 1933 1934 plength = (int)((linelength + endlinelength) - startoffset); 1935 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout); 1936 } 1937 1938 /* Not colouring; no need to search for further matches */ 1939 1940 else FWRITE(ptr, 1, linelength + endlinelength, stdout); 1941 } 1942 1943 /* End of doing what has to be done for a match. If --line-buffered was 1944 given, flush the output. */ 1945 1946 if (line_buffered) fflush(stdout); 1947 rc = 0; /* Had some success */ 1948 1949 /* Remember where the last match happened for after_context. We remember 1950 where we are about to restart, and that line's number. */ 1951 1952 lastmatchrestart = ptr + linelength + endlinelength; 1953 lastmatchnumber = linenumber + 1; 1954 } 1955 1956 /* For a match in multiline inverted mode (which of course did not cause 1957 anything to be printed), we have to move on to the end of the match before 1958 proceeding. */ 1959 1960 if (multiline && invert && match) 1961 { 1962 int ellength; 1963 char *endmatch = ptr + offsets[1]; 1964 t = ptr; 1965 while (t < endmatch) 1966 { 1967 t = end_of_line(t, endptr, &ellength); 1968 if (t <= endmatch) linenumber++; else break; 1969 } 1970 endmatch = end_of_line(endmatch, endptr, &ellength); 1971 linelength = endmatch - ptr - ellength; 1972 } 1973 1974 /* Advance to after the newline and increment the line number. The file 1975 offset to the current line is maintained in filepos. */ 1976 1977 ptr += linelength + endlinelength; 1978 filepos += (int)(linelength + endlinelength); 1979 linenumber++; 1980 1981 /* If input is line buffered, and the buffer is not yet full, read another 1982 line and add it into the buffer. */ 1983 1984 if (input_line_buffered && bufflength < (size_t)bufsize) 1985 { 1986 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in); 1987 bufflength += add; 1988 endptr += add; 1989 } 1990 1991 /* If we haven't yet reached the end of the file (the buffer is full), and 1992 the current point is in the top 1/3 of the buffer, slide the buffer down by 1993 1/3 and refill it. Before we do this, if some unprinted "after" lines are 1994 about to be lost, print them. */ 1995 1996 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird) 1997 { 1998 if (after_context > 0 && 1999 lastmatchnumber > 0 && 2000 lastmatchrestart < main_buffer + bufthird) 2001 { 2002 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 2003 lastmatchnumber = 0; 2004 } 2005 2006 /* Now do the shuffle */ 2007 2008 memmove(main_buffer, main_buffer + bufthird, 2*bufthird); 2009 ptr -= bufthird; 2010 2011#ifdef SUPPORT_LIBZ 2012 if (frtype == FR_LIBZ) 2013 bufflength = 2*bufthird + 2014 gzread (ingz, main_buffer + 2*bufthird, bufthird); 2015 else 2016#endif 2017 2018#ifdef SUPPORT_LIBBZ2 2019 if (frtype == FR_LIBBZ2) 2020 bufflength = 2*bufthird + 2021 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird); 2022 else 2023#endif 2024 2025 bufflength = 2*bufthird + 2026 (input_line_buffered? 2027 read_one_line(main_buffer + 2*bufthird, bufthird, in) : 2028 fread(main_buffer + 2*bufthird, 1, bufthird, in)); 2029 endptr = main_buffer + bufflength; 2030 2031 /* Adjust any last match point */ 2032 2033 if (lastmatchnumber > 0) lastmatchrestart -= bufthird; 2034 } 2035 } /* Loop through the whole file */ 2036 2037/* End of file; print final "after" lines if wanted; do_after_lines sets 2038hyphenpending if it prints something. */ 2039 2040if (!show_only_matching && !count_only) 2041 { 2042 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 2043 hyphenpending |= endhyphenpending; 2044 } 2045 2046/* Print the file name if we are looking for those without matches and there 2047were none. If we found a match, we won't have got this far. */ 2048 2049if (filenames == FN_NOMATCH_ONLY) 2050 { 2051 fprintf(stdout, "%s\n", printname); 2052 return 0; 2053 } 2054 2055/* Print the match count if wanted */ 2056 2057if (count_only) 2058 { 2059 if (count > 0 || !omit_zero_count) 2060 { 2061 if (printname != NULL && filenames != FN_NONE) 2062 fprintf(stdout, "%s:", printname); 2063 fprintf(stdout, "%d\n", count); 2064 } 2065 } 2066 2067return rc; 2068} 2069 2070 2071 2072/************************************************* 2073* Grep a file or recurse into a directory * 2074*************************************************/ 2075 2076/* Given a path name, if it's a directory, scan all the files if we are 2077recursing; if it's a file, grep it. 2078 2079Arguments: 2080 pathname the path to investigate 2081 dir_recurse TRUE if recursing is wanted (-r or -drecurse) 2082 only_one_at_top TRUE if the path is the only one at toplevel 2083 2084Returns: -1 the file/directory was skipped 2085 0 if there was at least one match 2086 1 if there were no matches 2087 2 there was some kind of error 2088 2089However, file opening failures are suppressed if "silent" is set. 2090*/ 2091 2092static int 2093grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) 2094{ 2095int rc = 1; 2096int frtype; 2097void *handle; 2098char *lastcomp; 2099FILE *in = NULL; /* Ensure initialized */ 2100 2101#ifdef SUPPORT_LIBZ 2102gzFile ingz = NULL; 2103#endif 2104 2105#ifdef SUPPORT_LIBBZ2 2106BZFILE *inbz2 = NULL; 2107#endif 2108 2109#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 2110int pathlen; 2111#endif 2112 2113#if defined NATIVE_ZOS 2114int zos_type; 2115FILE *zos_test_file; 2116#endif 2117 2118/* If the file name is "-" we scan stdin */ 2119 2120if (strcmp(pathname, "-") == 0) 2121 { 2122 return pcregrep(stdin, FR_PLAIN, stdin_name, 2123 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? 2124 stdin_name : NULL); 2125 } 2126 2127/* Inclusion and exclusion: --include-dir and --exclude-dir apply only to 2128directories, whereas --include and --exclude apply to everything else. The test 2129is against the final component of the path. */ 2130 2131lastcomp = strrchr(pathname, FILESEP); 2132lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1; 2133 2134/* If the file is a directory, skip if not recursing or if explicitly excluded. 2135Otherwise, scan the directory and recurse for each path within it. The scanning 2136code is localized so it can be made system-specific. */ 2137 2138 2139/* For z/OS, determine the file type. */ 2140 2141#if defined NATIVE_ZOS 2142zos_test_file = fopen(pathname,"rb"); 2143 2144if (zos_test_file == NULL) 2145 { 2146 if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n", 2147 pathname, strerror(errno)); 2148 return -1; 2149 } 2150zos_type = identifyzosfiletype (zos_test_file); 2151fclose (zos_test_file); 2152 2153/* Handle a PDS in separate code */ 2154 2155if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE) 2156 { 2157 return travelonpdsdir (pathname, only_one_at_top); 2158 } 2159 2160/* Deal with regular files in the normal way below. These types are: 2161 zos_type == __ZOS_PDS_MEMBER 2162 zos_type == __ZOS_PS 2163 zos_type == __ZOS_VSAM_KSDS 2164 zos_type == __ZOS_VSAM_ESDS 2165 zos_type == __ZOS_VSAM_RRDS 2166*/ 2167 2168/* Handle a z/OS directory using common code. */ 2169 2170else if (zos_type == __ZOS_HFS) 2171 { 2172#endif /* NATIVE_ZOS */ 2173 2174 2175/* Handle directories: common code for all OS */ 2176 2177if (isdirectory(pathname)) 2178 { 2179 if (dee_action == dee_SKIP || 2180 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns)) 2181 return -1; 2182 2183 if (dee_action == dee_RECURSE) 2184 { 2185 char buffer[1024]; 2186 char *nextfile; 2187 directory_type *dir = opendirectory(pathname); 2188 2189 if (dir == NULL) 2190 { 2191 if (!silent) 2192 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname, 2193 strerror(errno)); 2194 return 2; 2195 } 2196 2197 while ((nextfile = readdirectory(dir)) != NULL) 2198 { 2199 int frc; 2200 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile); 2201 frc = grep_or_recurse(buffer, dir_recurse, FALSE); 2202 if (frc > 1) rc = frc; 2203 else if (frc == 0 && rc == 1) rc = 0; 2204 } 2205 2206 closedirectory(dir); 2207 return rc; 2208 } 2209 } 2210 2211#if defined NATIVE_ZOS 2212 } 2213#endif 2214 2215/* If the file is not a directory, check for a regular file, and if it is not, 2216skip it if that's been requested. Otherwise, check for an explicit inclusion or 2217exclusion. */ 2218 2219else if ( 2220#if defined NATIVE_ZOS 2221 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) || 2222#else /* all other OS */ 2223 (!isregfile(pathname) && DEE_action == DEE_SKIP) || 2224#endif 2225 !test_incexc(lastcomp, include_patterns, exclude_patterns)) 2226 return -1; /* File skipped */ 2227 2228/* Control reaches here if we have a regular file, or if we have a directory 2229and recursion or skipping was not requested, or if we have anything else and 2230skipping was not requested. The scan proceeds. If this is the first and only 2231argument at top level, we don't show the file name, unless we are only showing 2232the file name, or the filename was forced (-H). */ 2233 2234#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 2235pathlen = (int)(strlen(pathname)); 2236#endif 2237 2238/* Open using zlib if it is supported and the file name ends with .gz. */ 2239 2240#ifdef SUPPORT_LIBZ 2241if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) 2242 { 2243 ingz = gzopen(pathname, "rb"); 2244 if (ingz == NULL) 2245 { 2246 if (!silent) 2247 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, 2248 strerror(errno)); 2249 return 2; 2250 } 2251 handle = (void *)ingz; 2252 frtype = FR_LIBZ; 2253 } 2254else 2255#endif 2256 2257/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ 2258 2259#ifdef SUPPORT_LIBBZ2 2260if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) 2261 { 2262 inbz2 = BZ2_bzopen(pathname, "rb"); 2263 handle = (void *)inbz2; 2264 frtype = FR_LIBBZ2; 2265 } 2266else 2267#endif 2268 2269/* Otherwise use plain fopen(). The label is so that we can come back here if 2270an attempt to read a .bz2 file indicates that it really is a plain file. */ 2271 2272#ifdef SUPPORT_LIBBZ2 2273PLAIN_FILE: 2274#endif 2275 { 2276 in = fopen(pathname, "rb"); 2277 handle = (void *)in; 2278 frtype = FR_PLAIN; 2279 } 2280 2281/* All the opening methods return errno when they fail. */ 2282 2283if (handle == NULL) 2284 { 2285 if (!silent) 2286 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, 2287 strerror(errno)); 2288 return 2; 2289 } 2290 2291/* Now grep the file */ 2292 2293rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT || 2294 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); 2295 2296/* Close in an appropriate manner. */ 2297 2298#ifdef SUPPORT_LIBZ 2299if (frtype == FR_LIBZ) 2300 gzclose(ingz); 2301else 2302#endif 2303 2304/* If it is a .bz2 file and the result is 3, it means that the first attempt to 2305read failed. If the error indicates that the file isn't in fact bzipped, try 2306again as a normal file. */ 2307 2308#ifdef SUPPORT_LIBBZ2 2309if (frtype == FR_LIBBZ2) 2310 { 2311 if (rc == 3) 2312 { 2313 int errnum; 2314 const char *err = BZ2_bzerror(inbz2, &errnum); 2315 if (errnum == BZ_DATA_ERROR_MAGIC) 2316 { 2317 BZ2_bzclose(inbz2); 2318 goto PLAIN_FILE; 2319 } 2320 else if (!silent) 2321 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n", 2322 pathname, err); 2323 rc = 2; /* The normal "something went wrong" code */ 2324 } 2325 BZ2_bzclose(inbz2); 2326 } 2327else 2328#endif 2329 2330/* Normal file close */ 2331 2332fclose(in); 2333 2334/* Pass back the yield from pcregrep(). */ 2335 2336return rc; 2337} 2338 2339 2340 2341/************************************************* 2342* Handle a single-letter, no data option * 2343*************************************************/ 2344 2345static int 2346handle_option(int letter, int options) 2347{ 2348switch(letter) 2349 { 2350 case N_FOFFSETS: file_offsets = TRUE; break; 2351 case N_HELP: help(); pcregrep_exit(0); 2352 case N_LBUFFER: line_buffered = TRUE; break; 2353 case N_LOFFSETS: line_offsets = number = TRUE; break; 2354 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break; 2355 case 'a': binary_files = BIN_TEXT; break; 2356 case 'c': count_only = TRUE; break; 2357 case 'F': process_options |= PO_FIXED_STRINGS; break; 2358 case 'H': filenames = FN_FORCE; break; 2359 case 'I': binary_files = BIN_NOMATCH; break; 2360 case 'h': filenames = FN_NONE; break; 2361 case 'i': options |= PCRE_CASELESS; break; 2362 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break; 2363 case 'L': filenames = FN_NOMATCH_ONLY; break; 2364 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break; 2365 case 'n': number = TRUE; break; 2366 2367 case 'o': 2368 only_matching_last = add_number(0, only_matching_last); 2369 if (only_matching == NULL) only_matching = only_matching_last; 2370 break; 2371 2372 case 'q': quiet = TRUE; break; 2373 case 'r': dee_action = dee_RECURSE; break; 2374 case 's': silent = TRUE; break; 2375 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break; 2376 case 'v': invert = TRUE; break; 2377 case 'w': process_options |= PO_WORD_MATCH; break; 2378 case 'x': process_options |= PO_LINE_MATCH; break; 2379 2380 case 'V': 2381 fprintf(stdout, "pcregrep version %s\n", pcre_version()); 2382 pcregrep_exit(0); 2383 break; 2384 2385 default: 2386 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); 2387 pcregrep_exit(usage(2)); 2388 } 2389 2390return options; 2391} 2392 2393 2394 2395 2396/************************************************* 2397* Construct printed ordinal * 2398*************************************************/ 2399 2400/* This turns a number into "1st", "3rd", etc. */ 2401 2402static char * 2403ordin(int n) 2404{ 2405static char buffer[8]; 2406char *p = buffer; 2407sprintf(p, "%d", n); 2408while (*p != 0) p++; 2409switch (n%10) 2410 { 2411 case 1: strcpy(p, "st"); break; 2412 case 2: strcpy(p, "nd"); break; 2413 case 3: strcpy(p, "rd"); break; 2414 default: strcpy(p, "th"); break; 2415 } 2416return buffer; 2417} 2418 2419 2420 2421/************************************************* 2422* Compile a single pattern * 2423*************************************************/ 2424 2425/* Do nothing if the pattern has already been compiled. This is the case for 2426include/exclude patterns read from a file. 2427 2428When the -F option has been used, each "pattern" may be a list of strings, 2429separated by line breaks. They will be matched literally. We split such a 2430string and compile the first substring, inserting an additional block into the 2431pattern chain. 2432 2433Arguments: 2434 p points to the pattern block 2435 options the PCRE options 2436 popts the processing options 2437 fromfile TRUE if the pattern was read from a file 2438 fromtext file name or identifying text (e.g. "include") 2439 count 0 if this is the only command line pattern, or 2440 number of the command line pattern, or 2441 linenumber for a pattern from a file 2442 2443Returns: TRUE on success, FALSE after an error 2444*/ 2445 2446static BOOL 2447compile_pattern(patstr *p, int options, int popts, int fromfile, 2448 const char *fromtext, int count) 2449{ 2450char buffer[PATBUFSIZE]; 2451const char *error; 2452char *ps = p->string; 2453int patlen = strlen(ps); 2454int errptr; 2455 2456if (p->compiled != NULL) return TRUE; 2457 2458if ((popts & PO_FIXED_STRINGS) != 0) 2459 { 2460 int ellength; 2461 char *eop = ps + patlen; 2462 char *pe = end_of_line(ps, eop, &ellength); 2463 2464 if (ellength != 0) 2465 { 2466 if (add_pattern(pe, p) == NULL) return FALSE; 2467 patlen = (int)(pe - ps - ellength); 2468 } 2469 } 2470 2471sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]); 2472p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables); 2473if (p->compiled != NULL) return TRUE; 2474 2475/* Handle compile errors */ 2476 2477errptr -= (int)strlen(prefix[popts]); 2478if (errptr > patlen) errptr = patlen; 2479 2480if (fromfile) 2481 { 2482 fprintf(stderr, "pcregrep: Error in regex in line %d of %s " 2483 "at offset %d: %s\n", count, fromtext, errptr, error); 2484 } 2485else 2486 { 2487 if (count == 0) 2488 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n", 2489 fromtext, errptr, error); 2490 else 2491 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n", 2492 ordin(count), fromtext, errptr, error); 2493 } 2494 2495return FALSE; 2496} 2497 2498 2499 2500/************************************************* 2501* Read and compile a file of patterns * 2502*************************************************/ 2503 2504/* This is used for --filelist, --include-from, and --exclude-from. 2505 2506Arguments: 2507 name the name of the file; "-" is stdin 2508 patptr pointer to the pattern chain anchor 2509 patlastptr pointer to the last pattern pointer 2510 popts the process options to pass to pattern_compile() 2511 2512Returns: TRUE if all went well 2513*/ 2514 2515static BOOL 2516read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts) 2517{ 2518int linenumber = 0; 2519FILE *f; 2520char *filename; 2521char buffer[PATBUFSIZE]; 2522 2523if (strcmp(name, "-") == 0) 2524 { 2525 f = stdin; 2526 filename = stdin_name; 2527 } 2528else 2529 { 2530 f = fopen(name, "r"); 2531 if (f == NULL) 2532 { 2533 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno)); 2534 return FALSE; 2535 } 2536 filename = name; 2537 } 2538 2539while (fgets(buffer, PATBUFSIZE, f) != NULL) 2540 { 2541 char *s = buffer + (int)strlen(buffer); 2542 while (s > buffer && isspace((unsigned char)(s[-1]))) s--; 2543 *s = 0; 2544 linenumber++; 2545 if (buffer[0] == 0) continue; /* Skip blank lines */ 2546 2547 /* Note: this call to add_pattern() puts a pointer to the local variable 2548 "buffer" into the pattern chain. However, that pointer is used only when 2549 compiling the pattern, which happens immediately below, so we flatten it 2550 afterwards, as a precaution against any later code trying to use it. */ 2551 2552 *patlastptr = add_pattern(buffer, *patlastptr); 2553 if (*patlastptr == NULL) 2554 { 2555 if (f != stdin) fclose(f); 2556 return FALSE; 2557 } 2558 if (*patptr == NULL) *patptr = *patlastptr; 2559 2560 /* This loop is needed because compiling a "pattern" when -F is set may add 2561 on additional literal patterns if the original contains a newline. In the 2562 common case, it never will, because fgets() stops at a newline. However, 2563 the -N option can be used to give pcregrep a different newline setting. */ 2564 2565 for(;;) 2566 { 2567 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename, 2568 linenumber)) 2569 { 2570 if (f != stdin) fclose(f); 2571 return FALSE; 2572 } 2573 (*patlastptr)->string = NULL; /* Insurance */ 2574 if ((*patlastptr)->next == NULL) break; 2575 *patlastptr = (*patlastptr)->next; 2576 } 2577 } 2578 2579if (f != stdin) fclose(f); 2580return TRUE; 2581} 2582 2583 2584 2585/************************************************* 2586* Main program * 2587*************************************************/ 2588 2589/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */ 2590 2591int 2592main(int argc, char **argv) 2593{ 2594int i, j; 2595int rc = 1; 2596BOOL only_one_at_top; 2597patstr *cp; 2598fnstr *fn; 2599const char *locale_from = "--locale"; 2600const char *error; 2601 2602#ifdef SUPPORT_PCREGREP_JIT 2603pcre_jit_stack *jit_stack = NULL; 2604#endif 2605 2606/* Set the default line ending value from the default in the PCRE library; 2607"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf". 2608Note that the return values from pcre_config(), though derived from the ASCII 2609codes, are the same in EBCDIC environments, so we must use the actual values 2610rather than escapes such as as '\r'. */ 2611 2612(void)pcre_config(PCRE_CONFIG_NEWLINE, &i); 2613switch(i) 2614 { 2615 default: newline = (char *)"lf"; break; 2616 case 13: newline = (char *)"cr"; break; 2617 case (13 << 8) | 10: newline = (char *)"crlf"; break; 2618 case -1: newline = (char *)"any"; break; 2619 case -2: newline = (char *)"anycrlf"; break; 2620 } 2621 2622/* Process the options */ 2623 2624for (i = 1; i < argc; i++) 2625 { 2626 option_item *op = NULL; 2627 char *option_data = (char *)""; /* default to keep compiler happy */ 2628 BOOL longop; 2629 BOOL longopwasequals = FALSE; 2630 2631 if (argv[i][0] != '-') break; 2632 2633 /* If we hit an argument that is just "-", it may be a reference to STDIN, 2634 but only if we have previously had -e or -f to define the patterns. */ 2635 2636 if (argv[i][1] == 0) 2637 { 2638 if (pattern_files != NULL || patterns != NULL) break; 2639 else pcregrep_exit(usage(2)); 2640 } 2641 2642 /* Handle a long name option, or -- to terminate the options */ 2643 2644 if (argv[i][1] == '-') 2645 { 2646 char *arg = argv[i] + 2; 2647 char *argequals = strchr(arg, '='); 2648 2649 if (*arg == 0) /* -- terminates options */ 2650 { 2651 i++; 2652 break; /* out of the options-handling loop */ 2653 } 2654 2655 longop = TRUE; 2656 2657 /* Some long options have data that follows after =, for example file=name. 2658 Some options have variations in the long name spelling: specifically, we 2659 allow "regexp" because GNU grep allows it, though I personally go along 2660 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". 2661 These options are entered in the table as "regex(p)". Options can be in 2662 both these categories. */ 2663 2664 for (op = optionlist; op->one_char != 0; op++) 2665 { 2666 char *opbra = strchr(op->long_name, '('); 2667 char *equals = strchr(op->long_name, '='); 2668 2669 /* Handle options with only one spelling of the name */ 2670 2671 if (opbra == NULL) /* Does not contain '(' */ 2672 { 2673 if (equals == NULL) /* Not thing=data case */ 2674 { 2675 if (strcmp(arg, op->long_name) == 0) break; 2676 } 2677 else /* Special case xxx=data */ 2678 { 2679 int oplen = (int)(equals - op->long_name); 2680 int arglen = (argequals == NULL)? 2681 (int)strlen(arg) : (int)(argequals - arg); 2682 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0) 2683 { 2684 option_data = arg + arglen; 2685 if (*option_data == '=') 2686 { 2687 option_data++; 2688 longopwasequals = TRUE; 2689 } 2690 break; 2691 } 2692 } 2693 } 2694 2695 /* Handle options with an alternate spelling of the name */ 2696 2697 else 2698 { 2699 char buff1[24]; 2700 char buff2[24]; 2701 2702 int baselen = (int)(opbra - op->long_name); 2703 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1); 2704 int arglen = (argequals == NULL || equals == NULL)? 2705 (int)strlen(arg) : (int)(argequals - arg); 2706 2707 sprintf(buff1, "%.*s", baselen, op->long_name); 2708 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1); 2709 2710 if (strncmp(arg, buff1, arglen) == 0 || 2711 strncmp(arg, buff2, arglen) == 0) 2712 { 2713 if (equals != NULL && argequals != NULL) 2714 { 2715 option_data = argequals; 2716 if (*option_data == '=') 2717 { 2718 option_data++; 2719 longopwasequals = TRUE; 2720 } 2721 } 2722 break; 2723 } 2724 } 2725 } 2726 2727 if (op->one_char == 0) 2728 { 2729 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); 2730 pcregrep_exit(usage(2)); 2731 } 2732 } 2733 2734 /* Jeffrey Friedl's debugging harness uses these additional options which 2735 are not in the right form for putting in the option table because they use 2736 only one hyphen, yet are more than one character long. By putting them 2737 separately here, they will not get displayed as part of the help() output, 2738 but I don't think Jeffrey will care about that. */ 2739 2740#ifdef JFRIEDL_DEBUG 2741 else if (strcmp(argv[i], "-pre") == 0) { 2742 jfriedl_prefix = argv[++i]; 2743 continue; 2744 } else if (strcmp(argv[i], "-post") == 0) { 2745 jfriedl_postfix = argv[++i]; 2746 continue; 2747 } else if (strcmp(argv[i], "-XT") == 0) { 2748 sscanf(argv[++i], "%d", &jfriedl_XT); 2749 continue; 2750 } else if (strcmp(argv[i], "-XR") == 0) { 2751 sscanf(argv[++i], "%d", &jfriedl_XR); 2752 continue; 2753 } 2754#endif 2755 2756 2757 /* One-char options; many that have no data may be in a single argument; we 2758 continue till we hit the last one or one that needs data. */ 2759 2760 else 2761 { 2762 char *s = argv[i] + 1; 2763 longop = FALSE; 2764 2765 while (*s != 0) 2766 { 2767 for (op = optionlist; op->one_char != 0; op++) 2768 { 2769 if (*s == op->one_char) break; 2770 } 2771 if (op->one_char == 0) 2772 { 2773 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n", 2774 *s, argv[i]); 2775 pcregrep_exit(usage(2)); 2776 } 2777 2778 option_data = s+1; 2779 2780 /* Break out if this is the last character in the string; it's handled 2781 below like a single multi-char option. */ 2782 2783 if (*option_data == 0) break; 2784 2785 /* Check for a single-character option that has data: OP_OP_NUMBER(S) 2786 are used for ones that either have a numerical number or defaults, i.e. 2787 the data is optional. If a digit follows, there is data; if not, carry on 2788 with other single-character options in the same string. */ 2789 2790 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS) 2791 { 2792 if (isdigit((unsigned char)s[1])) break; 2793 } 2794 else /* Check for an option with data */ 2795 { 2796 if (op->type != OP_NODATA) break; 2797 } 2798 2799 /* Handle a single-character option with no data, then loop for the 2800 next character in the string. */ 2801 2802 pcre_options = handle_option(*s++, pcre_options); 2803 } 2804 } 2805 2806 /* At this point we should have op pointing to a matched option. If the type 2807 is NO_DATA, it means that there is no data, and the option might set 2808 something in the PCRE options. */ 2809 2810 if (op->type == OP_NODATA) 2811 { 2812 pcre_options = handle_option(op->one_char, pcre_options); 2813 continue; 2814 } 2815 2816 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that 2817 either has a value or defaults to something. It cannot have data in a 2818 separate item. At the moment, the only such options are "colo(u)r", 2819 "only-matching", and Jeffrey Friedl's special -S debugging option. */ 2820 2821 if (*option_data == 0 && 2822 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER || 2823 op->type == OP_OP_NUMBERS)) 2824 { 2825 switch (op->one_char) 2826 { 2827 case N_COLOUR: 2828 colour_option = (char *)"auto"; 2829 break; 2830 2831 case 'o': 2832 only_matching_last = add_number(0, only_matching_last); 2833 if (only_matching == NULL) only_matching = only_matching_last; 2834 break; 2835 2836#ifdef JFRIEDL_DEBUG 2837 case 'S': 2838 S_arg = 0; 2839 break; 2840#endif 2841 } 2842 continue; 2843 } 2844 2845 /* Otherwise, find the data string for the option. */ 2846 2847 if (*option_data == 0) 2848 { 2849 if (i >= argc - 1 || longopwasequals) 2850 { 2851 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]); 2852 pcregrep_exit(usage(2)); 2853 } 2854 option_data = argv[++i]; 2855 } 2856 2857 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be 2858 added to a chain of numbers. */ 2859 2860 if (op->type == OP_OP_NUMBERS) 2861 { 2862 unsigned long int n = decode_number(option_data, op, longop); 2863 omdatastr *omd = (omdatastr *)op->dataptr; 2864 *(omd->lastptr) = add_number((int)n, *(omd->lastptr)); 2865 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr); 2866 } 2867 2868 /* If the option type is OP_PATLIST, it's the -e option, or one of the 2869 include/exclude options, which can be called multiple times to create lists 2870 of patterns. */ 2871 2872 else if (op->type == OP_PATLIST) 2873 { 2874 patdatastr *pd = (patdatastr *)op->dataptr; 2875 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr)); 2876 if (*(pd->lastptr) == NULL) goto EXIT2; 2877 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr); 2878 } 2879 2880 /* If the option type is OP_FILELIST, it's one of the options that names a 2881 file. */ 2882 2883 else if (op->type == OP_FILELIST) 2884 { 2885 fndatastr *fd = (fndatastr *)op->dataptr; 2886 fn = (fnstr *)malloc(sizeof(fnstr)); 2887 if (fn == NULL) 2888 { 2889 fprintf(stderr, "pcregrep: malloc failed\n"); 2890 goto EXIT2; 2891 } 2892 fn->next = NULL; 2893 fn->name = option_data; 2894 if (*(fd->anchor) == NULL) 2895 *(fd->anchor) = fn; 2896 else 2897 (*(fd->lastptr))->next = fn; 2898 *(fd->lastptr) = fn; 2899 } 2900 2901 /* Handle OP_BINARY_FILES */ 2902 2903 else if (op->type == OP_BINFILES) 2904 { 2905 if (strcmp(option_data, "binary") == 0) 2906 binary_files = BIN_BINARY; 2907 else if (strcmp(option_data, "without-match") == 0) 2908 binary_files = BIN_NOMATCH; 2909 else if (strcmp(option_data, "text") == 0) 2910 binary_files = BIN_TEXT; 2911 else 2912 { 2913 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n", 2914 option_data); 2915 pcregrep_exit(usage(2)); 2916 } 2917 } 2918 2919 /* Otherwise, deal with a single string or numeric data value. */ 2920 2921 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER && 2922 op->type != OP_OP_NUMBER) 2923 { 2924 *((char **)op->dataptr) = option_data; 2925 } 2926 else 2927 { 2928 unsigned long int n = decode_number(option_data, op, longop); 2929 if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n; 2930 else *((int *)op->dataptr) = n; 2931 } 2932 } 2933 2934/* Options have been decoded. If -C was used, its value is used as a default 2935for -A and -B. */ 2936 2937if (both_context > 0) 2938 { 2939 if (after_context == 0) after_context = both_context; 2940 if (before_context == 0) before_context = both_context; 2941 } 2942 2943/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. 2944However, all three set show_only_matching because they display, each in their 2945own way, only the data that has matched. */ 2946 2947if ((only_matching != NULL && (file_offsets || line_offsets)) || 2948 (file_offsets && line_offsets)) 2949 { 2950 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets " 2951 "and/or --line-offsets\n"); 2952 pcregrep_exit(usage(2)); 2953 } 2954 2955if (only_matching != NULL || file_offsets || line_offsets) 2956 show_only_matching = TRUE; 2957 2958/* If a locale has not been provided as an option, see if the LC_CTYPE or 2959LC_ALL environment variable is set, and if so, use it. */ 2960 2961if (locale == NULL) 2962 { 2963 locale = getenv("LC_ALL"); 2964 locale_from = "LCC_ALL"; 2965 } 2966 2967if (locale == NULL) 2968 { 2969 locale = getenv("LC_CTYPE"); 2970 locale_from = "LC_CTYPE"; 2971 } 2972 2973/* If a locale is set, use it to generate the tables the PCRE needs. Otherwise, 2974pcretables==NULL, which causes the use of default tables. */ 2975 2976if (locale != NULL) 2977 { 2978 if (setlocale(LC_CTYPE, locale) == NULL) 2979 { 2980 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n", 2981 locale, locale_from); 2982 goto EXIT2; 2983 } 2984 pcretables = pcre_maketables(); 2985 } 2986 2987/* Sort out colouring */ 2988 2989if (colour_option != NULL && strcmp(colour_option, "never") != 0) 2990 { 2991 if (strcmp(colour_option, "always") == 0) do_colour = TRUE; 2992 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); 2993 else 2994 { 2995 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n", 2996 colour_option); 2997 goto EXIT2; 2998 } 2999 if (do_colour) 3000 { 3001 char *cs = getenv("PCREGREP_COLOUR"); 3002 if (cs == NULL) cs = getenv("PCREGREP_COLOR"); 3003 if (cs != NULL) colour_string = cs; 3004 } 3005 } 3006 3007/* Interpret the newline type; the default settings are Unix-like. */ 3008 3009if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0) 3010 { 3011 pcre_options |= PCRE_NEWLINE_CR; 3012 endlinetype = EL_CR; 3013 } 3014else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0) 3015 { 3016 pcre_options |= PCRE_NEWLINE_LF; 3017 endlinetype = EL_LF; 3018 } 3019else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0) 3020 { 3021 pcre_options |= PCRE_NEWLINE_CRLF; 3022 endlinetype = EL_CRLF; 3023 } 3024else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0) 3025 { 3026 pcre_options |= PCRE_NEWLINE_ANY; 3027 endlinetype = EL_ANY; 3028 } 3029else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0) 3030 { 3031 pcre_options |= PCRE_NEWLINE_ANYCRLF; 3032 endlinetype = EL_ANYCRLF; 3033 } 3034else 3035 { 3036 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); 3037 goto EXIT2; 3038 } 3039 3040/* Interpret the text values for -d and -D */ 3041 3042if (dee_option != NULL) 3043 { 3044 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; 3045 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; 3046 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; 3047 else 3048 { 3049 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option); 3050 goto EXIT2; 3051 } 3052 } 3053 3054if (DEE_option != NULL) 3055 { 3056 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; 3057 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; 3058 else 3059 { 3060 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option); 3061 goto EXIT2; 3062 } 3063 } 3064 3065/* Check the values for Jeffrey Friedl's debugging options. */ 3066 3067#ifdef JFRIEDL_DEBUG 3068if (S_arg > 9) 3069 { 3070 fprintf(stderr, "pcregrep: bad value for -S option\n"); 3071 return 2; 3072 } 3073if (jfriedl_XT != 0 || jfriedl_XR != 0) 3074 { 3075 if (jfriedl_XT == 0) jfriedl_XT = 1; 3076 if (jfriedl_XR == 0) jfriedl_XR = 1; 3077 } 3078#endif 3079 3080/* Get memory for the main buffer. */ 3081 3082bufsize = 3*bufthird; 3083main_buffer = (char *)malloc(bufsize); 3084 3085if (main_buffer == NULL) 3086 { 3087 fprintf(stderr, "pcregrep: malloc failed\n"); 3088 goto EXIT2; 3089 } 3090 3091/* If no patterns were provided by -e, and there are no files provided by -f, 3092the first argument is the one and only pattern, and it must exist. */ 3093 3094if (patterns == NULL && pattern_files == NULL) 3095 { 3096 if (i >= argc) return usage(2); 3097 patterns = patterns_last = add_pattern(argv[i++], NULL); 3098 if (patterns == NULL) goto EXIT2; 3099 } 3100 3101/* Compile the patterns that were provided on the command line, either by 3102multiple uses of -e or as a single unkeyed pattern. We cannot do this until 3103after all the command-line options are read so that we know which PCRE options 3104to use. When -F is used, compile_pattern() may add another block into the 3105chain, so we must not access the next pointer till after the compile. */ 3106 3107for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next) 3108 { 3109 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line", 3110 (j == 1 && patterns->next == NULL)? 0 : j)) 3111 goto EXIT2; 3112 } 3113 3114/* Read and compile the regular expressions that are provided in files. */ 3115 3116for (fn = pattern_files; fn != NULL; fn = fn->next) 3117 { 3118 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options)) 3119 goto EXIT2; 3120 } 3121 3122/* Study the regular expressions, as we will be running them many times. If an 3123extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is 3124returned, even if studying produces no data. */ 3125 3126if (match_limit > 0 || match_limit_recursion > 0) 3127 study_options |= PCRE_STUDY_EXTRA_NEEDED; 3128 3129/* Unless JIT has been explicitly disabled, arrange a stack for it to use. */ 3130 3131#ifdef SUPPORT_PCREGREP_JIT 3132if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0) 3133 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024); 3134#endif 3135 3136for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next) 3137 { 3138 cp->hint = pcre_study(cp->compiled, study_options, &error); 3139 if (error != NULL) 3140 { 3141 char s[16]; 3142 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j); 3143 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); 3144 goto EXIT2; 3145 } 3146#ifdef SUPPORT_PCREGREP_JIT 3147 if (jit_stack != NULL && cp->hint != NULL) 3148 pcre_assign_jit_stack(cp->hint, NULL, jit_stack); 3149#endif 3150 } 3151 3152/* If --match-limit or --recursion-limit was set, put the value(s) into the 3153pcre_extra block for each pattern. There will always be an extra block because 3154of the use of PCRE_STUDY_EXTRA_NEEDED above. */ 3155 3156for (cp = patterns; cp != NULL; cp = cp->next) 3157 { 3158 if (match_limit > 0) 3159 { 3160 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT; 3161 cp->hint->match_limit = match_limit; 3162 } 3163 3164 if (match_limit_recursion > 0) 3165 { 3166 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; 3167 cp->hint->match_limit_recursion = match_limit_recursion; 3168 } 3169 } 3170 3171/* If there are include or exclude patterns read from the command line, compile 3172them. -F, -w, and -x do not apply, so the third argument of compile_pattern is 31730. */ 3174 3175for (j = 0; j < 4; j++) 3176 { 3177 int k; 3178 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next) 3179 { 3180 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j], 3181 (k == 1 && cp->next == NULL)? 0 : k)) 3182 goto EXIT2; 3183 } 3184 } 3185 3186/* Read and compile include/exclude patterns from files. */ 3187 3188for (fn = include_from; fn != NULL; fn = fn->next) 3189 { 3190 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0)) 3191 goto EXIT2; 3192 } 3193 3194for (fn = exclude_from; fn != NULL; fn = fn->next) 3195 { 3196 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0)) 3197 goto EXIT2; 3198 } 3199 3200/* If there are no files that contain lists of files to search, and there are 3201no file arguments, search stdin, and then exit. */ 3202 3203if (file_lists == NULL && i >= argc) 3204 { 3205 rc = pcregrep(stdin, FR_PLAIN, stdin_name, 3206 (filenames > FN_DEFAULT)? stdin_name : NULL); 3207 goto EXIT; 3208 } 3209 3210/* If any files that contains a list of files to search have been specified, 3211read them line by line and search the given files. */ 3212 3213for (fn = file_lists; fn != NULL; fn = fn->next) 3214 { 3215 char buffer[PATBUFSIZE]; 3216 FILE *fl; 3217 if (strcmp(fn->name, "-") == 0) fl = stdin; else 3218 { 3219 fl = fopen(fn->name, "rb"); 3220 if (fl == NULL) 3221 { 3222 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name, 3223 strerror(errno)); 3224 goto EXIT2; 3225 } 3226 } 3227 while (fgets(buffer, PATBUFSIZE, fl) != NULL) 3228 { 3229 int frc; 3230 char *end = buffer + (int)strlen(buffer); 3231 while (end > buffer && isspace(end[-1])) end--; 3232 *end = 0; 3233 if (*buffer != 0) 3234 { 3235 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE); 3236 if (frc > 1) rc = frc; 3237 else if (frc == 0 && rc == 1) rc = 0; 3238 } 3239 } 3240 if (fl != stdin) fclose(fl); 3241 } 3242 3243/* After handling file-list, work through remaining arguments. Pass in the fact 3244that there is only one argument at top level - this suppresses the file name if 3245the argument is not a directory and filenames are not otherwise forced. */ 3246 3247only_one_at_top = i == argc - 1 && file_lists == NULL; 3248 3249for (; i < argc; i++) 3250 { 3251 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, 3252 only_one_at_top); 3253 if (frc > 1) rc = frc; 3254 else if (frc == 0 && rc == 1) rc = 0; 3255 } 3256 3257EXIT: 3258#ifdef SUPPORT_PCREGREP_JIT 3259if (jit_stack != NULL) pcre_jit_stack_free(jit_stack); 3260#endif 3261 3262free(main_buffer); 3263free((void *)pcretables); 3264 3265free_pattern_chain(patterns); 3266free_pattern_chain(include_patterns); 3267free_pattern_chain(include_dir_patterns); 3268free_pattern_chain(exclude_patterns); 3269free_pattern_chain(exclude_dir_patterns); 3270 3271free_file_chain(exclude_from); 3272free_file_chain(include_from); 3273free_file_chain(pattern_files); 3274free_file_chain(file_lists); 3275 3276while (only_matching != NULL) 3277 { 3278 omstr *this = only_matching; 3279 only_matching = this->next; 3280 free(this); 3281 } 3282 3283pcregrep_exit(rc); 3284 3285EXIT2: 3286rc = 2; 3287goto EXIT; 3288} 3289 3290/* End of pcregrep */ 3291