13e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey/* $OpenBSD: util.c,v 1.36 2007/10/02 17:59:18 otto Exp $ */ 23e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey/* $FreeBSD: head/usr.bin/grep/fastgrep.c 211496 2010-08-19 09:28:59Z des $ */ 33e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 43e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey/*- 53e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 63e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Copyright (C) 2008 Gabor Kovesdan <gabor@FreeBSD.org> 73e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * All rights reserved. 83e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * 93e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Redistribution and use in source and binary forms, with or without 103e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * modification, are permitted provided that the following conditions 113e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * are met: 123e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * 1. Redistributions of source code must retain the above copyright 133e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * notice, this list of conditions and the following disclaimer. 143e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * 2. Redistributions in binary form must reproduce the above copyright 153e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * notice, this list of conditions and the following disclaimer in the 163e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * documentation and/or other materials provided with the distribution. 173e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * 183e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 193e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 203e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 213e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 223e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 233e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 243e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 253e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 263e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 273e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 283e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * SUCH DAMAGE. 293e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey */ 303e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 313e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey/* 323e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * XXX: This file is a speed up for grep to cover the defects of the 333e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * regex library. These optimizations should practically be implemented 343e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * there keeping this code clean. This is a future TODO, but for the 353e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * meantime, we need to use this workaround. 363e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey */ 373e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 383e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#if HAVE_NBTOOL_CONFIG_H 393e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include "nbtool_config.h" 403e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#endif 413e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 423e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include <sys/cdefs.h> 433e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey__RCSID("$NetBSD: fastgrep.c,v 1.5 2011/04/18 03:27:40 joerg Exp $"); 443e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 453e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include <limits.h> 463e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include <stdbool.h> 473e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include <stdlib.h> 483e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include <string.h> 493e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include <wchar.h> 503e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include <wctype.h> 513e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 523e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey#include "grep.h" 533e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 543e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeystatic inline int grep_cmp(const unsigned char *, const unsigned char *, size_t); 553e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeystatic inline void grep_revstr(unsigned char *, int); 563e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 573e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeyvoid 583e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeyfgrepcomp(fastgrep_t *fg, const char *pat) 593e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey{ 603e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey unsigned int i; 613e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 623e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Initialize. */ 633e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->len = strlen(pat); 643e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->bol = false; 653e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->eol = false; 663e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->reversed = false; 673e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 683e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->pattern = (unsigned char *)grep_strdup(pat); 693e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 703e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Preprocess pattern. */ 713e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey for (i = 0; i <= UCHAR_MAX; i++) 723e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->qsBc[i] = fg->len; 733e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey for (i = 1; i < fg->len; i++) 743e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->qsBc[fg->pattern[i]] = fg->len - i; 753e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey} 763e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 773e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey/* 783e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Returns: -1 on failure, 0 on success 793e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey */ 803e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeyint 813e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeyfastcomp(fastgrep_t *fg, const char *pat) 823e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey{ 833e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey unsigned int i; 843e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey int firstHalfDot = -1; 853e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey int firstLastHalfDot = -1; 863e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey int hasDot = 0; 873e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey int lastHalfDot = 0; 883e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey int shiftPatternLen; 893e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 903e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Initialize. */ 913e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->len = strlen(pat); 923e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->bol = false; 933e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->eol = false; 943e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->reversed = false; 953e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->word = wflag; 963e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 973e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Remove end-of-line character ('$'). */ 983e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (fg->len > 0 && pat[fg->len - 1] == '$') { 993e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->eol = true; 1003e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->len--; 1013e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 1023e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1033e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Remove beginning-of-line character ('^'). */ 1043e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (pat[0] == '^') { 1053e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->bol = true; 1063e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->len--; 1073e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pat++; 1083e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 1093e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1103e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (fg->len >= 14 && 1113e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey memcmp(pat, "[[:<:]]", 7) == 0 && 1123e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey memcmp(pat + fg->len - 7, "[[:>:]]", 7) == 0) { 1133e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->len -= 14; 1143e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pat += 7; 1153e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Word boundary is handled separately in util.c */ 1163e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->word = true; 1173e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 1183e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1193e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* 1203e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * pat has been adjusted earlier to not include '^', '$' or 1213e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * the word match character classes at the beginning and ending 1223e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * of the string respectively. 1233e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey */ 1243e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->pattern = grep_malloc(fg->len + 1); 1253e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey memcpy(fg->pattern, pat, fg->len); 1263e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->pattern[fg->len] = '\0'; 1273e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1283e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Look for ways to cheat...er...avoid the full regex engine. */ 1293e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey for (i = 0; i < fg->len; i++) { 1303e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Can still cheat? */ 1313e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (fg->pattern[i] == '.') { 1323e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey hasDot = i; 1333e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (i < fg->len / 2) { 1343e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (firstHalfDot < 0) 1353e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Closest dot to the beginning */ 1363e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey firstHalfDot = i; 1373e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } else { 1383e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Closest dot to the end of the pattern. */ 1393e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey lastHalfDot = i; 1403e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (firstLastHalfDot < 0) 1413e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey firstLastHalfDot = i; 1423e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 1433e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } else { 1443e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Free memory and let others know this is empty. */ 1453e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey free(fg->pattern); 1463e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->pattern = NULL; 1473e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (-1); 1483e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 1493e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 1503e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1513e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* 1523e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Determine if a reverse search would be faster based on the placement 1533e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * of the dots. 1543e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey */ 1553e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if ((!(lflag || cflag)) && ((!(fg->bol || fg->eol)) && 1563e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ((lastHalfDot) && ((firstHalfDot < 0) || 1573e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ((fg->len - (lastHalfDot + 1)) < (size_t)firstHalfDot)))) && 1583e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey !oflag && !color) { 1593e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->reversed = true; 1603e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey hasDot = fg->len - (firstHalfDot < 0 ? 1613e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey firstLastHalfDot : firstHalfDot) - 1; 1623e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey grep_revstr(fg->pattern, fg->len); 1633e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 1643e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1653e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* 1663e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Normal Quick Search would require a shift based on the position the 1673e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * next character after the comparison is within the pattern. With 1683e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * wildcards, the position of the last dot effects the maximum shift 1693e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * distance. 1703e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * The closer to the end the wild card is the slower the search. A 1713e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * reverse version of this algorithm would be useful for wildcards near 1723e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * the end of the string. 1733e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * 1743e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Examples: 1753e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Pattern Max shift 1763e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * ------- --------- 1773e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * this 5 1783e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * .his 4 1793e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * t.is 3 1803e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * th.s 2 1813e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * thi. 1 1823e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey */ 1833e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1843e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Adjust the shift based on location of the last dot ('.'). */ 1853e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey shiftPatternLen = fg->len - hasDot; 1863e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1873e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Preprocess pattern. */ 1883e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey for (i = 0; i <= (signed)UCHAR_MAX; i++) 1893e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->qsBc[i] = shiftPatternLen; 1903e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey for (i = hasDot + 1; i < fg->len; i++) { 1913e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->qsBc[fg->pattern[i]] = fg->len - i; 1923e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 1933e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 1943e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* 1953e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Put pattern back to normal after pre-processing to allow for easy 1963e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * comparisons later. 1973e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey */ 1983e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (fg->reversed) 1993e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey grep_revstr(fg->pattern, fg->len); 2003e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2013e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (0); 2023e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey} 2033e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2043e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeyint 2053e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeygrep_search(fastgrep_t *fg, const unsigned char *data, size_t len, regmatch_t *pmatch) 2063e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey{ 2073e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey unsigned int j; 2083e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey int ret = REG_NOMATCH; 2093e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2103e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (pmatch->rm_so == (ssize_t)len) 2113e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (ret); 2123e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2133e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (fg->bol && pmatch->rm_so != 0) { 2143e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pmatch->rm_so = len; 2153e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pmatch->rm_eo = len; 2163e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (ret); 2173e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 2183e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2193e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* No point in going farther if we do not have enough data. */ 2203e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (len < fg->len) 2213e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (ret); 2223e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2233e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Only try once at the beginning or ending of the line. */ 2243e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (fg->bol || fg->eol) { 2253e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Simple text comparison. */ 2263e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Verify data is >= pattern length before searching on it. */ 2273e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (len >= fg->len) { 2283e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Determine where in data to start search at. */ 2293e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey j = fg->eol ? len - fg->len : 0; 2303e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (!((fg->bol && fg->eol) && (len != fg->len))) 2313e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (grep_cmp(fg->pattern, data + j, 2323e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->len) == -1) { 2333e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pmatch->rm_so = j; 2343e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pmatch->rm_eo = j + fg->len; 2353e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ret = 0; 2363e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 2373e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 2383e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } else if (fg->reversed) { 2393e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Quick Search algorithm. */ 2403e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey j = len; 2413e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey do { 2423e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (grep_cmp(fg->pattern, data + j - fg->len, 2433e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey fg->len) == -1) { 2443e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pmatch->rm_so = j - fg->len; 2453e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pmatch->rm_eo = j; 2463e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ret = 0; 2473e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey break; 2483e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 2493e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Shift if within bounds, otherwise, we are done. */ 2503e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (j == fg->len) 2513e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey break; 2523e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey j -= fg->qsBc[data[j - fg->len - 1]]; 2533e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } while (j >= fg->len); 2543e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } else { 2553e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Quick Search algorithm. */ 2563e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey j = pmatch->rm_so; 2573e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey do { 2583e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (grep_cmp(fg->pattern, data + j, fg->len) == -1) { 2593e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pmatch->rm_so = j; 2603e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pmatch->rm_eo = j + fg->len; 2613e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ret = 0; 2623e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey break; 2633e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 2643e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2653e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey /* Shift if within bounds, otherwise, we are done. */ 2663e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (j + fg->len == len) 2673e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey break; 2683e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey else 2693e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey j += fg->qsBc[data[j + fg->len]]; 2703e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } while (j <= (len - fg->len)); 2713e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 2723e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2733e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (ret); 2743e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey} 2753e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2763e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey/* 2773e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * Returns: i >= 0 on failure (position that it failed) 2783e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey * -1 on success 2793e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey */ 2803e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeystatic inline int 2813e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeygrep_cmp(const unsigned char *pat, const unsigned char *data, size_t len) 2823e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey{ 2833e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey size_t size; 2843e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey wchar_t *wdata, *wpat; 2853e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey unsigned int i; 2863e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2873e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (iflag) { 2883e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if ((size = mbstowcs(NULL, (const char *)data, 0)) == 2893e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ((size_t) - 1)) 2903e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (-1); 2913e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2923e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey wdata = grep_malloc(size * sizeof(wint_t)); 2933e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2943e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (mbstowcs(wdata, (const char *)data, size) == 2953e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ((size_t) - 1)) 2963e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (-1); 2973e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 2983e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if ((size = mbstowcs(NULL, (const char *)pat, 0)) == 2993e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ((size_t) - 1)) 3003e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (-1); 3013e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 3023e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey wpat = grep_malloc(size * sizeof(wint_t)); 3033e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 3043e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if (mbstowcs(wpat, (const char *)pat, size) == ((size_t) - 1)) 3053e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (-1); 3063e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey for (i = 0; i < len; i++) { 3073e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if ((towlower(wpat[i]) == towlower(wdata[i])) || 3083e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey ((grepbehave != GREP_FIXED) && wpat[i] == L'.')) 3093e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey continue; 3103e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey free(wpat); 3113e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey free(wdata); 3123e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (i); 3133e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 3143e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } else { 3153e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey for (i = 0; i < len; i++) { 3163e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey if ((pat[i] == data[i]) || ((grepbehave != GREP_FIXED) && 3173e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey pat[i] == '.')) 3183e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey continue; 3193e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (i); 3203e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 3213e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 3223e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey return (-1); 3233e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey} 3243e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 3253e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeystatic inline void 3263e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkeygrep_revstr(unsigned char *str, int len) 3273e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey{ 3283e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey int i; 3293e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey char c; 3303e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey 3313e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey for (i = 0; i < len / 2; i++) { 3323e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey c = str[i]; 3333e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey str[i] = str[len - i - 1]; 3343e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey str[len - i - 1] = c; 3353e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey } 3363e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9Jeff Sharkey} 337