1// Copyright (c) 2005, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29// 30// Author: Sanjay Ghemawat 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <vector> 37#include <assert.h> 38 39#include "pcrecpp_internal.h" 40#include "pcre_scanner.h" 41 42using std::vector; 43 44namespace pcrecpp { 45 46Scanner::Scanner() 47 : data_(), 48 input_(data_), 49 skip_(NULL), 50 should_skip_(false), 51 skip_repeat_(false), 52 save_comments_(false), 53 comments_(NULL), 54 comments_offset_(0) { 55} 56 57Scanner::Scanner(const string& in) 58 : data_(in), 59 input_(data_), 60 skip_(NULL), 61 should_skip_(false), 62 skip_repeat_(false), 63 save_comments_(false), 64 comments_(NULL), 65 comments_offset_(0) { 66} 67 68Scanner::~Scanner() { 69 delete skip_; 70 delete comments_; 71} 72 73void Scanner::SetSkipExpression(const char* re) { 74 delete skip_; 75 if (re != NULL) { 76 skip_ = new RE(re); 77 should_skip_ = true; 78 skip_repeat_ = true; 79 ConsumeSkip(); 80 } else { 81 skip_ = NULL; 82 should_skip_ = false; 83 skip_repeat_ = false; 84 } 85} 86 87void Scanner::Skip(const char* re) { 88 delete skip_; 89 if (re != NULL) { 90 skip_ = new RE(re); 91 should_skip_ = true; 92 skip_repeat_ = false; 93 ConsumeSkip(); 94 } else { 95 skip_ = NULL; 96 should_skip_ = false; 97 skip_repeat_ = false; 98 } 99} 100 101void Scanner::DisableSkip() { 102 assert(skip_ != NULL); 103 should_skip_ = false; 104} 105 106void Scanner::EnableSkip() { 107 assert(skip_ != NULL); 108 should_skip_ = true; 109 ConsumeSkip(); 110} 111 112int Scanner::LineNumber() const { 113 // TODO: Make it more efficient by keeping track of the last point 114 // where we computed line numbers and counting newlines since then. 115 // We could use std:count, but not all systems have it. :-( 116 int count = 1; 117 for (const char* p = data_.data(); p < input_.data(); ++p) 118 if (*p == '\n') 119 ++count; 120 return count; 121} 122 123int Scanner::Offset() const { 124 return (int)(input_.data() - data_.c_str()); 125} 126 127bool Scanner::LookingAt(const RE& re) const { 128 int consumed; 129 return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); 130} 131 132 133bool Scanner::Consume(const RE& re, 134 const Arg& arg0, 135 const Arg& arg1, 136 const Arg& arg2) { 137 const bool result = re.Consume(&input_, arg0, arg1, arg2); 138 if (result && should_skip_) ConsumeSkip(); 139 return result; 140} 141 142// helper function to consume *skip_ and honour save_comments_ 143void Scanner::ConsumeSkip() { 144 const char* start_data = input_.data(); 145 while (skip_->Consume(&input_)) { 146 if (!skip_repeat_) { 147 // Only one skip allowed. 148 break; 149 } 150 } 151 if (save_comments_) { 152 if (comments_ == NULL) { 153 comments_ = new vector<StringPiece>; 154 } 155 // already pointing one past end, so no need to +1 156 int length = (int)(input_.data() - start_data); 157 if (length > 0) { 158 comments_->push_back(StringPiece(start_data, length)); 159 } 160 } 161} 162 163 164void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { 165 // short circuit out if we've not yet initialized comments_ 166 // (e.g., when save_comments is false) 167 if (!comments_) { 168 return; 169 } 170 // TODO: if we guarantee that comments_ will contain StringPieces 171 // that are ordered by their start, then we can do a binary search 172 // for the first StringPiece at or past start and then scan for the 173 // ones contained in the range, quit early (use equal_range or 174 // lower_bound) 175 for (vector<StringPiece>::const_iterator it = comments_->begin(); 176 it != comments_->end(); ++it) { 177 if ((it->data() >= data_.c_str() + start && 178 it->data() + it->size() <= data_.c_str() + end)) { 179 ranges->push_back(*it); 180 } 181 } 182} 183 184 185void Scanner::GetNextComments(vector<StringPiece> *ranges) { 186 // short circuit out if we've not yet initialized comments_ 187 // (e.g., when save_comments is false) 188 if (!comments_) { 189 return; 190 } 191 for (vector<StringPiece>::const_iterator it = 192 comments_->begin() + comments_offset_; 193 it != comments_->end(); ++it) { 194 ranges->push_back(*it); 195 ++comments_offset_; 196 } 197} 198 199} // namespace pcrecpp 200