165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// Copyright (c) 2005, Google Inc. 265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// All rights reserved. 365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// 465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// Redistribution and use in source and binary forms, with or without 565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// modification, are permitted provided that the following conditions are 665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// met: 765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// 865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// * Redistributions of source code must retain the above copyright 965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// notice, this list of conditions and the following disclaimer. 1065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// * Redistributions in binary form must reproduce the above 1165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// copyright notice, this list of conditions and the following disclaimer 1265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// in the documentation and/or other materials provided with the 1365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// distribution. 1465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// * Neither the name of Google Inc. nor the names of its 1565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// contributors may be used to endorse or promote products derived from 1665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// this software without specific prior written permission. 1765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// 1865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// 3065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// Author: Sanjay Ghemawat 3165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 3265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef HAVE_CONFIG_H 3365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "config.h" 3465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 3565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 3665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include <vector> 3765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include <assert.h> 3865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 3965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "pcrecpp_internal.h" 4065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "pcre_scanner.h" 4165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichusing std::vector; 4365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnamespace pcrecpp { 4565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichScanner::Scanner() 4765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich : data_(), 4865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich input_(data_), 4965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_(NULL), 5065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_skip_(false), 5165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_repeat_(false), 5265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_comments_(false), 5365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich comments_(NULL), 5465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich comments_offset_(0) { 5565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 5665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichScanner::Scanner(const string& in) 5865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich : data_(in), 5965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich input_(data_), 6065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_(NULL), 6165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_skip_(false), 6265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_repeat_(false), 6365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_comments_(false), 6465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich comments_(NULL), 6565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich comments_offset_(0) { 6665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 6765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 6865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichScanner::~Scanner() { 6965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich delete skip_; 7065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich delete comments_; 7165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 7265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 7365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::SetSkipExpression(const char* re) { 7465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich delete skip_; 7565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (re != NULL) { 7665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_ = new RE(re); 7765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_skip_ = true; 7865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_repeat_ = true; 7965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ConsumeSkip(); 8065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } else { 8165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_ = NULL; 8265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_skip_ = false; 8365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_repeat_ = false; 8465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 8565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 8665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 8765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::Skip(const char* re) { 8865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich delete skip_; 8965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (re != NULL) { 9065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_ = new RE(re); 9165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_skip_ = true; 9265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_repeat_ = false; 9365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ConsumeSkip(); 9465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } else { 9565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_ = NULL; 9665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_skip_ = false; 9765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip_repeat_ = false; 9865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 9965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 10065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 10165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::DisableSkip() { 10265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich assert(skip_ != NULL); 10365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_skip_ = false; 10465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 10565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 10665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::EnableSkip() { 10765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich assert(skip_ != NULL); 10865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_skip_ = true; 10965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ConsumeSkip(); 11065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 11165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 11265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint Scanner::LineNumber() const { 11365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // TODO: Make it more efficient by keeping track of the last point 11465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // where we computed line numbers and counting newlines since then. 11565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // We could use std:count, but not all systems have it. :-( 11665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int count = 1; 11765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (const char* p = data_.data(); p < input_.data(); ++p) 11865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*p == '\n') 11965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ++count; 12065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return count; 12165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 12265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 12365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint Scanner::Offset() const { 12465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (int)(input_.data() - data_.c_str()); 12565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 12665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 12765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbool Scanner::LookingAt(const RE& re) const { 12865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int consumed; 12965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); 13065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 13165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 13265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 13365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbool Scanner::Consume(const RE& re, 13465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const Arg& arg0, 13565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const Arg& arg1, 13665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const Arg& arg2) { 13765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const bool result = re.Consume(&input_, arg0, arg1, arg2); 13865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (result && should_skip_) ConsumeSkip(); 13965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return result; 14065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 14165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 14265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// helper function to consume *skip_ and honour save_comments_ 14365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::ConsumeSkip() { 14465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const char* start_data = input_.data(); 14565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (skip_->Consume(&input_)) { 14665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!skip_repeat_) { 14765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // Only one skip allowed. 14865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 14965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 15065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 15165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (save_comments_) { 15265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (comments_ == NULL) { 15365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich comments_ = new vector<StringPiece>; 15465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 15565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // already pointing one past end, so no need to +1 15665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int length = (int)(input_.data() - start_data); 15765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (length > 0) { 15865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich comments_->push_back(StringPiece(start_data, length)); 15965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 16065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 16165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 16265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 16365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 16465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { 16565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // short circuit out if we've not yet initialized comments_ 16665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // (e.g., when save_comments is false) 16765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!comments_) { 16865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return; 16965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 17065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // TODO: if we guarantee that comments_ will contain StringPieces 17165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // that are ordered by their start, then we can do a binary search 17265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // for the first StringPiece at or past start and then scan for the 17365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // ones contained in the range, quit early (use equal_range or 17465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // lower_bound) 17565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (vector<StringPiece>::const_iterator it = comments_->begin(); 17665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it != comments_->end(); ++it) { 17765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((it->data() >= data_.c_str() + start && 17865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it->data() + it->size() <= data_.c_str() + end)) { 17965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ranges->push_back(*it); 18065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 18165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 18265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 18365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 18465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 18565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::GetNextComments(vector<StringPiece> *ranges) { 18665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // short circuit out if we've not yet initialized comments_ 18765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich // (e.g., when save_comments is false) 18865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!comments_) { 18965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return; 19065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 19165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (vector<StringPiece>::const_iterator it = 19265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich comments_->begin() + comments_offset_; 19365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it != comments_->end(); ++it) { 19465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ranges->push_back(*it); 19565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ++comments_offset_; 19665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 19765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 19865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 19965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} // namespace pcrecpp 200