165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// Copyright (c) 2005, Google Inc.
265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// All rights reserved.
365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich//
465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// Redistribution and use in source and binary forms, with or without
565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// modification, are permitted provided that the following conditions are
665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// met:
765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich//
865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich//     * Redistributions of source code must retain the above copyright
965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// notice, this list of conditions and the following disclaimer.
1065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich//     * Redistributions in binary form must reproduce the above
1165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// copyright notice, this list of conditions and the following disclaimer
1265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// in the documentation and/or other materials provided with the
1365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// distribution.
1465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich//     * Neither the name of Google Inc. nor the names of its
1565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// contributors may be used to endorse or promote products derived from
1665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// this software without specific prior written permission.
1765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich//
1865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich//
3065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// Author: Sanjay Ghemawat
3165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
3265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef HAVE_CONFIG_H
3365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "config.h"
3465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
3565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
3665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include <vector>
3765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include <assert.h>
3865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
3965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "pcrecpp_internal.h"
4065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "pcre_scanner.h"
4165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichusing std::vector;
4365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnamespace pcrecpp {
4565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichScanner::Scanner()
4765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  : data_(),
4865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    input_(data_),
4965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_(NULL),
5065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    should_skip_(false),
5165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_repeat_(false),
5265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    save_comments_(false),
5365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    comments_(NULL),
5465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    comments_offset_(0) {
5565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
5665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
5765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichScanner::Scanner(const string& in)
5865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  : data_(in),
5965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    input_(data_),
6065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_(NULL),
6165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    should_skip_(false),
6265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_repeat_(false),
6365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    save_comments_(false),
6465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    comments_(NULL),
6565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    comments_offset_(0) {
6665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
6765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
6865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichScanner::~Scanner() {
6965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  delete skip_;
7065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  delete comments_;
7165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
7265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
7365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::SetSkipExpression(const char* re) {
7465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  delete skip_;
7565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (re != NULL) {
7665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_ = new RE(re);
7765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    should_skip_ = true;
7865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_repeat_ = true;
7965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ConsumeSkip();
8065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  } else {
8165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_ = NULL;
8265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    should_skip_ = false;
8365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_repeat_ = false;
8465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
8565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
8665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
8765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::Skip(const char* re) {
8865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  delete skip_;
8965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (re != NULL) {
9065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_ = new RE(re);
9165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    should_skip_ = true;
9265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_repeat_ = false;
9365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ConsumeSkip();
9465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  } else {
9565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_ = NULL;
9665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    should_skip_ = false;
9765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip_repeat_ = false;
9865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
9965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
10065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
10165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::DisableSkip() {
10265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  assert(skip_ != NULL);
10365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  should_skip_ = false;
10465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
10565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
10665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::EnableSkip() {
10765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  assert(skip_ != NULL);
10865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  should_skip_ = true;
10965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ConsumeSkip();
11065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
11165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
11265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint Scanner::LineNumber() const {
11365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // TODO: Make it more efficient by keeping track of the last point
11465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // where we computed line numbers and counting newlines since then.
11565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // We could use std:count, but not all systems have it. :-(
11665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int count = 1;
11765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (const char* p = data_.data(); p < input_.data(); ++p)
11865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*p == '\n')
11965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ++count;
12065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return count;
12165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
12265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
12365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint Scanner::Offset() const {
12465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return (int)(input_.data() - data_.c_str());
12565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
12665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
12765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbool Scanner::LookingAt(const RE& re) const {
12865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int consumed;
12965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
13065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
13165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
13265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
13365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbool Scanner::Consume(const RE& re,
13465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                      const Arg& arg0,
13565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                      const Arg& arg1,
13665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                      const Arg& arg2) {
13765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const bool result = re.Consume(&input_, arg0, arg1, arg2);
13865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (result && should_skip_) ConsumeSkip();
13965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return result;
14065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
14165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
14265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich// helper function to consume *skip_ and honour save_comments_
14365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::ConsumeSkip() {
14465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const char* start_data = input_.data();
14565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  while (skip_->Consume(&input_)) {
14665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!skip_repeat_) {
14765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      // Only one skip allowed.
14865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
14965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
15065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
15165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (save_comments_) {
15265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (comments_ == NULL) {
15365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      comments_ = new vector<StringPiece>;
15465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
15565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    // already pointing one past end, so no need to +1
15665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    int length = (int)(input_.data() - start_data);
15765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (length > 0) {
15865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      comments_->push_back(StringPiece(start_data, length));
15965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
16065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
16165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
16265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
16365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
16465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
16565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // short circuit out if we've not yet initialized comments_
16665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // (e.g., when save_comments is false)
16765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (!comments_) {
16865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return;
16965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
17065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // TODO: if we guarantee that comments_ will contain StringPieces
17165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // that are ordered by their start, then we can do a binary search
17265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // for the first StringPiece at or past start and then scan for the
17365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // ones contained in the range, quit early (use equal_range or
17465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // lower_bound)
17565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (vector<StringPiece>::const_iterator it = comments_->begin();
17665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       it != comments_->end(); ++it) {
17765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((it->data() >= data_.c_str() + start &&
17865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         it->data() + it->size() <= data_.c_str() + end)) {
17965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ranges->push_back(*it);
18065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
18165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
18265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
18365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
18465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
18565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvoid Scanner::GetNextComments(vector<StringPiece> *ranges) {
18665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // short circuit out if we've not yet initialized comments_
18765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  // (e.g., when save_comments is false)
18865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (!comments_) {
18965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return;
19065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
19165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (vector<StringPiece>::const_iterator it =
19265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         comments_->begin() + comments_offset_;
19365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       it != comments_->end(); ++it) {
19465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ranges->push_back(*it);
19565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ++comments_offset_;
19665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
19765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
19865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
19965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}   // namespace pcrecpp
200