12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2010 The RE2 Authors.  All Rights Reserved.
22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style
32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file.
42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/set.h"
62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h"
82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/stringpiece.h"
92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/prog.h"
102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/re2.h"
112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/regexp.h"
122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonusing namespace re2;
142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  options_.Copy(options);
172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  anchor_ = anchor;
182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  prog_ = NULL;
192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  compiled_ = false;
202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::Set::~Set() {
232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  for (int i = 0; i < re_.size(); i++)
242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    re_[i]->Decref();
252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete prog_;
262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint RE2::Set::Add(const StringPiece& pattern, string* error) {
292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (compiled_) {
302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    LOG(DFATAL) << "RE2::Set::Add after Compile";
312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return -1;
322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    options_.ParseFlags());
362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  RegexpStatus status;
382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (re == NULL) {
402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (error != NULL)
412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      *error = status.Text();
422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (options_.log_errors())
432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return -1;
452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Concatenate with match index and push on vector.
482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int n = re_.size();
492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (re->op() == kRegexpConcat) {
512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    int nsub = re->nsub();
522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    re2::Regexp** sub = new re2::Regexp*[nsub + 1];
532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    for (int i = 0; i < nsub; i++)
542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      sub[i] = re->sub()[i]->Incref();
552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    sub[nsub] = m;
562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    re->Decref();
572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    re = re2::Regexp::Concat(sub, nsub + 1, pf);
582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    delete[] sub;
592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  } else {
602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    re2::Regexp* sub[2];
612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    sub[0] = re;
622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    sub[1] = m;
632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    re = re2::Regexp::Concat(sub, 2, pf);
642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  re_.push_back(re);
662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return n;
672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Set::Compile() {
702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (compiled_) {
712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    LOG(DFATAL) << "RE2::Set::Compile multiple times";
722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return false;
732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  compiled_ = true;
752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    options_.ParseFlags());
782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]),
792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson                                           re_.size(), pf);
802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  re_.clear();
812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  re2::Regexp* sre = re->Simplify();
822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  re->Decref();
832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  re = sre;
842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (re == NULL) {
852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (options_.log_errors())
862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      LOG(ERROR) << "Error simplifying during Compile.";
872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return false;
882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  prog_ = Prog::CompileSet(options_, anchor_, re);
912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return prog_ != NULL;
922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (!compiled_) {
962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    LOG(DFATAL) << "RE2::Set::Match without Compile";
972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return false;
982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  v->clear();
1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  bool failed;
1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson                              Prog::kManyMatch, NULL, &failed, v);
1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (failed)
1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";
1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (ret == false)
1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return false;
1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (v->size() == 0) {
1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return false;
1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return true;
1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
114