1// Copyright 2010 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "re2/set.h"
6
7#include "util/util.h"
8#include "re2/stringpiece.h"
9#include "re2/prog.h"
10#include "re2/re2.h"
11#include "re2/regexp.h"
12
13using namespace re2;
14
15RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
16  options_.Copy(options);
17  anchor_ = anchor;
18  prog_ = NULL;
19  compiled_ = false;
20}
21
22RE2::Set::~Set() {
23  for (int i = 0; i < re_.size(); i++)
24    re_[i]->Decref();
25  delete prog_;
26}
27
28int RE2::Set::Add(const StringPiece& pattern, string* error) {
29  if (compiled_) {
30    LOG(DFATAL) << "RE2::Set::Add after Compile";
31    return -1;
32  }
33
34  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
35    options_.ParseFlags());
36
37  RegexpStatus status;
38  re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
39  if (re == NULL) {
40    if (error != NULL)
41      *error = status.Text();
42    if (options_.log_errors())
43      LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
44    return -1;
45  }
46
47  // Concatenate with match index and push on vector.
48  int n = re_.size();
49  re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
50  if (re->op() == kRegexpConcat) {
51    int nsub = re->nsub();
52    re2::Regexp** sub = new re2::Regexp*[nsub + 1];
53    for (int i = 0; i < nsub; i++)
54      sub[i] = re->sub()[i]->Incref();
55    sub[nsub] = m;
56    re->Decref();
57    re = re2::Regexp::Concat(sub, nsub + 1, pf);
58    delete[] sub;
59  } else {
60    re2::Regexp* sub[2];
61    sub[0] = re;
62    sub[1] = m;
63    re = re2::Regexp::Concat(sub, 2, pf);
64  }
65  re_.push_back(re);
66  return n;
67}
68
69bool RE2::Set::Compile() {
70  if (compiled_) {
71    LOG(DFATAL) << "RE2::Set::Compile multiple times";
72    return false;
73  }
74  compiled_ = true;
75
76  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
77    options_.ParseFlags());
78  re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]),
79                                           re_.size(), pf);
80  re_.clear();
81  re2::Regexp* sre = re->Simplify();
82  re->Decref();
83  re = sre;
84  if (re == NULL) {
85    if (options_.log_errors())
86      LOG(ERROR) << "Error simplifying during Compile.";
87    return false;
88  }
89
90  prog_ = Prog::CompileSet(options_, anchor_, re);
91  return prog_ != NULL;
92}
93
94bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
95  if (!compiled_) {
96    LOG(DFATAL) << "RE2::Set::Match without Compile";
97    return false;
98  }
99  v->clear();
100  bool failed;
101  bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
102                              Prog::kManyMatch, NULL, &failed, v);
103  if (failed)
104    LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";
105
106  if (ret == false)
107    return false;
108  if (v->size() == 0) {
109    LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
110    return false;
111  }
112  return true;
113}
114