1/*
2 * Copyright (c) 1995, 2001, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package sun.misc;
27import java.io.*;
28
29/**
30 * A class to represent a pool of regular expressions.  A string
31 * can be matched against the whole pool all at once.  It is much
32 * faster than doing individual regular expression matches one-by-one.
33 *
34 * @see java.misc.RegexpTarget
35 * @author  James Gosling
36 */
37
38public class RegexpPool {
39    private RegexpNode prefixMachine = new RegexpNode();
40    private RegexpNode suffixMachine = new RegexpNode();
41    private static final int BIG = 0x7FFFFFFF;
42    private int lastDepth = BIG;
43
44    public RegexpPool () {
45    }
46
47    /**
48     * Add a regular expression to the pool of regular expressions.
49     * @param   re  The regular expression to add to the pool.
50            For now, only handles strings that either begin or end with
51            a '*'.
52     * @param   ret The object to be returned when this regular expression is
53            matched.  If ret is an instance of the RegexpTarget class, ret.found
54            is called with the string fragment that matched the '*' as its
55            parameter.
56     * @exception REException error
57     */
58    public void add(String re, Object ret) throws REException {
59        add(re, ret, false);
60    }
61
62    /**
63     * Replace the target for the regular expression with a different
64     * target.
65     *
66     * @param   re  The regular expression to be replaced in the pool.
67     *      For now, only handles strings that either begin or end with
68     *      a '*'.
69     * @param   ret The object to be returned when this regular expression is
70     *      matched.  If ret is an instance of the RegexpTarget class, ret.found
71     *      is called with the string fragment that matched the '*' as its
72     *      parameter.
73     */
74    public void replace(String re, Object ret) {
75        try {
76            add(re, ret, true);
77        } catch(Exception e) {
78            // should never occur if replace is true
79        }
80    }
81
82    /**
83     * Delete the regular expression and its target.
84     * @param re The regular expression to be deleted from the pool.
85     *           must begin or end with a '*'
86     * @return target - the old target.
87     */
88    public Object delete(String re) {
89        Object o = null;
90        RegexpNode p = prefixMachine;
91        RegexpNode best = p;
92        int len = re.length() - 1;
93        int i;
94        boolean prefix = true;
95
96        if (!re.startsWith("*") ||
97            !re.endsWith("*"))
98            len++;
99
100        if (len <= 0)
101            return null;
102
103        /* March forward through the prefix machine */
104        for (i = 0; p != null; i++) {
105            if (p.result != null && p.depth < BIG
106                && (!p.exact || i == len)) {
107                best = p;
108            }
109            if (i >= len)
110                break;
111            p = p.find(re.charAt(i));
112        }
113
114        /* march backward through the suffix machine */
115        p = suffixMachine;
116        for (i = len; --i >= 0 && p != null;) {
117            if (p.result != null && p.depth < BIG) {
118                prefix = false;
119                best = p;
120            }
121            p = p.find(re.charAt(i));
122        }
123
124        // delete only if there is an exact match
125        if (prefix) {
126            if (re.equals(best.re)) {
127                o = best.result;
128                best.result = null;
129
130            }
131        } else {
132            if (re.equals(best.re)) {
133                o = best.result;
134                best.result = null;
135            }
136        }
137        return o;
138    }
139
140    /** Search for a match to a string & return the object associated
141        with it with the match.  When multiple regular expressions
142        would match the string, the best match is returned first.
143        The next best match is returned the next time matchNext is
144        called.
145        @param s    The string to match against the regular expressions
146                    in the pool.
147        @return     null on failure, otherwise the object associated with
148                    the regular expression when it was added to the pool.
149                    If the object is an instance of RegexpTarget, then
150                    the return value is the result from calling
151                    return.found(string_that_matched_wildcard).
152    */
153    public Object match(String s) {
154        return matchAfter(s, BIG);
155    }
156
157    /** Identical to match except that it will only find matches to
158        regular expressions that were added to the pool <i>after</i>
159        the last regular expression that matched in the last call
160        to match() or matchNext() */
161    public Object matchNext(String s) {
162        return matchAfter(s, lastDepth);
163    }
164
165    private void add(String re, Object ret, boolean replace) throws REException {
166        int len = re.length();
167        RegexpNode p;
168        if (re.charAt(0) == '*') {
169            p = suffixMachine;
170            while (len > 1)
171                p = p.add(re.charAt(--len));
172        } else {
173            boolean exact = false;
174            if (re.charAt(len - 1) == '*')
175                len--;
176            else
177                exact = true;
178            p = prefixMachine;
179            for (int i = 0; i < len; i++)
180                p = p.add(re.charAt(i));
181            p.exact = exact;
182        }
183
184        if (p.result != null && !replace)
185            throw new REException(re + " is a duplicate");
186
187        p.re = re;
188        p.result = ret;
189    }
190
191    private Object matchAfter(String s, int lastMatchDepth) {
192        RegexpNode p = prefixMachine;
193        RegexpNode best = p;
194        int bst = 0;
195        int bend = 0;
196        int len = s.length();
197        int i;
198        if (len <= 0)
199            return null;
200        /* March forward through the prefix machine */
201        for (i = 0; p != null; i++) {
202            if (p.result != null && p.depth < lastMatchDepth
203                && (!p.exact || i == len)) {
204                lastDepth = p.depth;
205                best = p;
206                bst = i;
207                bend = len;
208            }
209            if (i >= len)
210                break;
211            p = p.find(s.charAt(i));
212        }
213        /* march backward through the suffix machine */
214        p = suffixMachine;
215        for (i = len; --i >= 0 && p != null;) {
216            if (p.result != null && p.depth < lastMatchDepth) {
217                lastDepth = p.depth;
218                best = p;
219                bst = 0;
220                bend = i+1;
221            }
222            p = p.find(s.charAt(i));
223        }
224        Object o = best.result;
225        if (o != null && o instanceof RegexpTarget)
226            o = ((RegexpTarget) o).found(s.substring(bst, bend));
227        return o;
228    }
229
230    /** Resets the pool so that the next call to matchNext looks
231        at all regular expressions in the pool.  match(s); is equivalent
232        to reset(); matchNext(s);
233        <p><b>Multithreading note:</b> reset/nextMatch leave state in the
234        regular expression pool.  If multiple threads could be using this
235        pool this way, they should be syncronized to avoid race hazards.
236        match() was done in such a way that there are no such race
237        hazards: multiple threads can be matching in the same pool
238        simultaneously. */
239    public void reset() {
240        lastDepth = BIG;
241    }
242
243    /** Print this pool to standard output */
244    public void print(PrintStream out) {
245        out.print("Regexp pool:\n");
246        if (suffixMachine.firstchild != null) {
247            out.print(" Suffix machine: ");
248            suffixMachine.firstchild.print(out);
249            out.print("\n");
250        }
251        if (prefixMachine.firstchild != null) {
252            out.print(" Prefix machine: ");
253            prefixMachine.firstchild.print(out);
254            out.print("\n");
255        }
256    }
257
258}
259
260/* A node in a regular expression finite state machine. */
261class RegexpNode {
262    char c;
263    RegexpNode firstchild;
264    RegexpNode nextsibling;
265    int depth;
266    boolean exact;
267    Object result;
268    String re = null;
269
270    RegexpNode () {
271        c = '#';
272        depth = 0;
273    }
274    RegexpNode (char C, int depth) {
275        c = C;
276        this.depth = depth;
277    }
278    RegexpNode add(char C) {
279        RegexpNode p = firstchild;
280        if (p == null)
281            p = new RegexpNode (C, depth+1);
282        else {
283            while (p != null)
284                if (p.c == C)
285                    return p;
286                else
287                    p = p.nextsibling;
288            p = new RegexpNode (C, depth+1);
289            p.nextsibling = firstchild;
290        }
291        firstchild = p;
292        return p;
293    }
294    RegexpNode find(char C) {
295        for (RegexpNode p = firstchild;
296                p != null;
297                p = p.nextsibling)
298            if (p.c == C)
299                return p;
300        return null;
301    }
302    void print(PrintStream out) {
303        if (nextsibling != null) {
304            RegexpNode p = this;
305            out.print("(");
306            while (p != null) {
307                out.write(p.c);
308                if (p.firstchild != null)
309                    p.firstchild.print(out);
310                p = p.nextsibling;
311                out.write(p != null ? '|' : ')');
312            }
313        } else {
314            out.write(c);
315            if (firstchild != null)
316                firstchild.print(out);
317        }
318    }
319}
320