17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2001-2011, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility;
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass Quantifier implements UnicodeMatcher {
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UnicodeMatcher matcher;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int minCount;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int maxCount;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Maximum count a quantifier can have.
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MAX = Integer.MAX_VALUE;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Quantifier(UnicodeMatcher theMatcher,
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                      int theMinCount, int theMaxCount) {
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (theMatcher == null || theMinCount < 0 || theMaxCount < 0 || theMinCount > theMaxCount) {
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException();
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        matcher = theMatcher;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        minCount = theMinCount;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        maxCount = theMaxCount;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Implement UnicodeMatcher API.
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int matches(Replaceable text,
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       int[] offset,
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       int limit,
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       boolean incremental) {
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int start = offset[0];
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int count = 0;
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (count < maxCount) {
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int pos = offset[0];
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int m = matcher.matches(text, offset, limit, incremental);
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (m == U_MATCH) {
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++count;
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (pos == offset[0]) {
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If offset has not moved we have a zero-width match.
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Don't keep matching it infinitely.
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (incremental && m == U_PARTIAL_MATCH) {
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return U_PARTIAL_MATCH;
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (incremental && offset[0] == limit) {
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return U_PARTIAL_MATCH;
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (count >= minCount) {
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return U_MATCH;
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offset[0] = start;
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return U_MISMATCH;
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Implement UnicodeMatcher API
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String toPattern(boolean escapeUnprintable) {
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder result = new StringBuilder();
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.append(matcher.toPattern(escapeUnprintable));
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (minCount == 0) {
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (maxCount == 1) {
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return result.append('?').toString();
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (maxCount == MAX) {
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return result.append('*').toString();
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // else fall through
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (minCount == 1 && maxCount == MAX) {
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result.append('+').toString();
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.append('{');
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.append(Utility.hex(minCount,1));
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.append(',');
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (maxCount != MAX) {
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.append(Utility.hex(maxCount,1));
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.append('}');
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result.toString();
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Implement UnicodeMatcher API
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean matchesIndexValue(int v) {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (minCount == 0) || matcher.matchesIndexValue(v);
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Implementation of UnicodeMatcher API.  Union the set of all
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * characters that may be matched by this object into the given
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * set.
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param toUnionTo the set into which to union the source characters
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @returns a reference to toUnionTo
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void addMatchSetTo(UnicodeSet toUnionTo) {
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (maxCount > 0) {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            matcher.addMatchSetTo(toUnionTo);
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//eof
116