17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2001-2011, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility; 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass Quantifier implements UnicodeMatcher { 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private UnicodeMatcher matcher; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int minCount; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int maxCount; 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Maximum count a quantifier can have. 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final int MAX = Integer.MAX_VALUE; 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Quantifier(UnicodeMatcher theMatcher, 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int theMinCount, int theMaxCount) { 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (theMatcher == null || theMinCount < 0 || theMaxCount < 0 || theMinCount > theMaxCount) { 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException(); 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert matcher = theMatcher; 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert minCount = theMinCount; 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert maxCount = theMaxCount; 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implement UnicodeMatcher API. 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int matches(Replaceable text, 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int[] offset, 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit, 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean incremental) { 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int start = offset[0]; 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int count = 0; 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (count < maxCount) { 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos = offset[0]; 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int m = matcher.matches(text, offset, limit, incremental); 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (m == U_MATCH) { 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++count; 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (pos == offset[0]) { 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If offset has not moved we have a zero-width match. 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Don't keep matching it infinitely. 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (incremental && m == U_PARTIAL_MATCH) { 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return U_PARTIAL_MATCH; 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (incremental && offset[0] == limit) { 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return U_PARTIAL_MATCH; 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (count >= minCount) { 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return U_MATCH; 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offset[0] = start; 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return U_MISMATCH; 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implement UnicodeMatcher API 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String toPattern(boolean escapeUnprintable) { 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder result = new StringBuilder(); 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(matcher.toPattern(escapeUnprintable)); 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (minCount == 0) { 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (maxCount == 1) { 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.append('?').toString(); 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (maxCount == MAX) { 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.append('*').toString(); 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // else fall through 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (minCount == 1 && maxCount == MAX) { 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.append('+').toString(); 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append('{'); 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(Utility.hex(minCount,1)); 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(','); 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (maxCount != MAX) { 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(Utility.hex(maxCount,1)); 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append('}'); 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.toString(); 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implement UnicodeMatcher API 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean matchesIndexValue(int v) { 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (minCount == 0) || matcher.matchesIndexValue(v); 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implementation of UnicodeMatcher API. Union the set of all 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * characters that may be matched by this object into the given 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * set. 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param toUnionTo the set into which to union the source characters 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @returns a reference to toUnionTo 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void addMatchSetTo(UnicodeSet toUnionTo) { 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (maxCount > 0) { 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert matcher.addMatchSetTo(toUnionTo); 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//eof 116