1/* 2 ******************************************************************************* 3 * Copyright (C) 2002-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7package com.ibm.icu.dev.util; 8 9import java.io.BufferedReader; 10import java.io.BufferedWriter; 11import java.io.File; 12import java.io.FileInputStream; 13import java.io.FileOutputStream; 14import java.io.IOException; 15import java.io.InputStreamReader; 16import java.io.OutputStreamWriter; 17import java.io.PrintWriter; 18import java.io.StringWriter; 19import java.text.MessageFormat; 20import java.util.Collection; 21import java.util.HashMap; 22import java.util.HashSet; 23import java.util.Locale; 24import java.util.Map; 25 26import com.ibm.icu.impl.Utility; 27import com.ibm.icu.text.NumberFormat; 28import com.ibm.icu.text.Transliterator; 29import com.ibm.icu.text.UTF16; 30import com.ibm.icu.text.UnicodeSet; 31 32public class BagFormatter { 33 static final boolean DEBUG = false; 34 public static final boolean SHOW_FILES; 35 static { 36 boolean showFiles = false; 37 try { 38 showFiles = System.getProperty("SHOW_FILES") != null; 39 } 40 catch (SecurityException e) { 41 } 42 SHOW_FILES = showFiles; 43 } 44 45 public static final PrintWriter CONSOLE = new PrintWriter(System.out,true); 46 47 private static PrintWriter log = CONSOLE; 48 49 private boolean abbreviated = false; 50 private String separator = ","; 51 private String prefix = "["; 52 private String suffix = "]"; 53 private UnicodeProperty.Factory source; 54 private UnicodeLabel nameSource; 55 private UnicodeLabel labelSource; 56 private UnicodeLabel rangeBreakSource; 57 private UnicodeLabel valueSource; 58 private String propName = ""; 59 private boolean showCount = true; 60 //private boolean suppressReserved = true; 61 private boolean hexValue = false; 62 private static final String NULL_VALUE = "_NULL_VALUE_"; 63 private int fullTotal = -1; 64 private boolean showTotal = true; 65 private String lineSeparator = "\r\n"; 66 private Tabber tabber = new Tabber.MonoTabber(); 67 68 /** 69 * Compare two UnicodeSets, and show the differences 70 * @param name1 name of first set to be compared 71 * @param set1 first set 72 * @param name2 name of second set to be compared 73 * @param set2 second set 74 * @return formatted string 75 */ 76 public String showSetDifferences( 77 String name1, 78 UnicodeSet set1, 79 String name2, 80 UnicodeSet set2) { 81 82 StringWriter result = new StringWriter(); 83 showSetDifferences(new PrintWriter(result),name1,set1,name2,set2); 84 result.flush(); 85 return result.getBuffer().toString(); 86 } 87 88 public String showSetDifferences( 89 String name1, 90 Collection set1, 91 String name2, 92 Collection set2) { 93 94 StringWriter result = new StringWriter(); 95 showSetDifferences(new PrintWriter(result), name1, set1, name2, set2); 96 result.flush(); 97 return result.getBuffer().toString(); 98 } 99 100 public void showSetDifferences( 101 PrintWriter pw, 102 String name1, 103 UnicodeSet set1, 104 String name2, 105 UnicodeSet set2) { 106 showSetDifferences(pw, name1, set1, name2, set2, -1); 107 } 108 /** 109 * Compare two UnicodeSets, and show the differences 110 * @param name1 name of first set to be compared 111 * @param set1 first set 112 * @param name2 name of second set to be compared 113 * @param set2 second set 114 */ 115 public void showSetDifferences( 116 PrintWriter pw, 117 String name1, 118 UnicodeSet set1, 119 String name2, 120 UnicodeSet set2, 121 int flags) 122 { 123 if (pw == null) pw = CONSOLE; 124 String[] names = { name1, name2 }; 125 126 UnicodeSet temp; 127 128 if ((flags&1) != 0) { 129 temp = new UnicodeSet(set1).removeAll(set2); 130 pw.print(lineSeparator); 131 pw.print(inOut.format(names)); 132 pw.print(lineSeparator); 133 showSetNames(pw, temp); 134 } 135 136 if ((flags&2) != 0) { 137 temp = new UnicodeSet(set2).removeAll(set1); 138 pw.print(lineSeparator); 139 pw.print(outIn.format(names)); 140 pw.print(lineSeparator); 141 showSetNames(pw, temp); 142 } 143 144 if ((flags&4) != 0) { 145 temp = new UnicodeSet(set2).retainAll(set1); 146 pw.print(lineSeparator); 147 pw.print(inIn.format(names)); 148 pw.print(lineSeparator); 149 showSetNames(pw, temp); 150 } 151 pw.flush(); 152 } 153 154 public void showSetDifferences( 155 PrintWriter pw, 156 String name1, 157 Collection set1, 158 String name2, 159 Collection set2) { 160 161 if (pw == null) pw = CONSOLE; 162 String[] names = { name1, name2 }; 163 // damn'd collection doesn't have a clone, so 164 // we go with Set, even though that 165 // may not preserve order and duplicates 166 Collection temp = new HashSet(set1); 167 temp.removeAll(set2); 168 pw.println(); 169 pw.println(inOut.format(names)); 170 showSetNames(pw, temp); 171 172 temp.clear(); 173 temp.addAll(set2); 174 temp.removeAll(set1); 175 pw.println(); 176 pw.println(outIn.format(names)); 177 showSetNames(pw, temp); 178 179 temp.clear(); 180 temp.addAll(set1); 181 temp.retainAll(set2); 182 pw.println(); 183 pw.println(inIn.format(names)); 184 showSetNames(pw, temp); 185 } 186 187 /** 188 * Returns a list of items in the collection, with each separated by the separator. 189 * Each item must not be null; its toString() is called for a printable representation 190 * @param c source collection 191 * @return a String representation of the list 192 */ 193 public String showSetNames(Object c) { 194 StringWriter buffer = new StringWriter(); 195 PrintWriter output = new PrintWriter(buffer); 196 showSetNames(output,c); 197 return buffer.toString(); 198 } 199 200 /** 201 * Returns a list of items in the collection, with each separated by the separator. 202 * Each item must not be null; its toString() is called for a printable representation 203 * @param output destination to which to write names 204 * @param c source collection 205 */ 206 public void showSetNames(PrintWriter output, Object c) { 207 mainVisitor.doAt(c, output); 208 output.flush(); 209 } 210 211 /** 212 * Returns a list of items in the collection, with each separated by the separator. 213 * Each item must not be null; its toString() is called for a printable representation 214 * @param filename destination to which to write names 215 * @param c source collection 216 */ 217 public void showSetNames(String filename, Object c) throws IOException { 218 PrintWriter pw = new PrintWriter( 219 new OutputStreamWriter( 220 new FileOutputStream(filename),"utf-8")); 221 showSetNames(log,c); 222 pw.close(); 223 } 224 225 public String getAbbreviatedName( 226 String src, 227 String pattern, 228 String substitute) { 229 230 int matchEnd = NameIterator.findMatchingEnd(src, pattern); 231 int sdiv = src.length() - matchEnd; 232 int pdiv = pattern.length() - matchEnd; 233 StringBuffer result = new StringBuffer(); 234 addMatching( 235 src.substring(0, sdiv), 236 pattern.substring(0, pdiv), 237 substitute, 238 result); 239 addMatching( 240 src.substring(sdiv), 241 pattern.substring(pdiv), 242 substitute, 243 result); 244 return result.toString(); 245 } 246 247 abstract public static class Relation { 248 abstract public String getRelation(String a, String b); 249 } 250 251 static class NullRelation extends Relation { 252 public String getRelation(String a, String b) { return ""; } 253 } 254 255 private Relation r = new NullRelation(); 256 257 public BagFormatter setRelation(Relation r) { 258 this.r = r; 259 return this; // for chaining 260 } 261 262 public Relation getRelation() { 263 return r; 264 } 265 266 /* 267 r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s) 268 */ 269 /* 270 static final UnicodeSet NO_NAME = 271 new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]"); 272 static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement(); 273 static final UnicodeSet NAME_CHARACTERS = 274 new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]"); 275 276 public UnicodeSet getSetForName(String namePattern) { 277 UnicodeSet result = new UnicodeSet(); 278 Matcher m = Pattern.compile(namePattern).matcher(""); 279 // check for no-name items, and add in bulk 280 m.reset("<no name>"); 281 if (m.matches()) { 282 result.addAll(NO_NAME); 283 } 284 // check all others 285 UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME); 286 while (usi.next()) { 287 String name = getName(usi.codepoint); 288 if (name == null) 289 continue; 290 m.reset(name); 291 if (m.matches()) { 292 result.add(usi.codepoint); 293 } 294 } 295 // Note: if Regex had some API so that if we could tell that 296 // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-" 297 // then we could optimize by skipping whole swathes of characters 298 return result; 299 } 300 */ 301 302 public BagFormatter setMergeRanges(boolean in) { 303 mergeRanges = in; 304 return this; 305 } 306 public BagFormatter setShowSetAlso(boolean b) { 307 showSetAlso = b; 308 return this; 309 } 310 311 public String getName(int codePoint) { 312 return getName("", codePoint, codePoint); 313 } 314 315 public String getName(String sep, int start, int end) { 316 if (getNameSource() == null || getNameSource() == UnicodeLabel.NULL) return ""; 317 String result = getName(start, false); 318 if (start == end) return sep + result; 319 String endString = getName(end, false); 320 if (result.length() == 0 && endString.length() == 0) return sep; 321 if (abbreviated) endString = getAbbreviatedName(endString,result,"~"); 322 return sep + result + ".." + endString; 323 } 324 325 public String getName(String s) { 326 return getName(s, false); 327 } 328 329 public static class NameLabel extends UnicodeLabel { 330 UnicodeProperty nameProp; 331 UnicodeSet control; 332 UnicodeSet private_use; 333 UnicodeSet noncharacter; 334 UnicodeSet surrogate; 335 336 public NameLabel(UnicodeProperty.Factory source) { 337 nameProp = source.getProperty("Name"); 338 control = source.getSet("gc=Cc"); 339 private_use = source.getSet("gc=Co"); 340 surrogate = source.getSet("gc=Cs"); 341 noncharacter = source.getSet("noncharactercodepoint=yes"); 342 } 343 344 public String getValue(int codePoint, boolean isShort) { 345 String hcp = !isShort 346 ? "U+" + Utility.hex(codePoint, 4) + " " 347 : ""; 348 String result = nameProp.getValue(codePoint); 349 if (result != null) 350 return hcp + result; 351 if (control.contains(codePoint)) { 352 return "<control-" + Utility.hex(codePoint, 4) + ">"; 353 } 354 if (private_use.contains(codePoint)) { 355 return "<private-use-" + Utility.hex(codePoint, 4) + ">"; 356 } 357 if (surrogate.contains(codePoint)) { 358 return "<surrogate-" + Utility.hex(codePoint, 4) + ">"; 359 } 360 if (noncharacter.contains(codePoint)) { 361 return "<noncharacter-" + Utility.hex(codePoint, 4) + ">"; 362 } 363 //if (suppressReserved) return ""; 364 return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">"; 365 } 366 367 } 368 369 // refactored 370 public String getName(int codePoint, boolean withCodePoint) { 371 String result = getNameSource().getValue(codePoint, !withCodePoint); 372 return fixName == null ? result : fixName.transliterate(result); 373 } 374 375 public String getName(String s, boolean withCodePoint) { 376 String result = getNameSource().getValue(s, separator, !withCodePoint); 377 return fixName == null ? result : fixName.transliterate(result); 378 } 379 380 public String hex(String s) { 381 return hex(s,separator); 382 } 383 384 public String hex(String s, String sep) { 385 return UnicodeLabel.HEX.getValue(s, sep, true); 386 } 387 388 public String hex(int start, int end) { 389 String s = Utility.hex(start,4); 390 if (start == end) return s; 391 return s + ".." + Utility.hex(end,4); 392 } 393 394 public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) { 395 this.source = source; 396 return this; 397 } 398 399 public UnicodeProperty.Factory getUnicodePropertyFactory() { 400 if (source == null) source = ICUPropertyFactory.make(); 401 return source; 402 } 403 404 public BagFormatter () { 405 } 406 407 public BagFormatter (UnicodeProperty.Factory source) { 408 setUnicodePropertyFactory(source); 409 } 410 411 public String join(Object o) { 412 return labelVisitor.join(o); 413 } 414 415 // ===== PRIVATES ===== 416 417 private Join labelVisitor = new Join(); 418 419 private boolean mergeRanges = true; 420 private Transliterator showLiteral = null; 421 private Transliterator fixName = null; 422 private boolean showSetAlso = false; 423 424 private RangeFinder rf = new RangeFinder(); 425 426 private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:"); 427 private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:"); 428 private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:"); 429 430 private MyVisitor mainVisitor = new MyVisitor(); 431 432 /* 433 private String getLabels(int start, int end) { 434 Set names = new TreeSet(); 435 for (int cp = start; cp <= end; ++cp) { 436 names.add(getLabel(cp)); 437 } 438 return labelVisitor.join(names); 439 } 440 */ 441 442 private void addMatching( 443 String src, 444 String pattern, 445 String substitute, 446 StringBuffer result) { 447 NameIterator n1 = new NameIterator(src); 448 NameIterator n2 = new NameIterator(pattern); 449 boolean first = true; 450 while (true) { 451 String s1 = n1.next(); 452 if (s1 == null) 453 break; 454 String s2 = n2.next(); 455 if (!first) 456 result.append(" "); 457 first = false; 458 if (s1.equals(s2)) 459 result.append(substitute); 460 else 461 result.append(s1); 462 } 463 } 464 465 private static NumberFormat nf = 466 NumberFormat.getIntegerInstance(Locale.ENGLISH); 467 static { 468 nf.setGroupingUsed(false); 469 } 470 471 private int maxWidthOverride = -1; 472 private int maxLabelWidthOverride = -1; 473 474 public BagFormatter setValueWidthOverride(int maxWidthOverride) { 475 this.maxWidthOverride = maxWidthOverride; 476 return this; 477 } 478 479 public int getValueWidthOverride() { 480 return maxWidthOverride; 481 } 482 483 public BagFormatter setLabelWidthOverride(int maxWidthOverride) { 484 this.maxLabelWidthOverride = maxWidthOverride; 485 return this; 486 } 487 488 public int getLabelWidthOverride() { 489 return maxLabelWidthOverride; 490 } 491 492 493 private class MyVisitor extends Visitor { 494 private PrintWriter output; 495 String commentSeparator; 496 int counter; 497 int valueSize; 498 int labelSize; 499 boolean isHtml; 500 boolean inTable = false; 501 502 public void toOutput(String s) { 503 if (isHtml) { 504 if (inTable) { 505 output.print("</table>"); 506 inTable = false; 507 } 508 output.print("<p>"); 509 } 510 output.print(s); 511 if (isHtml) 512 output.println("</p>"); 513 else 514 output.print(lineSeparator); 515 } 516 517 public void toTable(String s) { 518 if (isHtml && !inTable) { 519 output.print("<table>"); 520 inTable = true; 521 } 522 output.print(tabber.process(s) + lineSeparator); 523 } 524 525 public void doAt(Object c, PrintWriter out) { 526 output = out; 527 isHtml = tabber instanceof Tabber.HTMLTabber; 528 counter = 0; 529 530 tabber.clear(); 531 // old: 532 // 0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D> 533 // new 534 // 0009..000D ; White_Space #Cc [5] <control>..<control> 535 tabber.add(mergeRanges ? 14 : 6,Tabber.LEFT); 536 537 if (propName.length() > 0) { 538 tabber.add(propName.length() + 2,Tabber.LEFT); 539 } 540 541 valueSize = maxWidthOverride > 0 ? maxWidthOverride : getValueSource().getMaxWidth(shortValue); 542 543 if (DEBUG) System.out.println("ValueSize: " + valueSize); 544 if (valueSize > 0) { 545 tabber.add(valueSize + 2,Tabber.LEFT); // value 546 } 547 548 tabber.add(3,Tabber.LEFT); // comment character 549 550 labelSize = maxLabelWidthOverride > 0 ? maxLabelWidthOverride : getLabelSource(true).getMaxWidth(shortLabel); 551 if (labelSize > 0) { 552 tabber.add(labelSize + 1,Tabber.LEFT); // value 553 } 554 555 if (mergeRanges && showCount) { 556 tabber.add(5,Tabber.RIGHT); 557 } 558 559 if (showLiteral != null) { 560 tabber.add(4,Tabber.LEFT); 561 } 562 //myTabber.add(7,Tabber.LEFT); 563 564 commentSeparator = (showCount || showLiteral != null 565 || getLabelSource(true) != UnicodeLabel.NULL 566 || getNameSource() != UnicodeLabel.NULL) 567 ? "\t #" : ""; 568 569 if (DEBUG) System.out.println("Tabber: " + tabber.toString()); 570 if (DEBUG) System.out.println("Tabber: " + tabber.process( 571 "200C..200D\t; White_Space\t #\tCf\t [2]\t ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER")); 572 doAt(c); 573 } 574 575 @SuppressWarnings("unused") 576 public String format(Object o) { 577 StringWriter sw = new StringWriter(); 578 PrintWriter pw = new PrintWriter(sw); 579 doAt(o); 580 pw.flush(); 581 String result = sw.getBuffer().toString(); 582 pw.close(); 583 return result; 584 } 585 586 protected void doBefore(Object container, Object o) { 587 if (showSetAlso && container instanceof UnicodeSet) { 588 toOutput("#" + container); 589 } 590 } 591 592 protected void doBetween(Object container, Object lastItem, Object nextItem) { 593 } 594 595 protected void doAfter(Object container, Object o) { 596 if (fullTotal != -1 && fullTotal != counter) { 597 if (showTotal) { 598 toOutput(""); 599 toOutput("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here."); 600 toOutput("# Total code points: " + nf.format(fullTotal)); 601 } 602 fullTotal = -1; 603 } else if (showTotal) { 604 toOutput(""); 605 toOutput("# Total code points: " + nf.format(counter)); 606 } 607 } 608 609 protected void doSimpleAt(Object o) { 610 if (o instanceof Map.Entry) { 611 Map.Entry oo = (Map.Entry)o; 612 Object key = oo.getKey(); 613 Object value = oo.getValue(); 614 doBefore(o, key); 615 doAt(key); 616 output.println("\u2192"); 617 doAt(value); 618 doAfter(o, value); 619 counter++; 620 } else if (o instanceof Visitor.CodePointRange) { 621 doAt((Visitor.CodePointRange) o); 622 } else { 623 String thing = o.toString(); 624 String value = getValueSource() == UnicodeLabel.NULL ? "" : getValueSource().getValue(thing, ",", true); 625 if (getValueSource() != UnicodeLabel.NULL) value = "\t; " + value; 626 String label = getLabelSource(true) == UnicodeLabel.NULL ? "" : getLabelSource(true).getValue(thing, ",", true); 627 if (label.length() != 0) label = " " + label; 628 toTable( 629 hex(thing) 630 + value 631 + commentSeparator 632 + label 633 + insertLiteral(thing) 634 + "\t" 635 + getName(thing)); 636 counter++; 637 } 638 } 639 640 protected void doAt(Visitor.CodePointRange usi) { 641 if (!mergeRanges) { 642 for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) { 643 showLine(cp, cp); 644 } 645 } else { 646 rf.reset(usi.codepoint, usi.codepointEnd + 1); 647 while (rf.next()) { 648 showLine(rf.start, rf.limit - 1); 649 } 650 } 651 } 652 653 private void showLine(int start, int end) { 654 String label = getLabelSource(true).getValue(start, shortLabel); 655 String value = getValue(start, shortValue); 656 if (value == NULL_VALUE) return; 657 658 counter += end - start + 1; 659 String pn = propName; 660 if (pn.length() != 0) { 661 pn = "\t; " + pn; 662 } 663 if (valueSize > 0) { 664 value = "\t; " + value; 665 } else if (value.length() > 0) { 666 throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue)); 667 } 668 if (labelSize > 0) { 669 label = "\t" + label; 670 } else if (label.length() > 0) { 671 throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource(true).getMaxWidth(shortLabel)); 672 } 673 674 String count = ""; 675 if (mergeRanges && showCount) { 676 if (end == start) count = "\t"; 677 else count = "\t ["+ nf.format(end - start + 1)+ "]"; 678 } 679 680 toTable( 681 hex(start, end) 682 + pn 683 + value 684 + commentSeparator 685 + label 686 + count 687 + insertLiteral(start, end) 688 + getName("\t ", start, end)); 689 } 690 691 private String insertLiteral(String thing) { 692 return (showLiteral == null ? "" 693 : " \t(" + showLiteral.transliterate(thing) + ") "); 694 } 695 696 private String insertLiteral(int start, int end) { 697 return (showLiteral == null ? "" : 698 " \t(" + showLiteral.transliterate(UTF16.valueOf(start)) 699 + ((start != end) 700 ? (".." + showLiteral.transliterate(UTF16.valueOf(end))) 701 : "") 702 + ") "); 703 } 704 /* 705 private String insertLiteral(int cp) { 706 return (showLiteral == null ? "" 707 : " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") "); 708 } 709 */ 710 } 711 712 /** 713 * Iterate through a string, breaking at words. 714 * @author Davis 715 */ 716 private static class NameIterator { 717 String source; 718 int position; 719 int limit; 720 721 NameIterator(String source) { 722 this.source = source; 723 this.limit = source.length(); 724 } 725 /** 726 * Find next word, including trailing spaces 727 * @return the next word 728 */ 729 String next() { 730 if (position >= limit) 731 return null; 732 int pos = source.indexOf(' ', position); 733 if (pos < 0 || pos >= limit) 734 pos = limit; 735 String result = source.substring(position, pos); 736 position = pos + 1; 737 return result; 738 } 739 740 static int findMatchingEnd(String s1, String s2) { 741 int i = s1.length(); 742 int j = s2.length(); 743 try { 744 while (true) { 745 --i; // decrement both before calling function! 746 --j; 747 if (s1.charAt(i) != s2.charAt(j)) 748 break; 749 } 750 } catch (Exception e) {} // run off start 751 752 ++i; // counteract increment 753 i = s1.indexOf(' ', i); // move forward to space 754 if (i < 0) 755 return 0; 756 return s1.length() - i; 757 } 758 } 759 760 private class RangeFinder { 761 int start, limit; 762 private int veryLimit; 763 //String label, value; 764 void reset(int rangeStart, int rangeLimit) { 765 limit = rangeStart; 766 veryLimit = rangeLimit; 767 } 768 boolean next() { 769 if (limit >= veryLimit) 770 return false; 771 start = limit; // set to end of last 772 String label = getLabelSource(false).getValue(limit, true); 773 String value = getValue(limit, true); 774 String breaker = getRangeBreakSource().getValue(limit,true); 775 if (DEBUG && limit < 0x7F) System.out.println("Label: " + label + ", Value: " + value + ", Break: " + breaker); 776 limit++; 777 for (; limit < veryLimit; limit++) { 778 String s = getLabelSource(false).getValue(limit, true); 779 String v = getValue(limit, true); 780 String b = getRangeBreakSource().getValue(limit, true); 781 if (DEBUG && limit < 0x7F) System.out.println("*Label: " + label + ", Value: " + value + ", Break: " + breaker); 782 if (!equalTo(s, label) || !equalTo(v, value) || !equalTo(b, breaker)) break; 783 } 784 // at this point, limit is the first item that has a different label than source 785 // OR, we got to the end, and limit == veryLimit 786 return true; 787 } 788 } 789 790 boolean equalTo(Object a, Object b) { 791 if (a == b) return true; 792 if (a == null) return false; 793 return a.equals(b); 794 } 795 796 boolean shortLabel = true; 797 boolean shortValue = true; 798 799 public String getPrefix() { 800 return prefix; 801 } 802 803 public String getSuffix() { 804 return suffix; 805 } 806 807 public BagFormatter setPrefix(String string) { 808 prefix = string; 809 return this; 810 } 811 812 public BagFormatter setSuffix(String string) { 813 suffix = string; 814 return this; 815 } 816 817 public boolean isAbbreviated() { 818 return abbreviated; 819 } 820 821 public BagFormatter setAbbreviated(boolean b) { 822 abbreviated = b; 823 return this; 824 } 825 826 public UnicodeLabel getLabelSource(boolean visible) { 827 if (labelSource == null) { 828 Map labelMap = new HashMap(); 829 //labelMap.put("Lo","L&"); 830 labelMap.put("Lu","L&"); 831 labelMap.put("Lt","L&"); 832 labelMap.put("Ll","L&"); 833 labelSource = new UnicodeProperty.FilteredProperty( 834 getUnicodePropertyFactory().getProperty("General_Category"), 835 new UnicodeProperty.MapFilter(labelMap) 836 ).setAllowValueAliasCollisions(true); 837 } 838 return labelSource; 839 } 840 841 /** 842 * @deprecated 843 */ 844 public static void addAll(UnicodeSet source, Collection target) { 845 source.addAllTo(target); 846 } 847 848 // UTILITIES 849 850 public static final Transliterator hex = Transliterator.getInstance( 851 "[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex"); 852 853 public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException { 854 return openReader(dir,filename,"UTF-8"); 855 } 856 857 public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException { 858 File file = dir.length() == 0 ? new File(filename) : new File(dir, filename); 859 if (SHOW_FILES && log != null) { 860 log.println("Opening File: " 861 + file.getCanonicalPath()); 862 } 863 return new BufferedReader( 864 new InputStreamReader( 865 new FileInputStream(file), 866 encoding), 867 4*1024); 868 } 869 870 public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException { 871 return openWriter(dir,filename,"UTF-8"); 872 } 873 874 public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException { 875 File file = new File(dir, filename); 876 if (SHOW_FILES && log != null) { 877 log.println("Creating File: " 878 + file.getCanonicalPath()); 879 } 880 String parentName = file.getParent(); 881 if (parentName != null) { 882 File parent = new File(parentName); 883 parent.mkdirs(); 884 } 885 return new PrintWriter( 886 new BufferedWriter( 887 new OutputStreamWriter( 888 new FileOutputStream(file), 889 encoding), 890 4*1024)); 891 } 892 public static PrintWriter getLog() { 893 return log; 894 } 895 public BagFormatter setLog(PrintWriter writer) { 896 log = writer; 897 return this; 898 } 899 public String getSeparator() { 900 return separator; 901 } 902 public BagFormatter setSeparator(String string) { 903 separator = string; 904 return this; 905 } 906 public Transliterator getShowLiteral() { 907 return showLiteral; 908 } 909 public BagFormatter setShowLiteral(Transliterator transliterator) { 910 showLiteral = transliterator; 911 return this; 912 } 913 914 // ===== CONVENIENCES ===== 915 private class Join extends Visitor { 916 StringBuffer output = new StringBuffer(); 917 @SuppressWarnings("unused") 918 int depth = 0; 919 String join (Object o) { 920 output.setLength(0); 921 doAt(o); 922 return output.toString(); 923 } 924 protected void doBefore(Object container, Object item) { 925 ++depth; 926 output.append(prefix); 927 } 928 protected void doAfter(Object container, Object item) { 929 output.append(suffix); 930 --depth; 931 } 932 protected void doBetween(Object container, Object lastItem, Object nextItem) { 933 output.append(separator); 934 } 935 protected void doSimpleAt(Object o) { 936 if (o != null) output.append(o.toString()); 937 } 938 } 939 940 /** 941 * @param label 942 */ 943 public BagFormatter setLabelSource(UnicodeLabel label) { 944 if (label == null) label = UnicodeLabel.NULL; 945 labelSource = label; 946 return this; 947 } 948 949 /** 950 * @return the NameLable representing the source 951 */ 952 public UnicodeLabel getNameSource() { 953 if (nameSource == null) { 954 nameSource = new NameLabel(getUnicodePropertyFactory()); 955 } 956 return nameSource; 957 } 958 959 /** 960 * @param label 961 */ 962 public BagFormatter setNameSource(UnicodeLabel label) { 963 if (label == null) label = UnicodeLabel.NULL; 964 nameSource = label; 965 return this; 966 } 967 968 /** 969 * @return the UnicodeLabel representing the value 970 */ 971 public UnicodeLabel getValueSource() { 972 if (valueSource == null) valueSource = UnicodeLabel.NULL; 973 return valueSource; 974 } 975 976 private String getValue(int cp, boolean shortVal) { 977 String result = getValueSource().getValue(cp, shortVal); 978 if (result == null) return NULL_VALUE; 979 if (hexValue) result = hex(result, " "); 980 return result; 981 } 982 983 /** 984 * @param label 985 */ 986 public BagFormatter setValueSource(UnicodeLabel label) { 987 if (label == null) label = UnicodeLabel.NULL; 988 valueSource = label; 989 return this; 990 } 991 992 public BagFormatter setValueSource(String label) { 993 return setValueSource(new UnicodeLabel.Constant(label)); 994 } 995 996 /** 997 * @return true if showCount is true 998 */ 999 public boolean isShowCount() { 1000 return showCount; 1001 } 1002 1003 /** 1004 * @param b true to show the count 1005 * @return this (for chaining) 1006 */ 1007 public BagFormatter setShowCount(boolean b) { 1008 showCount = b; 1009 return this; 1010 } 1011 1012 /** 1013 * @return the property name 1014 */ 1015 public String getPropName() { 1016 return propName; 1017 } 1018 1019 /** 1020 * @param string 1021 * @return this (for chaining) 1022 */ 1023 public BagFormatter setPropName(String string) { 1024 if (string == null) string = ""; 1025 propName = string; 1026 return this; 1027 } 1028 1029 /** 1030 * @return true if this is a hexValue 1031 */ 1032 public boolean isHexValue() { 1033 return hexValue; 1034 } 1035 1036 /** 1037 * @param b 1038 * @return this (for chaining) 1039 */ 1040 public BagFormatter setHexValue(boolean b) { 1041 hexValue = b; 1042 return this; 1043 } 1044 1045 /** 1046 * @return the full total 1047 */ 1048 public int getFullTotal() { 1049 return fullTotal; 1050 } 1051 1052 /** 1053 * @param i set the full total 1054 * @return this (for chaining) 1055 */ 1056 public BagFormatter setFullTotal(int i) { 1057 fullTotal = i; 1058 return this; 1059 } 1060 1061 /** 1062 * @return the line separator 1063 */ 1064 public String getLineSeparator() { 1065 return lineSeparator; 1066 } 1067 1068 /** 1069 * @param string 1070 * @return this (for chaining) 1071 */ 1072 public BagFormatter setLineSeparator(String string) { 1073 lineSeparator = string; 1074 return this; 1075 } 1076 1077 /** 1078 * @return the UnicodeLabel representing the range break source 1079 */ 1080 public UnicodeLabel getRangeBreakSource() { 1081 if (rangeBreakSource == null) { 1082 Map labelMap = new HashMap(); 1083 // reflects the code point types on p 25 1084 labelMap.put("Lo", "G&"); 1085 labelMap.put("Lm", "G&"); 1086 labelMap.put("Lu", "G&"); 1087 labelMap.put("Lt", "G&"); 1088 labelMap.put("Ll", "G&"); 1089 labelMap.put("Mn", "G&"); 1090 labelMap.put("Me", "G&"); 1091 labelMap.put("Mc", "G&"); 1092 labelMap.put("Nd", "G&"); 1093 labelMap.put("Nl", "G&"); 1094 labelMap.put("No", "G&"); 1095 labelMap.put("Zs", "G&"); 1096 labelMap.put("Pd", "G&"); 1097 labelMap.put("Ps", "G&"); 1098 labelMap.put("Pe", "G&"); 1099 labelMap.put("Pc", "G&"); 1100 labelMap.put("Po", "G&"); 1101 labelMap.put("Pi", "G&"); 1102 labelMap.put("Pf", "G&"); 1103 labelMap.put("Sm", "G&"); 1104 labelMap.put("Sc", "G&"); 1105 labelMap.put("Sk", "G&"); 1106 labelMap.put("So", "G&"); 1107 1108 labelMap.put("Zl", "Cf"); 1109 labelMap.put("Zp", "Cf"); 1110 1111 rangeBreakSource = 1112 new UnicodeProperty 1113 .FilteredProperty( 1114 getUnicodePropertyFactory().getProperty( 1115 "General_Category"), 1116 new UnicodeProperty.MapFilter(labelMap)) 1117 .setAllowValueAliasCollisions(true); 1118 1119 /* 1120 "Cn", // = Other, Not Assigned 0 1121 "Cc", // = Other, Control 15 1122 "Cf", // = Other, Format 16 1123 UnicodeProperty.UNUSED, // missing 1124 "Co", // = Other, Private Use 18 1125 "Cs", // = Other, Surrogate 19 1126 */ 1127 } 1128 return rangeBreakSource; 1129 } 1130 1131 /** 1132 * @param label 1133 */ 1134 public BagFormatter setRangeBreakSource(UnicodeLabel label) { 1135 if (label == null) label = UnicodeLabel.NULL; 1136 rangeBreakSource = label; 1137 return this; 1138 } 1139 1140 /** 1141 * @return Returns the fixName. 1142 */ 1143 public Transliterator getFixName() { 1144 return fixName; 1145 } 1146 /** 1147 * @param fixName The fixName to set. 1148 */ 1149 public BagFormatter setFixName(Transliterator fixName) { 1150 this.fixName = fixName; 1151 return this; 1152 } 1153 1154 public Tabber getTabber() { 1155 return tabber; 1156 } 1157 1158 public void setTabber(Tabber tabber) { 1159 this.tabber = tabber; 1160 } 1161 1162 public boolean isShowTotal() { 1163 return showTotal; 1164 } 1165 1166 public void setShowTotal(boolean showTotal) { 1167 this.showTotal = showTotal; 1168 } 1169} 1170