1/* 2 ******************************************************************************* 3 * Copyright (C) 1996-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7package com.ibm.icu.dev.test.rbbi; 8 9import java.util.ListResourceBundle; 10 11import com.ibm.icu.dev.test.TestFmwk; 12import com.ibm.icu.text.BreakIterator; 13import com.ibm.icu.text.RuleBasedBreakIterator; 14 15// TODO: {dlf} this test currently doesn't test anything! 16// You'll notice that the resource that uses the dictionary isn't even on the resource path, 17// so the dictionary never gets used. Good thing, too, because it would throw a security 18// exception if run with a security manager. Not that it would matter, the dictionary 19// resource isn't even in the icu source tree! 20// In order to fix this: 21// 1) make sure english.dict matches the current dictionary format required by dbbi 22// 2) make sure english.dict gets included in icu4jtests.jar 23// 3) have this test use getResourceAsStream to get a stream on the dictionary, and 24// directly instantiate a DictionaryBasedBreakIterator. It can use the rules from 25// the appropriate section of ResourceBundle_en_US_TEST. I'd suggest just copying 26// the rules into this file. 27// 4) change the test text by inserting '|' at word breaks, and '||' at line breaks. 28// 5) process this text to a) create tables of break indices, and b) clean up the test 29// for the break iterator to work on 30// 31// This would NOT test the ability to load dictionary-based break iterators through our 32// normal resource mechanism. One could install such a break iterator and its 33// resources into the icu4j jar, and it would work, but there's no way to register entire 34// resources from outside yet. Even if there were, the access restrictions are a bit 35// difficult to manage, if one wanted to register a break iterator whose code and data 36// resides outside the icu4j jar. Since the code to instantiate would be going through 37// two protection domains, each domain would have to allow access to the data-- but 38// icu4j's domain wouldn't know about ours. So we could instantiate before registering 39// the break iterator, but this would mean we'd have to fully initialize the dictionary(s) 40// at instantiation time, rather than let this be deferred until they are actually needed. 41// 42// I've done items 2 and 3 above. Unfortunately, since I haven't done item 1, the 43// dictionary builder crashes. So for now I'm disabling this test. This is not 44// that important, since we have a thai dictionary that we do test thoroughly. 45// 46 47public class SimpleBITest extends TestFmwk{ 48 public static final String testText = 49// "The rain in Spain stays mainly on the plain. The plains in Spain are mainly pained with rain."; 50//"one-two now-- Hah! You owe me exactly $1,345.67... Pay up, huh? By the way, why don't I send you my re\u0301sume\u0301? This is a line\r\nbreak."; 51//"nowisthetimeforallgoodmen... tocometothehelpoftheircountry"; 52"When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have " 53//"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave" 54+ "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws" 55+ "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe" 56+ "causeswhichimpelthemtotheseparation\n" 57+ "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain" 58+ "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare" 59+ "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment" 60+ "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying" 61+ "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety" 62+ "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient" 63+ "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than" 64+ "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations," 65+ "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty," 66+ "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof" 67+ "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory" 68+ "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe" 69+ "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n" 70+ "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n" 71+ "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill" 72+ "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n" 73+ "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish" 74+ "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n" 75+ "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic" 76+ "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n" 77+ "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n" 78+ "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers," 79+ "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed" 80+ "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n" 81+ "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof" 82+ "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof" 83+ "lands.\n" 84+ "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n" 85+ "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n" 86+ "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir" 87+ "substance.\n" 88+ "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n" 89+ "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n" 90+ "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;" 91+ "givinghisassenttotheiractsofpretendedlegislation:\n" 92+ "Forquarteringlargebodiesofarmedtroopsamongus:\n" 93+ "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese" 94+ "states:\n" 95+ "Forcuttingoffourtradewithallpartsoftheworld:\n" 96+ "Forimposingtaxesonuswithoutourconsent:\n" 97+ "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n" 98+ "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n" 99+ "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and" 100+ "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese" 101+ "colonies:\n" 102+ "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n" 103+ "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n" 104+ "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n" 105+ "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n" 106+ "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny," 107+ "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth" 108+ "theheadofacivilizednation.\n" 109+ "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe" 110+ "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n" 111+ "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the" 112+ "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n" 113+ "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave" 114+ "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is" 115+ "unfittobetherulerofafreepeople.\n" 116+ "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir" 117+ "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration" 118+ "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour" 119+ "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We" 120+ "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind," 121+ "enemiesinwar,inpeacefriends.\n" 122+ "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe" 123+ "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof" 124+ "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent" 125+ "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe" 126+ "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto" 127+ "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent" 128+ "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we" 129+ "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n"; 130 131 public static void main(String[] args) throws Exception { 132 new SimpleBITest().run(args); 133 } 134 135 protected boolean validate() { 136 // TODO: remove when english.dict gets fixed 137 return false; 138 } 139 140 private BreakIterator createTestIterator(int kind) { 141 final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST"; 142 143 BreakIterator iter = null; 144 145 ListResourceBundle bundle = null; 146 try { 147 Class cls = Class.forName(bname); 148 bundle = (ListResourceBundle)cls.newInstance(); 149 } 150 catch (Exception e) { 151 errln("could not create bundle: " + bname + "exception: " + e.getMessage()); 152 return null; 153 } 154 155 final String[] kindNames = { 156 "Character", "Word", "Line", "Sentence" 157 }; 158 String rulesName = kindNames[kind] + "BreakRules"; 159 160 String[] classNames = bundle.getStringArray("BreakIteratorClasses"); 161 String rules = bundle.getString(rulesName); 162 if (classNames[kind].equals("RuleBasedBreakIterator")) { 163 iter = new RuleBasedBreakIterator(rules); 164 } 165 if (iter == null) { 166 errln("could not create iterator"); 167 } 168 169 return iter; 170 } 171 172 public void testWordBreak() throws Exception { 173 BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD); 174 int breaks = doTest(wordBreak); 175 logln(String.valueOf(breaks)); 176 } 177 178 public void testLineBreak() throws Exception { 179 BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE); 180 int breaks = doTest(lineBreak); 181 logln(String.valueOf(breaks)); 182 } 183 184 public void testSentenceBreak() throws Exception { 185 BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE); 186 int breaks = doTest(sentenceBreak); 187 logln(String.valueOf(breaks)); 188 } 189 190 private int doTest(BreakIterator bi) { 191 // forward 192 bi.setText(testText); 193 int p = bi.first(); 194 int lastP = p; 195 String fragment; 196 int breaks = 0; 197 logln("Forward..."); 198 while (p != BreakIterator.DONE) { 199 p = bi.next(); 200 if (p != BreakIterator.DONE) { 201 fragment = testText.substring(lastP, p); 202 } else { 203 fragment = testText.substring(lastP); 204 } 205 debugPrintln(": >" + fragment + "<"); 206 ++breaks; 207 lastP = p; 208 } 209 return breaks; 210 } 211 212 private void debugPrintln(String s) { 213 final String zeros = "0000"; 214 String temp; 215 StringBuffer out = new StringBuffer(); 216 for (int i = 0; i < s.length(); i++) { 217 char c = s.charAt(i); 218 if (c >= ' ' && c < '\u007f') 219 out.append(c); 220 else { 221 out.append("\\u"); 222 temp = Integer.toHexString((int)c); 223 out.append(zeros.substring(0, 4 - temp.length())); 224 out.append(temp); 225 } 226 } 227 logln(out.toString()); 228 } 229 230/* private void debugPrintln2(String s) { 231 StringBuffer out = new StringBuffer(); 232 for (int i = 0; i < s.length(); i++) { 233 char c = s.charAt(i); 234 if (c >= '\u0100') 235 out.append("<" + ((int)c - 0x100) + ">"); 236 else 237 out.append(c); 238 } 239 logln(out.toString()); 240 }*/ 241} 242 243