1/*
2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7package com.ibm.icu.dev.test.rbbi;
8
9import java.util.ListResourceBundle;
10
11import com.ibm.icu.dev.test.TestFmwk;
12import com.ibm.icu.text.BreakIterator;
13import com.ibm.icu.text.RuleBasedBreakIterator;
14
15// TODO: {dlf} this test currently doesn't test anything!
16// You'll notice that the resource that uses the dictionary isn't even on the resource path,
17// so the dictionary never gets used.  Good thing, too, because it would throw a security
18// exception if run with a security manager.  Not that it would matter, the dictionary
19// resource isn't even in the icu source tree!
20// In order to fix this:
21// 1) make sure english.dict matches the current dictionary format required by dbbi
22// 2) make sure english.dict gets included in icu4jtests.jar
23// 3) have this test use getResourceAsStream to get a stream on the dictionary, and
24//    directly instantiate a DictionaryBasedBreakIterator.  It can use the rules from
25//    the appropriate section of ResourceBundle_en_US_TEST.  I'd suggest just copying
26//    the rules into this file.
27// 4) change the test text by inserting '|' at word breaks, and '||' at line breaks.
28// 5) process this text to a) create tables of break indices, and b) clean up the test
29//    for the break iterator to work on
30//
31// This would NOT test the ability to load dictionary-based break iterators through our
32// normal resource mechanism.  One could install such a break iterator and its
33// resources into the icu4j jar, and it would work, but there's no way to register entire
34// resources from outside yet.  Even if there were, the access restrictions are a bit
35// difficult to manage, if one wanted to register a break iterator whose code and data
36// resides outside the icu4j jar.  Since the code to instantiate would be going through
37// two protection domains, each domain would have to allow access to the data-- but
38// icu4j's domain wouldn't know about ours.  So we could instantiate before registering
39// the break iterator, but this would mean we'd have to fully initialize the dictionary(s)
40// at instantiation time, rather than let this be deferred until they are actually needed.
41//
42// I've done items 2 and 3 above.  Unfortunately, since I haven't done item 1, the
43// dictionary builder crashes.  So for now I'm disabling this test.  This is not
44// that important, since we have a thai dictionary that we do test thoroughly.
45//
46
47public class SimpleBITest extends TestFmwk{
48    public static final String testText =
49//        "The rain in Spain stays mainly on the plain.  The plains in Spain are mainly pained with rain.";
50//"one-two now--  Hah!  You owe me exactly $1,345.67...  Pay up, huh?  By the way, why don't I send you my re\u0301sume\u0301?  This is a line\r\nbreak.";
51//"nowisthetimeforallgoodmen...  tocometothehelpoftheircountry";
52"When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have "
53//"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave"
54+ "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws"
55+ "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe"
56+ "causeswhichimpelthemtotheseparation\n"
57+ "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain"
58+ "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare"
59+ "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment"
60+ "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying"
61+ "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety"
62+ "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient"
63+ "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than"
64+ "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations,"
65+ "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty,"
66+ "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof"
67+ "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory"
68+ "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe"
69+ "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n"
70+ "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n"
71+ "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill"
72+ "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n"
73+ "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish"
74+ "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n"
75+ "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic"
76+ "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n"
77+ "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n"
78+ "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers,"
79+ "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed"
80+ "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n"
81+ "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof"
82+ "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof"
83+ "lands.\n"
84+ "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n"
85+ "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n"
86+ "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir"
87+ "substance.\n"
88+ "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n"
89+ "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n"
90+ "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;"
91+ "givinghisassenttotheiractsofpretendedlegislation:\n"
92+ "Forquarteringlargebodiesofarmedtroopsamongus:\n"
93+ "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese"
94+ "states:\n"
95+ "Forcuttingoffourtradewithallpartsoftheworld:\n"
96+ "Forimposingtaxesonuswithoutourconsent:\n"
97+ "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n"
98+ "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n"
99+ "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and"
100+ "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese"
101+ "colonies:\n"
102+ "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n"
103+ "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n"
104+ "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n"
105+ "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n"
106+ "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny,"
107+ "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth"
108+ "theheadofacivilizednation.\n"
109+ "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe"
110+ "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n"
111+ "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the"
112+ "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n"
113+ "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave"
114+ "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is"
115+ "unfittobetherulerofafreepeople.\n"
116+ "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir"
117+ "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration"
118+ "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour"
119+ "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We"
120+ "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind,"
121+ "enemiesinwar,inpeacefriends.\n"
122+ "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe"
123+ "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof"
124+ "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent"
125+ "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe"
126+ "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto"
127+ "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent"
128+ "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we"
129+ "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n";
130
131    public static void main(String[] args) throws Exception {
132        new SimpleBITest().run(args);
133    }
134
135    protected boolean validate() {
136        // TODO: remove when english.dict gets fixed
137        return false;
138    }
139
140    private BreakIterator createTestIterator(int kind) {
141        final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";
142
143        BreakIterator iter = null;
144
145        ListResourceBundle bundle = null;
146        try {
147            Class cls = Class.forName(bname);
148            bundle = (ListResourceBundle)cls.newInstance();
149        }
150        catch (Exception e) {
151            errln("could not create bundle: " + bname + "exception: " + e.getMessage());
152            return null;
153        }
154
155        final String[] kindNames = {
156            "Character", "Word", "Line", "Sentence"
157        };
158        String rulesName = kindNames[kind] + "BreakRules";
159
160        String[] classNames = bundle.getStringArray("BreakIteratorClasses");
161        String rules = bundle.getString(rulesName);
162        if (classNames[kind].equals("RuleBasedBreakIterator")) {
163            iter = new RuleBasedBreakIterator(rules);
164        }
165        if (iter == null) {
166            errln("could not create iterator");
167        }
168
169        return iter;
170    }
171
172    public void testWordBreak() throws Exception {
173        BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);
174        int breaks = doTest(wordBreak);
175        logln(String.valueOf(breaks));
176    }
177
178    public void testLineBreak() throws Exception {
179        BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);
180        int breaks = doTest(lineBreak);
181        logln(String.valueOf(breaks));
182    }
183
184    public void testSentenceBreak() throws Exception {
185        BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);
186        int breaks = doTest(sentenceBreak);
187        logln(String.valueOf(breaks));
188    }
189
190    private int doTest(BreakIterator bi) {
191        // forward
192        bi.setText(testText);
193        int p = bi.first();
194        int lastP = p;
195        String fragment;
196        int breaks = 0;
197        logln("Forward...");
198        while (p != BreakIterator.DONE) {
199            p = bi.next();
200            if (p != BreakIterator.DONE) {
201                fragment = testText.substring(lastP, p);
202            } else {
203                fragment = testText.substring(lastP);
204            }
205            debugPrintln(": >" + fragment + "<");
206            ++breaks;
207            lastP = p;
208        }
209        return breaks;
210    }
211
212    private void debugPrintln(String s) {
213        final String zeros = "0000";
214        String temp;
215        StringBuffer out = new StringBuffer();
216        for (int i = 0; i < s.length(); i++) {
217            char c = s.charAt(i);
218            if (c >= ' ' && c < '\u007f')
219                out.append(c);
220            else {
221                out.append("\\u");
222                temp = Integer.toHexString((int)c);
223                out.append(zeros.substring(0, 4 - temp.length()));
224                out.append(temp);
225            }
226        }
227        logln(out.toString());
228    }
229
230/*    private void debugPrintln2(String s) {
231        StringBuffer out = new StringBuffer();
232        for (int i = 0; i < s.length(); i++) {
233            char c = s.charAt(i);
234            if (c >= '\u0100')
235                out.append("<" + ((int)c - 0x100) + ">");
236            else
237                out.append(c);
238        }
239        logln(out.toString());
240    }*/
241}
242
243