2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.test.rbbi;
9 import java.util.ListResourceBundle;
11 import com.ibm.icu.dev.test.TestFmwk;
12 import com.ibm.icu.text.BreakIterator;
13 import com.ibm.icu.text.RuleBasedBreakIterator;
15 // TODO: {dlf} this test currently doesn't test anything!
16 // You'll notice that the resource that uses the dictionary isn't even on the resource path,
17 // so the dictionary never gets used. Good thing, too, because it would throw a security
18 // exception if run with a security manager. Not that it would matter, the dictionary
19 // resource isn't even in the icu source tree!
20 // In order to fix this:
21 // 1) make sure english.dict matches the current dictionary format required by dbbi
22 // 2) make sure english.dict gets included in icu4jtests.jar
23 // 3) have this test use getResourceAsStream to get a stream on the dictionary, and
24 // directly instantiate a DictionaryBasedBreakIterator. It can use the rules from
25 // the appropriate section of ResourceBundle_en_US_TEST. I'd suggest just copying
26 // the rules into this file.
27 // 4) change the test text by inserting '|' at word breaks, and '||' at line breaks.
28 // 5) process this text to a) create tables of break indices, and b) clean up the test
29 // for the break iterator to work on
31 // This would NOT test the ability to load dictionary-based break iterators through our
32 // normal resource mechanism. One could install such a break iterator and its
33 // resources into the icu4j jar, and it would work, but there's no way to register entire
34 // resources from outside yet. Even if there were, the access restrictions are a bit
35 // difficult to manage, if one wanted to register a break iterator whose code and data
36 // resides outside the icu4j jar. Since the code to instantiate would be going through
37 // two protection domains, each domain would have to allow access to the data-- but
38 // icu4j's domain wouldn't know about ours. So we could instantiate before registering
39 // the break iterator, but this would mean we'd have to fully initialize the dictionary(s)
40 // at instantiation time, rather than let this be deferred until they are actually needed.
42 // I've done items 2 and 3 above. Unfortunately, since I haven't done item 1, the
43 // dictionary builder crashes. So for now I'm disabling this test. This is not
44 // that important, since we have a thai dictionary that we do test thoroughly.
47 public class SimpleBITest extends TestFmwk{
48 public static final String testText =
49 // "The rain in Spain stays mainly on the plain. The plains in Spain are mainly pained with rain.";
50 //"one-two now-- Hah! You owe me exactly $1,345.67... Pay up, huh? By the way, why don't I send you my re\u0301sume\u0301? This is a line\r\nbreak.";
51 //"nowisthetimeforallgoodmen... tocometothehelpoftheircountry";
52 "When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have "
53 //"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave"
54 + "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws"
55 + "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe"
56 + "causeswhichimpelthemtotheseparation\n"
57 + "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain"
58 + "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare"
59 + "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment"
60 + "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying"
61 + "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety"
62 + "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient"
63 + "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than"
64 + "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations,"
65 + "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty,"
66 + "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof"
67 + "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory"
68 + "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe"
69 + "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n"
70 + "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n"
71 + "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill"
72 + "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n"
73 + "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish"
74 + "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n"
75 + "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic"
76 + "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n"
77 + "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n"
78 + "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers,"
79 + "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed"
80 + "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n"
81 + "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof"
82 + "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof"
84 + "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n"
85 + "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n"
86 + "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir"
88 + "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n"
89 + "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n"
90 + "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;"
91 + "givinghisassenttotheiractsofpretendedlegislation:\n"
92 + "Forquarteringlargebodiesofarmedtroopsamongus:\n"
93 + "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese"
95 + "Forcuttingoffourtradewithallpartsoftheworld:\n"
96 + "Forimposingtaxesonuswithoutourconsent:\n"
97 + "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n"
98 + "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n"
99 + "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and"
100 + "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese"
102 + "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n"
103 + "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n"
104 + "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n"
105 + "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n"
106 + "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny,"
107 + "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth"
108 + "theheadofacivilizednation.\n"
109 + "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe"
110 + "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n"
111 + "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the"
112 + "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n"
113 + "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave"
114 + "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is"
115 + "unfittobetherulerofafreepeople.\n"
116 + "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir"
117 + "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration"
118 + "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour"
119 + "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We"
120 + "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind,"
121 + "enemiesinwar,inpeacefriends.\n"
122 + "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe"
123 + "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof"
124 + "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent"
125 + "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe"
126 + "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto"
127 + "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent"
128 + "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we"
129 + "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n";
131 public static void main(String[] args) throws Exception {
132 new SimpleBITest().run(args);
135 protected boolean validate() {
136 // TODO: remove when english.dict gets fixed
140 private BreakIterator createTestIterator(int kind) {
141 final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";
143 BreakIterator iter = null;
145 ListResourceBundle bundle = null;
147 Class cls = Class.forName(bname);
148 bundle = (ListResourceBundle)cls.newInstance();
150 catch (Exception e) {
151 errln("could not create bundle: " + bname + "exception: " + e.getMessage());
155 final String[] kindNames = {
156 "Character", "Word", "Line", "Sentence"
158 String rulesName = kindNames[kind] + "BreakRules";
160 String[] classNames = bundle.getStringArray("BreakIteratorClasses");
161 String rules = bundle.getString(rulesName);
162 if (classNames[kind].equals("RuleBasedBreakIterator")) {
163 iter = new RuleBasedBreakIterator(rules);
166 errln("could not create iterator");
172 public void testWordBreak() throws Exception {
173 BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);
174 int breaks = doTest(wordBreak);
175 logln(String.valueOf(breaks));
178 public void testLineBreak() throws Exception {
179 BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);
180 int breaks = doTest(lineBreak);
181 logln(String.valueOf(breaks));
184 public void testSentenceBreak() throws Exception {
185 BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);
186 int breaks = doTest(sentenceBreak);
187 logln(String.valueOf(breaks));
190 private int doTest(BreakIterator bi) {
192 bi.setText(testText);
198 while (p != BreakIterator.DONE) {
200 if (p != BreakIterator.DONE) {
201 fragment = testText.substring(lastP, p);
203 fragment = testText.substring(lastP);
205 debugPrintln(": >" + fragment + "<");
212 private void debugPrintln(String s) {
213 final String zeros = "0000";
215 StringBuffer out = new StringBuffer();
216 for (int i = 0; i < s.length(); i++) {
217 char c = s.charAt(i);
218 if (c >= ' ' && c < '\u007f')
222 temp = Integer.toHexString((int)c);
223 out.append(zeros.substring(0, 4 - temp.length()));
227 logln(out.toString());
230 /* private void debugPrintln2(String s) {
231 StringBuffer out = new StringBuffer();
232 for (int i = 0; i < s.length(); i++) {
233 char c = s.charAt(i);
235 out.append("<" + ((int)c - 0x100) + ">");
239 logln(out.toString());