2 *******************************************************************************
\r
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.rbbi;
\r
9 import java.io.IOException;
\r
10 import java.io.InputStream;
\r
11 import java.util.ListResourceBundle;
\r
12 import java.util.MissingResourceException;
\r
14 import com.ibm.icu.dev.test.TestFmwk;
\r
15 import com.ibm.icu.text.BreakIterator;
\r
16 import com.ibm.icu.text.DictionaryBasedBreakIterator;
\r
17 import com.ibm.icu.text.RuleBasedBreakIterator;
\r
19 // TODO: {dlf} this test currently doesn't test anything!
\r
20 // You'll notice that the resource that uses the dictionary isn't even on the resource path,
\r
21 // so the dictionary never gets used. Good thing, too, because it would throw a security
\r
22 // exception if run with a security manager. Not that it would matter, the dictionary
\r
23 // resource isn't even in the icu source tree!
\r
24 // In order to fix this:
\r
25 // 1) make sure english.dict matches the current dictionary format required by dbbi
\r
26 // 2) make sure english.dict gets included in icu4jtests.jar
\r
27 // 3) have this test use getResourceAsStream to get a stream on the dictionary, and
\r
28 // directly instantiate a DictionaryBasedBreakIterator. It can use the rules from
\r
29 // the appropriate section of ResourceBundle_en_US_TEST. I'd suggest just copying
\r
30 // the rules into this file.
\r
31 // 4) change the test text by inserting '|' at word breaks, and '||' at line breaks.
\r
32 // 5) process this text to a) create tables of break indices, and b) clean up the test
\r
33 // for the break iterator to work on
\r
35 // This would NOT test the ability to load dictionary-based break iterators through our
\r
36 // normal resource mechanism. One could install such a break iterator and its
\r
37 // resources into the icu4j jar, and it would work, but there's no way to register entire
\r
38 // resources from outside yet. Even if there were, the access restrictions are a bit
\r
39 // difficult to manage, if one wanted to register a break iterator whose code and data
\r
40 // resides outside the icu4j jar. Since the code to instantiate would be going through
\r
41 // two protection domains, each domain would have to allow access to the data-- but
\r
42 // icu4j's domain wouldn't know about ours. So we could instantiate before registering
\r
43 // the break iterator, but this would mean we'd have to fully initialize the dictionary(s)
\r
44 // at instantiation time, rather than let this be deferred until they are actually needed.
\r
46 // I've done items 2 and 3 above. Unfortunately, since I haven't done item 1, the
\r
47 // dictionary builder crashes. So for now I'm disabling this test. This is not
\r
48 // that important, since we have a thai dictionary that we do test thoroughly.
\r
51 public class SimpleBITest extends TestFmwk{
\r
52 public static final String testText =
\r
53 // "The rain in Spain stays mainly on the plain. The plains in Spain are mainly pained with rain.";
\r
54 //"one-two now-- Hah! You owe me exactly $1,345.67... Pay up, huh? By the way, why don't I send you my re\u0301sume\u0301? This is a line\r\nbreak.";
\r
55 //"nowisthetimeforallgoodmen... tocometothehelpoftheircountry";
\r
56 "When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have "
\r
57 //"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave"
\r
58 + "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws"
\r
59 + "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe"
\r
60 + "causeswhichimpelthemtotheseparation\n"
\r
61 + "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain"
\r
62 + "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare"
\r
63 + "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment"
\r
64 + "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying"
\r
65 + "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety"
\r
66 + "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient"
\r
67 + "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than"
\r
68 + "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations,"
\r
69 + "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty,"
\r
70 + "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof"
\r
71 + "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory"
\r
72 + "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe"
\r
73 + "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n"
\r
74 + "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n"
\r
75 + "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill"
\r
76 + "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n"
\r
77 + "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish"
\r
78 + "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n"
\r
79 + "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic"
\r
80 + "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n"
\r
81 + "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n"
\r
82 + "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers,"
\r
83 + "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed"
\r
84 + "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n"
\r
85 + "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof"
\r
86 + "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof"
\r
88 + "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n"
\r
89 + "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n"
\r
90 + "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir"
\r
92 + "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n"
\r
93 + "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n"
\r
94 + "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;"
\r
95 + "givinghisassenttotheiractsofpretendedlegislation:\n"
\r
96 + "Forquarteringlargebodiesofarmedtroopsamongus:\n"
\r
97 + "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese"
\r
99 + "Forcuttingoffourtradewithallpartsoftheworld:\n"
\r
100 + "Forimposingtaxesonuswithoutourconsent:\n"
\r
101 + "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n"
\r
102 + "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n"
\r
103 + "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and"
\r
104 + "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese"
\r
106 + "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n"
\r
107 + "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n"
\r
108 + "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n"
\r
109 + "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n"
\r
110 + "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny,"
\r
111 + "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth"
\r
112 + "theheadofacivilizednation.\n"
\r
113 + "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe"
\r
114 + "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n"
\r
115 + "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the"
\r
116 + "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n"
\r
117 + "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave"
\r
118 + "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is"
\r
119 + "unfittobetherulerofafreepeople.\n"
\r
120 + "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir"
\r
121 + "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration"
\r
122 + "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour"
\r
123 + "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We"
\r
124 + "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind,"
\r
125 + "enemiesinwar,inpeacefriends.\n"
\r
126 + "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe"
\r
127 + "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof"
\r
128 + "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent"
\r
129 + "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe"
\r
130 + "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto"
\r
131 + "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent"
\r
132 + "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we"
\r
133 + "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n";
\r
135 public static void main(String[] args) throws Exception {
\r
136 new SimpleBITest().run(args);
\r
139 protected boolean validate() {
\r
140 // TODO: remove when english.dict gets fixed
\r
144 private BreakIterator createTestIterator(int kind) {
\r
145 final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";
\r
147 BreakIterator iter = null;
\r
149 ListResourceBundle bundle = null;
\r
151 Class cls = Class.forName(bname);
\r
152 bundle = (ListResourceBundle)cls.newInstance();
\r
154 catch (Exception e) {
\r
155 errln("could not create bundle: " + bname + "exception: " + e.getMessage());
\r
159 final String[] kindNames = {
\r
160 "Character", "Word", "Line", "Sentence"
\r
162 String rulesName = kindNames[kind] + "BreakRules";
\r
163 String dictionaryName = kindNames[kind] + "BreakDictionary";
\r
165 String[] classNames = bundle.getStringArray("BreakIteratorClasses");
\r
166 String rules = bundle.getString(rulesName);
\r
167 if (classNames[kind].equals("RuleBasedBreakIterator")) {
\r
168 iter = new RuleBasedBreakIterator(rules);
\r
170 else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
\r
172 String dictionaryPath = bundle.getString(dictionaryName);
\r
173 InputStream dictionary = bundle.getClass().getResourceAsStream(dictionaryPath);
\r
174 System.out.println("looking for " + dictionaryPath + " from " + bundle.getClass() + " returned " + dictionary);
\r
175 iter = new DictionaryBasedBreakIterator(rules, dictionary);
\r
177 catch(IOException e) {
\r
178 e.printStackTrace();
\r
179 errln(e.getMessage());
\r
180 System.out.println(e); // debug
\r
182 catch(MissingResourceException e) {
\r
183 errln(e.getMessage());
\r
184 System.out.println(e); // debug
\r
187 if (iter == null) {
\r
188 errln("could not create iterator");
\r
194 public void testWordBreak() throws Exception {
\r
195 BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);
\r
196 int breaks = doTest(wordBreak);
\r
197 logln(String.valueOf(breaks));
\r
200 public void testLineBreak() throws Exception {
\r
201 BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);
\r
202 int breaks = doTest(lineBreak);
\r
203 logln(String.valueOf(breaks));
\r
206 public void testSentenceBreak() throws Exception {
\r
207 BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);
\r
208 int breaks = doTest(sentenceBreak);
\r
209 logln(String.valueOf(breaks));
\r
212 private int doTest(BreakIterator bi) {
\r
214 bi.setText(testText);
\r
215 int p = bi.first();
\r
219 logln("Forward...");
\r
220 while (p != BreakIterator.DONE) {
\r
222 if (p != BreakIterator.DONE) {
\r
223 fragment = testText.substring(lastP, p);
\r
225 fragment = testText.substring(lastP);
\r
227 debugPrintln(": >" + fragment + "<");
\r
234 private void debugPrintln(String s) {
\r
235 final String zeros = "0000";
\r
237 StringBuffer out = new StringBuffer();
\r
238 for (int i = 0; i < s.length(); i++) {
\r
239 char c = s.charAt(i);
\r
240 if (c >= ' ' && c < '\u007f')
\r
244 temp = Integer.toHexString((int)c);
\r
245 out.append(zeros.substring(0, 4 - temp.length()));
\r
249 logln(out.toString());
\r
252 /* private void debugPrintln2(String s) {
\r
253 StringBuffer out = new StringBuffer();
\r
254 for (int i = 0; i < s.length(); i++) {
\r
255 char c = s.charAt(i);
\r
257 out.append("<" + ((int)c - 0x100) + ">");
\r
261 logln(out.toString());
\r