]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/core/src/com/ibm/icu/dev/test/rbbi/SimpleBITest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / core / src / com / ibm / icu / dev / test / rbbi / SimpleBITest.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2006, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.test.rbbi;\r
8 \r
9 import java.io.IOException;\r
10 import java.io.InputStream;\r
11 import java.util.ListResourceBundle;\r
12 import java.util.MissingResourceException;\r
13 \r
14 import com.ibm.icu.dev.test.TestFmwk;\r
15 import com.ibm.icu.text.BreakIterator;\r
16 import com.ibm.icu.text.DictionaryBasedBreakIterator;\r
17 import com.ibm.icu.text.RuleBasedBreakIterator;\r
18 \r
19 // TODO: {dlf} this test currently doesn't test anything!\r
20 // You'll notice that the resource that uses the dictionary isn't even on the resource path,\r
21 // so the dictionary never gets used.  Good thing, too, because it would throw a security\r
22 // exception if run with a security manager.  Not that it would matter, the dictionary \r
23 // resource isn't even in the icu source tree!\r
24 // In order to fix this:\r
25 // 1) make sure english.dict matches the current dictionary format required by dbbi\r
26 // 2) make sure english.dict gets included in icu4jtests.jar\r
27 // 3) have this test use getResourceAsStream to get a stream on the dictionary, and\r
28 //    directly instantiate a DictionaryBasedBreakIterator.  It can use the rules from\r
29 //    the appropriate section of ResourceBundle_en_US_TEST.  I'd suggest just copying\r
30 //    the rules into this file.\r
31 // 4) change the test text by inserting '|' at word breaks, and '||' at line breaks.  \r
32 // 5) process this text to a) create tables of break indices, and b) clean up the test\r
33 //    for the break iterator to work on\r
34 // \r
35 // This would NOT test the ability to load dictionary-based break iterators through our\r
36 // normal resource mechanism.  One could install such a break iterator and its\r
37 // resources into the icu4j jar, and it would work, but there's no way to register entire\r
38 // resources from outside yet.  Even if there were, the access restrictions are a bit\r
39 // difficult to manage, if one wanted to register a break iterator whose code and data\r
40 // resides outside the icu4j jar.  Since the code to instantiate would be going through \r
41 // two protection domains, each domain would have to allow access to the data-- but \r
42 // icu4j's domain wouldn't know about ours.  So we could instantiate before registering\r
43 // the break iterator, but this would mean we'd have to fully initialize the dictionary(s)\r
44 // at instantiation time, rather than let this be deferred until they are actually needed.\r
45 //\r
46 // I've done items 2 and 3 above.  Unfortunately, since I haven't done item 1, the\r
47 // dictionary builder crashes.  So for now I'm disabling this test.  This is not\r
48 // that important, since we have a thai dictionary that we do test thoroughly.\r
49 //\r
50 \r
51 public class SimpleBITest extends TestFmwk{\r
52     public static final String testText =\r
53 //        "The rain in Spain stays mainly on the plain.  The plains in Spain are mainly pained with rain.";\r
54 //"one-two now--  Hah!  You owe me exactly $1,345.67...  Pay up, huh?  By the way, why don't I send you my re\u0301sume\u0301?  This is a line\r\nbreak.";\r
55 //"nowisthetimeforallgoodmen...  tocometothehelpoftheircountry";\r
56 "When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have "\r
57 //"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave"\r
58 + "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws"\r
59 + "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe"\r
60 + "causeswhichimpelthemtotheseparation\n"\r
61 + "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain"\r
62 + "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare"\r
63 + "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment"\r
64 + "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying"\r
65 + "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety"\r
66 + "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient"\r
67 + "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than"\r
68 + "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations,"\r
69 + "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty,"\r
70 + "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof"\r
71 + "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory"\r
72 + "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe"\r
73 + "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n"\r
74 + "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n"\r
75 + "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill"\r
76 + "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n"\r
77 + "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish"\r
78 + "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n"\r
79 + "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic"\r
80 + "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n"\r
81 + "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n"\r
82 + "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers,"\r
83 + "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed"\r
84 + "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n"\r
85 + "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof"\r
86 + "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof"\r
87 + "lands.\n"\r
88 + "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n"\r
89 + "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n"\r
90 + "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir"\r
91 + "substance.\n"\r
92 + "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n"\r
93 + "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n"\r
94 + "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;"\r
95 + "givinghisassenttotheiractsofpretendedlegislation:\n"\r
96 + "Forquarteringlargebodiesofarmedtroopsamongus:\n"\r
97 + "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese"\r
98 + "states:\n"\r
99 + "Forcuttingoffourtradewithallpartsoftheworld:\n"\r
100 + "Forimposingtaxesonuswithoutourconsent:\n"\r
101 + "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n"\r
102 + "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n"\r
103 + "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and"\r
104 + "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese"\r
105 + "colonies:\n"\r
106 + "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n"\r
107 + "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n"\r
108 + "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n"\r
109 + "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n"\r
110 + "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny,"\r
111 + "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth"\r
112 + "theheadofacivilizednation.\n"\r
113 + "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe"\r
114 + "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n"\r
115 + "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the"\r
116 + "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n"\r
117 + "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave"\r
118 + "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is"\r
119 + "unfittobetherulerofafreepeople.\n"\r
120 + "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir"\r
121 + "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration"\r
122 + "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour"\r
123 + "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We"\r
124 + "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind,"\r
125 + "enemiesinwar,inpeacefriends.\n"\r
126 + "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe"\r
127 + "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof"\r
128 + "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent"\r
129 + "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe"\r
130 + "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto"\r
131 + "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent"\r
132 + "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we"\r
133 + "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n";\r
134 \r
135     public static void main(String[] args) throws Exception {\r
136         new SimpleBITest().run(args);\r
137     }\r
138     \r
139     protected boolean validate() {\r
140         // TODO: remove when english.dict gets fixed\r
141         return false;\r
142     }\r
143 \r
144     private BreakIterator createTestIterator(int kind) {\r
145         final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";\r
146 \r
147         BreakIterator iter = null;\r
148 \r
149         ListResourceBundle bundle = null;\r
150         try {\r
151             Class cls = Class.forName(bname);\r
152             bundle = (ListResourceBundle)cls.newInstance();\r
153         }\r
154         catch (Exception e) {\r
155             errln("could not create bundle: " + bname + "exception: " + e.getMessage());\r
156             return null;\r
157         }\r
158         \r
159         final String[] kindNames = {\r
160             "Character", "Word", "Line", "Sentence"\r
161         };\r
162         String rulesName = kindNames[kind] + "BreakRules";\r
163         String dictionaryName = kindNames[kind] + "BreakDictionary";\r
164         \r
165         String[] classNames = bundle.getStringArray("BreakIteratorClasses");\r
166         String rules = bundle.getString(rulesName);\r
167         if (classNames[kind].equals("RuleBasedBreakIterator")) {\r
168             iter = new RuleBasedBreakIterator(rules);\r
169         }\r
170         else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {\r
171             try {\r
172                 String dictionaryPath = bundle.getString(dictionaryName);\r
173                 InputStream dictionary = bundle.getClass().getResourceAsStream(dictionaryPath);\r
174                 System.out.println("looking for " + dictionaryPath + " from " + bundle.getClass() + " returned " + dictionary);\r
175                 iter = new DictionaryBasedBreakIterator(rules, dictionary);\r
176             }\r
177             catch(IOException e) {\r
178                 e.printStackTrace();\r
179                 errln(e.getMessage());\r
180                 System.out.println(e); // debug\r
181             }\r
182             catch(MissingResourceException e) {\r
183                 errln(e.getMessage());\r
184                 System.out.println(e); // debug\r
185             }\r
186         }\r
187         if (iter == null) {\r
188             errln("could not create iterator");\r
189         }\r
190         \r
191         return iter;\r
192     }\r
193     \r
194     public void testWordBreak() throws Exception {\r
195         BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);\r
196         int breaks = doTest(wordBreak);\r
197         logln(String.valueOf(breaks));\r
198     }\r
199 \r
200     public void testLineBreak() throws Exception {\r
201         BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);\r
202         int breaks = doTest(lineBreak);\r
203         logln(String.valueOf(breaks));\r
204     }\r
205 \r
206     public void testSentenceBreak() throws Exception {\r
207         BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);\r
208         int breaks = doTest(sentenceBreak);\r
209         logln(String.valueOf(breaks));\r
210     }\r
211 \r
212     private int doTest(BreakIterator bi) {\r
213         // forward\r
214         bi.setText(testText);\r
215         int p = bi.first();\r
216         int lastP = p;\r
217         String fragment;\r
218         int breaks = 0;\r
219         logln("Forward...");\r
220         while (p != BreakIterator.DONE) {\r
221             p = bi.next();\r
222             if (p != BreakIterator.DONE) {\r
223                 fragment = testText.substring(lastP, p);\r
224             } else {\r
225                 fragment = testText.substring(lastP);\r
226             }\r
227             debugPrintln(": >" + fragment + "<");\r
228             ++breaks;\r
229             lastP = p;\r
230         }\r
231         return breaks;\r
232     }\r
233 \r
234     private void debugPrintln(String s) {\r
235         final String zeros = "0000";\r
236         String temp;\r
237         StringBuffer out = new StringBuffer();\r
238         for (int i = 0; i < s.length(); i++) {\r
239             char c = s.charAt(i);\r
240             if (c >= ' ' && c < '\u007f')\r
241                 out.append(c);\r
242             else {\r
243                 out.append("\\u");\r
244                 temp = Integer.toHexString((int)c);\r
245                 out.append(zeros.substring(0, 4 - temp.length()));\r
246                 out.append(temp);\r
247             }\r
248         }\r
249         logln(out.toString());\r
250     }\r
251 \r
252 /*    private void debugPrintln2(String s) {\r
253         StringBuffer out = new StringBuffer();\r
254         for (int i = 0; i < s.length(); i++) {\r
255             char c = s.charAt(i);\r
256             if (c >= '\u0100')\r
257                 out.append("<" + ((int)c - 0x100) + ">");\r
258             else\r
259                 out.append(c);\r
260         }\r
261         logln(out.toString());\r
262     }*/\r
263 }\r
264 \r