/* ******************************************************************************* * Copyright (C) 1996-2006, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.rbbi; import java.io.IOException; import java.io.InputStream; import java.util.ListResourceBundle; import java.util.MissingResourceException; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.DictionaryBasedBreakIterator; import com.ibm.icu.text.RuleBasedBreakIterator; // TODO: {dlf} this test currently doesn't test anything! // You'll notice that the resource that uses the dictionary isn't even on the resource path, // so the dictionary never gets used. Good thing, too, because it would throw a security // exception if run with a security manager. Not that it would matter, the dictionary // resource isn't even in the icu source tree! // In order to fix this: // 1) make sure english.dict matches the current dictionary format required by dbbi // 2) make sure english.dict gets included in icu4jtests.jar // 3) have this test use getResourceAsStream to get a stream on the dictionary, and // directly instantiate a DictionaryBasedBreakIterator. It can use the rules from // the appropriate section of ResourceBundle_en_US_TEST. I'd suggest just copying // the rules into this file. // 4) change the test text by inserting '|' at word breaks, and '||' at line breaks. // 5) process this text to a) create tables of break indices, and b) clean up the test // for the break iterator to work on // // This would NOT test the ability to load dictionary-based break iterators through our // normal resource mechanism. One could install such a break iterator and its // resources into the icu4j jar, and it would work, but there's no way to register entire // resources from outside yet. Even if there were, the access restrictions are a bit // difficult to manage, if one wanted to register a break iterator whose code and data // resides outside the icu4j jar. Since the code to instantiate would be going through // two protection domains, each domain would have to allow access to the data-- but // icu4j's domain wouldn't know about ours. So we could instantiate before registering // the break iterator, but this would mean we'd have to fully initialize the dictionary(s) // at instantiation time, rather than let this be deferred until they are actually needed. // // I've done items 2 and 3 above. Unfortunately, since I haven't done item 1, the // dictionary builder crashes. So for now I'm disabling this test. This is not // that important, since we have a thai dictionary that we do test thoroughly. // public class SimpleBITest extends TestFmwk{ public static final String testText = // "The rain in Spain stays mainly on the plain. The plains in Spain are mainly pained with rain."; //"one-two now-- Hah! You owe me exactly $1,345.67... Pay up, huh? By the way, why don't I send you my re\u0301sume\u0301? This is a line\r\nbreak."; //"nowisthetimeforallgoodmen... tocometothehelpoftheircountry"; "When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have " //"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave" + "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws" + "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe" + "causeswhichimpelthemtotheseparation\n" + "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain" + "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare" + "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment" + "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying" + "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety" + "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient" + "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than" + "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations," + "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty," + "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof" + "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory" + "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe" + "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n" + "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n" + "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill" + "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n" + "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish" + "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n" + "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic" + "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n" + "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n" + "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers," + "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed" + "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n" + "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof" + "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof" + "lands.\n" + "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n" + "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n" + "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir" + "substance.\n" + "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n" + "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n" + "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;" + "givinghisassenttotheiractsofpretendedlegislation:\n" + "Forquarteringlargebodiesofarmedtroopsamongus:\n" + "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese" + "states:\n" + "Forcuttingoffourtradewithallpartsoftheworld:\n" + "Forimposingtaxesonuswithoutourconsent:\n" + "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n" + "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n" + "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and" + "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese" + "colonies:\n" + "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n" + "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n" + "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n" + "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n" + "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny," + "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth" + "theheadofacivilizednation.\n" + "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe" + "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n" + "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the" + "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n" + "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave" + "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is" + "unfittobetherulerofafreepeople.\n" + "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir" + "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration" + "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour" + "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We" + "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind," + "enemiesinwar,inpeacefriends.\n" + "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe" + "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof" + "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent" + "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe" + "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto" + "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent" + "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we" + "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n"; public static void main(String[] args) throws Exception { new SimpleBITest().run(args); } protected boolean validate() { // TODO: remove when english.dict gets fixed return false; } private BreakIterator createTestIterator(int kind) { final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST"; BreakIterator iter = null; ListResourceBundle bundle = null; try { Class cls = Class.forName(bname); bundle = (ListResourceBundle)cls.newInstance(); } catch (Exception e) { errln("could not create bundle: " + bname + "exception: " + e.getMessage()); return null; } final String[] kindNames = { "Character", "Word", "Line", "Sentence" }; String rulesName = kindNames[kind] + "BreakRules"; String dictionaryName = kindNames[kind] + "BreakDictionary"; String[] classNames = bundle.getStringArray("BreakIteratorClasses"); String rules = bundle.getString(rulesName); if (classNames[kind].equals("RuleBasedBreakIterator")) { iter = new RuleBasedBreakIterator(rules); } else if (classNames[kind].equals("DictionaryBasedBreakIterator")) { try { String dictionaryPath = bundle.getString(dictionaryName); InputStream dictionary = bundle.getClass().getResourceAsStream(dictionaryPath); System.out.println("looking for " + dictionaryPath + " from " + bundle.getClass() + " returned " + dictionary); iter = new DictionaryBasedBreakIterator(rules, dictionary); } catch(IOException e) { e.printStackTrace(); errln(e.getMessage()); System.out.println(e); // debug } catch(MissingResourceException e) { errln(e.getMessage()); System.out.println(e); // debug } } if (iter == null) { errln("could not create iterator"); } return iter; } public void testWordBreak() throws Exception { BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD); int breaks = doTest(wordBreak); logln(String.valueOf(breaks)); } public void testLineBreak() throws Exception { BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE); int breaks = doTest(lineBreak); logln(String.valueOf(breaks)); } public void testSentenceBreak() throws Exception { BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE); int breaks = doTest(sentenceBreak); logln(String.valueOf(breaks)); } private int doTest(BreakIterator bi) { // forward bi.setText(testText); int p = bi.first(); int lastP = p; String fragment; int breaks = 0; logln("Forward..."); while (p != BreakIterator.DONE) { p = bi.next(); if (p != BreakIterator.DONE) { fragment = testText.substring(lastP, p); } else { fragment = testText.substring(lastP); } debugPrintln(": >" + fragment + "<"); ++breaks; lastP = p; } return breaks; } private void debugPrintln(String s) { final String zeros = "0000"; String temp; StringBuffer out = new StringBuffer(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c >= ' ' && c < '\u007f') out.append(c); else { out.append("\\u"); temp = Integer.toHexString((int)c); out.append(zeros.substring(0, 4 - temp.length())); out.append(temp); } } logln(out.toString()); } /* private void debugPrintln2(String s) { StringBuffer out = new StringBuffer(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c >= '\u0100') out.append("<" + ((int)c - 0x100) + ">"); else out.append(c); } logln(out.toString()); }*/ }