2 *******************************************************************************
3 * Copyright (C) 2011-2012, Google, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
7 package com.ibm.icu.dev.test.util;
9 import java.util.ArrayList;
10 import java.util.Arrays;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.List;
16 import java.util.Map.Entry;
17 import java.util.TreeMap;
19 import com.ibm.icu.dev.test.TestFmwk;
20 import com.ibm.icu.dev.test.util.TrieMap.Style;
21 import com.ibm.icu.dev.util.Timer;
22 import com.ibm.icu.impl.Row;
23 import com.ibm.icu.impl.Row.R3;
24 import com.ibm.icu.impl.Utility;
25 import com.ibm.icu.lang.UCharacter;
26 import com.ibm.icu.lang.UScript;
27 import com.ibm.icu.text.DecimalFormat;
28 import com.ibm.icu.text.UnicodeSet;
29 import com.ibm.icu.util.StringTrieBuilder.Option;
30 import com.ibm.icu.util.ULocale;
32 public class TrieMapTest extends TestFmwk {
33 static final boolean SHORT = false;
34 static final boolean HACK_TO_MAKE_TESTS_PASS = false;
35 static final int MASK = 0x3;
37 private Map<String, Integer> unicodeTestMap = new HashMap<String, Integer>();
38 private boolean useSmallList = true;
40 private Timer t = new Timer();
41 private DecimalFormat nf = t.getNumberFormat();
42 private DecimalFormat pf = t.getPercentFormat();
44 pf.setMaximumFractionDigits(0);
48 protected void init() throws Exception {
50 if (unicodeTestMap.size() == 0) {
51 if (getInclusion() < 5) {
52 logln("\tShort version, timing for 1s:\t to get more accurate figures and test for reasonable times, use -e5 or more");
53 t.setTimingPeriod(1*Timer.SECONDS);
55 int seconds = getInclusion();
56 logln("\tExhaustive version, timing for " + seconds + "s");
57 t.setTimingPeriod(seconds*Timer.SECONDS);
62 UnicodeSet testSet = new UnicodeSet("[[:^C:]-[:sc=han:]]");
63 for (String s : testSet) {
64 int codePoint = s.codePointAt(0);
65 String extendedName = UCharacter.getExtendedName(codePoint);
66 if (!unicodeTestMap.containsKey(extendedName)) {
67 unicodeTestMap.put(extendedName, i++);
69 if (i > 500 && useSmallList) break;
71 ULocale[] locales = useSmallList ? new ULocale[] {new ULocale("zh"), new ULocale("el")} : ULocale.getAvailableLocales();
72 for (ULocale locale : locales) {
73 if (locale.getDisplayCountry().length() != 0) {
77 for (String languageCode : ULocale.getISOLanguages()) {
78 localeName = ULocale.getDisplayName(languageCode, locale);
79 if (!localeName.equals(languageCode)) {
80 if (!unicodeTestMap.containsKey(localeName)) {
81 unicodeTestMap.put(localeName, MASK & i++);
86 for (String countryCode : ULocale.getISOCountries()) {
87 localeName = ULocale.getDisplayCountry("und-" + countryCode, locale);
88 if (!localeName.equals(countryCode)) {
89 if (!unicodeTestMap.containsKey(localeName)) {
90 unicodeTestMap.put(localeName, MASK & i++);
97 for (String key : unicodeTestMap.keySet()) {
98 charCount += key.length();
100 logln("\tTest Data Elements:\t\t\t" + nf.format(unicodeTestMap.size()));
101 logln("\tTotal chars:\t\t\t" + nf.format(charCount));
105 public static void main(String[] args) {
106 new TrieMapTest().run(args);
109 public void TestByteConversion() {
110 byte bytes[] = new byte[200];
111 for (Entry<String, Integer> entry : unicodeTestMap.entrySet()) {
112 String source = entry.getKey();
113 int limit = TrieMap.ByteConverter.getBytes(source, bytes, 0);
114 //logln(source + "\t=> " + Utility.hex(source, " ") + "\t=> " + Utility.hex(bytes, 0, limit, " "));
115 String recovered = TrieMap.ByteConverter.getChars(bytes, 0, limit);
116 if (!source.equals(recovered)) {
117 assertEquals("Char/Byte Conversion", source, recovered);
122 public void TestGet() {
123 checkGet(unicodeTestMap, TrieMap.Style.BYTES);
124 checkGet(unicodeTestMap, TrieMap.Style.CHARS);
127 private void checkGet(Map<String, Integer> testmap, TrieMap.Style style) {
128 if (testmap.size() == 0) {
131 TrieMap<Integer> trieMap = TrieMap.Builder.with(style, Option.SMALL, testmap).build();
132 //logln(trieMap.toString());
133 for (Entry<String, Integer> entry : testmap.entrySet()) {
134 Integer value = entry.getValue();
135 String key = entry.getKey();
136 Integer foundValue = trieMap.get(key);
137 if (!value.equals(foundValue)) {
139 if (!HACK_TO_MAKE_TESTS_PASS || 39497 != value) {
140 assertEquals(style + "\tGet of '" + key + "' = {" + Utility.hex(key) + "}", value, foundValue);
146 public void TestTimeIteration() {
147 long comparisonTime = timeIteration(unicodeTestMap, 0, null, 0);
148 timeIteration(unicodeTestMap, comparisonTime, null, 0);
149 timeIteration(unicodeTestMap, comparisonTime, Style.BYTES, 5);
150 timeIteration(unicodeTestMap, comparisonTime, Style.CHARS, 3);
153 @SuppressWarnings("unused")
154 public long timeIteration(Map<String, Integer> testMap, long comparisonTime, Style style, double ratioToMap) {
155 TrieMap<Integer> trieMap = TrieMap.BytesBuilder.with(style, Option.SMALL, testMap).build();
156 TreeMap<String,Integer> expected = new TreeMap<String, Integer>(testMap);
161 Map<String, Integer> map = comparisonTime == 0 ? new TreeMap<String, Integer>(testMap) : new HashMap<String, Integer>(testMap);
163 long mapTime = t.timeIterations(new MyLoop() {
164 public void time(int repeat) {
165 for (int tt = 0; tt < repeat; ++tt) {
166 for (Entry<String, Integer> entry : map.entrySet()) {
167 String key = entry.getKey();
168 Integer value = entry.getValue();
173 if (comparisonTime == 0) {
174 logln("\titeration time\tTREEMAP\tn/a\t" + t.toString(testMap.size()) + "\t\titerations=" + t.getIterations());
176 logln("\titeration time\tHASHMAP\tn/a\t" + t.toString(testMap.size(), comparisonTime) + "\titerations=" + t.getIterations());
180 long trieTime = t.timeIterations(new MyLoop() {
181 public void time(int repeat) {
182 for (int tt = 0; tt < repeat; ++tt) {
183 for (Entry<CharSequence, Integer> entry : trieMap) {
184 CharSequence key = entry.getKey();
185 Integer value = entry.getValue();
190 logln("\titeration time\t" + style + "\tn/a\t" + t.toString(testMap.size(), comparisonTime) + "\titerations=" + t.getIterations());
191 if (!useSmallList && trieTime > ratioToMap * comparisonTime) {
192 errln(style + "\tTime iteration takes too long. Expected:\t< " + ratioToMap * comparisonTime + ", Actual:\t" + trieTime);
198 public void TestContents() {
199 checkContents(unicodeTestMap, Style.BYTES);
200 checkContents(unicodeTestMap, Style.CHARS);
203 public void checkContents(Map<String, Integer> testMap, Style style) {
204 if (testMap.size() == 0) {
207 TrieMap<Integer> trieMap = TrieMap.BytesBuilder.with(style, Option.SMALL, testMap).build();
208 TreeMap<String,Integer> expected = new TreeMap<String, Integer>(testMap);
209 Iterator<Entry<CharSequence, Integer>> trieIterator = trieMap.iterator();
210 Iterator<Entry<String, Integer>> mapIterator = expected.entrySet().iterator();
212 boolean trieOk = trieIterator.hasNext();
213 boolean mapOk = mapIterator.hasNext();
215 assertEquals("Iterators end at same point", mapOk, trieOk);
219 Entry<CharSequence, Integer> trieEntry = trieIterator.next();
220 Entry<String, Integer> mapEntry = mapIterator.next();
221 String mapKey = mapEntry.getKey();
222 CharSequence trieKey = trieEntry.getKey();
223 if (!mapKey.contentEquals(trieKey)) {
224 assertEquals(style + "\tKeys match", mapKey, trieKey.toString());
226 Integer mapValue = mapEntry.getValue();
227 Integer trieValue = trieEntry.getValue();
228 if (!mapValue.equals(trieValue)) {
229 assertEquals(style + "\tValues match", mapValue, trieValue);
234 public void TestSearch() {
235 checkSearch(Style.BYTES);
236 checkSearch(Style.CHARS);
239 public void checkSearch(Style style) {
241 TrieMap<String> trieMap = TrieMap.BytesBuilder.with(style, Option.SMALL, "abc", "first")
242 .add("cdab", "fifth")
243 .add("abcde", "second")
244 .add("abdfg", "third").build();
246 String string = "xabcdab abcde abdfg";
247 @SuppressWarnings("unchecked")
248 Row.R3<Integer, Integer, String>[] expected = new Row.R3[] {
251 Row.of(8,11,"first"),
252 Row.of(8,13,"second"),
253 Row.of(14,19,"third"),
255 List<R3<Integer, Integer, String>> expectedList = Arrays.asList(expected);
256 List<R3<Integer, Integer, String>> actualList = new ArrayList<R3<Integer, Integer, String>>();
258 TrieMap.Matcher<String> matcher = trieMap.getMatcher();
259 matcher.set(string, 0);
263 hasMore = matcher.next();
264 String value = matcher.getValue();
266 int start = matcher.getStart();
267 int end = matcher.getEnd();
268 actualList.add(Row.of(start,end,value));
271 } while (matcher.nextStart());
272 assertEquals(style + "\tTrieMap matcher", expectedList, actualList);
273 // logln(bytes + "\tValue <" + value + "> at "
274 // + start + ".." + end + ", "
275 // + string.substring(0, start) + "|"
276 // + string.substring(start, end) + "|"
277 // + string.substring(end)
281 public void TestTimeBuilding() {
282 long comparisonTime = timeBuilding(unicodeTestMap, 0, null, Option.SMALL, 0);
283 timeBuilding(unicodeTestMap, comparisonTime, null, Option.SMALL, 0);
284 timeBuilding(unicodeTestMap, comparisonTime, Style.BYTES, Option.SMALL, 20);
285 timeBuilding(unicodeTestMap, comparisonTime, Style.BYTES, Option.FAST, 20);
286 timeBuilding(unicodeTestMap, comparisonTime, Style.CHARS, Option.SMALL, 20);
287 timeBuilding(unicodeTestMap, comparisonTime, Style.CHARS, Option.FAST, 20);
290 @SuppressWarnings("unused")
291 public long timeBuilding(Map<String, Integer> testmap, long comparisonTime, Style style, Option option, double ratioToMap) {
295 if (comparisonTime == 0) {
296 long mapTime = t.timeIterations(new MyLoop() {
297 public void time(int repeat) {
298 for (int tt = 0; tt < repeat; ++tt) {
299 Map<String, Integer> map2 = new TreeMap<String, Integer>(map);
303 logln("\tbuild time\tTREEMAP\tn/a\t" + t.toString(testmap.size()) + "\t\titerations=" + t.getIterations());
306 long mapTime = t.timeIterations(new MyLoop() {
307 public void time(int repeat) {
308 for (int tt = 0; tt < repeat; ++tt) {
309 Map<String, Integer> map2 = new HashMap<String, Integer>(map);
313 logln("\tbuild time\tHASHMAP\tn/a\t" + t.toString(testmap.size(), comparisonTime) + "\titerations=" + t.getIterations());
317 long trieTime = t.timeIterations(new MyLoop() {
318 public void time(int repeat) {
319 for (int tt = 0; tt < repeat; ++tt) {
320 trieMap = TrieMap.BytesBuilder.with(style, option, map).build();
323 }, null, testmap, style, option);
325 logln("\tbuild time\t" + style + "\t" + option + "\t" + t.toString(testmap.size(), comparisonTime) + "\titerations=" + t.getIterations());
326 if (!useSmallList && trieTime > ratioToMap * comparisonTime) {
327 errln(style + "\t" + option + "\tTrie build takes too long. Expected:\t< " + nf.format(ratioToMap * comparisonTime) + ", Actual:\t" + nf.format(trieTime));
333 public void TestSize() {
334 int size = checkSize(0, null, Option.SMALL, 0);
335 checkSize(size, Style.BYTES, Option.SMALL, 0.20);
336 checkSize(size, Style.BYTES, Option.FAST, 0.20);
337 checkSize(size, Style.CHARS, Option.SMALL, 0.30);
338 checkSize(size, Style.CHARS, Option.FAST, 0.30);
343 * @param ratioToMap TODO
346 private int checkSize(int comparisonSize, Style style, Option option, double ratioToMap) {
348 int mapKeyByteSize = 0;
349 TreeMap<String, Integer> map = new TreeMap<String, Integer>(unicodeTestMap);
350 for (Entry<String, Integer> entry : map.entrySet()) {
351 mapKeyByteSize += 8 * (int) ((((entry.getKey().length()) * 2) + 45) / 8);
353 logln("\tkey byte size\tTREEMAP\tn/a\t" + nf.format(mapKeyByteSize));
354 return mapKeyByteSize;
356 TrieMap<Integer> trieMap = TrieMap.BytesBuilder.with(style, option, unicodeTestMap).build();
358 int trieKeyByteSize = trieMap.keyByteSize();
359 logln("\tkey byte size\t" + style + "\t" + option + "\t" + nf.format(trieKeyByteSize) + "\t\t" + pf.format(trieKeyByteSize/(double)comparisonSize - 1D) + "");
362 if (!useSmallList && trieKeyByteSize > ratioToMap * comparisonSize) {
363 errln(style + "\t" + option + "\ttrieKeyByteSize too large. Expected:\t< " + nf.format(ratioToMap * comparisonSize) + ", Actual:\t" + nf.format(trieKeyByteSize));
365 return trieKeyByteSize;
369 public void TestTimeGet() {
370 HashSet<String> keySet = new HashSet<String>(unicodeTestMap.keySet());
371 ULocale[] locales = ULocale.getAvailableLocales();
373 for (ULocale locale : locales) {
374 if (locale.getDisplayCountry().length() != 0) {
378 for (int scriptCodeInt = 0; scriptCodeInt < UScript.CODE_LIMIT; ++scriptCodeInt) {
379 String scriptCode = UScript.getShortName(scriptCodeInt);
380 localeName = ULocale.getDisplayScript("und-" + scriptCode, locale);
381 if (!localeName.equals(scriptCode)) {
382 if (!keySet.contains(localeName)) {
383 keySet.add(localeName);
390 logln("\tExtra Key Elements\t" + i);
392 ArrayList<String> keys = new ArrayList<String>(keySet);
394 long comparisonTime = timeGet(keys, unicodeTestMap, 0, null, 0);
395 timeGet(keys, unicodeTestMap, comparisonTime, null, 0);
396 timeGet(keys, unicodeTestMap, comparisonTime, Style.BYTES, 3);
397 timeGet(keys, unicodeTestMap, comparisonTime, Style.CHARS, 3);
400 @SuppressWarnings("unused")
401 public long timeGet(ArrayList<String> keys, Map<String, Integer> testmap, long comparisonTime, Style style, int ratioToMap) {
403 TrieMap<Integer> trieMap = TrieMap.Builder.with(style, Option.SMALL, testmap).build();
406 Map<String, Integer> map = comparisonTime == 0 ? new TreeMap<String, Integer>(testmap) : new HashMap<String, Integer>(testmap);
408 long mapTime = t.timeIterations(new MyLoop() {
409 public void time(int repeat) {
410 for (int tt = 0; tt < repeat; ++tt) {
411 for (String key : keys) {
412 Integer foundValue = map.get(key);
417 if (comparisonTime == 0) {
418 logln("\tget() time\tTREEMAP\tn/a\t" + t.toString(keys.size()) + "\t\titerations=" + t.getIterations());
420 logln("\tget() time\tHASHMAP\tn/a\t" + t.toString(keys.size(), comparisonTime) + "\titerations=" + t.getIterations());
424 long trieTime = t.timeIterations(new MyLoop() {
425 public void time(int repeat) {
426 for (int tt = 0; tt < repeat; ++tt) {
427 for (String key : keys) {
428 Integer foundValue = trieMap.get(key);
436 // for (int tt = 0; tt < repeat; ++tt) {
437 // for (String key : keys) {
438 // Integer foundValue = trieMap.get(key);
441 // long trieTime = t.getDuration();
442 logln("\tget() time\t" + style + "\tn/a\t" + t.toString(keys.size(), comparisonTime) + "\titerations=" + t.getIterations());
443 if (!useSmallList && trieTime > ratioToMap * comparisonTime) {
444 errln(style + "\tTime iteration takes too long. Expected:\t< " + ratioToMap * comparisonTime + ", Actual:\t" + trieTime);
450 static abstract class MyLoop extends Timer.Loop {
451 ArrayList<String> keys;
452 TrieMap<Integer> trieMap;
453 Map<String, Integer> map;
456 public void init(Object... params) {
457 if (params.length > 0) {
458 keys = (ArrayList<String>) params[0];
460 if (params.length > 1) {
461 if (params[1] instanceof Map) {
462 map = (Map<String, Integer>) params[1];
464 trieMap = (TrieMap<Integer>) params[1];
467 if (params.length > 2) {
468 style = (Style) params[2];
470 if (params.length > 3) {
471 option = (Option) params[3];
474 abstract public void time(int repeat);
477 // static class Storage {
481 // public Storage(int initialCapacity) {
482 // buffer = new char[initialCapacity];
485 // public CharSequence add(CharSequence input) {
486 // int start = limit;
487 // int length = input.length();
488 // for (int i = 0; i < length; ++i) {
490 // buffer[limit++] = input.charAt(i);
491 // } catch (Exception e) {
492 // // we failed to add (limit-1)
493 // int newCapacity = buffer.length * 3 / 2 + length;
494 // //System.out.println(buffer.length + " => " + newCapacity);
495 // char[] temp = new char[newCapacity];
496 // System.arraycopy(buffer, 0, temp, 0, buffer.length);
498 // buffer[limit - 1] = input.charAt(i);
501 // return new StorageCharSequence(start, limit);
504 // final class StorageCharSequence implements CharSequence, Comparable<CharSequence> {
505 // private int start;
508 // public StorageCharSequence(int start, int limit) {
509 // if (start < 0 || start > limit || limit > buffer.length) {
510 // throw new ArrayIndexOutOfBoundsException();
512 // this.start = start;
513 // this.len = limit - start;
515 // public char charAt(int arg0) {
516 // return arg0 < 0 || arg0 >= len ? buffer[-1] : buffer[arg0 + start];
518 // public int length() {
521 // public CharSequence subSequence(int start, int limit) {
522 // return new StorageCharSequence(this.start + start, this.start + limit);
524 // public String toString() {
525 // return String.valueOf(buffer, start, len);
527 // public int hashCode() {
529 // int limit = start + len;
530 // for (int i = start; i < limit; ++i) {
536 // public boolean equals(Object other) {
538 // StorageCharSequence that = (StorageCharSequence) other;
540 // return CharSequences.equalsChars(this, that);
541 // } catch (Exception e) {
545 // public int compareTo(CharSequence other) {
547 // return CharSequences.compare(this, other);
552 // public void TestStorage() {
553 // ArrayList<String> keys = new ArrayList<String>(unicodeTestMap.keySet());
554 // int repeat = REPEAT * 10;
557 // for (int tt = 0; tt < repeat; ++tt) {
558 // Set<CharSequence> store = new HashSet<CharSequence>();
559 // // Storage storage = new Storage(1024);
560 // for (String key : keys) {
562 // // CharSequence item = storage.add(key);
563 // // if (!store.contains(item)) {
564 // // store.add(item);
566 // // if (!CharSequences.equalsChars(key, item)) {
567 // // throw new IllegalArgumentException(key);
570 // CharSequence[] raw = store.toArray(new CharSequence[store.size()]);
573 // long comparisonTime = t.getDuration();
574 // logln("\tget() time\tHashSet,sort\tn/a\t" + t.toString(repeat*keys.size()));
578 // for (int tt = 0; tt < repeat; ++tt) {
579 // Set<CharSequence> store = new TreeSet<CharSequence>();
580 // for (String key : keys) {
583 // CharSequence[] raw = store.toArray(new CharSequence[store.size()]);
585 // long trieTime = t.getDuration();
586 // logln("\tget() time\tTreeSet\tn/a\t" + t.toString(repeat*keys.size(), comparisonTime));