2 *******************************************************************************
3 * Copyright (C) 2011, Google, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
7 package com.ibm.icu.dev.test.util;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.Iterator;
12 import java.util.List;
14 import java.util.Map.Entry;
16 import com.ibm.icu.impl.Utility;
17 import com.ibm.icu.util.BytesTrie;
18 import com.ibm.icu.util.BytesTrie.Result;
19 import com.ibm.icu.util.BytesTrieBuilder;
20 import com.ibm.icu.util.CharsTrie;
21 import com.ibm.icu.util.CharsTrieBuilder;
22 import com.ibm.icu.util.StringTrieBuilder.Option;
24 // would be nice to have a BytesTrieBuilder.add(aByte);
25 // question: can bytetrie store <"",x>?
26 // can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error,
27 // should happen on add, not on build.
28 // the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something.
29 // need class description; examples of usage; which method can/should be called after which others.
31 public abstract class TrieMap<V> implements Iterable<Entry<CharSequence, V>> {
37 private static final boolean DEBUG = true;
39 protected final V[] intToValue;
40 protected final int size;
42 protected TrieMap(V[] intToValue, int size) {
43 this.intToValue = intToValue;
47 public int keyByteSize() {
51 public static abstract class Builder<V> {
53 protected List<V> intToValueTemp = new ArrayList<V>();
54 protected Map<V, Integer> valueToIntegerTemp = new HashMap<V, Integer>();
56 static public <K extends CharSequence, V> Builder<V> with(Style style, Map<K, V> keyValuePairs) {
57 return with(style, Option.SMALL, keyValuePairs);
60 static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, Map<K, V> keyValuePairs) {
61 Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>()
62 : new CharsTrieMap.CharsBuilder<V>();
63 result.option = option;
64 return result.addAll(keyValuePairs);
67 static public <K extends CharSequence, V> Builder<V> with(Style style, K key, V value) {
68 return with(style, Option.SMALL, key, value);
71 static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, K key, V value) {
72 Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>()
73 : new CharsTrieMap.CharsBuilder<V>();
74 result.option = option;
75 return result.add(key, value);
78 public abstract Builder<V> add(CharSequence key, V value);
80 public abstract <K extends CharSequence> Builder<V> addAll(Map<K, V> keyValuePairs);
82 public abstract TrieMap<V> build();
85 abstract public V get(CharSequence test);
88 * Warning: the entry contents are only valid until the next next() call!!
90 abstract public Iterator<Entry<CharSequence, V>> iterator();
92 abstract public Matcher<V> getMatcher();
94 public abstract static class Matcher<V> {
95 protected CharSequence text = "";
96 protected int start = 0;
97 protected int current = 0;
99 abstract void set(CharSequence string, int i);
101 abstract boolean next();
103 abstract V getValue();
105 abstract int getStart();
107 abstract int getEnd();
109 abstract boolean nextStart();
112 private static class BytesTrieMap<V> extends TrieMap<V> {
113 private final BytesTrie bytesTrie;
114 private byte[] bytes = new byte[3];
116 private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) {
117 super(intToValue, size);
118 this.bytesTrie = bytesTrie;
121 public V get(CharSequence test) {
122 int length = test.length();
125 return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null;
127 Result result = Result.NO_VALUE;
128 for (int i = 0; i < length; ++i) {
129 if (!result.hasNext()) {
132 char c = test.charAt(i);
133 int limit = ByteConverter.getBytes(c, bytes, 0);
134 result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit);
136 return result.hasValue() ? intToValue[bytesTrie.getValue()] : null;
138 // int length = test.length();
139 // if (length == 0) {
142 // bytesTrie.reset();
143 // Result result = null;
144 // byte[] bytes = new byte[3];
145 // for (int i = 0; i < length; ++i) {
146 // char c = test.charAt(i);
147 // int limit = ByteConverter.getBytes(c, bytes, 0);
148 // for (int j = 0; j < limit; ++j) {
149 // result = bytesTrie.next(bytes[j]&0xFF);
150 // if (!result.matches()) {
155 // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null;
158 public String toString() {
159 return toString(bytesTrie, " : ", "\n");
163 * Warning: the entry contents are only valid until the next next() call!!
165 public Iterator<Entry<CharSequence, V>> iterator() {
166 // TODO Auto-generated method stub
167 return new BytesIterator();
170 public TrieMap.Matcher<V> getMatcher() {
171 return new BytesMatcher();
174 private class BytesIterator implements Iterator<Entry<CharSequence, V>> {
175 BytesTrie.Iterator iterator = bytesTrie.iterator();
176 BytesEntry entry = new BytesEntry();
178 public boolean hasNext() {
179 return iterator.hasNext();
182 public Entry<CharSequence, V> next() {
183 entry.bytesEntry = iterator.next();
187 public void remove() {
188 throw new UnsupportedOperationException();
192 private class BytesEntry implements Entry<CharSequence, V> {
193 public BytesTrie.Entry bytesEntry;
194 StringBuilder buffer = new StringBuilder();
196 public CharSequence getKey() {
198 ByteConverter.getChars(bytesEntry, buffer);
202 public V getValue() {
203 return intToValue[bytesEntry.value];
206 public V setValue(V value) {
207 throw new UnsupportedOperationException();
211 public class BytesMatcher extends Matcher<V> {
212 private byte[] bytes = new byte[3];
213 private V value = null;
215 public void set(CharSequence text, int start) {
218 this.current = start;
221 public int getStart() {
225 public int getEnd() {
230 * Finds the next match. Returns false when there are no possible further matches from the current start
231 * point. Once that happens, call nextStart(); Call getValue to get the current value.
233 * @return false when done. There may be a value, however.
235 public boolean next() {
236 while (current < text.length()) {
237 char c = text.charAt(current++);
238 int limit = ByteConverter.getBytes(c, bytes, 0);
239 for (int j = 0; j < limit; ++j) {
240 Result result = bytesTrie.next(bytes[j]);
241 if (result.hasValue()) {
243 throw new IllegalArgumentException("Data corrupt");
245 value = intToValue[bytesTrie.getValue()];
246 return result.hasNext();
247 } else if (!result.matches()) {
257 public boolean nextStart() {
258 if (start >= text.length()) {
267 public V getValue() {
273 public static class BytesBuilder<V> extends Builder<V> {
274 BytesTrieBuilder builder = new BytesTrieBuilder();
275 byte[] bytes = new byte[200];
276 List<String> debugBytes = DEBUG ? new ArrayList<String>() : null;
278 public BytesBuilder<V> add(CharSequence key, V value) {
279 // traverse the values, and get a mapping of a byte string to list of
280 // integers, and a mapping from those integers to a set of values
282 if (option == Option.SMALL) {
283 index = valueToIntegerTemp.get(value);
285 index = intToValueTemp.size();
286 intToValueTemp.add(value);
287 valueToIntegerTemp.put(value, index);
290 index = intToValueTemp.size();
291 intToValueTemp.add(value);
293 // dumb implementation for now
294 // the buffer size is at most 3 * number_of_chars
295 if (bytes.length < key.length() * 3) {
296 bytes = new byte[64 + key.length() * 3];
299 for (int i = 0; i < key.length(); ++i) {
300 char c = key.charAt(i);
301 limit = ByteConverter.getBytes(c, bytes, limit);
304 builder.add(bytes, limit, index);
306 } catch (Exception e) {
307 ArrayList<String> list = new ArrayList<String>();
308 for (int i = 0; i < limit; ++i) {
309 list.add(Utility.hex(bytes[i]));
311 throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e);
315 public <K extends CharSequence> BytesBuilder<V> addAll(Map<K, V> keyValuePairs) {
316 for (Entry<K, V> entry : keyValuePairs.entrySet()) {
317 add(entry.getKey(), entry.getValue());
322 public TrieMap<V> build() {
323 int size = builder.buildByteBuffer(option).remaining();
324 BytesTrie bytesTrie = builder.build(option);
325 @SuppressWarnings("unchecked")
326 V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()]));
327 return new BytesTrieMap<V>(bytesTrie, intToValueArray, size);
331 private static class CharsTrieMap<V> extends TrieMap<V> {
332 private final CharsTrie charsTrie;
334 private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) {
335 super(intToValue, size);
336 this.charsTrie = charsTrie;
339 public V get(CharSequence test) {
340 Result result = charsTrie.reset().next(test, 0, test.length());
341 return result.hasValue() ? intToValue[charsTrie.getValue()] : null;
344 public String toString() {
345 return toString(charsTrie, " : ", "\n");
349 * Warning: the entry contents are only valid until the next next() call!!
351 public Iterator<Entry<CharSequence, V>> iterator() {
352 // TODO Auto-generated method stub
353 return new CharsIterator();
356 public TrieMap.Matcher<V> getMatcher() {
357 return new CharsMatcher();
360 private class CharsIterator implements Iterator<Entry<CharSequence, V>> {
361 CharsTrie.Iterator iterator = charsTrie.iterator();
362 CharsEntry entry = new CharsEntry();
364 public boolean hasNext() {
365 return iterator.hasNext();
368 public Entry<CharSequence, V> next() {
369 entry.charsEntry = iterator.next();
373 public void remove() {
374 throw new UnsupportedOperationException();
379 private class CharsEntry implements Entry<CharSequence, V> {
380 public CharsTrie.Entry charsEntry;
382 public CharSequence getKey() {
383 return charsEntry.chars;
386 public V getValue() {
387 return intToValue[charsEntry.value];
390 public V setValue(V value) {
391 throw new UnsupportedOperationException();
395 public class CharsMatcher extends Matcher<V> {
396 private V value = null;
398 public void set(CharSequence text, int start) {
401 this.current = start;
404 public int getStart() {
408 public int getEnd() {
413 * Finds the next match. Returns false when there are no possible further matches from the current start
414 * point. Once that happens, call nextStart(); Call getValue to get the current value.
416 * @return false when done. There may be a value, however.
418 public boolean next() {
419 while (current < text.length()) {
420 char c = text.charAt(current++);
421 Result result = charsTrie.next(c);
422 if (result.hasValue()) {
423 value = intToValue[charsTrie.getValue()];
424 return result.hasNext();
425 } else if (!result.matches()) {
435 public boolean nextStart() {
436 if (start >= text.length()) {
445 public V getValue() {
451 public static class CharsBuilder<V> extends Builder<V> {
452 CharsTrieBuilder builder = new CharsTrieBuilder();
454 public CharsBuilder<V> add(CharSequence key, V value) {
455 // traverse the values, and get a mapping of a byte string to list of
456 // integers, and a mapping from those integers to a set of values
458 if (option == Option.SMALL) {
459 index = valueToIntegerTemp.get(value);
461 index = intToValueTemp.size();
462 intToValueTemp.add(value);
463 valueToIntegerTemp.put(value, index);
466 index = intToValueTemp.size();
467 intToValueTemp.add(value);
470 builder.add(key.toString(), index);
472 } catch (Exception e) {
473 throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e);
477 public <K extends CharSequence> CharsBuilder<V> addAll(Map<K, V> keyValuePairs) {
478 for (Entry<K, V> entry : keyValuePairs.entrySet()) {
479 add(entry.getKey(), entry.getValue());
484 public TrieMap<V> build() {
485 CharSequence buildCharSequence = builder.buildCharSequence(option);
486 int size = 2 * buildCharSequence.length();
487 // CharsTrie charsTrie = builder.build(option);
488 CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0);
489 @SuppressWarnings("unchecked")
490 V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()]));
491 return new CharsTrieMap<V>(charsTrie, intToValueArray, size);
496 * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and
497 * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended
501 * 0000..007F - 0xxx xxxx
502 * 0000..7EFF - 1yyy yyyy xxxx xxxx
503 * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx
506 static class ByteConverter {
507 public static int getBytes(char source, byte[] bytes, int limit) {
509 bytes[limit++] = (byte) source;
510 } else if (source < 0x7F00) {
511 bytes[limit++] = (byte) (0x80 | (source >> 8));
512 bytes[limit++] = (byte) source;
514 bytes[limit++] = (byte) -1;
515 bytes[limit++] = (byte) (source >> 8);
516 bytes[limit++] = (byte) source;
522 * Transform the string into a sequence of bytes, appending them after start, and return the new limit.
524 public static int getBytes(CharSequence source, byte[] bytes, int limit) {
525 for (int i = 0; i < source.length(); ++i) {
526 limit = getBytes(source.charAt(i), bytes, limit);
532 * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking.
534 public static String getChars(byte[] bytes, int start, int limit) {
535 StringBuilder buffer = new StringBuilder();
536 char[] output = new char[1];
537 for (int i = start; i < limit;) {
538 i = getChar(bytes, i, output);
539 buffer.append(output[0]);
541 return buffer.toString();
544 public static int getChar(byte[] bytes, int start, char[] output) {
545 byte b = bytes[start++];
547 output[0] = (char) b;
548 } else if (b != (byte) -1) { // 2 bytes
550 int b2 = 0xFF & bytes[start++];
551 output[0] = (char) ((b1 << 8) | b2);
553 int b2 = bytes[start++];
554 int b3 = 0xFF & bytes[start++];
555 output[0] = (char) ((b2 << 8) | b3);
560 private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) {
561 int len = entry.bytesLength();
562 for (int i = 0; i < len;) {
563 byte b = entry.byteAt(i++);
565 stringBuilder.append((char) b);
566 } else if (b != (byte) -1) { // 2 bytes
568 int b2 = 0xFF & entry.byteAt(i++);
569 stringBuilder.append((char) ((b1 << 8) | b2));
571 int b2 = entry.byteAt(i++);
572 int b3 = 0xFF & entry.byteAt(i++);
573 stringBuilder.append((char) ((b2 << 8) | b3));
579 public static String toString(BytesTrie bytesTrie2) {
580 return toString(bytesTrie2, " : ", "\n");
583 public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) {
584 StringBuilder buffer = new StringBuilder();
585 BytesTrie.Iterator iterator = bytesTrie2.iterator();
586 while (iterator.hasNext()) {
587 BytesTrie.Entry bytesEntry = iterator.next();
588 int len = bytesEntry.bytesLength();
589 byte[] bytes = new byte[len];
590 bytesEntry.copyBytesTo(bytes, 0);
591 buffer.append(Utility.hex(bytes, 0, len, " "))
592 .append(keyValueSeparator)
593 .append(bytesEntry.value)
594 .append(itemSeparator);
596 return buffer.toString();
599 public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) {
600 StringBuilder buffer = new StringBuilder();
601 CharsTrie.Iterator iterator = bytesTrie2.iterator();
602 while (iterator.hasNext()) {
603 CharsTrie.Entry bytesEntry = iterator.next();
604 buffer.append(Utility.hex(bytesEntry.chars))
605 .append(keyValueSeparator)
606 .append(bytesEntry.value)
607 .append(itemSeparator);
609 return buffer.toString();