2 //#if defined(FOUNDATION10) || defined(J2SE13)
5 *******************************************************************************
6 * Copyright (C) 1996-2009, International Business Machines Corporation and *
7 * others. All Rights Reserved. *
8 *******************************************************************************
10 package com.ibm.icu.dev.test.util;
15 import com.ibm.icu.impl.Utility;
16 import com.ibm.icu.text.UTF16;
17 import com.ibm.icu.text.UnicodeSet;
18 import com.ibm.icu.text.UnicodeSetIterator;
19 import com.ibm.icu.util.Freezable;
21 * Class for mapping Unicode characters to values
22 * Much smaller storage than using HashMap, and much faster and more compact than
23 * a list of UnicodeSets.
27 public final class UnicodeMap implements Cloneable, Freezable, Externalizable {
31 private static final long serialVersionUID = -6540936876295804105L;
32 static final boolean ASSERTIONS = false;
33 static final long GROWTH_PERCENT = 200; // 100 is no growth!
34 static final long GROWTH_GAP = 10; // extra bump!
37 // two parallel arrays to save memory. Wish Java had structs.
38 private int[] transitions;
39 private Object[] values;
41 private LinkedHashSet availableValues = new LinkedHashSet();
42 private transient boolean staleAvailableValues;
44 private transient boolean errorOnReset;
45 private transient boolean locked;
46 private int lastIndex;
50 public UnicodeMap clear() {
51 if (locked) throw new UnsupportedOperationException("Attempt to modify locked object");
53 transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0};
54 values = new Object[10];
56 availableValues.clear();
57 staleAvailableValues = false;
65 public boolean equals(Object other) {
66 if (other == null) return false;
68 UnicodeMap that = (UnicodeMap) other;
69 if (length != that.length) return false;
70 for (int i = 0; i < length-1; ++i) {
71 if (transitions[i] != that.transitions[i]) return false;
72 if (!areEqual(values[i], that.values[i])) return false;
75 } catch (ClassCastException e) {
80 public int getHashCode(Object o) {
85 public static boolean areEqual(Object a , Object b) {
86 if (a == b) return true;
87 if (a == null || b == null) return false;
91 public int hashCode() {
93 // TODO might want to abbreviate this for speed.
94 for (int i = 0; i < length-1; ++i) {
95 result = 37*result + transitions[i];
96 result = 37*result + getHashCode(values[i]);
102 * Standard clone. Warning, as with Collections, does not do deep clone.
104 public Object cloneAsThawed() {
105 UnicodeMap that = new UnicodeMap();
106 that.length = length;
107 that.transitions = (int[]) transitions.clone();
108 that.values = (Object[]) values.clone();
109 that.availableValues = new LinkedHashSet(availableValues);
114 /* for internal consistency checking */
116 void _checkInvariants() {
118 || length > transitions.length
119 || transitions.length != values.length) {
120 throw new IllegalArgumentException("Invariant failed: Lengths bad");
122 for (int i = 1; i < length-1; ++i) {
123 if (areEqual(values[i-1], values[i])) {
124 throw new IllegalArgumentException("Invariant failed: values shared at "
125 + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">"
126 + "\t" + Utility.hex(i) + ": <" + values[i] + ">"
130 if (transitions[0] != 0 || transitions[length-1] != 0x110000) {
131 throw new IllegalArgumentException("Invariant failed: bounds set wrong");
133 for (int i = 1; i < length-1; ++i) {
134 if (transitions[i-1] >= transitions[i]) {
135 throw new IllegalArgumentException("Invariant failed: not monotonic"
136 + "\t" + Utility.hex(i-1) + ": " + transitions[i-1]
137 + "\t" + Utility.hex(i) + ": " + transitions[i]
144 * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
145 * Assumes that 0 <= codepoint <= 0x10FFFF
149 private int _findIndex(int c) {
152 int i = (lo + hi) >>> 1;
153 // invariant: c >= list[lo]
154 // invariant: c < list[hi]
156 if (c < transitions[i]) {
163 if (ASSERTIONS) _checkFind(c, lo);
167 private void _checkFind(int codepoint, int value) {
168 int other = __findIndex(codepoint);
169 if (other != value) {
170 throw new IllegalArgumentException("Invariant failed: binary search"
171 + "\t" + Utility.hex(codepoint) + ": " + value
172 + "\tshould be: " + other);
176 private int __findIndex(int codepoint) {
177 // TODO use binary search
178 for (int i = length-1; i > 0; --i) {
179 if (transitions[i] <= codepoint) return i;
187 static final int SHIFT = 8;
188 int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found
189 boolean startsValid = false;
190 private int findIndex(int codepoint) {
193 for (int i = 1; i < length; ++i) {
197 for (int i = length-1; i > 0; --i) {
198 if (transitions[i] <= codepoint) return i;
205 * Remove the items from index through index+count-1.
206 * Logically reduces the size of the internal arrays.
210 private void _removeAt(int index, int count) {
211 for (int i = index + count; i < length; ++i) {
212 transitions[i-count] = transitions[i];
213 values[i-count] = values[i];
218 * Add a gap from index to index+count-1.
219 * The values there are undefined, and must be set.
220 * Logically grows arrays to accomodate. Actual growth is limited
224 private void _insertGapAt(int index, int count) {
225 int newLength = length + count;
226 int[] oldtransitions = transitions;
227 Object[] oldvalues = values;
228 if (newLength > transitions.length) {
229 int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100);
230 transitions = new int[allocation];
231 values = new Object[allocation];
232 for (int i = 0; i < index; ++i) {
233 transitions[i] = oldtransitions[i];
234 values[i] = oldvalues[i];
237 for (int i = length - 1; i >= index; --i) {
238 transitions[i+count] = oldtransitions[i];
239 values[i+count] = oldvalues[i];
245 * Associates code point with value. Removes any previous association.
248 * @return this, for chaining
250 private UnicodeMap _put(int codepoint, Object value) {
251 // Warning: baseIndex is an invariant; must
252 // be defined such that transitions[baseIndex] < codepoint
253 // at end of this routine.
255 if (transitions[lastIndex] <= codepoint
256 && codepoint < transitions[lastIndex+1]) {
257 baseIndex = lastIndex;
259 baseIndex = _findIndex(codepoint);
261 int limitIndex = baseIndex + 1;
262 // cases are (a) value is already set
263 if (areEqual(values[baseIndex], value)) return this;
264 if (locked) throw new UnsupportedOperationException("Attempt to modify locked object");
265 if (errorOnReset && values[baseIndex] != null) {
266 throw new IllegalArgumentException("Attempt to reset value for " + Utility.hex(codepoint)
267 + " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
270 // adjust the available values
271 staleAvailableValues = true;
272 availableValues.add(value); // add if not there already
274 int baseCP = transitions[baseIndex];
275 int limitCP = transitions[limitIndex];
276 // we now start walking through the difference case,
277 // based on whether we are at the start or end of range
278 // and whether the range is a single character or multiple
280 if (baseCP == codepoint) {
281 // CASE: At very start of range
282 boolean connectsWithPrevious =
283 baseIndex != 0 && areEqual(value, values[baseIndex-1]);
285 if (limitCP == codepoint + 1) {
286 // CASE: Single codepoint range
287 boolean connectsWithFollowing =
288 baseIndex < length - 1 && areEqual(value, values[limitIndex]);
290 if (connectsWithPrevious) {
291 // A1a connects with previous & following, so remove index
292 if (connectsWithFollowing) {
293 _removeAt(baseIndex, 2);
295 _removeAt(baseIndex, 1); // extend previous
297 --baseIndex; // fix up
298 } else if (connectsWithFollowing) {
299 _removeAt(baseIndex, 1); // extend following backwards
300 transitions[baseIndex] = codepoint;
302 // doesn't connect on either side, just reset
303 values[baseIndex] = value;
305 } else if (connectsWithPrevious) {
306 // A.1: start of multi codepoint range
308 ++transitions[baseIndex]; // extend previous
310 // otherwise insert new transition
311 transitions[baseIndex] = codepoint+1; // fix following range
312 _insertGapAt(baseIndex, 1);
313 values[baseIndex] = value;
314 transitions[baseIndex] = codepoint;
316 } else if (limitCP == codepoint + 1) {
317 // CASE: at end of range
318 // if connects, just back up range
319 boolean connectsWithFollowing =
320 baseIndex < length - 1 && areEqual(value, values[limitIndex]);
322 if (connectsWithFollowing) {
323 --transitions[limitIndex];
326 _insertGapAt(limitIndex, 1);
327 transitions[limitIndex] = codepoint;
328 values[limitIndex] = value;
331 // CASE: in middle of range
332 // insert gap, then set the new range
333 _insertGapAt(++baseIndex,2);
334 transitions[baseIndex] = codepoint;
335 values[baseIndex] = value;
336 transitions[baseIndex+1] = codepoint + 1;
337 values[baseIndex+1] = values[baseIndex-1]; // copy lower range values
339 lastIndex = baseIndex; // store for next time
342 private UnicodeMap _putAll(int startCodePoint, int endCodePoint, Object value) {
343 for (int i = startCodePoint; i <= endCodePoint; ++i) {
349 * Sets the codepoint value.
352 * @return this (for chaining)
354 public UnicodeMap put(int codepoint, Object value) {
355 if (codepoint < 0 || codepoint > 0x10FFFF) {
356 throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
358 _put(codepoint, value);
359 if (ASSERTIONS) _checkInvariants();
363 * Adds bunch o' codepoints; otherwise like put.
366 * @return this (for chaining)
368 public UnicodeMap putAll(UnicodeSet codepoints, Object value) {
370 UnicodeSetIterator it = new UnicodeSetIterator(codepoints);
371 while (it.nextRange()) {
372 _putAll(it.codepoint, it.codepointEnd, value);
378 * Adds bunch o' codepoints; otherwise like add.
379 * @param startCodePoint
380 * @param endCodePoint
382 * @return this (for chaining)
384 public UnicodeMap putAll(int startCodePoint, int endCodePoint, Object value) {
385 if (startCodePoint < 0 || endCodePoint > 0x10FFFF) {
386 throw new IllegalArgumentException("Codepoint out of range: "
387 + Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint));
390 for (int i = startCodePoint; i <= endCodePoint; ++i) {
396 * Add all the (main) values from a Unicode property
397 * @param prop the property to add to the map
398 * @return this (for chaining)
400 public UnicodeMap putAll(UnicodeProperty prop) {
402 for (int i = 0; i <= 0x10FFFF; ++i) {
403 _put(i, prop.getValue(i));
409 * Add all the (main) values from a Unicode property
410 * @param prop the property to add to the map
411 * @return this (for chaining)
413 public UnicodeMap putAll(UnicodeMap prop) {
415 for (int i = 0; i <= 0x10FFFF; ++i) {
416 _put(i, prop.getValue(i));
422 * Set the currently unmapped Unicode code points to the given value.
423 * @param value the value to set
424 * @return this (for chaining)
426 public UnicodeMap setMissing(Object value) {
427 // fast path, if value not yet present
428 if (!getAvailableValues().contains(value)) {
429 staleAvailableValues = true;
430 availableValues.add(value);
431 for (int i = 0; i < length; ++i) {
432 if (values[i] == null) values[i] = value;
436 return putAll(getSet(null), value);
440 * Returns the set associated with a given value. Deposits into
441 * result if it is not null. Remember to clear if you just want
447 public UnicodeSet getSet(Object value, UnicodeSet result) {
448 if (result == null) result = new UnicodeSet();
449 for (int i = 0; i < length - 1; ++i) {
450 if (areEqual(value, values[i])) {
451 result.add(transitions[i], transitions[i+1]-1);
457 public UnicodeSet getSet(Object value) {
458 return getSet(value,null);
461 public UnicodeSet keySet() {
462 return getSet(null,null).complement();
465 * Returns the list of possible values. Deposits each non-null value into
466 * result. Creates result if it is null. Remember to clear result if
467 * you are not appending to existing collection.
471 public Collection getAvailableValues(Collection result) {
472 if (staleAvailableValues) {
473 // collect all the current values
474 // retain them in the availableValues
475 Set temp = new HashSet();
476 for (int i = 0; i < length - 1; ++i) {
477 if (values[i] != null) temp.add(values[i]);
479 availableValues.retainAll(temp);
480 staleAvailableValues = false;
482 if (result == null) result = new ArrayList(availableValues.size());
483 result.addAll(availableValues);
490 public Collection getAvailableValues() {
491 return getAvailableValues(null);
494 * Gets the value associated with a given code point.
495 * Returns null, if there is no such value.
499 public Object getValue(int codepoint) {
500 if (codepoint < 0 || codepoint > 0x10FFFF) {
501 throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
503 return values[_findIndex(codepoint)];
507 * Change a new string from the source string according to the mappings. For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString()
511 public String fold(String source) {
512 StringBuffer result = new StringBuffer();
514 for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
515 cp = UTF16.charAt(source, i);
516 Object mResult = getValue(cp);
517 if (mResult != null) {
518 result.append(mResult);
520 UTF16.append(result, cp);
523 return result.toString();
526 public interface Composer {
527 Object compose(int codePoint, Object a, Object b);
530 public UnicodeMap composeWith(UnicodeMap other, Composer composer) {
531 for (int i = 0; i <= 0x10FFFF; ++i) {
532 Object v1 = getValue(i);
533 Object v2 = other.getValue(i);
534 Object v3 = composer.compose(i, v1, v2);
535 if (v1 != v3 && (v1 == null || !v1.equals(v3))) put(i, v3);
540 public UnicodeMap composeWith(UnicodeSet set, Object value, Composer composer) {
541 for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
542 int i = it.codepoint;
543 Object v1 = getValue(i);
544 Object v3 = composer.compose(i, v1, value);
545 if (v1 != v3 && (v1 == null || !v1.equals(v3))) put(i, v3);
551 * Follow the style used by UnicodeSetIterator
553 public static class MapIterator {
554 public int codepoint;
555 public int codepointEnd;
558 private UnicodeMap map;
560 private int startRange;
561 private int endRange;
562 private Object lastValue;
564 public MapIterator(UnicodeMap map) {
567 // note: length of 2 means {0, 110000}. Only want to index up to 0!
568 public boolean nextRange() {
569 if (index < 0 || index >= map.length - 1) return false;
570 value = map.values[index];
571 codepoint = startRange = map.transitions[index++];
572 codepointEnd = endRange = map.transitions[index] - 1; // -1 to make limit into end
575 public boolean next() {
576 if (startRange > endRange) {
577 //System.out.println("***" + Utility.hex(startRange) + ".." + Utility.hex(endRange));
578 if (!nextRange()) return false;
579 // index now points AFTER the start of the range
580 lastValue = map.values[index-1];
581 //System.out.println("***" + Utility.hex(codepoint) + ".." + Utility.hex(codepointEnd) + " => " + lastValue);
584 codepoint = codepointEnd = startRange++; // set to first, and iterate
588 public MapIterator reset() {
594 public MapIterator reset(UnicodeMap newMap) {
600 public String toString() {
601 return toString(null);
603 public String toString(Comparator collected) {
604 StringBuffer result = new StringBuffer();
605 if (collected == null) {
606 for (int i = 0; i < length-1; ++i) {
607 Object value = values[i];
608 if (value == null) continue;
609 int start = transitions[i];
610 int end = transitions[i+1]-1;
611 result.append(Utility.hex(start));
612 if (start != end) result.append("..")
613 .append(Utility.hex(end));
614 result.append("\t=> ")
615 .append(values[i] == null ? "null" : values[i].toString())
619 Set set = (Set) getAvailableValues(new TreeSet(collected));
620 for (Iterator it = set.iterator(); it.hasNext();) {
621 Object value = it.next();
622 UnicodeSet s = getSet(value);
625 .append(s.toPattern(true))
629 return result.toString();
632 * @return Returns the errorOnReset.
634 public boolean getErrorOnReset() {
638 * @param errorOnReset The errorOnReset to set.
640 public void setErrorOnReset(boolean errorOnReset) {
641 this.errorOnReset = errorOnReset;
645 * @see com.ibm.icu.dev.test.util.Lockable#isLocked()
647 public boolean isFrozen() {
648 // TODO Auto-generated method stub
653 * @see com.ibm.icu.dev.test.util.Lockable#lock()
655 public Object freeze() {
660 static final boolean DEBUG_WRITE = false;
662 // TODO Fix to serialize more than just strings.
663 // Only if all the items are strings will we do the following compression
664 // Otherwise we'll just use Java Serialization, bulky as it is
665 public void writeExternal(ObjectOutput out1) throws IOException {
666 DataOutputCompressor sc = new DataOutputCompressor(out1);
667 // if all objects are strings
668 Collection availableVals = getAvailableValues();
669 boolean allStrings = allAreString(availableVals);
670 sc.writeBoolean(allStrings);
671 Map object_index = new LinkedHashMap();
672 if (allAreString(availableVals)) {
673 sc.writeStringSet(new TreeSet(availableVals), object_index);
675 sc.writeCollection(availableVals, object_index);
677 sc.writeUInt(length);
678 int lastTransition = -1;
679 int lastValueNumber = 0;
680 if (DEBUG_WRITE) System.out.println("Trans count: " + length);
681 for (int i = 0; i < length; ++i) {
682 int valueNumber = ((Integer)object_index.get(values[i])).intValue();
683 if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber);
685 int deltaTransition = transitions[i] - lastTransition;
686 lastTransition = transitions[i];
687 int deltaValueNumber = valueNumber - lastValueNumber;
688 lastValueNumber = valueNumber;
690 deltaValueNumber <<= 1; // make room for one bit
691 boolean canCombine = deltaTransition == 1;
692 if (canCombine) deltaValueNumber |= 1;
693 sc.writeInt(deltaValueNumber);
694 if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber);
696 sc.writeUInt(deltaTransition);
697 if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
706 private boolean allAreString(Collection availableValues2) {
707 //if (true) return false;
708 for (Iterator it = availableValues2.iterator(); it.hasNext();) {
709 if (!(it.next() instanceof String)) return false;
714 public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException {
715 DataInputCompressor sc = new DataInputCompressor(in1);
716 boolean allStrings = sc.readBoolean();
718 availableValues = new LinkedHashSet();
720 valuesList = sc.readStringSet(availableValues);
722 valuesList = sc.readCollection(availableValues);
724 length = sc.readUInt();
725 transitions = new int[length];
726 if (DEBUG_WRITE) System.out.println("Trans count: " + length);
727 values = new Object[length];
728 int currentTransition = -1;
729 int currentValue = 0;
731 for (int i = 0; i < length; ++i) {
732 int temp = sc.readInt();
733 if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp);
734 boolean combined = (temp & 1) != 0;
736 values[i] = valuesList[currentValue += temp];
738 deltaTransition = sc.readUInt();
739 if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
743 transitions[i] = currentTransition += deltaTransition; // delta value
744 if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue);
751 static int findCommon(String last, String s) {
752 int minLen = Math.min(last.length(), s.length());
753 for (int i = 0; i < minLen; ++i) {
754 if (last.charAt(i) != s.charAt(i)) return i;
762 // * @throws IOException
765 // private void showSize(String title, ObjectOutput out, StreamCompressor sc) throws IOException {
766 // sc.showSize(this, title, out);
768 // //public void readObject(ObjectInputStream in) throws IOException {
769 // public static class StreamCompressor {
770 // transient byte[] buffer = new byte[1];
771 // transient StringBuffer stringBuffer = new StringBuffer();
773 // transient byte[] readWriteBuffer = new byte[8];
780 // * @throws IOException
782 // public void writeInt(int i) throws IOException {
784 // if (position == readWriteBuffer.length) {
785 // out.write(readWriteBuffer);
788 // if ((i & ~0x7F) == 0) {
789 // readWriteBuffer[position++] = (byte)i;
792 // readWriteBuffer[position++] = (byte)(0x80 | i);
797 // * @throws IOException
800 // public int readNInt(ObjectInput in) throws IOException {
801 // int result = readInt(in);
802 // boolean negative = (result & 1) != 0;
804 // if (negative) result = ~result;
808 // * @throws IOException
811 // public void writeNInt(int input) throws IOException {
817 // input = (input << 1) | flag;
818 // writeInt(out, input);
821 // * @throws IOException
824 // public void flush() throws IOException {
825 // out.write(readWriteBuffer);
829 // int readPosition = readWriteBuffer.length;
831 // public int readInt(ObjectInput in) throws IOException {
835 // if (readPosition == readWriteBuffer.length) {
836 // in.read(readWriteBuffer);
839 // //in.read(buffer);
840 // int input = readWriteBuffer[readPosition++]; // buffer[0];
841 // result |= (input & 0x7F) << offset;
842 // if ((input & 0x80) == 0) {
850 // * @throws IOException
853 // public void writeString(String s) throws IOException {
854 // writeInt(UTF16.countCodePoint(s));
855 // writeCodePoints(s);
860 // private void writeCodePoints(String s) throws IOException {
862 // for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
863 // cp = UTF16.charAt(s, i);
868 // * @throws IOException
871 // public String readString() throws IOException {
872 // int len = readInt(in);
873 // return readCodePoints(in, len);
878 // private String readCodePoints(int len) throws IOException {
879 // stringBuffer.setLength(0);
880 // for (int i = 0; i < len; ++i) {
881 // int cp = readInt(in);
882 // UTF16.append(stringBuffer, cp);
884 // return stringBuffer.toString();
888 // * @throws IOException
891 // private void showSize(UnicodeMap map, String title, ObjectOutput out) throws IOException {
893 // System.out.println(title + ": " + (map.debugOut.size() + position));