2 *******************************************************************************
\r
3 * Copyright (C) 1996-2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.util;
\r
10 import java.util.Comparator;
\r
11 import java.util.Iterator;
\r
12 import java.util.Set;
\r
13 import java.util.TreeSet;
\r
15 import com.ibm.icu.dev.test.util.CollectionUtilities.MultiComparator;
\r
16 import com.ibm.icu.impl.Utility;
\r
17 import com.ibm.icu.lang.UCharacter;
\r
18 import com.ibm.icu.text.Collator;
\r
19 import com.ibm.icu.text.RuleBasedCollator;
\r
20 import com.ibm.icu.text.Transliterator;
\r
21 import com.ibm.icu.text.UTF16;
\r
22 import com.ibm.icu.text.UnicodeSet;
\r
23 import com.ibm.icu.text.UnicodeSetIterator;
\r
24 import com.ibm.icu.util.ULocale;
\r
26 /** Provides more flexible formatting of UnicodeSet patterns.
\r
28 public class PrettyPrinter {
\r
29 private static final UnicodeSet patternWhitespace = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
\r
30 private static final UnicodeSet sortAtEnd = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
\r
32 private boolean first = true;
\r
33 private StringBuffer target = new StringBuffer();
\r
34 private int firstCodePoint = -2;
\r
35 private int lastCodePoint = -2;
\r
36 private boolean compressRanges = true;
\r
37 private String lastString = "";
\r
38 private UnicodeSet toQuote = new UnicodeSet(patternWhitespace);
\r
39 private Transliterator quoter = null;
\r
41 private Comparator ordering;
\r
42 private Comparator spaceComp = Collator.getInstance(ULocale.ROOT);
\r
44 setOrdering(Collator.getInstance(ULocale.ROOT));
\r
45 ((RuleBasedCollator)spaceComp).setStrength(RuleBasedCollator.PRIMARY);
\r
48 public Transliterator getQuoter() {
\r
52 public PrettyPrinter setQuoter(Transliterator quoter) {
\r
53 this.quoter = quoter;
\r
54 return this; // for chaining
\r
57 public boolean isCompressRanges() {
\r
58 return compressRanges;
\r
62 * @param compressRanges if you want abcde instead of a-e, make this false
\r
65 public PrettyPrinter setCompressRanges(boolean compressRanges) {
\r
66 this.compressRanges = compressRanges;
\r
70 public Comparator getOrdering() {
\r
75 * @param ordering the resulting ordering of the list of characters in the pattern
\r
78 public PrettyPrinter setOrdering(Comparator ordering) {
\r
79 this.ordering = new MultiComparator(new Comparator[] {ordering, new UTF16.StringComparator(true,false,0)});
\r
83 public Comparator getSpaceComparator() {
\r
88 * @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters
\r
89 * @return this, for chaining
\r
91 public PrettyPrinter setSpaceComparator(Comparator spaceComp) {
\r
92 this.spaceComp = spaceComp;
\r
96 public UnicodeSet getToQuote() {
\r
101 * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)
\r
104 public PrettyPrinter setToQuote(UnicodeSet toQuote) {
\r
105 toQuote = (UnicodeSet)toQuote.clone();
\r
106 toQuote.addAll(patternWhitespace);
\r
107 this.toQuote = toQuote;
\r
112 * Get the pattern for a particular set.
\r
114 * @return formatted UnicodeSet
\r
116 public String toPattern(UnicodeSet uset) {
\r
118 UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(sortAtEnd); // remove all the unassigned gorp for now
\r
119 // make sure that comparison separates all strings, even canonically equivalent ones
\r
120 Set orderedStrings = new TreeSet(ordering);
\r
121 for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
\r
122 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
\r
123 orderedStrings.add(it.string);
\r
125 for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
\r
126 if (!putAtEnd.contains(i)) {
\r
127 orderedStrings.add(UTF16.valueOf(i));
\r
132 target.setLength(0);
\r
133 target.append("[");
\r
134 for (Iterator it = orderedStrings.iterator(); it.hasNext();) {
\r
135 appendUnicodeSetItem((String) it.next());
\r
137 for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
\r
138 appendUnicodeSetItem(it.codepoint);
\r
141 target.append("]");
\r
142 String sresult = target.toString();
\r
144 // double check the results. This can be removed once we have more tests.
\r
146 // UnicodeSet doubleCheck = new UnicodeSet(sresult);
\r
147 // if (!uset.equals(doubleCheck)) {
\r
148 // throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + "\r\n source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + "\r\n result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
\r
150 // } catch (RuntimeException e) {
\r
151 // throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
\r
156 private PrettyPrinter appendUnicodeSetItem(String s) {
\r
158 if (UTF16.hasMoreCodePointsThan(s, 1)) {
\r
161 target.append("{");
\r
162 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
\r
163 appendQuoted(cp = UTF16.charAt(s, i));
\r
165 target.append("}");
\r
168 appendUnicodeSetItem(UTF16.charAt(s, 0));
\r
173 private void appendUnicodeSetItem(int cp) {
\r
174 if (!compressRanges)
\r
176 if (cp == lastCodePoint + 1) {
\r
177 lastCodePoint = cp; // continue range
\r
178 } else { // start range
\r
180 firstCodePoint = lastCodePoint = cp;
\r
186 private void addSpace(String s) {
\r
189 } else if (spaceComp.compare(s, lastString) != 0) {
\r
190 target.append(' ');
\r
192 int cp = UTF16.charAt(s,0);
\r
193 int type = UCharacter.getType(cp);
\r
194 if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
\r
195 target.append(' ');
\r
196 } else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
\r
197 target.append(' '); // make sure we don't accidentally merge two surrogates
\r
202 private void flushLast() {
\r
203 if (lastCodePoint >= 0) {
\r
204 addSpace(UTF16.valueOf(firstCodePoint));
\r
205 if (firstCodePoint != lastCodePoint) {
\r
206 appendQuoted(firstCodePoint);
\r
207 target.append(firstCodePoint + 1 == lastCodePoint ? ' ' : '-');
\r
209 appendQuoted(lastCodePoint);
\r
210 lastString = UTF16.valueOf(lastCodePoint);
\r
211 firstCodePoint = lastCodePoint = -2;
\r
214 PrettyPrinter appendQuoted(int codePoint) {
\r
215 if (toQuote.contains(codePoint)) {
\r
216 if (quoter != null) {
\r
217 target.append(quoter.transliterate(UTF16.valueOf(codePoint)));
\r
220 if (codePoint > 0xFFFF) {
\r
221 target.append("\\U");
\r
222 target.append(Utility.hex(codePoint,8));
\r
224 target.append("\\u");
\r
225 target.append(Utility.hex(codePoint,4));
\r
229 switch (codePoint) {
\r
230 case '[': // SET_OPEN:
\r
231 case ']': // SET_CLOSE:
\r
232 case '-': // HYPHEN:
\r
233 case '^': // COMPLEMENT:
\r
234 case '&': // INTERSECTION:
\r
235 case '\\': //BACKSLASH:
\r
240 target.append('\\');
\r
243 // Escape whitespace
\r
244 if (patternWhitespace.contains(codePoint)) {
\r
245 target.append('\\');
\r
249 UTF16.append(target, codePoint);
\r
252 // Appender append(String s) {
\r
253 // target.append(s);
\r
256 // public String toString() {
\r
257 // return target.toString();
\r