]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/translit/src/com/ibm/icu/dev/test/util/PrettyPrinter.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / translit / src / com / ibm / icu / dev / test / util / PrettyPrinter.java
1 /**\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  **********************************************************************\r
6  * Author: Mark Davis\r
7  **********************************************************************\r
8  */\r
9 \r
10 package com.ibm.icu.dev.test.util;\r
11 \r
12 import java.io.IOException;\r
13 import java.text.FieldPosition;\r
14 import java.util.Comparator;\r
15 import java.util.TreeSet;\r
16 \r
17 import com.ibm.icu.impl.Utility;\r
18 import com.ibm.icu.lang.UCharacter;\r
19 import com.ibm.icu.text.StringTransform;\r
20 import com.ibm.icu.text.UTF16;\r
21 import com.ibm.icu.text.UnicodeSet;\r
22 import com.ibm.icu.text.UnicodeSetIterator;\r
23 import com.ibm.icu.text.UTF16.StringComparator;\r
24 \r
25 /** Provides more flexible formatting of UnicodeSet patterns.\r
26  */\r
27 public class PrettyPrinter {\r
28     private static final StringComparator CODEPOINT_ORDER = new UTF16.StringComparator(true,false,0);\r
29     private static final UnicodeSet PATTERN_WHITESPACE = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();\r
30     private static final UnicodeSet SORT_AT_END = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();\r
31     private static final UnicodeSet QUOTED_SYNTAX = (UnicodeSet) new UnicodeSet("[\\[\\]\\-\\^\\&\\\\\\{\\}\\$\\:]").addAll(PATTERN_WHITESPACE).freeze();\r
32 \r
33     private boolean first = true;\r
34     private StringBuffer target = new StringBuffer();\r
35     private int firstCodePoint = -2;\r
36     private int lastCodePoint = -2;\r
37     private boolean compressRanges = true;\r
38     private String lastString = "";\r
39     private UnicodeSet toQuote = new UnicodeSet(PATTERN_WHITESPACE);\r
40     private StringTransform quoter = null;\r
41 \r
42     private Comparator<String> ordering;\r
43     private Comparator<String> spaceComp;\r
44 \r
45     public PrettyPrinter() {\r
46     }\r
47 \r
48     public StringTransform getQuoter() {\r
49         return quoter;\r
50     }\r
51 \r
52     public PrettyPrinter setQuoter(StringTransform quoter) {\r
53         this.quoter = quoter;\r
54         return this; // for chaining\r
55     }\r
56 \r
57     public boolean isCompressRanges() {\r
58         return compressRanges;\r
59     }\r
60 \r
61     /**\r
62      * @param compressRanges if you want abcde instead of a-e, make this false\r
63      * @return\r
64      */\r
65     public PrettyPrinter setCompressRanges(boolean compressRanges) {\r
66         this.compressRanges = compressRanges;\r
67         return this;\r
68     }\r
69 \r
70     public Comparator<String> getOrdering() {\r
71         return ordering;\r
72     }\r
73 \r
74     /**\r
75      * @param ordering the resulting  ordering of the list of characters in the pattern\r
76      * @return\r
77      */\r
78     public PrettyPrinter setOrdering(Comparator ordering) {\r
79         this.ordering = ordering == null ? CODEPOINT_ORDER : new com.ibm.icu.impl.MultiComparator<String>(ordering, CODEPOINT_ORDER);\r
80         return this;\r
81     }\r
82 \r
83     public Comparator<String> getSpaceComparator() {\r
84         return spaceComp;\r
85     }\r
86 \r
87     /**\r
88      * @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters\r
89      * @return this, for chaining\r
90      */\r
91     public PrettyPrinter setSpaceComparator(Comparator spaceComp) {\r
92         this.spaceComp = spaceComp;\r
93         return this;\r
94     }\r
95 \r
96     public UnicodeSet getToQuote() {\r
97         return toQuote;\r
98     }\r
99 \r
100     /**\r
101      * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)\r
102      * @param toQuote\r
103      */\r
104     public PrettyPrinter setToQuote(UnicodeSet toQuote) {\r
105         if (toQuote != null) {\r
106             toQuote = (UnicodeSet)toQuote.cloneAsThawed();\r
107             toQuote.addAll(PATTERN_WHITESPACE);\r
108             this.toQuote = toQuote;\r
109         }\r
110         return this;\r
111     }\r
112 \r
113 \r
114     /**\r
115      * Get the pattern for a particular set.\r
116      * @param uset\r
117      * @return formatted UnicodeSet\r
118      */\r
119     public String format(UnicodeSet uset) {\r
120         first = true;\r
121         UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(SORT_AT_END); // remove all the unassigned gorp for now\r
122         // make sure that comparison separates all strings, even canonically equivalent ones\r
123         TreeSet<String> orderedStrings = new TreeSet<String>(ordering);\r
124         for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {\r
125             if (it.codepoint == UnicodeSetIterator.IS_STRING) {\r
126                 orderedStrings.add(it.string);\r
127             } else {\r
128                 for (int i = it.codepoint; i <= it.codepointEnd; ++i) {\r
129                     if (!putAtEnd.contains(i)) {\r
130                         orderedStrings.add(UTF16.valueOf(i));\r
131                     }\r
132                 }\r
133             }\r
134         }\r
135         target.setLength(0);\r
136         target.append("[");\r
137         for (String item : orderedStrings) {\r
138             appendUnicodeSetItem(item);\r
139         }\r
140         for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp\r
141             appendUnicodeSetItem(it.codepoint); // we know that these are only codepoints, not strings, so this is safe\r
142         }\r
143         flushLast();\r
144         target.append("]");\r
145         String sresult = target.toString();\r
146 \r
147         // double check the results. This can be removed once we have more tests.\r
148         //        try {\r
149         //            UnicodeSet  doubleCheck = new UnicodeSet(sresult);\r
150         //            if (!uset.equals(doubleCheck)) {\r
151         //                throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + Utility.LINE_SEPARATOR + " source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) +  Utility.LINE_SEPARATOR + " result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));\r
152         //            }\r
153         //        } catch (RuntimeException e) {\r
154         //            throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);\r
155         //        }\r
156         return sresult;\r
157     }\r
158 \r
159     private PrettyPrinter appendUnicodeSetItem(String s) {\r
160         if (UTF16.hasMoreCodePointsThan(s, 1)) {\r
161             flushLast();\r
162             addSpaceAsNeededBefore(s);\r
163             appendQuoted(s);\r
164             lastString = s;\r
165         } else {\r
166             appendUnicodeSetItem(UTF16.charAt(s, 0));\r
167         }\r
168         return this;\r
169     }\r
170 \r
171     private void appendUnicodeSetItem(int cp) {\r
172         if (!compressRanges)\r
173             flushLast();\r
174         if (cp == lastCodePoint + 1) {\r
175             lastCodePoint = cp; // continue range\r
176         } else { // start range\r
177             flushLast();\r
178             firstCodePoint = lastCodePoint = cp;\r
179         }\r
180     }\r
181     /**\r
182      * \r
183      */\r
184     private void addSpaceAsNeededBefore(String s) {\r
185         if (first) {\r
186             first = false;\r
187         } else if (spaceComp != null && spaceComp.compare(s, lastString) != 0) {\r
188             target.append(' ');\r
189         } else {\r
190             int cp = UTF16.charAt(s,0);\r
191             if (!toQuote.contains(cp) && !QUOTED_SYNTAX.contains(cp)) {\r
192                 int type = UCharacter.getType(cp);\r
193                 if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {\r
194                     target.append(' ');\r
195                 } else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {\r
196                     target.append(' '); // make sure we don't accidentally merge two surrogates\r
197                 }\r
198             }\r
199         }\r
200     }\r
201 \r
202     private void addSpaceAsNeededBefore(int codepoint) {\r
203         addSpaceAsNeededBefore(UTF16.valueOf(codepoint));\r
204     }\r
205 \r
206     private void flushLast() {\r
207         if (lastCodePoint >= 0) {\r
208             addSpaceAsNeededBefore(firstCodePoint);\r
209             if (firstCodePoint != lastCodePoint) {\r
210                 appendQuoted(firstCodePoint);\r
211                 if (firstCodePoint + 1 != lastCodePoint) {\r
212                     target.append('-');\r
213                 } else {\r
214                     addSpaceAsNeededBefore(lastCodePoint);\r
215                 }\r
216             }\r
217             appendQuoted(lastCodePoint);\r
218             lastString = UTF16.valueOf(lastCodePoint);\r
219             firstCodePoint = lastCodePoint = -2;\r
220         }\r
221     }\r
222 \r
223 \r
224     private void appendQuoted(String s) {\r
225         if (toQuote.containsSome(s) && quoter != null) {\r
226             target.append(quoter.transform(s));\r
227         } else {\r
228             int cp;\r
229             target.append("{");\r
230             for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {\r
231                 appendQuoted(cp = UTF16.charAt(s, i));\r
232             }\r
233             target.append("}");\r
234         }\r
235     }\r
236 \r
237     PrettyPrinter appendQuoted(int codePoint) {\r
238         if (toQuote.contains(codePoint)) {\r
239             if (quoter != null) {\r
240                 target.append(quoter.transform(UTF16.valueOf(codePoint)));\r
241                 return this;\r
242             }\r
243             if (codePoint > 0xFFFF) {\r
244                 target.append("\\U");\r
245                 target.append(Utility.hex(codePoint,8));\r
246             } else {\r
247                 target.append("\\u");\r
248                 target.append(Utility.hex(codePoint,4));                    \r
249             }\r
250             return this;\r
251         }\r
252         switch (codePoint) {\r
253         case '[': // SET_OPEN:\r
254         case ']': // SET_CLOSE:\r
255         case '-': // HYPHEN:\r
256         case '^': // COMPLEMENT:\r
257         case '&': // INTERSECTION:\r
258         case '\\': //BACKSLASH:\r
259         case '{':\r
260         case '}':\r
261         case '$':\r
262         case ':':\r
263             target.append('\\');\r
264             break;\r
265         default:\r
266             // Escape whitespace\r
267             if (PATTERN_WHITESPACE.contains(codePoint)) {\r
268                 target.append('\\');\r
269             }\r
270             break;\r
271         }\r
272         UTF16.append(target, codePoint);\r
273         return this;\r
274     }        \r
275     //  Appender append(String s) {\r
276     //  target.append(s);\r
277     //  return this;\r
278     //  }\r
279     //  public String toString() {\r
280     //  return target.toString();\r
281     //  }\r
282 \r
283     public Appendable format(UnicodeSet obj, Appendable toAppendTo, FieldPosition pos) {\r
284         try {\r
285             return toAppendTo.append(format(obj));\r
286         } catch (IOException e) {\r
287             throw new IllegalArgumentException(e);\r
288         }\r
289     }\r
290 }\r