jars/icu4j-4_4_2-src/main/tests/translit/src/com/ibm/icu/dev/test/util/PrettyPrinter.java

   1 /**\r
   2  *******************************************************************************\r
   3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
   4  * others. All Rights Reserved.                                                *\r
   5  **********************************************************************\r
   6  * Author: Mark Davis\r
   7  **********************************************************************\r
   8  */\r
   9 \r
  10 package com.ibm.icu.dev.test.util;\r
  11 \r
  12 import java.io.IOException;\r
  13 import java.text.FieldPosition;\r
  14 import java.util.Comparator;\r
  15 import java.util.TreeSet;\r
  16 \r
  17 import com.ibm.icu.impl.Utility;\r
  18 import com.ibm.icu.lang.UCharacter;\r
  19 import com.ibm.icu.text.StringTransform;\r
  20 import com.ibm.icu.text.UTF16;\r
  21 import com.ibm.icu.text.UnicodeSet;\r
  22 import com.ibm.icu.text.UnicodeSetIterator;\r
  23 import com.ibm.icu.text.UTF16.StringComparator;\r
  24 \r
  25 /** Provides more flexible formatting of UnicodeSet patterns.\r
  26  */\r
  27 public class PrettyPrinter {\r
  28     private static final StringComparator CODEPOINT_ORDER = new UTF16.StringComparator(true,false,0);\r
  29     private static final UnicodeSet PATTERN_WHITESPACE = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();\r
  30     private static final UnicodeSet SORT_AT_END = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();\r
  31     private static final UnicodeSet QUOTED_SYNTAX = (UnicodeSet) new UnicodeSet("[\\[\\]\\-\\^\\&\\\\\\{\\}\\$\\:]").addAll(PATTERN_WHITESPACE).freeze();\r
  32 \r
  33     private boolean first = true;\r
  34     private StringBuffer target = new StringBuffer();\r
  35     private int firstCodePoint = -2;\r
  36     private int lastCodePoint = -2;\r
  37     private boolean compressRanges = true;\r
  38     private String lastString = "";\r
  39     private UnicodeSet toQuote = new UnicodeSet(PATTERN_WHITESPACE);\r
  40     private StringTransform quoter = null;\r
  41 \r
  42     private Comparator<String> ordering;\r
  43     private Comparator<String> spaceComp;\r
  44 \r
  45     public PrettyPrinter() {\r
  46     }\r
  47 \r
  48     public StringTransform getQuoter() {\r
  49         return quoter;\r
  50     }\r
  51 \r
  52     public PrettyPrinter setQuoter(StringTransform quoter) {\r
  53         this.quoter = quoter;\r
  54         return this; // for chaining\r
  55     }\r
  56 \r
  57     public boolean isCompressRanges() {\r
  58         return compressRanges;\r
  59     }\r
  60 \r
  61     /**\r
  62      * @param compressRanges if you want abcde instead of a-e, make this false\r
  63      * @return\r
  64      */\r
  65     public PrettyPrinter setCompressRanges(boolean compressRanges) {\r
  66         this.compressRanges = compressRanges;\r
  67         return this;\r
  68     }\r
  69 \r
  70     public Comparator<String> getOrdering() {\r
  71         return ordering;\r
  72     }\r
  73 \r
  74     /**\r
  75      * @param ordering the resulting  ordering of the list of characters in the pattern\r
  76      * @return\r
  77      */\r
  78     public PrettyPrinter setOrdering(Comparator ordering) {\r
  79         this.ordering = ordering == null ? CODEPOINT_ORDER : new com.ibm.icu.impl.MultiComparator<String>(ordering, CODEPOINT_ORDER);\r
  80         return this;\r
  81     }\r
  82 \r
  83     public Comparator<String> getSpaceComparator() {\r
  84         return spaceComp;\r
  85     }\r
  86 \r
  87     /**\r
  88      * @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters\r
  89      * @return this, for chaining\r
  90      */\r
  91     public PrettyPrinter setSpaceComparator(Comparator spaceComp) {\r
  92         this.spaceComp = spaceComp;\r
  93         return this;\r
  94     }\r
  95 \r
  96     public UnicodeSet getToQuote() {\r
  97         return toQuote;\r
  98     }\r
  99 \r
 100     /**\r
 101      * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)\r
 102      * @param toQuote\r
 103      */\r
 104     public PrettyPrinter setToQuote(UnicodeSet toQuote) {\r
 105         if (toQuote != null) {\r
 106             toQuote = (UnicodeSet)toQuote.cloneAsThawed();\r
 107             toQuote.addAll(PATTERN_WHITESPACE);\r
 108             this.toQuote = toQuote;\r
 109         }\r
 110         return this;\r
 111     }\r
 112 \r
 113 \r
 114     /**\r
 115      * Get the pattern for a particular set.\r
 116      * @param uset\r
 117      * @return formatted UnicodeSet\r
 118      */\r
 119     public String format(UnicodeSet uset) {\r
 120         first = true;\r
 121         UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(SORT_AT_END); // remove all the unassigned gorp for now\r
 122         // make sure that comparison separates all strings, even canonically equivalent ones\r
 123         TreeSet<String> orderedStrings = new TreeSet<String>(ordering);\r
 124         for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {\r
 125             if (it.codepoint == UnicodeSetIterator.IS_STRING) {\r
 126                 orderedStrings.add(it.string);\r
 127             } else {\r
 128                 for (int i = it.codepoint; i <= it.codepointEnd; ++i) {\r
 129                     if (!putAtEnd.contains(i)) {\r
 130                         orderedStrings.add(UTF16.valueOf(i));\r
 131                     }\r
 132                 }\r
 133             }\r
 134         }\r
 135         target.setLength(0);\r
 136         target.append("[");\r
 137         for (String item : orderedStrings) {\r
 138             appendUnicodeSetItem(item);\r
 139         }\r
 140         for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp\r
 141             appendUnicodeSetItem(it.codepoint); // we know that these are only codepoints, not strings, so this is safe\r
 142         }\r
 143         flushLast();\r
 144         target.append("]");\r
 145         String sresult = target.toString();\r
 146 \r
 147         // double check the results. This can be removed once we have more tests.\r
 148         //        try {\r
 149         //            UnicodeSet  doubleCheck = new UnicodeSet(sresult);\r
 150         //            if (!uset.equals(doubleCheck)) {\r
 151         //                throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + Utility.LINE_SEPARATOR + " source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) +  Utility.LINE_SEPARATOR + " result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));\r
 152         //            }\r
 153         //        } catch (RuntimeException e) {\r
 154         //            throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);\r
 155         //        }\r
 156         return sresult;\r
 157     }\r
 158 \r
 159     private PrettyPrinter appendUnicodeSetItem(String s) {\r
 160         if (UTF16.hasMoreCodePointsThan(s, 1)) {\r
 161             flushLast();\r
 162             addSpaceAsNeededBefore(s);\r
 163             appendQuoted(s);\r
 164             lastString = s;\r
 165         } else {\r
 166             appendUnicodeSetItem(UTF16.charAt(s, 0));\r
 167         }\r
 168         return this;\r
 169     }\r
 170 \r
 171     private void appendUnicodeSetItem(int cp) {\r
 172         if (!compressRanges)\r
 173             flushLast();\r
 174         if (cp == lastCodePoint + 1) {\r
 175             lastCodePoint = cp; // continue range\r
 176         } else { // start range\r
 177             flushLast();\r
 178             firstCodePoint = lastCodePoint = cp;\r
 179         }\r
 180     }\r
 181     /**\r
 182      * \r
 183      */\r
 184     private void addSpaceAsNeededBefore(String s) {\r
 185         if (first) {\r
 186             first = false;\r
 187         } else if (spaceComp != null && spaceComp.compare(s, lastString) != 0) {\r
 188             target.append(' ');\r
 189         } else {\r
 190             int cp = UTF16.charAt(s,0);\r
 191             if (!toQuote.contains(cp) && !QUOTED_SYNTAX.contains(cp)) {\r
 192                 int type = UCharacter.getType(cp);\r
 193                 if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {\r
 194                     target.append(' ');\r
 195                 } else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {\r
 196                     target.append(' '); // make sure we don't accidentally merge two surrogates\r
 197                 }\r
 198             }\r
 199         }\r
 200     }\r
 201 \r
 202     private void addSpaceAsNeededBefore(int codepoint) {\r
 203         addSpaceAsNeededBefore(UTF16.valueOf(codepoint));\r
 204     }\r
 205 \r
 206     private void flushLast() {\r
 207         if (lastCodePoint >= 0) {\r
 208             addSpaceAsNeededBefore(firstCodePoint);\r
 209             if (firstCodePoint != lastCodePoint) {\r
 210                 appendQuoted(firstCodePoint);\r
 211                 if (firstCodePoint + 1 != lastCodePoint) {\r
 212                     target.append('-');\r
 213                 } else {\r
 214                     addSpaceAsNeededBefore(lastCodePoint);\r
 215                 }\r
 216             }\r
 217             appendQuoted(lastCodePoint);\r
 218             lastString = UTF16.valueOf(lastCodePoint);\r
 219             firstCodePoint = lastCodePoint = -2;\r
 220         }\r
 221     }\r
 222 \r
 223 \r
 224     private void appendQuoted(String s) {\r
 225         if (toQuote.containsSome(s) && quoter != null) {\r
 226             target.append(quoter.transform(s));\r
 227         } else {\r
 228             int cp;\r
 229             target.append("{");\r
 230             for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {\r
 231                 appendQuoted(cp = UTF16.charAt(s, i));\r
 232             }\r
 233             target.append("}");\r
 234         }\r
 235     }\r
 236 \r
 237     PrettyPrinter appendQuoted(int codePoint) {\r
 238         if (toQuote.contains(codePoint)) {\r
 239             if (quoter != null) {\r
 240                 target.append(quoter.transform(UTF16.valueOf(codePoint)));\r
 241                 return this;\r
 242             }\r
 243             if (codePoint > 0xFFFF) {\r
 244                 target.append("\\U");\r
 245                 target.append(Utility.hex(codePoint,8));\r
 246             } else {\r
 247                 target.append("\\u");\r
 248                 target.append(Utility.hex(codePoint,4));                    \r
 249             }\r
 250             return this;\r
 251         }\r
 252         switch (codePoint) {\r
 253         case '[': // SET_OPEN:\r
 254         case ']': // SET_CLOSE:\r
 255         case '-': // HYPHEN:\r
 256         case '^': // COMPLEMENT:\r
 257         case '&': // INTERSECTION:\r
 258         case '\\': //BACKSLASH:\r
 259         case '{':\r
 260         case '}':\r
 261         case '$':\r
 262         case ':':\r
 263             target.append('\\');\r
 264             break;\r
 265         default:\r
 266             // Escape whitespace\r
 267             if (PATTERN_WHITESPACE.contains(codePoint)) {\r
 268                 target.append('\\');\r
 269             }\r
 270             break;\r
 271         }\r
 272         UTF16.append(target, codePoint);\r
 273         return this;\r
 274     }        \r
 275     //  Appender append(String s) {\r
 276     //  target.append(s);\r
 277     //  return this;\r
 278     //  }\r
 279     //  public String toString() {\r
 280     //  return target.toString();\r
 281     //  }\r
 282 \r
 283     public Appendable format(UnicodeSet obj, Appendable toAppendTo, FieldPosition pos) {\r
 284         try {\r
 285             return toAppendTo.append(format(obj));\r
 286         } catch (IOException e) {\r
 287             throw new IllegalArgumentException(e);\r
 288         }\r
 289     }\r
 290 }\r