jars/icu4j-52_1/main/tests/framework/src/com/ibm/icu/dev/util/PrettyPrinter.java

   1 /**
   2  *******************************************************************************
   3  * Copyright (C) 1996-2012, International Business Machines Corporation and    *
   4  * others. All Rights Reserved.                                                *
   5  **********************************************************************
   6  * Author: Mark Davis
   7  **********************************************************************
   8  */
   9
  10 package com.ibm.icu.dev.util;
  11
  12 import java.io.IOException;
  13 import java.text.FieldPosition;
  14 import java.util.Comparator;
  15 import java.util.TreeSet;
  16
  17 import com.ibm.icu.impl.Utility;
  18 import com.ibm.icu.lang.UCharacter;
  19 import com.ibm.icu.text.StringTransform;
  20 import com.ibm.icu.text.UTF16;
  21 import com.ibm.icu.text.UTF16.StringComparator;
  22 import com.ibm.icu.text.UnicodeSet;
  23 import com.ibm.icu.text.UnicodeSetIterator;
  24
  25 /** Provides more flexible formatting of UnicodeSet patterns.
  26  */
  27 public class PrettyPrinter {
  28     private static final StringComparator CODEPOINT_ORDER = new UTF16.StringComparator(true,false,0);
  29     private static final UnicodeSet PATTERN_WHITESPACE = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
  30     private static final UnicodeSet SORT_AT_END = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
  31     private static final UnicodeSet QUOTED_SYNTAX = (UnicodeSet) new UnicodeSet("[\\[\\]\\-\\^\\&\\\\\\{\\}\\$\\:]").addAll(PATTERN_WHITESPACE).freeze();
  32
  33     private boolean first = true;
  34     private StringBuffer target = new StringBuffer();
  35     private int firstCodePoint = -2;
  36     private int lastCodePoint = -2;
  37     private boolean compressRanges = true;
  38     private String lastString = "";
  39     private UnicodeSet toQuote = new UnicodeSet(PATTERN_WHITESPACE);
  40     private StringTransform quoter = null;
  41
  42     private Comparator<String> ordering;
  43     private Comparator<String> spaceComp;
  44
  45     public PrettyPrinter() {
  46     }
  47
  48     public StringTransform getQuoter() {
  49         return quoter;
  50     }
  51
  52     public PrettyPrinter setQuoter(StringTransform quoter) {
  53         this.quoter = quoter;
  54         return this; // for chaining
  55     }
  56
  57     public boolean isCompressRanges() {
  58         return compressRanges;
  59     }
  60
  61     /**
  62      * @param compressRanges if you want abcde instead of a-e, make this false
  63      * @return
  64      */
  65     public PrettyPrinter setCompressRanges(boolean compressRanges) {
  66         this.compressRanges = compressRanges;
  67         return this;
  68     }
  69
  70     public Comparator<String> getOrdering() {
  71         return ordering;
  72     }
  73
  74     /**
  75      * @param ordering the resulting  ordering of the list of characters in the pattern
  76      * @return
  77      */
  78     public PrettyPrinter setOrdering(Comparator ordering) {
  79         this.ordering = ordering == null ? CODEPOINT_ORDER : new com.ibm.icu.impl.MultiComparator<String>(ordering, CODEPOINT_ORDER);
  80         return this;
  81     }
  82
  83     public Comparator<String> getSpaceComparator() {
  84         return spaceComp;
  85     }
  86
  87     /**
  88      * @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters
  89      * @return this, for chaining
  90      */
  91     public PrettyPrinter setSpaceComparator(Comparator spaceComp) {
  92         this.spaceComp = spaceComp;
  93         return this;
  94     }
  95
  96     public UnicodeSet getToQuote() {
  97         return toQuote;
  98     }
  99
 100     /**
 101      * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)
 102      * @param toQuote
 103      */
 104     public PrettyPrinter setToQuote(UnicodeSet toQuote) {
 105         if (toQuote != null) {
 106             toQuote = (UnicodeSet)toQuote.cloneAsThawed();
 107             toQuote.addAll(PATTERN_WHITESPACE);
 108             this.toQuote = toQuote;
 109         }
 110         return this;
 111     }
 112
 113
 114     /**
 115      * Get the pattern for a particular set.
 116      * @param uset
 117      * @return formatted UnicodeSet
 118      */
 119     public String format(UnicodeSet uset) {
 120         first = true;
 121         UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(SORT_AT_END); // remove all the unassigned gorp for now
 122         // make sure that comparison separates all strings, even canonically equivalent ones
 123         TreeSet<String> orderedStrings = new TreeSet<String>(ordering);
 124         for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
 125             if (it.codepoint == UnicodeSetIterator.IS_STRING) {
 126                 orderedStrings.add(it.string);
 127             } else {
 128                 for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
 129                     if (!putAtEnd.contains(i)) {
 130                         orderedStrings.add(UTF16.valueOf(i));
 131                     }
 132                 }
 133             }
 134         }
 135         target.setLength(0);
 136         target.append("[");
 137         for (String item : orderedStrings) {
 138             appendUnicodeSetItem(item);
 139         }
 140         for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
 141             appendUnicodeSetItem(it.codepoint); // we know that these are only codepoints, not strings, so this is safe
 142         }
 143         flushLast();
 144         target.append("]");
 145         String sresult = target.toString();
 146
 147         // double check the results. This can be removed once we have more tests.
 148         //        try {
 149         //            UnicodeSet  doubleCheck = new UnicodeSet(sresult);
 150         //            if (!uset.equals(doubleCheck)) {
 151         //                throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + Utility.LINE_SEPARATOR + " source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) +  Utility.LINE_SEPARATOR + " result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
 152         //            }
 153         //        } catch (RuntimeException e) {
 154         //            throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
 155         //        }
 156         return sresult;
 157     }
 158
 159     private PrettyPrinter appendUnicodeSetItem(String s) {
 160         if (UTF16.hasMoreCodePointsThan(s, 1)) {
 161             flushLast();
 162             addSpaceAsNeededBefore(s);
 163             appendQuoted(s);
 164             lastString = s;
 165         } else {
 166             appendUnicodeSetItem(UTF16.charAt(s, 0));
 167         }
 168         return this;
 169     }
 170
 171     private void appendUnicodeSetItem(int cp) {
 172         if (!compressRanges)
 173             flushLast();
 174         if (cp == lastCodePoint + 1) {
 175             lastCodePoint = cp; // continue range
 176         } else { // start range
 177             flushLast();
 178             firstCodePoint = lastCodePoint = cp;
 179         }
 180     }
 181     /**
 182      *
 183      */
 184     private void addSpaceAsNeededBefore(String s) {
 185         if (first) {
 186             first = false;
 187         } else if (spaceComp != null && spaceComp.compare(s, lastString) != 0) {
 188             target.append(' ');
 189         } else {
 190             int cp = UTF16.charAt(s,0);
 191             if (!toQuote.contains(cp) && !QUOTED_SYNTAX.contains(cp)) {
 192                 int type = UCharacter.getType(cp);
 193                 if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
 194                     target.append(' ');
 195                 } else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
 196                     target.append(' '); // make sure we don't accidentally merge two surrogates
 197                 }
 198             }
 199         }
 200     }
 201
 202     private void addSpaceAsNeededBefore(int codepoint) {
 203         addSpaceAsNeededBefore(UTF16.valueOf(codepoint));
 204     }
 205
 206     private void flushLast() {
 207         if (lastCodePoint >= 0) {
 208             addSpaceAsNeededBefore(firstCodePoint);
 209             if (firstCodePoint != lastCodePoint) {
 210                 appendQuoted(firstCodePoint);
 211                 if (firstCodePoint + 1 != lastCodePoint) {
 212                     target.append('-');
 213                 } else {
 214                     addSpaceAsNeededBefore(lastCodePoint);
 215                 }
 216             }
 217             appendQuoted(lastCodePoint);
 218             lastString = UTF16.valueOf(lastCodePoint);
 219             firstCodePoint = lastCodePoint = -2;
 220         }
 221     }
 222
 223
 224     private void appendQuoted(String s) {
 225         if (toQuote.containsSome(s) && quoter != null) {
 226             target.append(quoter.transform(s));
 227         } else {
 228             int cp;
 229             target.append("{");
 230             for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
 231                 appendQuoted(cp = UTF16.charAt(s, i));
 232             }
 233             target.append("}");
 234         }
 235     }
 236
 237     PrettyPrinter appendQuoted(int codePoint) {
 238         if (toQuote.contains(codePoint)) {
 239             if (quoter != null) {
 240                 target.append(quoter.transform(UTF16.valueOf(codePoint)));
 241                 return this;
 242             }
 243             if (codePoint > 0xFFFF) {
 244                 target.append("\\U");
 245                 target.append(Utility.hex(codePoint,8));
 246             } else {
 247                 target.append("\\u");
 248                 target.append(Utility.hex(codePoint,4));
 249             }
 250             return this;
 251         }
 252         switch (codePoint) {
 253         case '[': // SET_OPEN:
 254         case ']': // SET_CLOSE:
 255         case '-': // HYPHEN:
 256         case '^': // COMPLEMENT:
 257         case '&': // INTERSECTION:
 258         case '\\': //BACKSLASH:
 259         case '{':
 260         case '}':
 261         case '$':
 262         case ':':
 263             target.append('\\');
 264             break;
 265         default:
 266             // Escape whitespace
 267             if (PATTERN_WHITESPACE.contains(codePoint)) {
 268                 target.append('\\');
 269             }
 270             break;
 271         }
 272         UTF16.append(target, codePoint);
 273         return this;
 274     }
 275     //  Appender append(String s) {
 276     //  target.append(s);
 277     //  return this;
 278     //  }
 279     //  public String toString() {
 280     //  return target.toString();
 281     //  }
 282
 283     public Appendable format(UnicodeSet obj, Appendable toAppendTo, FieldPosition pos) {
 284         try {
 285             return toAppendTo.append(format(obj));
 286         } catch (IOException e) {
 287             throw new IllegalArgumentException(e);
 288         }
 289     }
 290 }