-//##header\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//#else\r
-/*\r
- *******************************************************************************\r
- * Copyright (C) 2002-2009, International Business Machines Corporation and *\r
- * others. All Rights Reserved. *\r
- *******************************************************************************\r
- */\r
-package com.ibm.icu.dev.test.util;\r
-\r
-import java.io.BufferedReader;\r
-import java.io.BufferedWriter;\r
-import java.io.File;\r
-import java.io.FileInputStream;\r
-import java.io.FileOutputStream;\r
-import java.io.IOException;\r
-import java.io.InputStreamReader;\r
-import java.io.OutputStreamWriter;\r
-import java.io.PrintWriter;\r
-import java.io.StringWriter;\r
-import java.text.MessageFormat;\r
-import java.util.Collection;\r
-import java.util.HashMap;\r
-import java.util.HashSet;\r
-import java.util.Locale;\r
-import java.util.Map;\r
-\r
-import com.ibm.icu.impl.Utility;\r
-import com.ibm.icu.text.NumberFormat;\r
-import com.ibm.icu.text.Transliterator;\r
-import com.ibm.icu.text.UTF16;\r
-import com.ibm.icu.text.UnicodeSet;\r
-\r
-public class BagFormatter {\r
- static final boolean DEBUG = false;\r
- public static final boolean SHOW_FILES;\r
- static {\r
- boolean showFiles = false;\r
- try {\r
- showFiles = System.getProperty("SHOW_FILES") != null;\r
- }\r
- catch (SecurityException e) {\r
- }\r
- SHOW_FILES = showFiles;\r
- }\r
-\r
- public static final PrintWriter CONSOLE = new PrintWriter(System.out,true);\r
-\r
- private static PrintWriter log = CONSOLE;\r
-\r
- private boolean abbreviated = false;\r
- private String separator = ",";\r
- private String prefix = "[";\r
- private String suffix = "]";\r
- private UnicodeProperty.Factory source;\r
- private UnicodeLabel nameSource;\r
- private UnicodeLabel labelSource;\r
- private UnicodeLabel rangeBreakSource;\r
- private UnicodeLabel valueSource;\r
- private String propName = "";\r
- private boolean showCount = true;\r
- //private boolean suppressReserved = true;\r
- private boolean hexValue = false;\r
- private static final String NULL_VALUE = "_NULL_VALUE_";\r
- private int fullTotal = -1;\r
- private boolean showTotal = true;\r
- private String lineSeparator = "\r\n";\r
- private Tabber tabber = new Tabber.MonoTabber();\r
-\r
- /**\r
- * Compare two UnicodeSets, and show the differences\r
- * @param name1 name of first set to be compared\r
- * @param set1 first set\r
- * @param name2 name of second set to be compared\r
- * @param set2 second set\r
- * @return formatted string\r
- */\r
- public String showSetDifferences(\r
- String name1,\r
- UnicodeSet set1,\r
- String name2,\r
- UnicodeSet set2) {\r
-\r
- StringWriter result = new StringWriter();\r
- showSetDifferences(new PrintWriter(result),name1,set1,name2,set2);\r
- result.flush();\r
- return result.getBuffer().toString();\r
- }\r
-\r
- public String showSetDifferences(\r
- String name1,\r
- Collection set1,\r
- String name2,\r
- Collection set2) {\r
-\r
- StringWriter result = new StringWriter();\r
- showSetDifferences(new PrintWriter(result), name1, set1, name2, set2);\r
- result.flush();\r
- return result.getBuffer().toString();\r
- }\r
-\r
- public void showSetDifferences(\r
- PrintWriter pw,\r
- String name1,\r
- UnicodeSet set1,\r
- String name2,\r
- UnicodeSet set2) {\r
- showSetDifferences(pw, name1, set1, name2, set2, -1);\r
- }\r
- /**\r
- * Compare two UnicodeSets, and show the differences\r
- * @param name1 name of first set to be compared\r
- * @param set1 first set\r
- * @param name2 name of second set to be compared\r
- * @param set2 second set\r
- */\r
- public void showSetDifferences(\r
- PrintWriter pw,\r
- String name1,\r
- UnicodeSet set1,\r
- String name2,\r
- UnicodeSet set2,\r
- int flags) \r
- {\r
- if (pw == null) pw = CONSOLE;\r
- String[] names = { name1, name2 };\r
-\r
- UnicodeSet temp;\r
- \r
- if ((flags&1) != 0) {\r
- temp = new UnicodeSet(set1).removeAll(set2);\r
- pw.print(lineSeparator);\r
- pw.print(inOut.format(names));\r
- pw.print(lineSeparator);\r
- showSetNames(pw, temp);\r
- }\r
-\r
- if ((flags&2) != 0) {\r
- temp = new UnicodeSet(set2).removeAll(set1);\r
- pw.print(lineSeparator);\r
- pw.print(outIn.format(names));\r
- pw.print(lineSeparator);\r
- showSetNames(pw, temp);\r
- }\r
-\r
- if ((flags&4) != 0) {\r
- temp = new UnicodeSet(set2).retainAll(set1);\r
- pw.print(lineSeparator);\r
- pw.print(inIn.format(names));\r
- pw.print(lineSeparator);\r
- showSetNames(pw, temp);\r
- }\r
- pw.flush();\r
- }\r
-\r
- public void showSetDifferences(\r
- PrintWriter pw,\r
- String name1,\r
- Collection set1,\r
- String name2,\r
- Collection set2) {\r
-\r
- if (pw == null) pw = CONSOLE;\r
- String[] names = { name1, name2 };\r
- // damn'd collection doesn't have a clone, so\r
- // we go with Set, even though that\r
- // may not preserve order and duplicates\r
- Collection temp = new HashSet(set1);\r
- temp.removeAll(set2);\r
- pw.println();\r
- pw.println(inOut.format(names));\r
- showSetNames(pw, temp);\r
-\r
- temp.clear();\r
- temp.addAll(set2);\r
- temp.removeAll(set1);\r
- pw.println();\r
- pw.println(outIn.format(names));\r
- showSetNames(pw, temp);\r
-\r
- temp.clear();\r
- temp.addAll(set1);\r
- temp.retainAll(set2);\r
- pw.println();\r
- pw.println(inIn.format(names));\r
- showSetNames(pw, temp);\r
- }\r
-\r
- /**\r
- * Returns a list of items in the collection, with each separated by the separator.\r
- * Each item must not be null; its toString() is called for a printable representation\r
- * @param c source collection\r
- * @return a String representation of the list\r
- * @internal\r
- */\r
- public String showSetNames(Object c) {\r
- StringWriter buffer = new StringWriter();\r
- PrintWriter output = new PrintWriter(buffer);\r
- showSetNames(output,c);\r
- return buffer.toString();\r
- }\r
-\r
- /**\r
- * Returns a list of items in the collection, with each separated by the separator.\r
- * Each item must not be null; its toString() is called for a printable representation\r
- * @param output destination to which to write names\r
- * @param c source collection\r
- * @internal\r
- */\r
- public void showSetNames(PrintWriter output, Object c) {\r
- mainVisitor.doAt(c, output);\r
- output.flush();\r
- }\r
-\r
- /**\r
- * Returns a list of items in the collection, with each separated by the separator.\r
- * Each item must not be null; its toString() is called for a printable representation\r
- * @param filename destination to which to write names\r
- * @param c source collection\r
- * @internal\r
- */\r
- public void showSetNames(String filename, Object c) throws IOException {\r
- PrintWriter pw = new PrintWriter(\r
- new OutputStreamWriter(\r
- new FileOutputStream(filename),"utf-8"));\r
- showSetNames(log,c);\r
- pw.close();\r
- }\r
-\r
- public String getAbbreviatedName(\r
- String src,\r
- String pattern,\r
- String substitute) {\r
-\r
- int matchEnd = NameIterator.findMatchingEnd(src, pattern);\r
- int sdiv = src.length() - matchEnd;\r
- int pdiv = pattern.length() - matchEnd;\r
- StringBuffer result = new StringBuffer();\r
- addMatching(\r
- src.substring(0, sdiv),\r
- pattern.substring(0, pdiv),\r
- substitute,\r
- result);\r
- addMatching(\r
- src.substring(sdiv),\r
- pattern.substring(pdiv),\r
- substitute,\r
- result);\r
- return result.toString();\r
- }\r
-\r
- abstract public static class Relation {\r
- abstract public String getRelation(String a, String b);\r
- }\r
-\r
- static class NullRelation extends Relation {\r
- public String getRelation(String a, String b) { return ""; }\r
- }\r
-\r
- private Relation r = new NullRelation();\r
-\r
- public BagFormatter setRelation(Relation r) {\r
- this.r = r;\r
- return this; // for chaining\r
- }\r
-\r
- public Relation getRelation() {\r
- return r;\r
- }\r
-\r
- /*\r
- r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)\r
- */\r
- /*\r
- static final UnicodeSet NO_NAME =\r
- new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");\r
- static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();\r
- static final UnicodeSet NAME_CHARACTERS =\r
- new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");\r
-\r
- public UnicodeSet getSetForName(String namePattern) {\r
- UnicodeSet result = new UnicodeSet();\r
- Matcher m = Pattern.compile(namePattern).matcher("");\r
- // check for no-name items, and add in bulk\r
- m.reset("<no name>");\r
- if (m.matches()) {\r
- result.addAll(NO_NAME);\r
- }\r
- // check all others\r
- UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);\r
- while (usi.next()) {\r
- String name = getName(usi.codepoint);\r
- if (name == null)\r
- continue;\r
- m.reset(name);\r
- if (m.matches()) {\r
- result.add(usi.codepoint);\r
- }\r
- }\r
- // Note: if Regex had some API so that if we could tell that\r
- // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"\r
- // then we could optimize by skipping whole swathes of characters\r
- return result;\r
- }\r
- */\r
-\r
- public BagFormatter setMergeRanges(boolean in) {\r
- mergeRanges = in;\r
- return this;\r
- }\r
- public BagFormatter setShowSetAlso(boolean b) {\r
- showSetAlso = b;\r
- return this;\r
- }\r
-\r
- public String getName(int codePoint) {\r
- return getName("", codePoint, codePoint);\r
- }\r
-\r
- public String getName(String sep, int start, int end) {\r
- if (getNameSource() == null || getNameSource() == UnicodeLabel.NULL) return "";\r
- String result = getName(start, false);\r
- if (start == end) return sep + result;\r
- String endString = getName(end, false);\r
- if (result.length() == 0 && endString.length() == 0) return sep;\r
- if (abbreviated) endString = getAbbreviatedName(endString,result,"~");\r
- return sep + result + ".." + endString;\r
- }\r
-\r
- public String getName(String s) {\r
- return getName(s, false);\r
- }\r
-\r
- public static class NameLabel extends UnicodeLabel {\r
- UnicodeProperty nameProp;\r
- UnicodeSet control;\r
- UnicodeSet private_use;\r
- UnicodeSet noncharacter;\r
- UnicodeSet surrogate;\r
-\r
- public NameLabel(UnicodeProperty.Factory source) {\r
- nameProp = source.getProperty("Name");\r
- control = source.getSet("gc=Cc");\r
- private_use = source.getSet("gc=Co");\r
- surrogate = source.getSet("gc=Cs");\r
- noncharacter = source.getSet("noncharactercodepoint=yes");\r
- }\r
-\r
- public String getValue(int codePoint, boolean isShort) {\r
- String hcp = !isShort\r
- ? "U+" + Utility.hex(codePoint, 4) + " "\r
- : "";\r
- String result = nameProp.getValue(codePoint);\r
- if (result != null)\r
- return hcp + result;\r
- if (control.contains(codePoint)) {\r
- return "<control-" + Utility.hex(codePoint, 4) + ">";\r
- }\r
- if (private_use.contains(codePoint)) {\r
- return "<private-use-" + Utility.hex(codePoint, 4) + ">";\r
- }\r
- if (surrogate.contains(codePoint)) {\r
- return "<surrogate-" + Utility.hex(codePoint, 4) + ">";\r
- }\r
- if (noncharacter.contains(codePoint)) {\r
- return "<noncharacter-" + Utility.hex(codePoint, 4) + ">";\r
- }\r
- //if (suppressReserved) return "";\r
- return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">";\r
- }\r
-\r
- }\r
-\r
- // refactored\r
- public String getName(int codePoint, boolean withCodePoint) {\r
- String result = getNameSource().getValue(codePoint, !withCodePoint);\r
- return fixName == null ? result : fixName.transliterate(result);\r
- }\r
-\r
- public String getName(String s, boolean withCodePoint) {\r
- String result = getNameSource().getValue(s, separator, !withCodePoint);\r
- return fixName == null ? result : fixName.transliterate(result);\r
- }\r
-\r
- public String hex(String s) {\r
- return hex(s,separator);\r
- }\r
-\r
- public String hex(String s, String sep) {\r
- return UnicodeLabel.HEX.getValue(s, sep, true);\r
- }\r
-\r
- public String hex(int start, int end) {\r
- String s = Utility.hex(start,4);\r
- if (start == end) return s;\r
- return s + ".." + Utility.hex(end,4);\r
- }\r
-\r
- public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) {\r
- this.source = source;\r
- return this;\r
- }\r
-\r
- public UnicodeProperty.Factory getUnicodePropertyFactory() {\r
- if (source == null) source = ICUPropertyFactory.make();\r
- return source;\r
- }\r
-\r
- public BagFormatter () {\r
- }\r
-\r
- public BagFormatter (UnicodeProperty.Factory source) {\r
- setUnicodePropertyFactory(source);\r
- }\r
-\r
- public String join(Object o) {\r
- return labelVisitor.join(o);\r
- }\r
-\r
- // ===== PRIVATES =====\r
-\r
- private Join labelVisitor = new Join();\r
-\r
- private boolean mergeRanges = true;\r
- private Transliterator showLiteral = null;\r
- private Transliterator fixName = null;\r
- private boolean showSetAlso = false;\r
-\r
- private RangeFinder rf = new RangeFinder();\r
-\r
- private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:");\r
- private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:");\r
- private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:");\r
-\r
- private MyVisitor mainVisitor = new MyVisitor();\r
-\r
- /*\r
- private String getLabels(int start, int end) {\r
- Set names = new TreeSet();\r
- for (int cp = start; cp <= end; ++cp) {\r
- names.add(getLabel(cp));\r
- }\r
- return labelVisitor.join(names);\r
- }\r
- */\r
-\r
- private void addMatching(\r
- String src,\r
- String pattern,\r
- String substitute,\r
- StringBuffer result) {\r
- NameIterator n1 = new NameIterator(src);\r
- NameIterator n2 = new NameIterator(pattern);\r
- boolean first = true;\r
- while (true) {\r
- String s1 = n1.next();\r
- if (s1 == null)\r
- break;\r
- String s2 = n2.next();\r
- if (!first)\r
- result.append(" ");\r
- first = false;\r
- if (s1.equals(s2))\r
- result.append(substitute);\r
- else\r
- result.append(s1);\r
- }\r
- }\r
-\r
- private static NumberFormat nf =\r
- NumberFormat.getIntegerInstance(Locale.ENGLISH);\r
- static {\r
- nf.setGroupingUsed(false);\r
- }\r
-\r
- private class MyVisitor extends Visitor {\r
- private PrintWriter output;\r
- String commentSeparator;\r
- int counter;\r
- int valueSize;\r
- int labelSize;\r
- boolean isHtml;\r
- boolean inTable = false;\r
- \r
- public void toOutput(String s) {\r
- if (isHtml) {\r
- if (inTable) {\r
- output.print("</table>");\r
- inTable = false;\r
- }\r
- output.print("<p>");\r
- }\r
- output.print(s);\r
- if (isHtml)\r
- output.println("</p>");\r
- else\r
- output.print(lineSeparator);\r
- }\r
- \r
- public void toTable(String s) {\r
- if (isHtml && !inTable) {\r
- output.print("<table>");\r
- inTable = true;\r
- }\r
- output.print(tabber.process(s) + lineSeparator);\r
- }\r
-\r
- public void doAt(Object c, PrintWriter out) {\r
- output = out;\r
- isHtml = tabber instanceof Tabber.HTMLTabber;\r
- counter = 0;\r
- \r
- tabber.clear();\r
- // old:\r
- // 0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D>\r
- // new\r
- // 0009..000D ; White_Space #Cc [5] <control>..<control>\r
- tabber.add(mergeRanges ? 14 : 6,Tabber.LEFT);\r
-\r
- if (propName.length() > 0) {\r
- tabber.add(propName.length() + 2,Tabber.LEFT);\r
- }\r
-\r
- valueSize = getValueSource().getMaxWidth(shortValue);\r
- if (DEBUG) System.out.println("ValueSize: " + valueSize);\r
- if (valueSize > 0) {\r
- tabber.add(valueSize + 2,Tabber.LEFT); // value\r
- }\r
-\r
- tabber.add(3,Tabber.LEFT); // comment character\r
-\r
- labelSize = getLabelSource(true).getMaxWidth(shortLabel);\r
- if (labelSize > 0) {\r
- tabber.add(labelSize + 1,Tabber.LEFT); // value\r
- }\r
-\r
- if (mergeRanges && showCount) {\r
- tabber.add(5,Tabber.RIGHT);\r
- }\r
-\r
- if (showLiteral != null) {\r
- tabber.add(4,Tabber.LEFT);\r
- }\r
- //myTabber.add(7,Tabber.LEFT);\r
-\r
- commentSeparator = (showCount || showLiteral != null\r
- || getLabelSource(true) != UnicodeLabel.NULL\r
- || getNameSource() != UnicodeLabel.NULL)\r
- ? "\t #" : "";\r
-\r
- if (DEBUG) System.out.println("Tabber: " + tabber.toString());\r
- if (DEBUG) System.out.println("Tabber: " + tabber.process(\r
- "200C..200D\t; White_Space\t #\tCf\t [2]\t ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER"));\r
- doAt(c);\r
- }\r
-\r
- public String format(Object o) {\r
- StringWriter sw = new StringWriter();\r
- PrintWriter pw = new PrintWriter(sw);\r
- doAt(o);\r
- pw.flush();\r
- String result = sw.getBuffer().toString();\r
- pw.close();\r
- return result;\r
- }\r
-\r
- protected void doBefore(Object container, Object o) {\r
- if (showSetAlso && container instanceof UnicodeSet) {\r
- toOutput("#" + container);\r
- }\r
- }\r
-\r
- protected void doBetween(Object container, Object lastItem, Object nextItem) {\r
- }\r
-\r
- protected void doAfter(Object container, Object o) {\r
- if (fullTotal != -1 && fullTotal != counter) {\r
- if (showTotal) {\r
- toOutput("");\r
- toOutput("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here.");\r
- toOutput("# Total code points: " + nf.format(fullTotal));\r
- }\r
- fullTotal = -1;\r
- } else if (showTotal) {\r
- toOutput("");\r
- toOutput("# Total code points: " + nf.format(counter));\r
- }\r
- }\r
-\r
- protected void doSimpleAt(Object o) {\r
- if (o instanceof Map.Entry) {\r
- Map.Entry oo = (Map.Entry)o;\r
- Object key = oo.getKey();\r
- Object value = oo.getValue();\r
- doBefore(o, key);\r
- doAt(key);\r
- output.println("\u2192");\r
- doAt(value);\r
- doAfter(o, value);\r
- counter++;\r
- } else if (o instanceof Visitor.CodePointRange) {\r
- doAt((Visitor.CodePointRange) o);\r
- } else {\r
- String thing = o.toString();\r
- String value = getValueSource() == UnicodeLabel.NULL ? "" : getValueSource().getValue(thing, ",", true);\r
- if (getValueSource() != UnicodeLabel.NULL) value = "\t; " + value;\r
- String label = getLabelSource(true) == UnicodeLabel.NULL ? "" : getLabelSource(true).getValue(thing, ",", true);\r
- if (label.length() != 0) label = " " + label;\r
- toTable(\r
- hex(thing)\r
- + value\r
- + commentSeparator\r
- + label\r
- + insertLiteral(thing)\r
- + "\t"\r
- + getName(thing));\r
- counter++;\r
- }\r
- }\r
-\r
- protected void doAt(Visitor.CodePointRange usi) {\r
- if (!mergeRanges) {\r
- for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {\r
- showLine(cp, cp);\r
- }\r
- } else {\r
- rf.reset(usi.codepoint, usi.codepointEnd + 1);\r
- while (rf.next()) {\r
- showLine(rf.start, rf.limit - 1);\r
- }\r
- }\r
- }\r
-\r
- private void showLine(int start, int end) {\r
- String label = getLabelSource(true).getValue(start, shortLabel);\r
- String value = getValue(start, shortValue);\r
- if (value == NULL_VALUE) return;\r
-\r
- counter += end - start + 1;\r
- String pn = propName;\r
- if (pn.length() != 0) {\r
- pn = "\t; " + pn;\r
- }\r
- if (valueSize > 0) {\r
- value = "\t; " + value;\r
- } else if (value.length() > 0) {\r
- throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue));\r
- }\r
- if (labelSize > 0) {\r
- label = "\t" + label;\r
- } else if (label.length() > 0) {\r
- throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource(true).getMaxWidth(shortLabel));\r
- }\r
-\r
- String count = "";\r
- if (mergeRanges && showCount) {\r
- if (end == start) count = "\t";\r
- else count = "\t ["+ nf.format(end - start + 1)+ "]";\r
- }\r
-\r
- toTable(\r
- hex(start, end)\r
- + pn\r
- + value\r
- + commentSeparator\r
- + label\r
- + count\r
- + insertLiteral(start, end)\r
- + getName("\t ", start, end));\r
- }\r
-\r
- private String insertLiteral(String thing) {\r
- return (showLiteral == null ? ""\r
- : " \t(" + showLiteral.transliterate(thing) + ") ");\r
- }\r
-\r
- private String insertLiteral(int start, int end) {\r
- return (showLiteral == null ? "" :\r
- " \t(" + showLiteral.transliterate(UTF16.valueOf(start))\r
- + ((start != end)\r
- ? (".." + showLiteral.transliterate(UTF16.valueOf(end)))\r
- : "")\r
- + ") ");\r
- }\r
- /*\r
- private String insertLiteral(int cp) {\r
- return (showLiteral == null ? ""\r
- : " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") ");\r
- }\r
- */\r
- }\r
-\r
- /**\r
- * Iterate through a string, breaking at words.\r
- * @author Davis\r
- */\r
- private static class NameIterator {\r
- String source;\r
- int position;\r
- int start;\r
- int limit;\r
-\r
- NameIterator(String source) {\r
- this.source = source;\r
- this.start = 0;\r
- this.limit = source.length();\r
- }\r
- /**\r
- * Find next word, including trailing spaces\r
- * @return the next word\r
- */\r
- String next() {\r
- if (position >= limit)\r
- return null;\r
- int pos = source.indexOf(' ', position);\r
- if (pos < 0 || pos >= limit)\r
- pos = limit;\r
- String result = source.substring(position, pos);\r
- position = pos + 1;\r
- return result;\r
- }\r
-\r
- static int findMatchingEnd(String s1, String s2) {\r
- int i = s1.length();\r
- int j = s2.length();\r
- try {\r
- while (true) {\r
- --i; // decrement both before calling function!\r
- --j;\r
- if (s1.charAt(i) != s2.charAt(j))\r
- break;\r
- }\r
- } catch (Exception e) {} // run off start\r
-\r
- ++i; // counteract increment\r
- i = s1.indexOf(' ', i); // move forward to space\r
- if (i < 0)\r
- return 0;\r
- return s1.length() - i;\r
- }\r
- }\r
-\r
- private class RangeFinder {\r
- int start, limit;\r
- private int veryLimit;\r
- //String label, value;\r
- void reset(int rangeStart, int rangeLimit) {\r
- limit = rangeStart;\r
- veryLimit = rangeLimit;\r
- }\r
- boolean next() {\r
- if (limit >= veryLimit)\r
- return false;\r
- start = limit; // set to end of last\r
- String label = getLabelSource(false).getValue(limit, true);\r
- String value = getValue(limit, true);\r
- String breaker = getRangeBreakSource().getValue(limit,true);\r
- if (DEBUG && limit < 0x7F) System.out.println("Label: " + label + ", Value: " + value + ", Break: " + breaker);\r
- limit++;\r
- for (; limit < veryLimit; limit++) {\r
- String s = getLabelSource(false).getValue(limit, true);\r
- String v = getValue(limit, true);\r
- String b = getRangeBreakSource().getValue(limit, true);\r
- if (DEBUG && limit < 0x7F) System.out.println("*Label: " + label + ", Value: " + value + ", Break: " + breaker);\r
- if (!equalTo(s, label) || !equalTo(v, value) || !equalTo(b, breaker)) break;\r
- }\r
- // at this point, limit is the first item that has a different label than source\r
- // OR, we got to the end, and limit == veryLimit\r
- return true;\r
- }\r
- }\r
-\r
- boolean equalTo(Object a, Object b) {\r
- if (a == b) return true;\r
- if (a == null) return false;\r
- return a.equals(b);\r
- }\r
-\r
- boolean shortLabel = true;\r
- boolean shortValue = true;\r
-\r
- public String getPrefix() {\r
- return prefix;\r
- }\r
-\r
- public String getSuffix() {\r
- return suffix;\r
- }\r
-\r
- public BagFormatter setPrefix(String string) {\r
- prefix = string;\r
- return this;\r
- }\r
-\r
- public BagFormatter setSuffix(String string) {\r
- suffix = string;\r
- return this;\r
- }\r
-\r
- public boolean isAbbreviated() {\r
- return abbreviated;\r
- }\r
-\r
- public BagFormatter setAbbreviated(boolean b) {\r
- abbreviated = b;\r
- return this;\r
- }\r
-\r
- public UnicodeLabel getLabelSource(boolean visible) {\r
- if (labelSource == null) {\r
- Map labelMap = new HashMap();\r
- //labelMap.put("Lo","L&");\r
- labelMap.put("Lu","L&");\r
- labelMap.put("Lt","L&");\r
- labelMap.put("Ll","L&");\r
- labelSource = new UnicodeProperty.FilteredProperty(\r
- getUnicodePropertyFactory().getProperty("General_Category"),\r
- new UnicodeProperty.MapFilter(labelMap)\r
- ).setAllowValueAliasCollisions(true);\r
- }\r
- return labelSource;\r
- }\r
-\r
- /**\r
- * @deprecated\r
- */\r
- public static void addAll(UnicodeSet source, Collection target) {\r
- source.addAllTo(target);\r
- }\r
-\r
- // UTILITIES\r
-\r
- public static final Transliterator hex = Transliterator.getInstance(\r
- "[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex");\r
-\r
- public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException {\r
- return openReader(dir,filename,"UTF-8");\r
- }\r
-\r
- public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException {\r
- File file = new File(dir, filename);\r
- if (SHOW_FILES && log != null) {\r
- log.println("Opening File: "\r
- + file.getCanonicalPath());\r
- }\r
- return new BufferedReader(\r
- new InputStreamReader(\r
- new FileInputStream(file),\r
- encoding),\r
- 4*1024);\r
- }\r
-\r
- public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException {\r
- return openWriter(dir,filename,"UTF-8");\r
- }\r
-\r
- public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException {\r
- File file = new File(dir, filename);\r
- if (SHOW_FILES && log != null) {\r
- log.println("Creating File: "\r
- + file.getCanonicalPath());\r
- }\r
- String parentName = file.getParent();\r
- if (parentName != null) {\r
- File parent = new File(parentName);\r
- parent.mkdirs();\r
- }\r
- return new PrintWriter(\r
- new BufferedWriter(\r
- new OutputStreamWriter(\r
- new FileOutputStream(file),\r
- encoding),\r
- 4*1024));\r
- }\r
- public static PrintWriter getLog() {\r
- return log;\r
- }\r
- public BagFormatter setLog(PrintWriter writer) {\r
- log = writer;\r
- return this;\r
- }\r
- public String getSeparator() {\r
- return separator;\r
- }\r
- public BagFormatter setSeparator(String string) {\r
- separator = string;\r
- return this;\r
- }\r
- public Transliterator getShowLiteral() {\r
- return showLiteral;\r
- }\r
- public BagFormatter setShowLiteral(Transliterator transliterator) {\r
- showLiteral = transliterator;\r
- return this;\r
- }\r
-\r
- // ===== CONVENIENCES =====\r
- private class Join extends Visitor {\r
- StringBuffer output = new StringBuffer();\r
- int depth = 0;\r
- String join (Object o) {\r
- output.setLength(0);\r
- doAt(o);\r
- return output.toString();\r
- }\r
- protected void doBefore(Object container, Object item) {\r
- ++depth;\r
- output.append(prefix);\r
- }\r
- protected void doAfter(Object container, Object item) {\r
- output.append(suffix);\r
- --depth;\r
- }\r
- protected void doBetween(Object container, Object lastItem, Object nextItem) {\r
- output.append(separator);\r
- }\r
- protected void doSimpleAt(Object o) {\r
- if (o != null) output.append(o.toString());\r
- }\r
- }\r
-\r
- /**\r
- * @param label\r
- */\r
- public BagFormatter setLabelSource(UnicodeLabel label) {\r
- if (label == null) label = UnicodeLabel.NULL;\r
- labelSource = label;\r
- return this;\r
- }\r
-\r
- /**\r
- * @return the NameLable representing the source\r
- */\r
- public UnicodeLabel getNameSource() {\r
- if (nameSource == null) {\r
- nameSource = new NameLabel(getUnicodePropertyFactory());\r
- }\r
- return nameSource;\r
- }\r
-\r
- /**\r
- * @param label\r
- */\r
- public BagFormatter setNameSource(UnicodeLabel label) {\r
- if (label == null) label = UnicodeLabel.NULL;\r
- nameSource = label;\r
- return this;\r
- }\r
-\r
- /**\r
- * @return the UnicodeLabel representing the value\r
- */\r
- public UnicodeLabel getValueSource() {\r
- if (valueSource == null) valueSource = UnicodeLabel.NULL;\r
- return valueSource;\r
- }\r
-\r
- private String getValue(int cp, boolean shortVal) {\r
- String result = getValueSource().getValue(cp, shortVal);\r
- if (result == null) return NULL_VALUE;\r
- if (hexValue) result = hex(result, " ");\r
- return result;\r
- }\r
-\r
- /**\r
- * @param label\r
- */\r
- public BagFormatter setValueSource(UnicodeLabel label) {\r
- if (label == null) label = UnicodeLabel.NULL;\r
- valueSource = label;\r
- return this;\r
- }\r
-\r
- public BagFormatter setValueSource(String label) {\r
- return setValueSource(new UnicodeLabel.Constant(label));\r
- }\r
-\r
- /**\r
- * @return true if showCount is true\r
- */\r
- public boolean isShowCount() {\r
- return showCount;\r
- }\r
-\r
- /**\r
- * @param b true to show the count\r
- * @return this (for chaining)\r
- */\r
- public BagFormatter setShowCount(boolean b) {\r
- showCount = b;\r
- return this;\r
- }\r
-\r
- /**\r
- * @return the property name\r
- */\r
- public String getPropName() {\r
- return propName;\r
- }\r
-\r
- /**\r
- * @param string\r
- * @return this (for chaining)\r
- */\r
- public BagFormatter setPropName(String string) {\r
- if (string == null) string = "";\r
- propName = string;\r
- return this;\r
- }\r
-\r
- /**\r
- * @return true if this is a hexValue\r
- */\r
- public boolean isHexValue() {\r
- return hexValue;\r
- }\r
-\r
- /**\r
- * @param b\r
- * @return this (for chaining)\r
- */\r
- public BagFormatter setHexValue(boolean b) {\r
- hexValue = b;\r
- return this;\r
- }\r
-\r
- /**\r
- * @return the full total\r
- */\r
- public int getFullTotal() {\r
- return fullTotal;\r
- }\r
-\r
- /**\r
- * @param i set the full total\r
- * @return this (for chaining)\r
- */\r
- public BagFormatter setFullTotal(int i) {\r
- fullTotal = i;\r
- return this;\r
- }\r
-\r
- /**\r
- * @return the line separator\r
- */\r
- public String getLineSeparator() {\r
- return lineSeparator;\r
- }\r
-\r
- /**\r
- * @param string\r
- * @return this (for chaining)\r
- */\r
- public BagFormatter setLineSeparator(String string) {\r
- lineSeparator = string;\r
- return this;\r
- }\r
-\r
- /**\r
- * @return the UnicodeLabel representing the range break source\r
- */\r
- public UnicodeLabel getRangeBreakSource() {\r
- if (rangeBreakSource == null) {\r
- Map labelMap = new HashMap();\r
- // reflects the code point types on p 25\r
- labelMap.put("Lo", "G&");\r
- labelMap.put("Lm", "G&");\r
- labelMap.put("Lu", "G&");\r
- labelMap.put("Lt", "G&");\r
- labelMap.put("Ll", "G&");\r
- labelMap.put("Mn", "G&");\r
- labelMap.put("Me", "G&");\r
- labelMap.put("Mc", "G&");\r
- labelMap.put("Nd", "G&");\r
- labelMap.put("Nl", "G&");\r
- labelMap.put("No", "G&");\r
- labelMap.put("Zs", "G&");\r
- labelMap.put("Pd", "G&");\r
- labelMap.put("Ps", "G&");\r
- labelMap.put("Pe", "G&");\r
- labelMap.put("Pc", "G&");\r
- labelMap.put("Po", "G&");\r
- labelMap.put("Pi", "G&");\r
- labelMap.put("Pf", "G&");\r
- labelMap.put("Sm", "G&");\r
- labelMap.put("Sc", "G&");\r
- labelMap.put("Sk", "G&");\r
- labelMap.put("So", "G&");\r
-\r
- labelMap.put("Zl", "Cf");\r
- labelMap.put("Zp", "Cf");\r
-\r
- rangeBreakSource =\r
- new UnicodeProperty\r
- .FilteredProperty(\r
- getUnicodePropertyFactory().getProperty(\r
- "General_Category"),\r
- new UnicodeProperty.MapFilter(labelMap))\r
- .setAllowValueAliasCollisions(true);\r
-\r
- /*\r
- "Cn", // = Other, Not Assigned 0\r
- "Cc", // = Other, Control 15\r
- "Cf", // = Other, Format 16\r
- UnicodeProperty.UNUSED, // missing\r
- "Co", // = Other, Private Use 18\r
- "Cs", // = Other, Surrogate 19\r
- */\r
- }\r
- return rangeBreakSource;\r
- }\r
-\r
- /**\r
- * @param label\r
- */\r
- public BagFormatter setRangeBreakSource(UnicodeLabel label) {\r
- if (label == null) label = UnicodeLabel.NULL;\r
- rangeBreakSource = label;\r
- return this;\r
- }\r
-\r
- /**\r
- * @return Returns the fixName.\r
- */\r
- public Transliterator getFixName() {\r
- return fixName;\r
- }\r
- /**\r
- * @param fixName The fixName to set.\r
- */\r
- public void setFixName(Transliterator fixName) {\r
- this.fixName = fixName;\r
- }\r
-\r
- public Tabber getTabber() {\r
- return tabber;\r
- }\r
-\r
- public void setTabber(Tabber tabber) {\r
- this.tabber = tabber;\r
- }\r
-\r
- public boolean isShowTotal() {\r
- return showTotal;\r
- }\r
-\r
- public void setShowTotal(boolean showTotal) {\r
- this.showTotal = showTotal;\r
- }\r
-}\r
-//#endif\r
+//##header J2SE15
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//#else
+/*
+ *******************************************************************************
+ * Copyright (C) 2002-2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.util;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.text.MessageFormat;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Map;
+
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+public class BagFormatter {
+ static final boolean DEBUG = false;
+ public static final boolean SHOW_FILES;
+ static {
+ boolean showFiles = false;
+ try {
+ showFiles = System.getProperty("SHOW_FILES") != null;
+ }
+ catch (SecurityException e) {
+ }
+ SHOW_FILES = showFiles;
+ }
+
+ public static final PrintWriter CONSOLE = new PrintWriter(System.out,true);
+
+ private static PrintWriter log = CONSOLE;
+
+ private boolean abbreviated = false;
+ private String separator = ",";
+ private String prefix = "[";
+ private String suffix = "]";
+ private UnicodeProperty.Factory source;
+ private UnicodeLabel nameSource;
+ private UnicodeLabel labelSource;
+ private UnicodeLabel rangeBreakSource;
+ private UnicodeLabel valueSource;
+ private String propName = "";
+ private boolean showCount = true;
+ //private boolean suppressReserved = true;
+ private boolean hexValue = false;
+ private static final String NULL_VALUE = "_NULL_VALUE_";
+ private int fullTotal = -1;
+ private boolean showTotal = true;
+ private String lineSeparator = "\r\n";
+ private Tabber tabber = new Tabber.MonoTabber();
+
+ /**
+ * Compare two UnicodeSets, and show the differences
+ * @param name1 name of first set to be compared
+ * @param set1 first set
+ * @param name2 name of second set to be compared
+ * @param set2 second set
+ * @return formatted string
+ */
+ public String showSetDifferences(
+ String name1,
+ UnicodeSet set1,
+ String name2,
+ UnicodeSet set2) {
+
+ StringWriter result = new StringWriter();
+ showSetDifferences(new PrintWriter(result),name1,set1,name2,set2);
+ result.flush();
+ return result.getBuffer().toString();
+ }
+
+ public String showSetDifferences(
+ String name1,
+ Collection set1,
+ String name2,
+ Collection set2) {
+
+ StringWriter result = new StringWriter();
+ showSetDifferences(new PrintWriter(result), name1, set1, name2, set2);
+ result.flush();
+ return result.getBuffer().toString();
+ }
+
+ public void showSetDifferences(
+ PrintWriter pw,
+ String name1,
+ UnicodeSet set1,
+ String name2,
+ UnicodeSet set2) {
+ showSetDifferences(pw, name1, set1, name2, set2, -1);
+ }
+ /**
+ * Compare two UnicodeSets, and show the differences
+ * @param name1 name of first set to be compared
+ * @param set1 first set
+ * @param name2 name of second set to be compared
+ * @param set2 second set
+ */
+ public void showSetDifferences(
+ PrintWriter pw,
+ String name1,
+ UnicodeSet set1,
+ String name2,
+ UnicodeSet set2,
+ int flags)
+ {
+ if (pw == null) pw = CONSOLE;
+ String[] names = { name1, name2 };
+
+ UnicodeSet temp;
+
+ if ((flags&1) != 0) {
+ temp = new UnicodeSet(set1).removeAll(set2);
+ pw.print(lineSeparator);
+ pw.print(inOut.format(names));
+ pw.print(lineSeparator);
+ showSetNames(pw, temp);
+ }
+
+ if ((flags&2) != 0) {
+ temp = new UnicodeSet(set2).removeAll(set1);
+ pw.print(lineSeparator);
+ pw.print(outIn.format(names));
+ pw.print(lineSeparator);
+ showSetNames(pw, temp);
+ }
+
+ if ((flags&4) != 0) {
+ temp = new UnicodeSet(set2).retainAll(set1);
+ pw.print(lineSeparator);
+ pw.print(inIn.format(names));
+ pw.print(lineSeparator);
+ showSetNames(pw, temp);
+ }
+ pw.flush();
+ }
+
+ public void showSetDifferences(
+ PrintWriter pw,
+ String name1,
+ Collection set1,
+ String name2,
+ Collection set2) {
+
+ if (pw == null) pw = CONSOLE;
+ String[] names = { name1, name2 };
+ // damn'd collection doesn't have a clone, so
+ // we go with Set, even though that
+ // may not preserve order and duplicates
+ Collection temp = new HashSet(set1);
+ temp.removeAll(set2);
+ pw.println();
+ pw.println(inOut.format(names));
+ showSetNames(pw, temp);
+
+ temp.clear();
+ temp.addAll(set2);
+ temp.removeAll(set1);
+ pw.println();
+ pw.println(outIn.format(names));
+ showSetNames(pw, temp);
+
+ temp.clear();
+ temp.addAll(set1);
+ temp.retainAll(set2);
+ pw.println();
+ pw.println(inIn.format(names));
+ showSetNames(pw, temp);
+ }
+
+ /**
+ * Returns a list of items in the collection, with each separated by the separator.
+ * Each item must not be null; its toString() is called for a printable representation
+ * @param c source collection
+ * @return a String representation of the list
+ * @internal
+ */
+ public String showSetNames(Object c) {
+ StringWriter buffer = new StringWriter();
+ PrintWriter output = new PrintWriter(buffer);
+ showSetNames(output,c);
+ return buffer.toString();
+ }
+
+ /**
+ * Returns a list of items in the collection, with each separated by the separator.
+ * Each item must not be null; its toString() is called for a printable representation
+ * @param output destination to which to write names
+ * @param c source collection
+ * @internal
+ */
+ public void showSetNames(PrintWriter output, Object c) {
+ mainVisitor.doAt(c, output);
+ output.flush();
+ }
+
+ /**
+ * Returns a list of items in the collection, with each separated by the separator.
+ * Each item must not be null; its toString() is called for a printable representation
+ * @param filename destination to which to write names
+ * @param c source collection
+ * @internal
+ */
+ public void showSetNames(String filename, Object c) throws IOException {
+ PrintWriter pw = new PrintWriter(
+ new OutputStreamWriter(
+ new FileOutputStream(filename),"utf-8"));
+ showSetNames(log,c);
+ pw.close();
+ }
+
+ public String getAbbreviatedName(
+ String src,
+ String pattern,
+ String substitute) {
+
+ int matchEnd = NameIterator.findMatchingEnd(src, pattern);
+ int sdiv = src.length() - matchEnd;
+ int pdiv = pattern.length() - matchEnd;
+ StringBuffer result = new StringBuffer();
+ addMatching(
+ src.substring(0, sdiv),
+ pattern.substring(0, pdiv),
+ substitute,
+ result);
+ addMatching(
+ src.substring(sdiv),
+ pattern.substring(pdiv),
+ substitute,
+ result);
+ return result.toString();
+ }
+
+ abstract public static class Relation {
+ abstract public String getRelation(String a, String b);
+ }
+
+ static class NullRelation extends Relation {
+ public String getRelation(String a, String b) { return ""; }
+ }
+
+ private Relation r = new NullRelation();
+
+ public BagFormatter setRelation(Relation r) {
+ this.r = r;
+ return this; // for chaining
+ }
+
+ public Relation getRelation() {
+ return r;
+ }
+
+ /*
+ r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)
+ */
+ /*
+ static final UnicodeSet NO_NAME =
+ new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");
+ static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();
+ static final UnicodeSet NAME_CHARACTERS =
+ new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");
+
+ public UnicodeSet getSetForName(String namePattern) {
+ UnicodeSet result = new UnicodeSet();
+ Matcher m = Pattern.compile(namePattern).matcher("");
+ // check for no-name items, and add in bulk
+ m.reset("<no name>");
+ if (m.matches()) {
+ result.addAll(NO_NAME);
+ }
+ // check all others
+ UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);
+ while (usi.next()) {
+ String name = getName(usi.codepoint);
+ if (name == null)
+ continue;
+ m.reset(name);
+ if (m.matches()) {
+ result.add(usi.codepoint);
+ }
+ }
+ // Note: if Regex had some API so that if we could tell that
+ // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"
+ // then we could optimize by skipping whole swathes of characters
+ return result;
+ }
+ */
+
+ public BagFormatter setMergeRanges(boolean in) {
+ mergeRanges = in;
+ return this;
+ }
+ public BagFormatter setShowSetAlso(boolean b) {
+ showSetAlso = b;
+ return this;
+ }
+
+ public String getName(int codePoint) {
+ return getName("", codePoint, codePoint);
+ }
+
+ public String getName(String sep, int start, int end) {
+ if (getNameSource() == null || getNameSource() == UnicodeLabel.NULL) return "";
+ String result = getName(start, false);
+ if (start == end) return sep + result;
+ String endString = getName(end, false);
+ if (result.length() == 0 && endString.length() == 0) return sep;
+ if (abbreviated) endString = getAbbreviatedName(endString,result,"~");
+ return sep + result + ".." + endString;
+ }
+
+ public String getName(String s) {
+ return getName(s, false);
+ }
+
+ public static class NameLabel extends UnicodeLabel {
+ UnicodeProperty nameProp;
+ UnicodeSet control;
+ UnicodeSet private_use;
+ UnicodeSet noncharacter;
+ UnicodeSet surrogate;
+
+ public NameLabel(UnicodeProperty.Factory source) {
+ nameProp = source.getProperty("Name");
+ control = source.getSet("gc=Cc");
+ private_use = source.getSet("gc=Co");
+ surrogate = source.getSet("gc=Cs");
+ noncharacter = source.getSet("noncharactercodepoint=yes");
+ }
+
+ public String getValue(int codePoint, boolean isShort) {
+ String hcp = !isShort
+ ? "U+" + Utility.hex(codePoint, 4) + " "
+ : "";
+ String result = nameProp.getValue(codePoint);
+ if (result != null)
+ return hcp + result;
+ if (control.contains(codePoint)) {
+ return "<control-" + Utility.hex(codePoint, 4) + ">";
+ }
+ if (private_use.contains(codePoint)) {
+ return "<private-use-" + Utility.hex(codePoint, 4) + ">";
+ }
+ if (surrogate.contains(codePoint)) {
+ return "<surrogate-" + Utility.hex(codePoint, 4) + ">";
+ }
+ if (noncharacter.contains(codePoint)) {
+ return "<noncharacter-" + Utility.hex(codePoint, 4) + ">";
+ }
+ //if (suppressReserved) return "";
+ return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">";
+ }
+
+ }
+
+ // refactored
+ public String getName(int codePoint, boolean withCodePoint) {
+ String result = getNameSource().getValue(codePoint, !withCodePoint);
+ return fixName == null ? result : fixName.transliterate(result);
+ }
+
+ public String getName(String s, boolean withCodePoint) {
+ String result = getNameSource().getValue(s, separator, !withCodePoint);
+ return fixName == null ? result : fixName.transliterate(result);
+ }
+
+ public String hex(String s) {
+ return hex(s,separator);
+ }
+
+ public String hex(String s, String sep) {
+ return UnicodeLabel.HEX.getValue(s, sep, true);
+ }
+
+ public String hex(int start, int end) {
+ String s = Utility.hex(start,4);
+ if (start == end) return s;
+ return s + ".." + Utility.hex(end,4);
+ }
+
+ public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) {
+ this.source = source;
+ return this;
+ }
+
+ public UnicodeProperty.Factory getUnicodePropertyFactory() {
+ if (source == null) source = ICUPropertyFactory.make();
+ return source;
+ }
+
+ public BagFormatter () {
+ }
+
+ public BagFormatter (UnicodeProperty.Factory source) {
+ setUnicodePropertyFactory(source);
+ }
+
+ public String join(Object o) {
+ return labelVisitor.join(o);
+ }
+
+ // ===== PRIVATES =====
+
+ private Join labelVisitor = new Join();
+
+ private boolean mergeRanges = true;
+ private Transliterator showLiteral = null;
+ private Transliterator fixName = null;
+ private boolean showSetAlso = false;
+
+ private RangeFinder rf = new RangeFinder();
+
+ private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:");
+ private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:");
+ private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:");
+
+ private MyVisitor mainVisitor = new MyVisitor();
+
+ /*
+ private String getLabels(int start, int end) {
+ Set names = new TreeSet();
+ for (int cp = start; cp <= end; ++cp) {
+ names.add(getLabel(cp));
+ }
+ return labelVisitor.join(names);
+ }
+ */
+
+ private void addMatching(
+ String src,
+ String pattern,
+ String substitute,
+ StringBuffer result) {
+ NameIterator n1 = new NameIterator(src);
+ NameIterator n2 = new NameIterator(pattern);
+ boolean first = true;
+ while (true) {
+ String s1 = n1.next();
+ if (s1 == null)
+ break;
+ String s2 = n2.next();
+ if (!first)
+ result.append(" ");
+ first = false;
+ if (s1.equals(s2))
+ result.append(substitute);
+ else
+ result.append(s1);
+ }
+ }
+
+ private static NumberFormat nf =
+ NumberFormat.getIntegerInstance(Locale.ENGLISH);
+ static {
+ nf.setGroupingUsed(false);
+ }
+
+ private class MyVisitor extends Visitor {
+ private PrintWriter output;
+ String commentSeparator;
+ int counter;
+ int valueSize;
+ int labelSize;
+ boolean isHtml;
+ boolean inTable = false;
+
+ public void toOutput(String s) {
+ if (isHtml) {
+ if (inTable) {
+ output.print("</table>");
+ inTable = false;
+ }
+ output.print("<p>");
+ }
+ output.print(s);
+ if (isHtml)
+ output.println("</p>");
+ else
+ output.print(lineSeparator);
+ }
+
+ public void toTable(String s) {
+ if (isHtml && !inTable) {
+ output.print("<table>");
+ inTable = true;
+ }
+ output.print(tabber.process(s) + lineSeparator);
+ }
+
+ public void doAt(Object c, PrintWriter out) {
+ output = out;
+ isHtml = tabber instanceof Tabber.HTMLTabber;
+ counter = 0;
+
+ tabber.clear();
+ // old:
+ // 0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D>
+ // new
+ // 0009..000D ; White_Space #Cc [5] <control>..<control>
+ tabber.add(mergeRanges ? 14 : 6,Tabber.LEFT);
+
+ if (propName.length() > 0) {
+ tabber.add(propName.length() + 2,Tabber.LEFT);
+ }
+
+ valueSize = getValueSource().getMaxWidth(shortValue);
+ if (DEBUG) System.out.println("ValueSize: " + valueSize);
+ if (valueSize > 0) {
+ tabber.add(valueSize + 2,Tabber.LEFT); // value
+ }
+
+ tabber.add(3,Tabber.LEFT); // comment character
+
+ labelSize = getLabelSource(true).getMaxWidth(shortLabel);
+ if (labelSize > 0) {
+ tabber.add(labelSize + 1,Tabber.LEFT); // value
+ }
+
+ if (mergeRanges && showCount) {
+ tabber.add(5,Tabber.RIGHT);
+ }
+
+ if (showLiteral != null) {
+ tabber.add(4,Tabber.LEFT);
+ }
+ //myTabber.add(7,Tabber.LEFT);
+
+ commentSeparator = (showCount || showLiteral != null
+ || getLabelSource(true) != UnicodeLabel.NULL
+ || getNameSource() != UnicodeLabel.NULL)
+ ? "\t #" : "";
+
+ if (DEBUG) System.out.println("Tabber: " + tabber.toString());
+ if (DEBUG) System.out.println("Tabber: " + tabber.process(
+ "200C..200D\t; White_Space\t #\tCf\t [2]\t ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER"));
+ doAt(c);
+ }
+
+ public String format(Object o) {
+ StringWriter sw = new StringWriter();
+ PrintWriter pw = new PrintWriter(sw);
+ doAt(o);
+ pw.flush();
+ String result = sw.getBuffer().toString();
+ pw.close();
+ return result;
+ }
+
+ protected void doBefore(Object container, Object o) {
+ if (showSetAlso && container instanceof UnicodeSet) {
+ toOutput("#" + container);
+ }
+ }
+
+ protected void doBetween(Object container, Object lastItem, Object nextItem) {
+ }
+
+ protected void doAfter(Object container, Object o) {
+ if (fullTotal != -1 && fullTotal != counter) {
+ if (showTotal) {
+ toOutput("");
+ toOutput("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here.");
+ toOutput("# Total code points: " + nf.format(fullTotal));
+ }
+ fullTotal = -1;
+ } else if (showTotal) {
+ toOutput("");
+ toOutput("# Total code points: " + nf.format(counter));
+ }
+ }
+
+ protected void doSimpleAt(Object o) {
+ if (o instanceof Map.Entry) {
+ Map.Entry oo = (Map.Entry)o;
+ Object key = oo.getKey();
+ Object value = oo.getValue();
+ doBefore(o, key);
+ doAt(key);
+ output.println("\u2192");
+ doAt(value);
+ doAfter(o, value);
+ counter++;
+ } else if (o instanceof Visitor.CodePointRange) {
+ doAt((Visitor.CodePointRange) o);
+ } else {
+ String thing = o.toString();
+ String value = getValueSource() == UnicodeLabel.NULL ? "" : getValueSource().getValue(thing, ",", true);
+ if (getValueSource() != UnicodeLabel.NULL) value = "\t; " + value;
+ String label = getLabelSource(true) == UnicodeLabel.NULL ? "" : getLabelSource(true).getValue(thing, ",", true);
+ if (label.length() != 0) label = " " + label;
+ toTable(
+ hex(thing)
+ + value
+ + commentSeparator
+ + label
+ + insertLiteral(thing)
+ + "\t"
+ + getName(thing));
+ counter++;
+ }
+ }
+
+ protected void doAt(Visitor.CodePointRange usi) {
+ if (!mergeRanges) {
+ for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
+ showLine(cp, cp);
+ }
+ } else {
+ rf.reset(usi.codepoint, usi.codepointEnd + 1);
+ while (rf.next()) {
+ showLine(rf.start, rf.limit - 1);
+ }
+ }
+ }
+
+ private void showLine(int start, int end) {
+ String label = getLabelSource(true).getValue(start, shortLabel);
+ String value = getValue(start, shortValue);
+ if (value == NULL_VALUE) return;
+
+ counter += end - start + 1;
+ String pn = propName;
+ if (pn.length() != 0) {
+ pn = "\t; " + pn;
+ }
+ if (valueSize > 0) {
+ value = "\t; " + value;
+ } else if (value.length() > 0) {
+ throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue));
+ }
+ if (labelSize > 0) {
+ label = "\t" + label;
+ } else if (label.length() > 0) {
+ throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource(true).getMaxWidth(shortLabel));
+ }
+
+ String count = "";
+ if (mergeRanges && showCount) {
+ if (end == start) count = "\t";
+ else count = "\t ["+ nf.format(end - start + 1)+ "]";
+ }
+
+ toTable(
+ hex(start, end)
+ + pn
+ + value
+ + commentSeparator
+ + label
+ + count
+ + insertLiteral(start, end)
+ + getName("\t ", start, end));
+ }
+
+ private String insertLiteral(String thing) {
+ return (showLiteral == null ? ""
+ : " \t(" + showLiteral.transliterate(thing) + ") ");
+ }
+
+ private String insertLiteral(int start, int end) {
+ return (showLiteral == null ? "" :
+ " \t(" + showLiteral.transliterate(UTF16.valueOf(start))
+ + ((start != end)
+ ? (".." + showLiteral.transliterate(UTF16.valueOf(end)))
+ : "")
+ + ") ");
+ }
+ /*
+ private String insertLiteral(int cp) {
+ return (showLiteral == null ? ""
+ : " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") ");
+ }
+ */
+ }
+
+ /**
+ * Iterate through a string, breaking at words.
+ * @author Davis
+ */
+ private static class NameIterator {
+ String source;
+ int position;
+ int start;
+ int limit;
+
+ NameIterator(String source) {
+ this.source = source;
+ this.start = 0;
+ this.limit = source.length();
+ }
+ /**
+ * Find next word, including trailing spaces
+ * @return the next word
+ */
+ String next() {
+ if (position >= limit)
+ return null;
+ int pos = source.indexOf(' ', position);
+ if (pos < 0 || pos >= limit)
+ pos = limit;
+ String result = source.substring(position, pos);
+ position = pos + 1;
+ return result;
+ }
+
+ static int findMatchingEnd(String s1, String s2) {
+ int i = s1.length();
+ int j = s2.length();
+ try {
+ while (true) {
+ --i; // decrement both before calling function!
+ --j;
+ if (s1.charAt(i) != s2.charAt(j))
+ break;
+ }
+ } catch (Exception e) {} // run off start
+
+ ++i; // counteract increment
+ i = s1.indexOf(' ', i); // move forward to space
+ if (i < 0)
+ return 0;
+ return s1.length() - i;
+ }
+ }
+
+ private class RangeFinder {
+ int start, limit;
+ private int veryLimit;
+ //String label, value;
+ void reset(int rangeStart, int rangeLimit) {
+ limit = rangeStart;
+ veryLimit = rangeLimit;
+ }
+ boolean next() {
+ if (limit >= veryLimit)
+ return false;
+ start = limit; // set to end of last
+ String label = getLabelSource(false).getValue(limit, true);
+ String value = getValue(limit, true);
+ String breaker = getRangeBreakSource().getValue(limit,true);
+ if (DEBUG && limit < 0x7F) System.out.println("Label: " + label + ", Value: " + value + ", Break: " + breaker);
+ limit++;
+ for (; limit < veryLimit; limit++) {
+ String s = getLabelSource(false).getValue(limit, true);
+ String v = getValue(limit, true);
+ String b = getRangeBreakSource().getValue(limit, true);
+ if (DEBUG && limit < 0x7F) System.out.println("*Label: " + label + ", Value: " + value + ", Break: " + breaker);
+ if (!equalTo(s, label) || !equalTo(v, value) || !equalTo(b, breaker)) break;
+ }
+ // at this point, limit is the first item that has a different label than source
+ // OR, we got to the end, and limit == veryLimit
+ return true;
+ }
+ }
+
+ boolean equalTo(Object a, Object b) {
+ if (a == b) return true;
+ if (a == null) return false;
+ return a.equals(b);
+ }
+
+ boolean shortLabel = true;
+ boolean shortValue = true;
+
+ public String getPrefix() {
+ return prefix;
+ }
+
+ public String getSuffix() {
+ return suffix;
+ }
+
+ public BagFormatter setPrefix(String string) {
+ prefix = string;
+ return this;
+ }
+
+ public BagFormatter setSuffix(String string) {
+ suffix = string;
+ return this;
+ }
+
+ public boolean isAbbreviated() {
+ return abbreviated;
+ }
+
+ public BagFormatter setAbbreviated(boolean b) {
+ abbreviated = b;
+ return this;
+ }
+
+ public UnicodeLabel getLabelSource(boolean visible) {
+ if (labelSource == null) {
+ Map labelMap = new HashMap();
+ //labelMap.put("Lo","L&");
+ labelMap.put("Lu","L&");
+ labelMap.put("Lt","L&");
+ labelMap.put("Ll","L&");
+ labelSource = new UnicodeProperty.FilteredProperty(
+ getUnicodePropertyFactory().getProperty("General_Category"),
+ new UnicodeProperty.MapFilter(labelMap)
+ ).setAllowValueAliasCollisions(true);
+ }
+ return labelSource;
+ }
+
+ /**
+ * @deprecated
+ */
+ public static void addAll(UnicodeSet source, Collection target) {
+ source.addAllTo(target);
+ }
+
+ // UTILITIES
+
+ public static final Transliterator hex = Transliterator.getInstance(
+ "[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex");
+
+ public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException {
+ return openReader(dir,filename,"UTF-8");
+ }
+
+ public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException {
+ File file = new File(dir, filename);
+ if (SHOW_FILES && log != null) {
+ log.println("Opening File: "
+ + file.getCanonicalPath());
+ }
+ return new BufferedReader(
+ new InputStreamReader(
+ new FileInputStream(file),
+ encoding),
+ 4*1024);
+ }
+
+ public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException {
+ return openWriter(dir,filename,"UTF-8");
+ }
+
+ public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException {
+ File file = new File(dir, filename);
+ if (SHOW_FILES && log != null) {
+ log.println("Creating File: "
+ + file.getCanonicalPath());
+ }
+ String parentName = file.getParent();
+ if (parentName != null) {
+ File parent = new File(parentName);
+ parent.mkdirs();
+ }
+ return new PrintWriter(
+ new BufferedWriter(
+ new OutputStreamWriter(
+ new FileOutputStream(file),
+ encoding),
+ 4*1024));
+ }
+ public static PrintWriter getLog() {
+ return log;
+ }
+ public BagFormatter setLog(PrintWriter writer) {
+ log = writer;
+ return this;
+ }
+ public String getSeparator() {
+ return separator;
+ }
+ public BagFormatter setSeparator(String string) {
+ separator = string;
+ return this;
+ }
+ public Transliterator getShowLiteral() {
+ return showLiteral;
+ }
+ public BagFormatter setShowLiteral(Transliterator transliterator) {
+ showLiteral = transliterator;
+ return this;
+ }
+
+ // ===== CONVENIENCES =====
+ private class Join extends Visitor {
+ StringBuffer output = new StringBuffer();
+ int depth = 0;
+ String join (Object o) {
+ output.setLength(0);
+ doAt(o);
+ return output.toString();
+ }
+ protected void doBefore(Object container, Object item) {
+ ++depth;
+ output.append(prefix);
+ }
+ protected void doAfter(Object container, Object item) {
+ output.append(suffix);
+ --depth;
+ }
+ protected void doBetween(Object container, Object lastItem, Object nextItem) {
+ output.append(separator);
+ }
+ protected void doSimpleAt(Object o) {
+ if (o != null) output.append(o.toString());
+ }
+ }
+
+ /**
+ * @param label
+ */
+ public BagFormatter setLabelSource(UnicodeLabel label) {
+ if (label == null) label = UnicodeLabel.NULL;
+ labelSource = label;
+ return this;
+ }
+
+ /**
+ * @return the NameLable representing the source
+ */
+ public UnicodeLabel getNameSource() {
+ if (nameSource == null) {
+ nameSource = new NameLabel(getUnicodePropertyFactory());
+ }
+ return nameSource;
+ }
+
+ /**
+ * @param label
+ */
+ public BagFormatter setNameSource(UnicodeLabel label) {
+ if (label == null) label = UnicodeLabel.NULL;
+ nameSource = label;
+ return this;
+ }
+
+ /**
+ * @return the UnicodeLabel representing the value
+ */
+ public UnicodeLabel getValueSource() {
+ if (valueSource == null) valueSource = UnicodeLabel.NULL;
+ return valueSource;
+ }
+
+ private String getValue(int cp, boolean shortVal) {
+ String result = getValueSource().getValue(cp, shortVal);
+ if (result == null) return NULL_VALUE;
+ if (hexValue) result = hex(result, " ");
+ return result;
+ }
+
+ /**
+ * @param label
+ */
+ public BagFormatter setValueSource(UnicodeLabel label) {
+ if (label == null) label = UnicodeLabel.NULL;
+ valueSource = label;
+ return this;
+ }
+
+ public BagFormatter setValueSource(String label) {
+ return setValueSource(new UnicodeLabel.Constant(label));
+ }
+
+ /**
+ * @return true if showCount is true
+ */
+ public boolean isShowCount() {
+ return showCount;
+ }
+
+ /**
+ * @param b true to show the count
+ * @return this (for chaining)
+ */
+ public BagFormatter setShowCount(boolean b) {
+ showCount = b;
+ return this;
+ }
+
+ /**
+ * @return the property name
+ */
+ public String getPropName() {
+ return propName;
+ }
+
+ /**
+ * @param string
+ * @return this (for chaining)
+ */
+ public BagFormatter setPropName(String string) {
+ if (string == null) string = "";
+ propName = string;
+ return this;
+ }
+
+ /**
+ * @return true if this is a hexValue
+ */
+ public boolean isHexValue() {
+ return hexValue;
+ }
+
+ /**
+ * @param b
+ * @return this (for chaining)
+ */
+ public BagFormatter setHexValue(boolean b) {
+ hexValue = b;
+ return this;
+ }
+
+ /**
+ * @return the full total
+ */
+ public int getFullTotal() {
+ return fullTotal;
+ }
+
+ /**
+ * @param i set the full total
+ * @return this (for chaining)
+ */
+ public BagFormatter setFullTotal(int i) {
+ fullTotal = i;
+ return this;
+ }
+
+ /**
+ * @return the line separator
+ */
+ public String getLineSeparator() {
+ return lineSeparator;
+ }
+
+ /**
+ * @param string
+ * @return this (for chaining)
+ */
+ public BagFormatter setLineSeparator(String string) {
+ lineSeparator = string;
+ return this;
+ }
+
+ /**
+ * @return the UnicodeLabel representing the range break source
+ */
+ public UnicodeLabel getRangeBreakSource() {
+ if (rangeBreakSource == null) {
+ Map labelMap = new HashMap();
+ // reflects the code point types on p 25
+ labelMap.put("Lo", "G&");
+ labelMap.put("Lm", "G&");
+ labelMap.put("Lu", "G&");
+ labelMap.put("Lt", "G&");
+ labelMap.put("Ll", "G&");
+ labelMap.put("Mn", "G&");
+ labelMap.put("Me", "G&");
+ labelMap.put("Mc", "G&");
+ labelMap.put("Nd", "G&");
+ labelMap.put("Nl", "G&");
+ labelMap.put("No", "G&");
+ labelMap.put("Zs", "G&");
+ labelMap.put("Pd", "G&");
+ labelMap.put("Ps", "G&");
+ labelMap.put("Pe", "G&");
+ labelMap.put("Pc", "G&");
+ labelMap.put("Po", "G&");
+ labelMap.put("Pi", "G&");
+ labelMap.put("Pf", "G&");
+ labelMap.put("Sm", "G&");
+ labelMap.put("Sc", "G&");
+ labelMap.put("Sk", "G&");
+ labelMap.put("So", "G&");
+
+ labelMap.put("Zl", "Cf");
+ labelMap.put("Zp", "Cf");
+
+ rangeBreakSource =
+ new UnicodeProperty
+ .FilteredProperty(
+ getUnicodePropertyFactory().getProperty(
+ "General_Category"),
+ new UnicodeProperty.MapFilter(labelMap))
+ .setAllowValueAliasCollisions(true);
+
+ /*
+ "Cn", // = Other, Not Assigned 0
+ "Cc", // = Other, Control 15
+ "Cf", // = Other, Format 16
+ UnicodeProperty.UNUSED, // missing
+ "Co", // = Other, Private Use 18
+ "Cs", // = Other, Surrogate 19
+ */
+ }
+ return rangeBreakSource;
+ }
+
+ /**
+ * @param label
+ */
+ public BagFormatter setRangeBreakSource(UnicodeLabel label) {
+ if (label == null) label = UnicodeLabel.NULL;
+ rangeBreakSource = label;
+ return this;
+ }
+
+ /**
+ * @return Returns the fixName.
+ */
+ public Transliterator getFixName() {
+ return fixName;
+ }
+ /**
+ * @param fixName The fixName to set.
+ */
+ public void setFixName(Transliterator fixName) {
+ this.fixName = fixName;
+ }
+
+ public Tabber getTabber() {
+ return tabber;
+ }
+
+ public void setTabber(Tabber tabber) {
+ this.tabber = tabber;
+ }
+
+ public boolean isShowTotal() {
+ return showTotal;
+ }
+
+ public void setShowTotal(boolean showTotal) {
+ this.showTotal = showTotal;
+ }
+}
+//#endif