2 **********************************************************************
3 * Copyright (c) 2009-2012, Google, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 **********************************************************************
9 package com.ibm.icu.dev.tool.cldr;
11 import java.awt.GraphicsEnvironment;
12 import java.awt.Shape;
13 import java.awt.font.FontRenderContext;
14 import java.awt.font.GlyphVector;
15 import java.awt.geom.AffineTransform;
16 import java.awt.geom.PathIterator;
17 import java.awt.geom.Rectangle2D;
18 import java.io.BufferedReader;
20 import java.io.IOException;
21 import java.io.PrintWriter;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.Comparator;
25 import java.util.HashMap;
26 import java.util.HashSet;
28 import java.util.Map.Entry;
30 import java.util.TreeMap;
31 import java.util.TreeSet;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
35 import com.ibm.icu.dev.util.BagFormatter;
36 import com.ibm.icu.dev.util.Tabber.HTMLTabber;
37 import com.ibm.icu.dev.util.TransliteratorUtilities;
38 import com.ibm.icu.dev.util.UnicodeMap;
39 import com.ibm.icu.dev.util.UnicodeMap.Composer;
40 import com.ibm.icu.dev.util.UnicodeMapIterator;
41 import com.ibm.icu.dev.util.XEquivalenceClass.SetMaker;
42 import com.ibm.icu.impl.Row;
43 import com.ibm.icu.impl.Row.R2;
44 import com.ibm.icu.impl.Utility;
45 import com.ibm.icu.lang.UCharacter;
46 import com.ibm.icu.lang.UScript;
47 import com.ibm.icu.text.Collator;
48 import com.ibm.icu.text.Normalizer;
49 import com.ibm.icu.text.UTF16;
50 import com.ibm.icu.text.UnicodeSet;
51 import com.ibm.icu.text.UnicodeSetIterator;
54 public class CheckSystemFonts {
56 static String outputDirectoryName;
57 static Set<String> SKIP_SHAPES = new HashSet<String>();
59 public static void main(String[] args) throws IOException {
60 System.out.println("Arguments:\t" + Arrays.asList(args));
61 if (args.length < 2) {
62 throw new IllegalArgumentException("Need command-line args:" +
63 "\n\t\tfont-name-regex" +
64 "\n\t\toutput-directory"
67 Matcher nameMatcher = Pattern.compile(args[0], Pattern.CASE_INSENSITIVE).matcher("");
68 outputDirectoryName = args[1].trim();
69 File outputDirectory = new File(outputDirectoryName);
70 if (!outputDirectory.isDirectory()) {
71 throw new IllegalArgumentException("2nd arg must be valid directory");
75 Map<UnicodeSet,Set<String>> data = new TreeMap<UnicodeSet, Set<String>>();
76 Map<String, Font> fontMap = new TreeMap<String, Font>();
77 getFontData(nameMatcher, data, fontMap);
82 UnicodeMap<Set<String>> map = showEquivalentCoverage(data);
84 showRawCoverage(data);
86 Map<Set<String>, String> toShortName = showRawCoverage(map);
88 showFullCoverage(map, toShortName);
91 private static void loadSkipShapes() {
93 BufferedReader in = BagFormatter.openUTF8Reader(outputDirectoryName, "skip_fonts.txt");
95 String line = in.readLine();
96 if (line == null) break;
97 String[] fonts = line.trim().split("\\s+");
98 for (String font : fonts) {
99 SKIP_SHAPES.add(font);
103 } catch (IOException e) {
104 System.err.println("Couldn't open:\t" + outputDirectoryName + "/" + "skip_fonts.txt");
109 private static final Collator English = Collator.getInstance();
112 English.setStrength(Collator.SECONDARY);
115 public static final UnicodeSet DONT_CARE = new UnicodeSet("[[:cn:][:co:][:cs:]]").freeze();
116 public static final UnicodeSet COVERAGE = new UnicodeSet(DONT_CARE).complement().freeze();
118 private static final Comparator<String> SHORTER_FIRST = new Comparator<String>() {
119 public int compare(String n1, String n2) {
120 int result = n1.length() - n2.length();
121 if (result != 0) return result;
122 return n1.compareTo(n2);
126 private static final Comparator<UnicodeSet> LONGER_SET_FIRST = new Comparator<UnicodeSet>() {
127 public int compare(UnicodeSet n1, UnicodeSet n2) {
128 int result = n1.size() - n2.size();
129 if (result != 0) return -result;
130 return n1.compareTo(n2);
134 private static final Comparator<Collection> SHORTER_COLLECTION_FIRST = new Comparator<Collection>() {
135 public int compare(Collection n1, Collection n2) {
136 int result = n1.size() - n2.size();
137 if (result != 0) return result;
138 return UnicodeSet.compare(n1, n2);
142 private static final HashSet SKIP_TERMS = new HashSet(Arrays.asList("black", "blackitalic", "bold", "boldit", "bolditalic", "bolditalicmt", "boldmt",
143 "boldob", "boldoblique", "boldslanted", "book", "bookitalic", "condensed", "condensedblack", "condensedbold", "condensedextrabold",
144 "condensedlight", "condensedmedium", "extracondensed", "extralight", "heavy", "italic", "italicmt", "light", "lightit", "lightitalic", "medium",
145 "mediumitalic", "oblique", "regular", "roman", "semibold", "semibolditalic", "shadow", "slanted", "ultrabold", "ultralight", "ultralightitalic"
148 private static Composer<Set<String>> composer = new Composer<Set<String>>() {
149 Map<R2<Set<String>, Set<String>>,Set<String>> cache = new HashMap<R2<Set<String>, Set<String>>,Set<String>>();
150 public Set<String> compose(int codePoint, String string, Set<String> a, Set<String> b) {
155 private Set<String> intern(Set<String> a, Set<String> b) {
156 R2<Set<String>, Set<String>> row = Row.of(a, b);
157 Set<String> result = cache.get(row);
158 if (result == null) {
159 result = new TreeSet<String>(English);
162 cache.put(row, result);
169 private static void showFullCoverage(UnicodeMap<Set<String>> map, Map<Set<String>, String> toShortName) throws IOException {
170 System.out.println("\n***COVERAGE:\t" + map.keySet().size() + "\n");
171 PrintWriter out = BagFormatter.openUTF8Writer(outputDirectoryName, "coverage.txt");
173 for (UnicodeMapIterator<String> it = new UnicodeMapIterator<String>(map); it.nextRange();) {
174 String codes = "U+" + Utility.hex(it.codepoint);
175 String names = UCharacter.getExtendedName(it.codepoint);
176 if (it.codepointEnd != it.codepoint) {
177 codes += "..U+" + Utility.hex(it.codepointEnd);
178 names += ".." + UCharacter.getExtendedName(it.codepointEnd);
180 out.println(codes + "\t" + toShortName.get(map.get(it.codepoint)) + "\t" + names);
183 UnicodeSet missing = new UnicodeSet(COVERAGE).removeAll(map.keySet());
184 out.println("\nMISSING:\t" + missing.size() + "\n");
186 UnicodeMap<String> missingMap = new UnicodeMap<String>();
187 for (UnicodeSetIterator it = new UnicodeSetIterator(missing); it.next();) {
188 missingMap.put(it.codepoint, UScript.getName(UScript.getScript(it.codepoint)) + "-" + getShortAge(it.codepoint));
191 Set<String> sorted = new TreeSet<String>(English);
192 sorted.addAll(missingMap.values());
193 for (String value : sorted) {
194 UnicodeSet items = missingMap.getSet(value);
195 for (UnicodeSetIterator it = new UnicodeSetIterator(items); it.nextRange();) {
196 String codes = "U+" + Utility.hex(it.codepoint);
197 String names = UCharacter.getExtendedName(it.codepoint);
198 if (it.codepointEnd != it.codepoint) {
199 codes += "..U+" + Utility.hex(it.codepointEnd);
200 names += ".." + UCharacter.getExtendedName(it.codepointEnd);
202 out.println(codes + "\t" + value + "\t" + names);
209 private static Map<Set<String>, String> showRawCoverage(UnicodeMap<Set<String>> map) throws IOException {
210 System.out.println("\n***COMBO NAMES\n");
211 PrintWriter out = BagFormatter.openUTF8Writer(outputDirectoryName, "combo_names.txt");
214 Map<Set<String>, String> toShortName = new HashMap<Set<String>, String>();
215 TreeSet<Set<String>> sortedValues = new TreeSet<Set<String>>(SHORTER_COLLECTION_FIRST);
216 sortedValues.addAll(map.values());
217 for (Set<String> value : sortedValues) {
218 String shortName = "combo" + count++;
219 Set<String> contained = getLargestContained(value, toShortName.keySet());
221 if (contained != null) {
222 Set<String> remainder = new TreeSet<String>();
223 remainder.addAll(value);
224 remainder.removeAll(contained);
225 valueName = toShortName.get(contained) + " + " + remainder;
227 valueName = value.toString();
229 toShortName.put(value, shortName);
230 out.println(shortName + "\t" + valueName);
236 private static void showRawCoverage(Map<UnicodeSet, Set<String>> data) throws IOException {
237 System.out.println("\n***RAW COVERAGE (bridging unassigned)\n");
238 PrintWriter out = BagFormatter.openUTF8Writer(outputDirectoryName, "raw_coverage.txt");
240 for (Entry<UnicodeSet, Set<String>> entry : data.entrySet()) {
241 UnicodeSet s = entry.getKey();
242 Set<String> nameSet = entry.getValue();
243 String name = nameSet.iterator().next();
244 UnicodeSet bridged = new UnicodeSet(s).addBridges(DONT_CARE);
245 out.println(name + "\t" + s.size() + "\t" + bridged);
250 private static UnicodeMap<Set<String>> showEquivalentCoverage(Map<UnicodeSet, Set<String>> data) throws IOException {
251 System.out.println("\n***EQUIVALENT COVERAGE\n");
252 PrintWriter out = BagFormatter.openUTF8Writer(outputDirectoryName, "equiv_coverage.txt");
254 UnicodeMap<Set<String>> map = new UnicodeMap<Set<String>>();
256 Map<String,Set<String>> nameToSingleton = new HashMap<String,Set<String>>();
258 for (Entry<UnicodeSet, Set<String>> entry : data.entrySet()) {
259 UnicodeSet s = entry.getKey();
260 Set<String> nameSet = entry.getValue();
261 String name = nameSet.iterator().next();
262 //System.out.println(s);
263 Set<String> temp2 = nameToSingleton.get(name);
265 temp2 = new TreeSet<String>(English);
268 map.composeWith(s, temp2, composer);
269 if (nameSet.size() > 1) {
270 TreeSet<String> temp = new TreeSet<String>(English);
271 temp.addAll(nameSet);
273 out.println(name + "\t" + temp);
280 private static void showSameGlyphs() throws IOException {
281 System.out.println("\n***Visual Equivalences");
282 PrintWriter out = BagFormatter.openUTF8Writer(outputDirectoryName, "same_glyphs.txt");
283 PrintWriter out2 = BagFormatter.openUTF8Writer(outputDirectoryName, "same_glyphs.html");
284 out2.println("<html><head>");
285 out2.println("<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
286 out2.println("<link rel='stylesheet' href='index.css' type='text/css'>");
287 out2.println("</head><body><table>");
288 HTMLTabber tabber = new HTMLTabber();
290 out2.println(tabber.process("Code1\tCode2\tNFC1\tNFC1\tCh1\tCh1\tCh1/F\tCh2/F\tName1\tName2\tFonts"));
291 tabber.setParameters(0, "class='c'");
292 tabber.setParameters(1, "class='c'");
293 tabber.setParameters(2, "class='nf'");
294 tabber.setParameters(3, "class='nf'");
295 tabber.setParameters(4, "class='p'");
296 tabber.setParameters(5, "class='p'");
297 //tabber.setParameters(6, "class='q'");
298 //tabber.setParameters(7, "class='q'");
299 tabber.setParameters(8, "class='n'");
300 tabber.setParameters(9, "class='n'");
301 tabber.setParameters(10, "class='f'");
303 for (R2<Integer,Integer> sample : equivalences.keySet()) {
304 final Set<String> reasonSet = equivalences.get(sample);
305 String reasons = reasonSet.toString();
306 if (reasons.length() > 100) reasons = reasons.substring(0,100) + "...";
307 final Integer codepoint1 = sample.get0();
308 final Integer codepoint2 = sample.get1();
310 out.println("U+" + Utility.hex(codepoint1) + "\t" + "U+" + Utility.hex(codepoint2)
311 + "\t" + showNfc(codepoint1) + "\t" + showNfc(codepoint2)
312 + "\t" + showChar(codepoint1, false) + "\t" + showChar(codepoint2, false)
313 + "\t" + UCharacter.getExtendedName(codepoint1) + "\t" + UCharacter.getExtendedName(codepoint2)
315 String line = "U+" + Utility.hex(codepoint1) + "\t" + "U+" + Utility.hex(codepoint2)
316 + "\t" + showNfc(codepoint1) + "\t" + showNfc(codepoint2)
317 + "\t" + showChar(codepoint1, false) + "\t" + showChar(codepoint2, true)
318 + "\t" + showChar(codepoint1, false) + "\t" + showChar(codepoint2, true)
319 + "\t" + UCharacter.getExtendedName(codepoint1) + "\t" + UCharacter.getExtendedName(codepoint2)
322 String fonts = "class='q' style='font-family:";
324 for (String font : reasonSet) {
330 if (maxCount <= 0) break;
333 tabber.setParameters(6, fonts);
334 tabber.setParameters(7, fonts);
335 out2.println(tabber.process(line));
337 out2.println("</table></body>");
342 private static void showInvisibles() throws IOException {
343 System.out.println("\n***Invisibles Equivalences");
344 PrintWriter out = BagFormatter.openUTF8Writer(outputDirectoryName, "invisibles.txt");
345 for (String sample : invisibles) {
346 String reasons = invisibles.get(sample).toString();
347 if (reasons.length() > 100) reasons = reasons.substring(0,100) + "...";
348 int codepoint = sample.codePointAt(0);
349 out.println("U+" + Utility.hex(sample)
350 + "\t" + showChar(codepoint, false)
351 + "\t" + showNfc(codepoint)
352 + "\t" + UCharacter.getExtendedName(codepoint)
359 private static void getFontData(Matcher nameMatcher, Map<UnicodeSet, Set<String>> data, Map<String, Font> fontMap) {
360 GraphicsEnvironment env = GraphicsEnvironment.getLocalGraphicsEnvironment();
361 Font[] fonts = env.getAllFonts();
362 for (Font font : fonts) {
363 if (!font.isPlain()) {
366 String name = font.getName();
367 int lastDash = name.lastIndexOf('-');
368 String term = lastDash < 0 ? "" : name.substring(lastDash+1).toLowerCase();
369 if (SKIP_TERMS.contains(term)) {
372 if (nameMatcher != null && !nameMatcher.reset(name).find()) {
375 fontMap.put(name,font);
377 for (Entry<String, Font> entry : fontMap.entrySet()) {
378 String name = entry.getKey();
379 Font font = entry.getValue();
380 System.out.println(name);
381 UnicodeSet coverage = getCoverage(font);
382 Set<String> sameFonts = data.get(coverage);
383 if (sameFonts == null) {
384 data.put(coverage, sameFonts = new TreeSet<String>(SHORTER_FIRST));
386 System.out.println("\tNote: same coverage as " + sameFonts.iterator().next());
392 static Comparator<Integer> NFCLower = new Comparator<Integer>() {
393 public int compare(Integer o1, Integer o2) {
394 boolean n1 = Normalizer.isNormalized(o1, Normalizer.NFC, 0);
395 boolean n2 = Normalizer.isNormalized(o2, Normalizer.NFC, 0);
396 if (n1 != n2) return n1 ? -1 : 1;
397 n1 = Normalizer.isNormalized(o1, Normalizer.NFKC, 0);
398 n2 = Normalizer.isNormalized(o2, Normalizer.NFKC, 0);
399 if (n1 != n2) return n1 ? -1 : 1;
400 return o1.compareTo(o2);
404 static Comparator<R2<Integer,Integer>> NFCLowerR2 = new Comparator<R2<Integer,Integer>>() {
405 public int compare(R2<Integer, Integer> o1, R2<Integer, Integer> o2) {
406 int diff = NFCLower.compare(o1.get0(), o2.get0());
407 if (diff != 0) return diff;
408 return NFCLower.compare(o1.get1(), o2.get1());
412 private static String showNfc(int codepoint) {
413 return Normalizer.isNormalized(codepoint, Normalizer.NFC, 0) ? ""
414 : Normalizer.isNormalized(codepoint, Normalizer.NFKC, 0) ? "!C" : "!K";
417 private static String showChar(Integer item, boolean html) {
418 return rtlProtect(UTF16.valueOf(item), html);
420 static UnicodeSet RTL = new UnicodeSet("[[:bc=R:][:bc=AL:][:bc=AN:]]").freeze();
421 static UnicodeSet CONTROLS = new UnicodeSet("[[:cc:][:Zl:][:Zp:]]").freeze();
422 static UnicodeSet INVISIBLES = new UnicodeSet("[:di:]").freeze();
423 static final char LRM = '\u200E';
425 private static String rtlProtect(String source, boolean html) {
426 if (CONTROLS.containsSome(source)) {
428 } else if (INVISIBLES.containsSome(source)) {
430 } else if (RTL.containsSome(source) || source.startsWith("\"")) {
431 source = LRM + source + LRM;
433 return html ? TransliteratorUtilities.toHTML.transform(source) : source;
437 private static Set<String> getLargestContained(Set<String> value, Collection<Set<String>> collection) {
438 Set<String> best = null;
439 for (Set<String> set : collection) {
440 if (best != null && best.size() > set.size()) {
443 if (value.containsAll(set)) {
450 private static String getShortAge(int i) {
451 String age = UCharacter.getAge(i).toString();
452 return age.substring(0,age.indexOf('.',age.indexOf('.') + 1));
455 static SetMaker setMaker = new SetMaker() {
457 return new TreeSet();
461 static UnicodeMap<Set<String>> invisibles = new UnicodeMap();
462 static Map<R2<Integer,Integer>, Set<String>> equivalences = new TreeMap<R2<Integer,Integer>, Set<String>>(NFCLowerR2);
463 // static Set<String> SKIP_SHAPES = new HashSet<String>(Arrays.asList(
466 // "DFKaiShu-SB-Estd-BF",
472 // "SIL-Hei-Med-Jian",
473 // "SIL-Kai-Reg-Jian",
476 // "HelveticaCYBoldOblique",
477 // "HelveticaCYOblique",
478 // "HelveticaCYPlain",
479 // "HoeflerText-Ornaments",
481 // "MSReferenceSpecialty",
489 // "RosewoodStd-Fill",
507 // bug on Mac: http://forums.sun.com/thread.jspa?threadID=5209611
508 private static UnicodeSet getCoverage(Font font) {
509 String name = font.getFontName();
510 boolean skipShapes = SKIP_SHAPES.contains(name);
511 UnicodeSet result = new UnicodeSet();
512 final FontRenderContext fontRenderContext = new FontRenderContext(null, false, false);
513 char[] array = new char[1];
514 char[] array2 = new char[2];
515 Map<Rectangle2D,Map<Shape,UnicodeSet>> boundsToData = new TreeMap<Rectangle2D,Map<Shape,UnicodeSet>>(ShapeComparator);
516 for (UnicodeSetIterator it = new UnicodeSetIterator(COVERAGE); it.next();) {
517 if (font.canDisplay(it.codepoint)) {
519 if (it.codepoint <= 0xFFFF) {
520 array[0] = (char) it.codepoint;
523 Character.toChars(it.codepoint, array2, 0);
527 GlyphVector glyphVector = font.createGlyphVector(fontRenderContext, temp);
528 int glyphCode = glyphVector.getGlyphCode(0);
529 boolean validchar = (glyphCode > 0);
530 if (!validchar) continue;
532 result.add(it.codepoint);
534 if (skipShapes) continue;
535 Shape shape = glyphVector.getOutline();
536 if (isInvisible(shape)) {
537 Set<String> set = invisibles.get(it.codepoint);
539 invisibles.put(it.codepoint, set = new TreeSet<String>());
543 Rectangle2D bounds = glyphVector.getVisualBounds();
544 Map<Shape, UnicodeSet> map = boundsToData.get(bounds);
546 boundsToData.put(bounds, map = new TreeMap<Shape,UnicodeSet>(ShapeComparator));
548 UnicodeSet set = map.get(shape);
550 map.put(shape, set = new UnicodeSet());
552 if (false && set.size() != 0) {
553 System.out.println("Adding " + Utility.hex(it.codepoint) + "\t" + UTF16.valueOf(it.codepoint) + "\tto " + set.toPattern(false));
555 set.add(it.codepoint);
559 //System.out.println(result.size() + "\t" + result);
560 for (Rectangle2D bounds : boundsToData.keySet()) {
561 Map<Shape, UnicodeSet> map = boundsToData.get(bounds);
562 for (UnicodeSet set : map.values()) {
563 set.removeAll(CONTROLS);
564 if (set.size() != 1) {
565 //System.out.println(set.toPattern(false));
566 for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
567 for (UnicodeSetIterator it2 = new UnicodeSetIterator(set); it2.next();) {
568 int cp = it.codepoint;
569 int cp2 = it2.codepoint;
570 if (cp >= cp2) continue;
571 R2<Integer, Integer> r = Row.of(cp, cp2);
572 Set<String> reasons = equivalences.get(r);
573 if (reasons == null) {
574 equivalences.put(r, reasons = new TreeSet());
582 return result.freeze();
585 static Comparator<Rectangle2D> RectComparator = new Comparator<Rectangle2D>() {
587 public int compare(Rectangle2D r1, Rectangle2D r2) {
589 if (0 != (diff = compareDiff(r1.getX(),r2.getX()))) return diff;
590 if (0 != (diff = compareDiff(r1.getY(),r2.getY()))) return diff;
591 if (0 != (diff = compareDiff(r1.getWidth(),r2.getWidth()))) return diff;
592 if (0 != (diff = compareDiff(r1.getHeight(),r2.getHeight()))) return diff;
598 static final AffineTransform IDENTITY = new AffineTransform();
600 static boolean isInvisible(Shape shape) {
601 return shape.getPathIterator(IDENTITY).isDone();
604 static Comparator<Shape> ShapeComparator = new Comparator<Shape>() {
605 float[] coords1 = new float[6];
606 float[] coords2 = new float[6];
608 public int compare(Shape s1, Shape s2) {
610 PathIterator p1 = s1.getPathIterator(IDENTITY);
611 PathIterator p2 = s2.getPathIterator(IDENTITY);
614 return p2.isDone() ? 0 : -1;
615 } else if (p2.isDone()) {
618 int t1 = p1.currentSegment(coords1);
619 int t2 = p2.currentSegment(coords2);
621 if (diff != 0) return diff;
623 * SEG_MOVETO and SEG_LINETO types returns one point,
624 * SEG_QUADTO returns two points,
625 * SEG_CUBICTO returns 3 points
626 * and SEG_CLOSE does not return any points.
629 case PathIterator.SEG_CUBICTO:
630 if (0 != (diff = compareDiff(coords1[5],coords2[5]))) return diff;
631 if (0 != (diff = compareDiff(coords1[4],coords2[4]))) return diff;
632 case PathIterator.SEG_QUADTO:
633 if (0 != (diff = compareDiff(coords1[3],coords2[3]))) return diff;
634 if (0 != (diff = compareDiff(coords1[2],coords2[2]))) return diff;
635 case PathIterator.SEG_MOVETO:
636 case PathIterator.SEG_LINETO:
637 if (0 != (diff = compareDiff(coords1[1],coords2[1]))) return diff;
638 if (0 != (diff = compareDiff(coords1[0],coords2[0]))) return diff;
639 case PathIterator.SEG_CLOSE: break;
640 default: throw new IllegalArgumentException();
648 private static int compareDiff(float f, float g) {
649 return f < g ? -1 : f > g ? 1 : 0;
651 private static int compareDiff(double f, double g) {
652 return f < g ? -1 : f > g ? 1 : 0;