jars/icu4j-4_4_2-src/main/classes/translit/src/com/ibm/icu/text/TransliteratorIDParser.java

   1 /*\r
   2 **********************************************************************\r
   3 *   Copyright (c) 2002-2010, International Business Machines Corporation\r
   4 *   and others.  All Rights Reserved.\r
   5 **********************************************************************\r
   6 *   Date        Name        Description\r
   7 *   01/14/2002  aliu        Creation.\r
   8 **********************************************************************\r
   9 */\r
  10 \r
  11 package com.ibm.icu.text;\r
  12 \r
  13 import java.text.ParsePosition;\r
  14 import java.util.Hashtable;\r
  15 import java.util.Vector;\r
  16 \r
  17 import com.ibm.icu.impl.Utility;\r
  18 import com.ibm.icu.util.CaseInsensitiveString;\r
  19 \r
  20 /**\r
  21  * Parsing component for transliterator IDs.  This class contains only\r
  22  * static members; it cannot be instantiated.  Methods in this class\r
  23  * parse various ID formats, including the following:\r
  24  *\r
  25  * A basic ID, which contains source, target, and variant, but no\r
  26  * filter and no explicit inverse.  Examples include\r
  27  * "Latin-Greek/UNGEGN" and "Null".\r
  28  *\r
  29  * A single ID, which is a basic ID plus optional filter and optional\r
  30  * explicit inverse.  Examples include "[a-zA-Z] Latin-Greek" and\r
  31  * "Lower (Upper)".\r
  32  *\r
  33  * A compound ID, which is a sequence of one or more single IDs,\r
  34  * separated by semicolons, with optional forward and reverse global\r
  35  * filters.  The global filters are UnicodeSet patterns prepended or\r
  36  * appended to the IDs, separated by semicolons.  An appended filter\r
  37  * must be enclosed in parentheses and applies in the reverse\r
  38  * direction.\r
  39  *\r
  40  * @author Alan Liu\r
  41  */\r
  42 class TransliteratorIDParser {\r
  43 \r
  44     private static final char ID_DELIM = ';';\r
  45 \r
  46     private static final char TARGET_SEP = '-';\r
  47 \r
  48     private static final char VARIANT_SEP = '/';\r
  49 \r
  50     private static final char OPEN_REV = '(';\r
  51 \r
  52     private static final char CLOSE_REV = ')';\r
  53 \r
  54     private static final String ANY = "Any";\r
  55 \r
  56     private static final int FORWARD = Transliterator.FORWARD;\r
  57 \r
  58     private static final int REVERSE = Transliterator.REVERSE;\r
  59 \r
  60     private static final Hashtable<CaseInsensitiveString, String> SPECIAL_INVERSES =\r
  61         new Hashtable<CaseInsensitiveString, String>();\r
  62 \r
  63     /**\r
  64      * A structure containing the parsed data of a filtered ID, that\r
  65      * is, a basic ID optionally with a filter.\r
  66      *\r
  67      * 'source' and 'target' will always be non-null.  The 'variant'\r
  68      * will be non-null only if a non-empty variant was parsed.\r
  69      *\r
  70      * 'sawSource' is true if there was an explicit source in the\r
  71      * parsed id.  If there was no explicit source, then an implied\r
  72      * source of ANY is returned and 'sawSource' is set to false.\r
  73      * \r
  74      * 'filter' is the parsed filter pattern, or null if there was no\r
  75      * filter.\r
  76      */\r
  77     private static class Specs {\r
  78         public String source; // not null\r
  79         public String target; // not null\r
  80         public String variant; // may be null\r
  81         public String filter; // may be null\r
  82         public boolean sawSource;\r
  83         Specs(String s, String t, String v, boolean sawS, String f) {\r
  84             source = s;\r
  85             target = t;\r
  86             variant = v;\r
  87             sawSource = sawS;\r
  88             filter = f;\r
  89         }\r
  90     }\r
  91 \r
  92     /**\r
  93      * A structure containing the canonicalized data of a filtered ID,\r
  94      * that is, a basic ID optionally with a filter.\r
  95      *\r
  96      * 'canonID' is always non-null.  It may be the empty string "".\r
  97      * It is the id that should be assigned to the created\r
  98      * transliterator.  It _cannot_ be instantiated directly.\r
  99      *\r
 100      * 'basicID' is always non-null and non-empty.  It is always of\r
 101      * the form S-T or S-T/V.  It is designed to be fed to low-level\r
 102      * instantiation code that only understands these two formats.\r
 103      *\r
 104      * 'filter' may be null, if there is none, or non-null and\r
 105      * non-empty.\r
 106      */\r
 107     static class SingleID {\r
 108         public String canonID;\r
 109         public String basicID;\r
 110         public String filter;\r
 111         SingleID(String c, String b, String f) {\r
 112             canonID = c;\r
 113             basicID = b;\r
 114             filter = f;\r
 115         }\r
 116         SingleID(String c, String b) {\r
 117             this(c, b, null);\r
 118         }\r
 119         Transliterator getInstance() {\r
 120             Transliterator t;\r
 121             if (basicID == null || basicID.length() == 0) {\r
 122                 t = Transliterator.getBasicInstance("Any-Null", canonID);\r
 123             } else {\r
 124                 t = Transliterator.getBasicInstance(basicID, canonID);\r
 125             }\r
 126             if (t != null) {\r
 127                 if (filter != null) {\r
 128                     t.setFilter(new UnicodeSet(filter));\r
 129                 }\r
 130             }\r
 131             return t;\r
 132         }\r
 133     }\r
 134 \r
 135     /**\r
 136      * Parse a filter ID, that is, an ID of the general form\r
 137      * "[f1] s1-t1/v1", with the filters optional, and the variants optional.\r
 138      * @param id the id to be parsed\r
 139      * @param pos INPUT-OUTPUT parameter.  On input, the position of\r
 140      * the first character to parse.  On output, the position after\r
 141      * the last character parsed.\r
 142      * @return a SingleID object or null if the parse fails\r
 143      */\r
 144     public static SingleID parseFilterID(String id, int[] pos) {\r
 145 \r
 146         int start = pos[0];\r
 147         Specs specs = parseFilterID(id, pos, true);\r
 148         if (specs == null) {\r
 149             pos[0] = start;\r
 150             return null;\r
 151         }\r
 152 \r
 153         // Assemble return results\r
 154         SingleID single = specsToID(specs, FORWARD);\r
 155         single.filter = specs.filter;\r
 156         return single;\r
 157     }\r
 158 \r
 159     /**\r
 160      * Parse a single ID, that is, an ID of the general form\r
 161      * "[f1] s1-t1/v1 ([f2] s2-t3/v2)", with the parenthesized element\r
 162      * optional, the filters optional, and the variants optional.\r
 163      * @param id the id to be parsed\r
 164      * @param pos INPUT-OUTPUT parameter.  On input, the position of\r
 165      * the first character to parse.  On output, the position after\r
 166      * the last character parsed.\r
 167      * @param dir the direction.  If the direction is REVERSE then the\r
 168      * SingleID is constructed for the reverse direction.\r
 169      * @return a SingleID object or null\r
 170      */\r
 171     public static SingleID parseSingleID(String id, int[] pos, int dir) {\r
 172 \r
 173         int start = pos[0];\r
 174 \r
 175         // The ID will be of the form A, A(), A(B), or (B), where\r
 176         // A and B are filter IDs.\r
 177         Specs specsA = null;\r
 178         Specs specsB = null;\r
 179         boolean sawParen = false;\r
 180 \r
 181         // On the first pass, look for (B) or ().  If this fails, then\r
 182         // on the second pass, look for A, A(B), or A().\r
 183         for (int pass=1; pass<=2; ++pass) {\r
 184             if (pass == 2) {\r
 185                 specsA = parseFilterID(id, pos, true);\r
 186                 if (specsA == null) {\r
 187                     pos[0] = start;\r
 188                     return null;\r
 189                 }\r
 190             }\r
 191             if (Utility.parseChar(id, pos, OPEN_REV)) {\r
 192                 sawParen = true;\r
 193                 if (!Utility.parseChar(id, pos, CLOSE_REV)) {\r
 194                     specsB = parseFilterID(id, pos, true);\r
 195                     // Must close with a ')'\r
 196                     if (specsB == null || !Utility.parseChar(id, pos, CLOSE_REV)) {\r
 197                         pos[0] = start;\r
 198                         return null;\r
 199                     }\r
 200                 }\r
 201                 break;\r
 202             }\r
 203         }\r
 204 \r
 205         // Assemble return results\r
 206         SingleID single;\r
 207         if (sawParen) {\r
 208             if (dir == FORWARD) {\r
 209                 single = specsToID(specsA, FORWARD);\r
 210                 single.canonID = single.canonID +\r
 211                     OPEN_REV + specsToID(specsB, FORWARD).canonID + CLOSE_REV;\r
 212                 if (specsA != null) {\r
 213                     single.filter = specsA.filter;\r
 214                 }\r
 215             } else {\r
 216                 single = specsToID(specsB, FORWARD);\r
 217                 single.canonID = single.canonID +\r
 218                     OPEN_REV + specsToID(specsA, FORWARD).canonID + CLOSE_REV;\r
 219                 if (specsB != null) {\r
 220                     single.filter = specsB.filter;\r
 221                 }\r
 222             }\r
 223         } else {\r
 224             // assert(specsA != null);\r
 225             if (dir == FORWARD) {\r
 226                 single = specsToID(specsA, FORWARD);\r
 227             } else {\r
 228                 single = specsToSpecialInverse(specsA);\r
 229                 if (single == null) {\r
 230                     single = specsToID(specsA, REVERSE);\r
 231                 }\r
 232             }\r
 233             single.filter = specsA.filter;\r
 234         }\r
 235 \r
 236         return single;\r
 237     }\r
 238 \r
 239     /**\r
 240      * Parse a global filter of the form "[f]" or "([f])", depending\r
 241      * on 'withParens'.\r
 242      * @param id the pattern the parse\r
 243      * @param pos INPUT-OUTPUT parameter.  On input, the position of\r
 244      * the first character to parse.  On output, the position after\r
 245      * the last character parsed.\r
 246      * @param dir the direction.\r
 247      * @param withParens INPUT-OUTPUT parameter.  On entry, if\r
 248      * withParens[0] is 0, then parens are disallowed.  If it is 1,\r
 249      * then parens are requires.  If it is -1, then parens are\r
 250      * optional, and the return result will be set to 0 or 1.\r
 251      * @param canonID OUTPUT parameter.  The pattern for the filter\r
 252      * added to the canonID, either at the end, if dir is FORWARD, or\r
 253      * at the start, if dir is REVERSE.  The pattern will be enclosed\r
 254      * in parentheses if appropriate, and will be suffixed with an\r
 255      * ID_DELIM character.  May be null.\r
 256      * @return a UnicodeSet object or null.  A non-null results\r
 257      * indicates a successful parse, regardless of whether the filter\r
 258      * applies to the given direction.  The caller should discard it\r
 259      * if withParens != (dir == REVERSE).\r
 260      */\r
 261     public static UnicodeSet parseGlobalFilter(String id, int[] pos, int dir,\r
 262                                                int[] withParens,\r
 263                                                StringBuffer canonID) {\r
 264         UnicodeSet filter = null;\r
 265         int start = pos[0];\r
 266 \r
 267         if (withParens[0] == -1) {\r
 268             withParens[0] = Utility.parseChar(id, pos, OPEN_REV) ? 1 : 0;\r
 269         } else if (withParens[0] == 1) {\r
 270             if (!Utility.parseChar(id, pos, OPEN_REV)) {\r
 271                 pos[0] = start;\r
 272                 return null;\r
 273             }\r
 274         }\r
 275         \r
 276         Utility.skipWhitespace(id, pos);\r
 277 \r
 278         if (UnicodeSet.resemblesPattern(id, pos[0])) {\r
 279             ParsePosition ppos = new ParsePosition(pos[0]);\r
 280             try {\r
 281                 filter = new UnicodeSet(id, ppos, null);\r
 282             } catch (IllegalArgumentException e) {\r
 283                 pos[0] = start;\r
 284                 return null;\r
 285             }\r
 286 \r
 287             String pattern = id.substring(pos[0], ppos.getIndex());\r
 288             pos[0] = ppos.getIndex();\r
 289 \r
 290             if (withParens[0] == 1 && !Utility.parseChar(id, pos, CLOSE_REV)) {\r
 291                 pos[0] = start;\r
 292                 return null;\r
 293             }\r
 294 \r
 295             // In the forward direction, append the pattern to the\r
 296             // canonID.  In the reverse, insert it at zero, and invert\r
 297             // the presence of parens ("A" <-> "(A)").\r
 298             if (canonID != null) {\r
 299                 if (dir == FORWARD) {\r
 300                     if (withParens[0] == 1) {\r
 301                         pattern = String.valueOf(OPEN_REV) + pattern + CLOSE_REV;\r
 302                     }\r
 303                     canonID.append(pattern + ID_DELIM);\r
 304                 } else {\r
 305                     if (withParens[0] == 0) {\r
 306                         pattern = String.valueOf(OPEN_REV) + pattern + CLOSE_REV;\r
 307                     }\r
 308                     canonID.insert(0, pattern + ID_DELIM);\r
 309                 }\r
 310             }\r
 311         }\r
 312 \r
 313         return filter;\r
 314     }\r
 315 \r
 316     /**\r
 317      * Parse a compound ID, consisting of an optional forward global\r
 318      * filter, a separator, one or more single IDs delimited by\r
 319      * separators, an an optional reverse global filter.  The\r
 320      * separator is a semicolon.  The global filters are UnicodeSet\r
 321      * patterns.  The reverse global filter must be enclosed in\r
 322      * parentheses.\r
 323      * @param id the pattern the parse\r
 324      * @param dir the direction.\r
 325      * @param canonID OUTPUT parameter that receives the canonical ID,\r
 326      * consisting of canonical IDs for all elements, as returned by\r
 327      * parseSingleID(), separated by semicolons.  Previous contents\r
 328      * are discarded.\r
 329      * @param list OUTPUT parameter that receives a list of SingleID\r
 330      * objects representing the parsed IDs.  Previous contents are\r
 331      * discarded.\r
 332      * @param globalFilter OUTPUT parameter that receives a pointer to\r
 333      * a newly created global filter for this ID in this direction, or\r
 334      * null if there is none.\r
 335      * @return true if the parse succeeds, that is, if the entire\r
 336      * id is consumed without syntax error.\r
 337      */\r
 338     public static boolean parseCompoundID(String id, int dir,\r
 339                                           StringBuffer canonID,\r
 340                                           Vector<SingleID> list,\r
 341                                           UnicodeSet[] globalFilter) {\r
 342         int[] pos = new int[] { 0 };\r
 343         int[] withParens = new int[1];\r
 344         list.removeAllElements();\r
 345         UnicodeSet filter;\r
 346         globalFilter[0] = null;\r
 347         canonID.setLength(0);\r
 348 \r
 349         // Parse leading global filter, if any\r
 350         withParens[0] = 0; // parens disallowed\r
 351         filter = parseGlobalFilter(id, pos, dir, withParens, canonID);\r
 352         if (filter != null) {\r
 353             if (!Utility.parseChar(id, pos, ID_DELIM)) {\r
 354                 // Not a global filter; backup and resume\r
 355                 canonID.setLength(0);\r
 356                 pos[0] = 0;\r
 357             }\r
 358             if (dir == FORWARD) {\r
 359                 globalFilter[0] = filter;\r
 360             }\r
 361         }\r
 362 \r
 363         boolean sawDelimiter = true;\r
 364         for (;;) {\r
 365             SingleID single = parseSingleID(id, pos, dir);\r
 366             if (single == null) {\r
 367                 break;\r
 368             }\r
 369             if (dir == FORWARD) {\r
 370                 list.addElement(single);\r
 371             } else {\r
 372                 list.insertElementAt(single, 0);\r
 373             }\r
 374             if (!Utility.parseChar(id, pos, ID_DELIM)) {\r
 375                 sawDelimiter = false;\r
 376                 break;\r
 377             }\r
 378         }\r
 379 \r
 380         if (list.size() == 0) {\r
 381             return false;\r
 382         }\r
 383 \r
 384         // Construct canonical ID\r
 385         for (int i=0; i<list.size(); ++i) {\r
 386             SingleID single = list.elementAt(i);\r
 387             canonID.append(single.canonID);\r
 388             if (i != (list.size()-1)) {\r
 389                 canonID.append(ID_DELIM);\r
 390             }\r
 391         }\r
 392 \r
 393         // Parse trailing global filter, if any, and only if we saw\r
 394         // a trailing delimiter after the IDs.\r
 395         if (sawDelimiter) {\r
 396             withParens[0] = 1; // parens required\r
 397             filter = parseGlobalFilter(id, pos, dir, withParens, canonID);\r
 398             if (filter != null) {\r
 399                 // Don't require trailing ';', but parse it if present\r
 400                 Utility.parseChar(id, pos, ID_DELIM);\r
 401                 \r
 402                 if (dir == REVERSE) {\r
 403                     globalFilter[0] = filter;\r
 404                 }\r
 405             }\r
 406         }\r
 407 \r
 408         // Trailing unparsed text is a syntax error\r
 409         Utility.skipWhitespace(id, pos[0]);\r
 410         if (pos[0] != id.length()) {\r
 411             return false;\r
 412         }\r
 413 \r
 414         return true;\r
 415     }\r
 416 \r
 417     /**\r
 418      * Returns the list of Transliterator objects for the\r
 419      * given list of SingleID objects.\r
 420      * \r
 421      * @param ids list vector of SingleID objects.\r
 422      * @return Actual transliterators for the list of SingleIDs\r
 423      */\r
 424     static Vector<Transliterator> instantiateList(Vector<SingleID> ids) {\r
 425         Transliterator t;\r
 426         Vector<Transliterator> translits = new Vector<Transliterator>();\r
 427         for (SingleID single : ids) {\r
 428             if (single.basicID.length() == 0) {\r
 429                 continue;\r
 430             }\r
 431             t = single.getInstance();\r
 432             if (t == null) {\r
 433                 throw new IllegalArgumentException("Illegal ID " + single.canonID);\r
 434             }\r
 435             translits.add(t);\r
 436         }\r
 437 \r
 438         // An empty list is equivalent to a Null transliterator.\r
 439         if (translits.size() == 0) {\r
 440             t = Transliterator.getBasicInstance("Any-Null", null);\r
 441             if (t == null) {\r
 442                 // Should never happen\r
 443                 throw new IllegalArgumentException("Internal error; cannot instantiate Any-Null");\r
 444             }\r
 445             translits.add(t);\r
 446         }\r
 447         return translits;\r
 448     }\r
 449 \r
 450     /**\r
 451      * Parse an ID into pieces.  Take IDs of the form T, T/V, S-T,\r
 452      * S-T/V, or S/V-T.  If the source is missing, return a source of\r
 453      * ANY.\r
 454      * @param id the id string, in any of several forms\r
 455      * @return an array of 4 strings: source, target, variant, and\r
 456      * isSourcePresent.  If the source is not present, ANY will be\r
 457      * given as the source, and isSourcePresent will be null.  Otherwise\r
 458      * isSourcePresent will be non-null.  The target may be empty if the\r
 459      * id is not well-formed.  The variant may be empty.\r
 460      */\r
 461     public static String[] IDtoSTV(String id) {\r
 462         String source = ANY;\r
 463         String target = null;\r
 464         String variant = "";\r
 465         \r
 466         int sep = id.indexOf(TARGET_SEP);\r
 467         int var = id.indexOf(VARIANT_SEP);\r
 468         if (var < 0) {\r
 469             var = id.length();\r
 470         }\r
 471         boolean isSourcePresent = false;\r
 472         \r
 473         if (sep < 0) {\r
 474             // Form: T/V or T (or /V)\r
 475             target = id.substring(0, var);\r
 476             variant = id.substring(var);\r
 477         } else if (sep < var) {\r
 478             // Form: S-T/V or S-T (or -T/V or -T)\r
 479             if (sep > 0) {\r
 480                 source = id.substring(0, sep);\r
 481               isSourcePresent = true;\r
 482             }\r
 483             target = id.substring(++sep, var);\r
 484             variant = id.substring(var);\r
 485         } else {\r
 486             // Form: (S/V-T or /V-T)\r
 487             if (var > 0) {\r
 488                 source = id.substring(0, var);\r
 489                 isSourcePresent = true;\r
 490             }\r
 491             variant = id.substring(var, sep++);\r
 492             target = id.substring(sep);\r
 493         }\r
 494 \r
 495         if (variant.length() > 0) {\r
 496             variant = variant.substring(1);\r
 497         }\r
 498         \r
 499         return new String[] { source, target, variant,\r
 500                               isSourcePresent ? "" : null };\r
 501     }\r
 502 \r
 503     /**\r
 504      * Given source, target, and variant strings, concatenate them into a\r
 505      * full ID.  If the source is empty, then "Any" will be used for the\r
 506      * source, so the ID will always be of the form s-t/v or s-t.\r
 507      */\r
 508     public static String STVtoID(String source,\r
 509                                  String target,\r
 510                                  String variant) {\r
 511         StringBuilder id = new StringBuilder(source);\r
 512         if (id.length() == 0) {\r
 513             id.append(ANY);\r
 514         }\r
 515         id.append(TARGET_SEP).append(target);\r
 516         if (variant != null && variant.length() != 0) {\r
 517             id.append(VARIANT_SEP).append(variant);\r
 518         }\r
 519         return id.toString();\r
 520     }\r
 521 \r
 522     /**\r
 523      * Register two targets as being inverses of one another.  For\r
 524      * example, calling registerSpecialInverse("NFC", "NFD", true) causes\r
 525      * Transliterator to form the following inverse relationships:\r
 526      *\r
 527      * <pre>NFC => NFD\r
 528      * Any-NFC => Any-NFD\r
 529      * NFD => NFC\r
 530      * Any-NFD => Any-NFC</pre>\r
 531      *\r
 532      * (Without the special inverse registration, the inverse of NFC\r
 533      * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but\r
 534      * that the presence or absence of "Any-" is preserved.\r
 535      *\r
 536      * <p>The relationship is symmetrical; registering (a, b) is\r
 537      * equivalent to registering (b, a).\r
 538      *\r
 539      * <p>The relevant IDs must still be registered separately as\r
 540      * factories or classes.\r
 541      *\r
 542      * <p>Only the targets are specified.  Special inverses always\r
 543      * have the form Any-Target1 <=> Any-Target2.  The target should\r
 544      * have canonical casing (the casing desired to be produced when\r
 545      * an inverse is formed) and should contain no whitespace or other\r
 546      * extraneous characters.\r
 547      *\r
 548      * @param target the target against which to register the inverse\r
 549      * @param inverseTarget the inverse of target, that is\r
 550      * Any-target.getInverse() => Any-inverseTarget\r
 551      * @param bidirectional if true, register the reverse relation\r
 552      * as well, that is, Any-inverseTarget.getInverse() => Any-target\r
 553      */\r
 554     public static void registerSpecialInverse(String target,\r
 555                                               String inverseTarget,\r
 556                                               boolean bidirectional) {\r
 557         SPECIAL_INVERSES.put(new CaseInsensitiveString(target), inverseTarget);\r
 558         if (bidirectional && !target.equalsIgnoreCase(inverseTarget)) {\r
 559             SPECIAL_INVERSES.put(new CaseInsensitiveString(inverseTarget), target);\r
 560         }\r
 561     }\r
 562 \r
 563     //----------------------------------------------------------------\r
 564     // Private implementation\r
 565     //----------------------------------------------------------------\r
 566 \r
 567     /**\r
 568      * Parse an ID into component pieces.  Take IDs of the form T,\r
 569      * T/V, S-T, S-T/V, or S/V-T.  If the source is missing, return a\r
 570      * source of ANY.\r
 571      * @param id the id string, in any of several forms\r
 572      * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the\r
 573      * offset of the first character to parse in id.  On output,\r
 574      * pos[0] is the offset after the last parsed character.  If the\r
 575      * parse failed, pos[0] will be unchanged.\r
 576      * @param allowFilter if true, a UnicodeSet pattern is allowed\r
 577      * at any location between specs or delimiters, and is returned\r
 578      * as the fifth string in the array.\r
 579      * @return a Specs object, or null if the parse failed.  If\r
 580      * neither source nor target was seen in the parsed id, then the\r
 581      * parse fails.  If allowFilter is true, then the parsed filter\r
 582      * pattern is returned in the Specs object, otherwise the returned\r
 583      * filter reference is null.  If the parse fails for any reason\r
 584      * null is returned.\r
 585      */\r
 586     private static Specs parseFilterID(String id, int[] pos,\r
 587                                        boolean allowFilter) {\r
 588         String first = null;\r
 589         String source = null;\r
 590         String target = null;\r
 591         String variant = null;\r
 592         String filter = null;\r
 593         char delimiter = 0;\r
 594         int specCount = 0;\r
 595         int start = pos[0];\r
 596 \r
 597         // This loop parses one of the following things with each\r
 598         // pass: a filter, a delimiter character (either '-' or '/'),\r
 599         // or a spec (source, target, or variant).\r
 600         for (;;) {\r
 601             Utility.skipWhitespace(id, pos);\r
 602             if (pos[0] == id.length()) {\r
 603                 break;\r
 604             }\r
 605 \r
 606             // Parse filters\r
 607             if (allowFilter && filter == null &&\r
 608                 UnicodeSet.resemblesPattern(id, pos[0])) {\r
 609 \r
 610                 ParsePosition ppos = new ParsePosition(pos[0]);\r
 611                 // Parse the set to get the position.\r
 612                 new UnicodeSet(id, ppos, null);\r
 613                 filter = id.substring(pos[0], ppos.getIndex());\r
 614                 pos[0] = ppos.getIndex();\r
 615                 continue;\r
 616             }\r
 617 \r
 618             if (delimiter == 0) {\r
 619                 char c = id.charAt(pos[0]);\r
 620                 if ((c == TARGET_SEP && target == null) ||\r
 621                     (c == VARIANT_SEP && variant == null)) {\r
 622                     delimiter = c;\r
 623                     ++pos[0];\r
 624                     continue;\r
 625                 }\r
 626             }\r
 627 \r
 628             // We are about to try to parse a spec with no delimiter\r
 629             // when we can no longer do so (we can only do so at the\r
 630             // start); break.\r
 631             if (delimiter == 0 && specCount > 0) {\r
 632                 break;\r
 633             }\r
 634 \r
 635             String spec = Utility.parseUnicodeIdentifier(id, pos);\r
 636             if (spec == null) {\r
 637                 // Note that if there was a trailing delimiter, we\r
 638                 // consume it.  So Foo-, Foo/, Foo-Bar/, and Foo/Bar-\r
 639                 // are legal.\r
 640                 break;\r
 641             }\r
 642 \r
 643             switch (delimiter) {\r
 644             case 0:\r
 645                 first = spec;\r
 646                 break;\r
 647             case TARGET_SEP:\r
 648                 target = spec;\r
 649                 break;\r
 650             case VARIANT_SEP:\r
 651                 variant = spec;\r
 652                 break;\r
 653             }\r
 654             ++specCount;\r
 655             delimiter = 0;\r
 656         }\r
 657 \r
 658         // A spec with no prior character is either source or target,\r
 659         // depending on whether an explicit "-target" was seen.\r
 660         if (first != null) {\r
 661             if (target == null) {\r
 662                 target = first;\r
 663             } else {\r
 664                 source = first;\r
 665             }\r
 666         }\r
 667 \r
 668         // Must have either source or target\r
 669         if (source == null && target == null) {\r
 670             pos[0] = start;\r
 671             return null;\r
 672         }\r
 673 \r
 674         // Empty source or target defaults to ANY\r
 675         boolean sawSource = true;\r
 676         if (source == null) {\r
 677             source = ANY;\r
 678             sawSource = false;\r
 679         }\r
 680         if (target == null) {\r
 681             target = ANY;\r
 682         }\r
 683 \r
 684         return new Specs(source, target, variant, sawSource, filter);\r
 685     }\r
 686 \r
 687     /**\r
 688      * Givens a Spec object, convert it to a SingleID object.  The\r
 689      * Spec object is a more unprocessed parse result.  The SingleID\r
 690      * object contains information about canonical and basic IDs.\r
 691      * @return a SingleID; never returns null.  Returned object always\r
 692      * has 'filter' field of null.\r
 693      */\r
 694     private static SingleID specsToID(Specs specs, int dir) {\r
 695         String canonID = "";\r
 696         String basicID = "";\r
 697         String basicPrefix = "";\r
 698         if (specs != null) {\r
 699             StringBuilder buf = new StringBuilder();\r
 700             if (dir == FORWARD) {\r
 701                 if (specs.sawSource) {\r
 702                     buf.append(specs.source).append(TARGET_SEP);\r
 703                 } else {\r
 704                     basicPrefix = specs.source + TARGET_SEP;\r
 705                 }\r
 706                 buf.append(specs.target);\r
 707             } else {\r
 708                 buf.append(specs.target).append(TARGET_SEP).append(specs.source);\r
 709             }\r
 710             if (specs.variant != null) {\r
 711                 buf.append(VARIANT_SEP).append(specs.variant);\r
 712             }\r
 713             basicID = basicPrefix + buf.toString();\r
 714             if (specs.filter != null) {\r
 715                 buf.insert(0, specs.filter);\r
 716             }\r
 717             canonID = buf.toString();\r
 718         }\r
 719         return new SingleID(canonID, basicID);\r
 720     }\r
 721 \r
 722     /**\r
 723      * Given a Specs object, return a SingleID representing the\r
 724      * special inverse of that ID.  If there is no special inverse\r
 725      * then return null.\r
 726      * @return a SingleID or null.  Returned object always has\r
 727      * 'filter' field of null.\r
 728      */\r
 729     private static SingleID specsToSpecialInverse(Specs specs) {\r
 730         if (!specs.source.equalsIgnoreCase(ANY)) {\r
 731             return null;\r
 732         }\r
 733         String inverseTarget = SPECIAL_INVERSES.get(new CaseInsensitiveString(specs.target));\r
 734         if (inverseTarget != null) {\r
 735             // If the original ID contained "Any-" then make the\r
 736             // special inverse "Any-Foo"; otherwise make it "Foo".\r
 737             // So "Any-NFC" => "Any-NFD" but "NFC" => "NFD".\r
 738             StringBuilder buf = new StringBuilder();\r
 739             if (specs.filter != null) {\r
 740                 buf.append(specs.filter);\r
 741             }\r
 742             if (specs.sawSource) {\r
 743                 buf.append(ANY).append(TARGET_SEP);\r
 744             }\r
 745             buf.append(inverseTarget);\r
 746 \r
 747             String basicID = ANY + TARGET_SEP + inverseTarget;\r
 748 \r
 749             if (specs.variant != null) {\r
 750                 buf.append(VARIANT_SEP).append(specs.variant);\r
 751                 basicID = basicID + VARIANT_SEP + specs.variant;\r
 752             }\r
 753             return new SingleID(buf.toString(), basicID);\r
 754         }\r
 755         return null;\r
 756     }\r
 757 }\r
 758 \r
 759 //eof\r