jars/icu4j-4_4_2-src/main/classes/core/src/com/ibm/icu/text/ComposedCharIter.java

   1 /*\r
   2  *******************************************************************************\r
   3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
   4  * others. All Rights Reserved.                                                *\r
   5  *******************************************************************************\r
   6  */\r
   7 package com.ibm.icu.text;\r
   8 import com.ibm.icu.impl.Norm2AllModes;\r
   9 import com.ibm.icu.impl.Normalizer2Impl;\r
  10 \r
  11 /**\r
  12  * This class has been deprecated since ICU 2.2.\r
  13  * One problem is that this class is not designed to return supplementary characters.\r
  14  * Use the Normalizer2 and UCharacter classes instead.\r
  15  * <p>\r
  16  * <tt>ComposedCharIter</tt> is an iterator class that returns all\r
  17  * of the precomposed characters defined in the Unicode standard, along\r
  18  * with their decomposed forms.  This is often useful when building\r
  19  * data tables (<i>e.g.</i> collation tables) which need to treat composed\r
  20  * and decomposed characters equivalently.\r
  21  * <p>\r
  22  * For example, imagine that you have built a collation table with ordering\r
  23  * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all\r
  24  * characters used in a particular language.  When you process input text using\r
  25  * this table, the text must first be decomposed so that it matches the form\r
  26  * used in the table.  This can impose a performance penalty that may be\r
  27  * unacceptable in some situations.\r
  28  * <p>\r
  29  * You can avoid this problem by ensuring that the collation table contains\r
  30  * rules for both the decomposed <i>and</i> composed versions of each character.\r
  31  * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the\r
  32  * composed characters in Unicode.  If the decomposition for that character\r
  33  * consists solely of characters that are listed in your ruleset, you can\r
  34  * add a new rule for the composed character that makes it equivalent to\r
  35  * its decomposition sequence.\r
  36  * <p>\r
  37  * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table\r
  38  * of the composed characters in Unicode.  If you want to iterate over the\r
  39  * composed characters in a particular string, use {@link Normalizer} instead.\r
  40  * <p>\r
  41  * When constructing a <tt>ComposedCharIter</tt> there is one\r
  42  * optional feature that you can enable or disable:\r
  43  * <ul>\r
  44  *   <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul\r
  45  *          characters and their corresponding Jamo decompositions.\r
  46  *          This option is off by default (<i>i.e.</i> Hangul processing is enabled)\r
  47  *          since the Unicode standard specifies that Hangul to Jamo \r
  48  *          is a canonical decomposition.\r
  49  * </ul>\r
  50  * <p>\r
  51  * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the\r
  52  * <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.\r
  53  * It will be updated as later versions of Unicode are released.\r
  54  * @deprecated ICU 2.2\r
  55  */\r
  56 ///CLOVER:OFF\r
  57 public final class ComposedCharIter {\r
  58     /**\r
  59      * Constant that indicates the iteration has completed.\r
  60      * {@link #next} returns this value when there are no more composed characters\r
  61      * over which to iterate.\r
  62      * @deprecated ICU 2.2\r
  63      */\r
  64     public static final  char DONE = (char) Normalizer.DONE;\r
  65 \r
  66     /**\r
  67      * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return\r
  68      * all Unicode characters with canonical decompositions, including Korean\r
  69      * Hangul characters.\r
  70      * @deprecated ICU 2.2\r
  71      */\r
  72     public ComposedCharIter() {\r
  73         this(false, 0);\r
  74     }\r
  75 \r
  76     /**\r
  77      * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.\r
  78      * <p>\r
  79      * @param compat    <tt>false</tt> for canonical decompositions only;\r
  80      *                  <tt>true</tt> for both canonical and compatibility\r
  81      *                  decompositions.\r
  82      *\r
  83      * @param options   Optional decomposition features. None are supported, so this is ignored.\r
  84      * @deprecated ICU 2.2\r
  85      */\r
  86     public ComposedCharIter(boolean compat, int options) {\r
  87         if(compat) {\r
  88             n2impl = Norm2AllModes.getNFKCInstance().impl;\r
  89         } else {\r
  90             n2impl = Norm2AllModes.getNFCInstance().impl;\r
  91         }\r
  92     }\r
  93 \r
  94     /**\r
  95      * Determines whether there any precomposed Unicode characters not yet returned\r
  96      * by {@link #next}.\r
  97      * @deprecated ICU 2.2\r
  98      */\r
  99     public boolean hasNext() {\r
 100         if (nextChar == Normalizer.DONE)  {\r
 101             findNextChar();\r
 102         }\r
 103         return nextChar != Normalizer.DONE;\r
 104     }\r
 105     \r
 106     /**\r
 107      * Returns the next precomposed Unicode character.\r
 108      * Repeated calls to <tt>next</tt> return all of the precomposed characters defined\r
 109      * by Unicode, in ascending order.  After all precomposed characters have\r
 110      * been returned, {@link #hasNext} will return <tt>false</tt> and further calls\r
 111      * to <tt>next</tt> will return {@link #DONE}.\r
 112      * @deprecated ICU 2.2\r
 113      */\r
 114     public char next() {\r
 115         if (nextChar == Normalizer.DONE)  {\r
 116             findNextChar();\r
 117         }\r
 118         curChar = nextChar;\r
 119         nextChar = Normalizer.DONE;\r
 120         return (char) curChar;\r
 121     }\r
 122     \r
 123     /**\r
 124      * Returns the Unicode decomposition of the current character.\r
 125      * This method returns the decomposition of the precomposed character most\r
 126      * recently returned by {@link #next}.  The resulting decomposition is\r
 127      * affected by the settings of the options passed to the constructor.\r
 128      * @deprecated ICU 2.2\r
 129      */\r
 130     public String decomposition() {\r
 131         // the decomposition buffer contains the decomposition of \r
 132         // current char so just return it\r
 133         if(decompBuf != null) {\r
 134             return decompBuf;\r
 135         } else {\r
 136             return "";\r
 137         }\r
 138     }\r
 139 \r
 140     private void findNextChar() {\r
 141         int c=curChar+1;\r
 142         decompBuf = null;\r
 143         for(;;) {\r
 144             if(c < 0xFFFF) {\r
 145                 decompBuf = n2impl.getDecomposition(c);\r
 146                 if(decompBuf != null) {\r
 147                     // the curChar can be decomposed... so it is a composed char\r
 148                     // cache the result     \r
 149                     break;\r
 150                 }\r
 151                 c++;\r
 152             } else {\r
 153                 c=Normalizer.DONE;\r
 154                 break;\r
 155             }\r
 156         }\r
 157         nextChar=c;  \r
 158     }\r
 159 \r
 160     private final Normalizer2Impl n2impl;\r
 161     private String decompBuf;\r
 162     private int curChar = 0;\r
 163     private int nextChar = Normalizer.DONE;\r
 164 }\r