2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
8 import com.ibm.icu.impl.Norm2AllModes;
9 import com.ibm.icu.impl.Normalizer2Impl;
12 * This class has been deprecated since ICU 2.2.
13 * One problem is that this class is not designed to return supplementary characters.
14 * Use the Normalizer2 and UCharacter classes instead.
16 * <tt>ComposedCharIter</tt> is an iterator class that returns all
17 * of the precomposed characters defined in the Unicode standard, along
18 * with their decomposed forms. This is often useful when building
19 * data tables (<i>e.g.</i> collation tables) which need to treat composed
20 * and decomposed characters equivalently.
22 * For example, imagine that you have built a collation table with ordering
23 * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all
24 * characters used in a particular language. When you process input text using
25 * this table, the text must first be decomposed so that it matches the form
26 * used in the table. This can impose a performance penalty that may be
27 * unacceptable in some situations.
29 * You can avoid this problem by ensuring that the collation table contains
30 * rules for both the decomposed <i>and</i> composed versions of each character.
31 * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the
32 * composed characters in Unicode. If the decomposition for that character
33 * consists solely of characters that are listed in your ruleset, you can
34 * add a new rule for the composed character that makes it equivalent to
35 * its decomposition sequence.
37 * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table
38 * of the composed characters in Unicode. If you want to iterate over the
39 * composed characters in a particular string, use {@link Normalizer} instead.
41 * When constructing a <tt>ComposedCharIter</tt> there is one
42 * optional feature that you can enable or disable:
44 * <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul
45 * characters and their corresponding Jamo decompositions.
46 * This option is off by default (<i>i.e.</i> Hangul processing is enabled)
47 * since the Unicode standard specifies that Hangul to Jamo
48 * is a canonical decomposition.
51 * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the
52 * <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
53 * It will be updated as later versions of Unicode are released.
57 public final class ComposedCharIter {
59 * Constant that indicates the iteration has completed.
60 * {@link #next} returns this value when there are no more composed characters
61 * over which to iterate.
64 public static final char DONE = (char) Normalizer.DONE;
67 * Construct a new <tt>ComposedCharIter</tt>. The iterator will return
68 * all Unicode characters with canonical decompositions, including Korean
72 public ComposedCharIter() {
77 * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
79 * @param compat <tt>false</tt> for canonical decompositions only;
80 * <tt>true</tt> for both canonical and compatibility
83 * @param options Optional decomposition features. None are supported, so this is ignored.
86 public ComposedCharIter(boolean compat, int options) {
88 n2impl = Norm2AllModes.getNFKCInstance().impl;
90 n2impl = Norm2AllModes.getNFCInstance().impl;
95 * Determines whether there any precomposed Unicode characters not yet returned
99 public boolean hasNext() {
100 if (nextChar == Normalizer.DONE) {
103 return nextChar != Normalizer.DONE;
107 * Returns the next precomposed Unicode character.
108 * Repeated calls to <tt>next</tt> return all of the precomposed characters defined
109 * by Unicode, in ascending order. After all precomposed characters have
110 * been returned, {@link #hasNext} will return <tt>false</tt> and further calls
111 * to <tt>next</tt> will return {@link #DONE}.
112 * @deprecated ICU 2.2
115 if (nextChar == Normalizer.DONE) {
119 nextChar = Normalizer.DONE;
120 return (char) curChar;
124 * Returns the Unicode decomposition of the current character.
125 * This method returns the decomposition of the precomposed character most
126 * recently returned by {@link #next}. The resulting decomposition is
127 * affected by the settings of the options passed to the constructor.
128 * @deprecated ICU 2.2
130 public String decomposition() {
131 // the decomposition buffer contains the decomposition of
132 // current char so just return it
133 if(decompBuf != null) {
140 private void findNextChar() {
145 decompBuf = n2impl.getDecomposition(c);
146 if(decompBuf != null) {
147 // the curChar can be decomposed... so it is a composed char
160 private final Normalizer2Impl n2impl;
161 private String decompBuf;
162 private int curChar = 0;
163 private int nextChar = Normalizer.DONE;