2 *******************************************************************************
\r
3 * Copyright (C) 2004-2006, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.text;
\r
10 import com.ibm.icu.impl.Utility;
\r
13 * A post-processor for Chinese text.
\r
15 final class RBNFChinesePostProcessor implements RBNFPostProcessor {
\r
16 private NFRuleSet lastRuleSet;
\r
17 private boolean longForm;
\r
20 private static final String[] rulesetNames = {
\r
21 "%traditional", "%simplified", "%accounting", "%time"
\r
25 * Initialization routine for this instance, called once
\r
26 * immediately after first construction and never again.
\r
28 public void init(RuleBasedNumberFormat formatter, String rules) {
\r
32 * Work routine. Post process the output, which was generated by the
\r
33 * ruleset with the given name.
\r
35 public void process(StringBuffer buf, NFRuleSet ruleSet) {
\r
36 // markers depend on what rule set we are using
\r
38 if (ruleSet != lastRuleSet) {
\r
39 String name = ruleSet.getName();
\r
40 for (int i = 0; i < rulesetNames.length; ++i) {
\r
41 if (rulesetNames[i].equals(name)) {
\r
43 longForm = i == 1 || i == 3;
\r
50 for (int i = Utility.indexOf(buf,"*"); i != -1; i = Utility.indexOf(buf, "*", i)) {
\r
56 final String DIAN = "\u9ede"; // decimal point
\r
58 final String[][] markers = {
\r
59 { "\u842c", "\u5104", "\u5146", "\u3007" }, // marker chars, last char is the 'zero'
\r
60 { "\u4e07", "\u4ebf", "\u5146", "\u3007" },
\r
61 { "\u842c", "\u5104", "\u5146", "\u96f6" }
\r
62 // need markers for time?
\r
65 // remove unwanted lings
\r
66 // a '0' (ling) with * might be removed
\r
67 // mark off 10,000 'chunks', markers are Z, Y, W (zhao, yii, and wan)
\r
68 // already, we avoid two lings in the same chunk -- ling without * wins
\r
69 // now, just need to avoid optional lings in adjacent chunks
\r
70 // process right to left
\r
74 // state none opt. req.
\r
75 // ----- ---- ---- ----
\r
76 // none to right none opt. req.
\r
77 // opt. to right none clear, none clear right, req.
\r
78 // req. to right none clear, none req.
\r
80 // mark chunks with '|' for convenience
\r
82 String[] m = markers[format];
\r
83 for (int i = 0; i < m.length-1; ++i) {
\r
84 int n = Utility.indexOf(buf, m[i]);
\r
86 buf.insert(n+m[i].length(), '|');
\r
91 int x = Utility.indexOf(buf, DIAN);
\r
95 int s = 0; // 0 = none to right, 1 = opt. to right, 2 = req. to right
\r
96 int n = -1; // previous optional ling
\r
97 String ling = markers[format][3];
\r
99 int m = Utility.lastIndexOf(buf, "|", x);
\r
100 int nn = Utility.lastIndexOf(buf, ling, x);
\r
103 ns = (nn > 0 && buf.charAt(nn-1) != '*') ? 2 : 1;
\r
107 // actually much simpler, but leave this verbose for now so it's easier to follow
\r
109 case 0: /* none, none */
\r
110 s = ns; // redundant
\r
113 case 1: /* none, opt. */
\r
115 n = nn; // remember optional ling to right
\r
117 case 2: /* none, req. */
\r
121 case 3: /* opt., none */
\r
125 case 4: /* opt., opt. */
\r
126 buf.delete(nn-1, nn+ling.length()); // delete current optional ling
\r
130 case 5: /* opt., req. */
\r
131 buf.delete(n-1, n+ling.length()); // delete previous optional ling
\r
135 case 6: /* req., none */
\r
139 case 7: /* req., opt. */
\r
140 buf.delete(nn-1, nn+ling.length()); // delete current optional ling
\r
144 case 8: /* req., req. */
\r
149 throw new IllegalStateException();
\r
153 for (int i = buf.length(); --i >= 0;) {
\r
154 char c = buf.charAt(i);
\r
155 if (c == '*' || c == '|') {
\r
156 buf.delete(i, i+1);
\r