/* ******************************************************************************* * Copyright (C) 1996-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.text; import java.util.Vector; import com.ibm.icu.impl.UtilityExtensions; /** * A set of rules for a RuleBasedTransliterator. This set encodes * the transliteration in one direction from one set of characters or short * strings to another. A RuleBasedTransliterator consists of up to * two such sets, one for the forward direction, and one for the reverse. * *

A TransliterationRuleSet has one important operation, that of * finding a matching rule at a given point in the text. This is accomplished * by the findMatch() method. * *

Copyright © IBM Corporation 1999. All rights reserved. * * @author Alan Liu */ class TransliterationRuleSet { /** * Vector of rules, in the order added. */ private Vector ruleVector; /** * Length of the longest preceding context */ private int maxContextLength; /** * Sorted and indexed table of rules. This is created by freeze() from * the rules in ruleVector. rules.length >= ruleVector.size(), and the * references in rules[] are aliases of the references in ruleVector. * A single rule in ruleVector is listed one or more times in rules[]. */ private TransliterationRule[] rules; /** * Index table. For text having a first character c, compute x = c&0xFF. * Now use rules[index[x]..index[x+1]-1]. This index table is created by * freeze(). */ private int[] index; /** * Construct a new empty rule set. */ public TransliterationRuleSet() { ruleVector = new Vector(); maxContextLength = 0; } /** * Return the maximum context length. * @return the length of the longest preceding context. */ public int getMaximumContextLength() { return maxContextLength; } /** * Add a rule to this set. Rules are added in order, and order is * significant. * @param rule the rule to add */ public void addRule(TransliterationRule rule) { ruleVector.addElement(rule); int len; if ((len = rule.getAnteContextLength()) > maxContextLength) { maxContextLength = len; } rules = null; } /** * Close this rule set to further additions, check it for masked rules, * and index it to optimize performance. * @exception IllegalArgumentException if some rules are masked */ public void freeze() { /* Construct the rule array and index table. We reorder the * rules by sorting them into 256 bins. Each bin contains all * rules matching the index value for that bin. A rule * matches an index value if string whose first key character * has a low byte equal to the index value can match the rule. * * Each bin contains zero or more rules, in the same order * they were found originally. However, the total rules in * the bins may exceed the number in the original vector, * since rules that have a variable as their first key * character will generally fall into more than one bin. * * That is, each bin contains all rules that either have that * first index value as their first key character, or have * a set containing the index value as their first character. */ int n = ruleVector.size(); index = new int[257]; // [sic] Vector v = new Vector(2*n); // heuristic; adjust as needed /* Precompute the index values. This saves a LOT of time. */ int[] indexValue = new int[n]; for (int j=0; j= 0) { if (indexValue[j] == x) { v.addElement(ruleVector.elementAt(j)); } } else { // If the indexValue is < 0, then the first key character is // a set, and we must use the more time-consuming // matchesIndexValue check. In practice this happens // rarely, so we seldom tread this code path. TransliterationRule r = ruleVector.elementAt(j); if (r.matchesIndexValue(x)) { v.addElement(r); } } } } index[256] = v.size(); /* Freeze things into an array. */ rules = new TransliterationRule[v.size()]; v.copyInto(rules); StringBuilder errors = null; /* Check for masking. This is MUCH faster than our old check, * which was each rule against each following rule, since we * only have to check for masking within each bin now. It's * 256*O(n2^2) instead of O(n1^2), where n1 is the total rule * count, and n2 is the per-bin rule count. But n2< " + UtilityExtensions.formatInput(text, pos)); } return true; case UnicodeMatcher.U_PARTIAL_MATCH: if (Transliterator.DEBUG) { System.out.println((incremental ? "Rule.i: partial match ":"Rule: partial match ") + rules[i].toRule(true) + " => " + UtilityExtensions.formatInput(text, pos)); } return false; default: if (Transliterator.DEBUG) { System.out.println("Rule: no match " + rules[i]); } } } // No match or partial match from any rule pos.start += UTF16.getCharCount(text.char32At(pos.start)); if (Transliterator.DEBUG) { System.out.println((incremental ? "Rule.i: no match => ":"Rule: no match => ") + UtilityExtensions.formatInput(text, pos)); } return true; } /** * Create rule strings that represents this rule set. */ String toRules(boolean escapeUnprintable) { int i; int count = ruleVector.size(); StringBuilder ruleSource = new StringBuilder(); for (i=0; i