* thrown when argument rules have an invalid syntax
*/
CollationParsedRuleBuilder(String rules) throws ParseException {
- m_nfcImpl_.getFCDTrie(); // initialize the optional FCD trie
m_parser_ = new CollationRuleParser(rules);
m_parser_.assembleTokenList();
m_utilColEIter_ = RuleBasedCollator.UCA_
}
m_utilElement_.clear();
- StringBuilder str = new StringBuilder();
// add latin-1 stuff
copyRangeFromUCA(t, 0, 0xFF);
// copy contractions from the UCA - this is felt mostly for cyrillic
char conts[] = RuleBasedCollator.UCA_CONTRACTIONS_;
+ int maxUCAContractionLength = RuleBasedCollator.MAX_UCA_CONTRACTION_LENGTH;
int offset = 0;
while (conts[offset] != 0) {
- // tailoredCE = ucmpe32_get(t.m_mapping, *conts);
- int tailoredCE = t.m_mapping_.getValue(conts[offset]);
+ // A continuation is NUL-terminated and NUL-padded
+ // except if it has the maximum length.
+ int contractionLength = maxUCAContractionLength;
+ while (contractionLength > 0 && conts[offset + contractionLength - 1] == 0) {
+ --contractionLength;
+ }
+ int first = Character.codePointAt(conts, offset);
+ int firstLength = Character.charCount(first);
+ int tailoredCE = t.m_mapping_.getValue(first);
Elements prefixElm = null;
if (tailoredCE != CE_NOT_FOUND_) {
boolean needToAdd = true;
if (isContractionTableElement(tailoredCE)) {
if (isTailored(t.m_contractions_, tailoredCE, conts,
- offset + 1) == true) {
+ offset + firstLength) == true) {
needToAdd = false;
}
}
// The format for pre-context character is
// conts[0]: baseCP conts[1]:0 conts[2]:pre-context CP
Elements elm = new Elements();
- elm.m_cPoints_ = m_utilElement_.m_uchars_;
elm.m_CELength_ = 0;
- elm.m_uchars_ = UCharacter.toString(conts[offset]);
- elm.m_prefixChars_ = UCharacter.toString(conts[offset + 2]);
+ elm.m_uchars_ = Character.toString(conts[offset]);
+ elm.m_cPoints_ = m_utilElement_.m_uchars_;
+ elm.m_prefixChars_ = Character.toString(conts[offset + 2]);
elm.m_prefix_ = 0; // TODO(claireho) : confirm!
prefixElm = t.m_prefixLookup_.get(elm);
if ((prefixElm == null)
}
}
if (m_parser_.m_removeSet_ != null
- && m_parser_.m_removeSet_.contains(conts[offset])) {
+ && m_parser_.m_removeSet_.contains(first)) {
needToAdd = false;
}
if (conts[offset + 1] != 0) { // not precontext
m_utilElement_.m_prefix_ = 0;
m_utilElement_.m_prefixChars_ = null;
+ m_utilElement_.m_uchars_ = new String(conts, offset, contractionLength);
m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
- str.delete(0, str.length());
- str.append(conts[offset]);
- str.append(conts[offset + 1]);
- if (conts[offset + 2] != 0) {
- str.append(conts[offset + 2]);
- }
- m_utilElement_.m_uchars_ = str.toString();
m_utilElement_.m_CELength_ = 0;
m_utilColEIter_.setText(m_utilElement_.m_uchars_);
} else { // add a pre-context element
int preKeyLen = 0;
- str.delete(0, str.length()); // clean up
- m_utilElement_.m_cPoints_ = UCharacter
- .toString(conts[offset]);
+ m_utilElement_.m_uchars_ = Character.toString(conts[offset]);
+ m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
m_utilElement_.m_CELength_ = 0;
- m_utilElement_.m_uchars_ = UCharacter
- .toString(conts[offset]);
- m_utilElement_.m_prefixChars_ = UCharacter
- .toString(conts[offset + 2]);
+ m_utilElement_.m_prefixChars_ = Character.toString(conts[offset + 2]);
if (prefixElm == null) {
m_utilElement_.m_prefix_ = 0;
} else { // TODO (claireho): confirm!
// count number of keys for pre-context char.
preKeyLen++;
}
- str.append(conts[offset + 2]);
- str.append(conts[offset]);
- m_utilColEIter_.setText(str.toString());
+ m_utilColEIter_.setText(m_utilElement_.m_prefixChars_ + m_utilElement_.m_uchars_);
// Skip the keys for prefix character, then copy the
// rest to el.
while ((preKeyLen-- > 0)
addAnElement(t, m_utilElement_);
}
} else if (m_parser_.m_removeSet_ != null
- && m_parser_.m_removeSet_.contains(conts[offset])) {
- copyRangeFromUCA(t, conts[offset], conts[offset]);
+ && m_parser_.m_removeSet_.contains(first)) {
+ copyRangeFromUCA(t, first, first);
}
- offset += 3;
+ offset += maxUCAContractionLength;
}
// Add completely ignorable elements
MaxJamoExpansionTable() {
m_endExpansionCE_ = new ArrayList<Integer>();
m_isV_ = new ArrayList<Boolean>();
- m_endExpansionCE_.add(new Integer(0));
+ m_endExpansionCE_.add(Integer.valueOf(0));
m_isV_.add(Boolean.FALSE);
m_maxLSize_ = 1;
m_maxVSize_ = 1;
MaxExpansionTable() {
m_endExpansionCE_ = new ArrayList<Integer>();
m_expansionCESize_ = new ArrayList<Byte>();
- m_endExpansionCE_.add(new Integer(0));
- m_expansionCESize_.add(new Byte((byte) 0));
+ m_endExpansionCE_.add(Integer.valueOf(0));
+ m_expansionCESize_.add(Byte.valueOf((byte) 0));
}
MaxExpansionTable(MaxExpansionTable table) {
m_maxExpansions_ = maxet;
// adding an extra initial value for easier manipulation
for (int i = 0; i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i++) {
- maxet.m_endExpansionCE_.add(new Integer(
+ maxet.m_endExpansionCE_.add(Integer.valueOf(
RuleBasedCollator.UCA_.m_expansionEndCE_[i]));
- maxet.m_expansionCESize_.add(new Byte(
+ maxet.m_expansionCESize_.add(Byte.valueOf(
RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i]));
}
m_maxJamoExpansions_ = maxjet;
}
if (!buildCMTabFlag) {
// check combining class
- int fcd = m_nfcImpl_.getFCD16FromSingleLead(m_utilElement_.m_cPoints_.charAt(i)); // TODO: review for handling supplementary characters
+ int fcd = m_nfcImpl_.getFCD16(m_utilElement_.m_cPoints_.charAt(i)); // TODO: review for handling supplementary characters
if ((fcd & 0xff) == 0) {
// reset flag when current char is not combining mark.
containCombinMarks = false;
* @return the current position of the new element
*/
private static final int addExpansion(List<Integer> expansions, int value) {
- expansions.add(new Integer(value));
+ expansions.add(Integer.valueOf(value));
return expansions.size() - 1;
}
// is smaller
Object currentsize = maxexpansion.m_expansionCESize_.get(result);
if (((Byte) currentsize).byteValue() < expansionsize) {
- maxexpansion.m_expansionCESize_.set(result, new Byte(
- expansionsize));
+ maxexpansion.m_expansionCESize_.set(result, Byte.valueOf(
+ expansionsize));
}
} else {
// we'll need to squeeze the value into the array. initial
// implementation. shifting the subarray down by 1
- maxexpansion.m_endExpansionCE_.add(start + 1, new Integer(endexpansion));
- maxexpansion.m_expansionCESize_.add(start + 1, new Byte(expansionsize));
+ maxexpansion.m_endExpansionCE_.add(start + 1, Integer.valueOf(endexpansion));
+ maxexpansion.m_expansionCESize_.add(start + 1, Byte.valueOf(expansionsize));
}
return maxexpansion.m_endExpansionCE_.size();
}
return maxexpansion.m_endExpansionCE_.size();
}
}
- maxexpansion.m_endExpansionCE_.add(new Integer(endexpansion));
+ maxexpansion.m_endExpansionCE_.add(Integer.valueOf(endexpansion));
maxexpansion.m_isV_.add(isV ? Boolean.TRUE : Boolean.FALSE);
return maxexpansion.m_endExpansionCE_.size();
element = table.m_elements_.size() - 1;
}
- tbl.m_CEs_.add(new Integer(value));
+ tbl.m_CEs_.add(Integer.valueOf(value));
tbl.m_codePoints_.append(codePoint);
return constructSpecialCE(table.m_currentTag_, element);
}
}
}
if (codePoint == tbl.m_codePoints_.charAt(position)) {
- tbl.m_CEs_.set(position, new Integer(newCE));
+ tbl.m_CEs_.set(position, Integer.valueOf(newCE));
return element & 0xFFFFFF;
} else {
return CE_NOT_FOUND_;
element = table.m_elements_.size() - 1;
}
- tbl.m_CEs_.set(offset, new Integer(value));
+ tbl.m_CEs_.set(offset, Integer.valueOf(value));
tbl.m_codePoints_.setCharAt(offset, codePoint);
return constructSpecialCE(table.m_currentTag_, element);
}
offset++;
}
- tbl.m_CEs_.add(offset, new Integer(value));
+ tbl.m_CEs_.add(offset, Integer.valueOf(value));
tbl.m_codePoints_.insert(offset, codePoint);
return constructSpecialCE(table.m_currentTag_, element);
return 0;
}
- tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, new Integer(value));
+ tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, Integer.valueOf(value));
return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF);
}
table.m_offsets_.clear();
int position = 0;
for (int i = 0; i < tsize; i++) {
- table.m_offsets_.add(new Integer(position));
+ table.m_offsets_.add(Integer.valueOf(position));
position += table.m_elements_.get(i).m_CEs_
.size();
}
if (isContractionTableElement(CEPointer.get(offset + j).intValue())) {
int ce = CEPointer.get(offset + j).intValue();
CEPointer.set(offset + j,
- new Integer(constructSpecialCE(getCETag(ce),
+ Integer.valueOf(constructSpecialCE(getCETag(ce),
table.m_offsets_.get(getContractionOffset(ce))
.intValue())));
}
cm = new char[0x10000];
}
for (char c = 0; c < 0xffff; c++) {
- int fcd = m_nfcImpl_.getFCD16FromSingleLead(c); // TODO: review for handling supplementary characters
+ int fcd;
+ if (UTF16.isLeadSurrogate(c)) {
+ fcd = 0;
+ if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(c)) {
+ int supp = Character.toCodePoint(c, (char)0xdc00);
+ int suppLimit = supp + 0x400;
+ while (supp < suppLimit) {
+ fcd |= m_nfcImpl_.getFCD16FromNormData(supp++);
+ }
+ }
+ } else {
+ fcd = m_nfcImpl_.getFCD16(c);
+ }
+ // TODO: review for handling supplementary characters
if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
(UTF16.isLeadSurrogate(c) && fcd != 0)) {
// c is a leading surrogate with some FCD data
unsafeCPSet(t.m_unsafeCP_, c);
- if (buildCMTable && (fcd != 0)) {
+ if (buildCMTable) {
int cc = (fcd & 0xff);
int pos = (cc << 8) + index[cc];
cm[pos] = c;
for (int j = 0; j < m_utilElement_.m_cPoints_.length()
- m_utilElement_.m_cPointsOffset_; j++) {
- int fcd = m_nfcImpl_.getFCD16FromSingleLead(m_utilElement_.m_cPoints_.charAt(j)); // TODO: review for handling supplementary characters
+ int fcd = m_nfcImpl_.getFCD16(m_utilElement_.m_cPoints_.charAt(j)); // TODO: review for handling supplementary characters
if ((fcd & 0xff) == 0) {
baseChar = m_utilElement_.m_cPoints_.charAt(j);
} else {
}
CombinClassTable cmLookup = t.cmLookup;
int[] index = cmLookup.index;
- int cClass = m_nfcImpl_.getFCD16FromSingleLead(cMark) & 0xff; // TODO: review for handling supplementary characters
+ int cClass = m_nfcImpl_.getFCD16(cMark) & 0xff; // TODO: review for handling supplementary characters
int maxIndex = 0;
char[] precompCh = new char[256];
int[] precompClass = new int[256];
String comp = Normalizer.compose(decompBuf.toString(), false);
if (comp.length() == 1) {
precompCh[precompLen] = comp.charAt(0);
- precompClass[precompLen] = (m_nfcImpl_.getFCD16FromSingleLead(cmLookup.cPoints[i]) & 0xff); // TODO: review for handling supplementary characters
+ precompClass[precompLen] = m_nfcImpl_.getFCD16(cmLookup.cPoints[i]) & 0xff; // TODO: review for handling supplementary characters
precompLen++;
StringBuilder decomp = new StringBuilder();
for (int j = 0; j < m_utilElement_.m_cPoints_.length(); j++) {