/* ********************************************************************** * Copyright (c) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 11/19/2001 aliu Creation. ********************************************************************** */ package com.ibm.icu.text; import com.ibm.icu.lang.UCharacter; /** * A transliterator that converts Unicode escape forms to the * characters they represent. Escape forms have a prefix, a suffix, a * radix, and minimum and maximum digit counts. * *
This class is package private. It registers several standard
* variants with the system which are then accessed via their IDs.
*
* @author Alan Liu
*/
class UnescapeTransliterator extends Transliterator {
/**
* The encoded pattern specification. The pattern consists of
* zero or more forms. Each form consists of a prefix, suffix,
* radix, minimum digit count, and maximum digit count. These
* values are stored as a five character header. That is, their
* numeric values are cast to 16-bit characters and stored in the
* string. Following these five characters, the prefix
* characters, then suffix characters are stored. Each form thus
* takes n+5 characters, where n is the total length of the prefix
* and suffix. The end is marked by a header of length one
* consisting of the character END.
*/
private char spec[];
/**
* Special character marking the end of the spec[] array.
*/
private static final char END = 0xFFFF;
/**
* Registers standard variants with the system. Called by
* Transliterator during initialization.
*/
static void register() {
// Unicode: "U+10FFFF" hex, min=4, max=6
Transliterator.registerFactory("Hex-Any/Unicode", new Transliterator.Factory() {
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/Unicode", new char[] {
2, 0, 16, 4, 6, 'U', '+',
END
});
}
});
// Java: "\\uFFFF" hex, min=4, max=4
Transliterator.registerFactory("Hex-Any/Java", new Transliterator.Factory() {
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/Java", new char[] {
2, 0, 16, 4, 4, '\\', 'u',
END
});
}
});
// C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
Transliterator.registerFactory("Hex-Any/C", new Transliterator.Factory() {
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/C", new char[] {
2, 0, 16, 4, 4, '\\', 'u',
2, 0, 16, 8, 8, '\\', 'U',
END
});
}
});
// XML: "" hex, min=1, max=6
Transliterator.registerFactory("Hex-Any/XML", new Transliterator.Factory() {
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/XML", new char[] {
3, 1, 16, 1, 6, '&', '#', 'x', ';',
END
});
}
});
// XML10: "&1114111;" dec, min=1, max=7 (not really "Hex-Any")
Transliterator.registerFactory("Hex-Any/XML10", new Transliterator.Factory() {
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/XML10", new char[] {
2, 1, 10, 1, 7, '&', '#', ';',
END
});
}
});
// Perl: "\\x{263A}" hex, min=1, max=6
Transliterator.registerFactory("Hex-Any/Perl", new Transliterator.Factory() {
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/Perl", new char[] {
3, 1, 16, 1, 6, '\\', 'x', '{', '}',
END
});
}
});
// All: Java, C, Perl, XML, XML10, Unicode
Transliterator.registerFactory("Hex-Any", new Transliterator.Factory() {
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any", new char[] {
2, 0, 16, 4, 6, 'U', '+', // Unicode
2, 0, 16, 4, 4, '\\', 'u', // Java
2, 0, 16, 8, 8, '\\', 'U', // C (surrogates)
3, 1, 16, 1, 6, '&', '#', 'x', ';', // XML
2, 1, 10, 1, 7, '&', '#', ';', // XML10
3, 1, 16, 1, 6, '\\', 'x', '{', '}', // Perl
END
});
}
});
}
/**
* Package private constructor. Takes the encoded spec array.
*/
UnescapeTransliterator(String ID, char spec[]) {
super(ID, null);
this.spec = spec;
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
protected void handleTransliterate(Replaceable text,
Position pos, boolean isIncremental) {
int start = pos.start;
int limit = pos.limit;
int i, j, ipat;
loop:
while (start < limit) {
// Loop over the forms in spec[]. Exit this loop when we
// match one of the specs. Exit the outer loop if a
// partial match is detected and isIncremental is true.
for (j=0, ipat=0; spec[ipat] != END; ++j) {
// Read the header
int prefixLen = spec[ipat++];
int suffixLen = spec[ipat++];
int radix = spec[ipat++];
int minDigits = spec[ipat++];
int maxDigits = spec[ipat++];
// s is a copy of start that is advanced over the
// characters as we parse them.
int s = start;
boolean match = true;
for (i=0; i