2 *******************************************************************************
\r
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
10 * <code>UnicodeFilter</code> defines a protocol for selecting a
\r
11 * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
\r
12 * Currently, filters are used in conjunction with classes like {@link
\r
13 * Transliterator} to only process selected characters through a
\r
17 public abstract class UnicodeFilter implements UnicodeMatcher {
\r
20 * Returns <tt>true</tt> for characters that are in the selected
\r
21 * subset. In other words, if a character is <b>to be
\r
22 * filtered</b>, then <tt>contains()</tt> returns
\r
23 * <b><tt>false</tt></b>.
\r
26 public abstract boolean contains(int c);
\r
29 * Default implementation of UnicodeMatcher::matches() for Unicode
\r
30 * filters. Matches a single 16-bit code unit at offset.
\r
33 public int matches(Replaceable text,
\r
36 boolean incremental) {
\r
38 if (offset[0] < limit &&
\r
39 contains(c = text.char32At(offset[0]))) {
\r
40 offset[0] += UTF16.getCharCount(c);
\r
43 if (offset[0] > limit &&
\r
44 contains(c = text.char32At(offset[0]))) {
\r
45 // Backup offset by 1, unless the preceding character is a
\r
46 // surrogate pair -- then backup by 2 (keep offset pointing at
\r
47 // the lead surrogate).
\r
49 if (offset[0] >= 0) {
\r
50 offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
\r
54 if (incremental && offset[0] == limit) {
\r
55 return U_PARTIAL_MATCH;
\r
61 * (This should not be here; it is declared to make CheckTags
\r
62 * happy. Java inserts a synthetic constructor and CheckTags
\r
63 * can't tell that it's synthetic.)
\r
65 * TODO Remove this when the JDK property implements MemberDoc.isSynthetic
\r
67 * @deprecated This API is ICU internal only.
\r
69 protected UnicodeFilter() {}
\r