2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
10 * <code>UnicodeFilter</code> defines a protocol for selecting a
\r
11 * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
\r
12 * Currently, filters are used in conjunction with classes like
\r
13 * <a href="Transliterator.html" title="class in com.ibm.icu.text"><code>Transliterator</code></a>
\r
14 * to only process selected characters through a
\r
18 public abstract class UnicodeFilter implements UnicodeMatcher {
\r
21 * Returns <tt>true</tt> for characters that are in the selected
\r
22 * subset. In other words, if a character is <b>to be
\r
23 * filtered</b>, then <tt>contains()</tt> returns
\r
24 * <b><tt>false</tt></b>.
\r
27 public abstract boolean contains(int c);
\r
30 * Default implementation of UnicodeMatcher::matches() for Unicode
\r
31 * filters. Matches a single 16-bit code unit at offset.
\r
34 public int matches(Replaceable text,
\r
37 boolean incremental) {
\r
39 if (offset[0] < limit &&
\r
40 contains(c = text.char32At(offset[0]))) {
\r
41 offset[0] += UTF16.getCharCount(c);
\r
44 if (offset[0] > limit &&
\r
45 contains(c = text.char32At(offset[0]))) {
\r
46 // Backup offset by 1, unless the preceding character is a
\r
47 // surrogate pair -- then backup by 2 (keep offset pointing at
\r
48 // the lead surrogate).
\r
50 if (offset[0] >= 0) {
\r
51 offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
\r
55 if (incremental && offset[0] == limit) {
\r
56 return U_PARTIAL_MATCH;
\r
61 // TODO Remove this when the JDK property implements MemberDoc.isSynthetic
\r
63 * (This should not be here; it is declared to make CheckTags
\r
64 * happy. Java inserts a synthetic constructor and CheckTags
\r
65 * can't tell that it's synthetic.)
\r
68 * @deprecated This API is ICU internal only.
\r
70 protected UnicodeFilter() {}
\r