2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
10 * <code>UnicodeFilter</code> defines a protocol for selecting a
11 * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
12 * Currently, filters are used in conjunction with classes like
13 * <a href="Transliterator.html" title="class in com.ibm.icu.text"><code>Transliterator</code></a>
14 * to only process selected characters through a
18 public abstract class UnicodeFilter implements UnicodeMatcher {
21 * Returns <tt>true</tt> for characters that are in the selected
22 * subset. In other words, if a character is <b>to be
23 * filtered</b>, then <tt>contains()</tt> returns
24 * <b><tt>false</tt></b>.
27 public abstract boolean contains(int c);
30 * Default implementation of UnicodeMatcher::matches() for Unicode
31 * filters. Matches a single 16-bit code unit at offset.
34 public int matches(Replaceable text,
37 boolean incremental) {
39 if (offset[0] < limit &&
40 contains(c = text.char32At(offset[0]))) {
41 offset[0] += UTF16.getCharCount(c);
44 if (offset[0] > limit && contains(text.char32At(offset[0]))) {
45 // Backup offset by 1, unless the preceding character is a
46 // surrogate pair -- then backup by 2 (keep offset pointing at
47 // the lead surrogate).
50 offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
54 if (incremental && offset[0] == limit) {
55 return U_PARTIAL_MATCH;
60 // TODO Remove this when the JDK property implements MemberDoc.isSynthetic
62 * (This should not be here; it is declared to make CheckTags
63 * happy. Java inserts a synthetic constructor and CheckTags
64 * can't tell that it's synthetic.)
67 * @deprecated This API is ICU internal only.
69 protected UnicodeFilter() {}