/* ******************************************************************************* * Copyright (C) 1996-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.text; /** * UnicodeFilter defines a protocol for selecting a * subset of the full range (U+0000 to U+FFFF) of Unicode characters. * Currently, filters are used in conjunction with classes like * Transliterator * to only process selected characters through a * transformation. * @stable ICU 2.0 */ public abstract class UnicodeFilter implements UnicodeMatcher { /** * Returns true for characters that are in the selected * subset. In other words, if a character is to be * filtered, then contains() returns * false. * @stable ICU 2.0 */ public abstract boolean contains(int c); /** * Default implementation of UnicodeMatcher::matches() for Unicode * filters. Matches a single 16-bit code unit at offset. * @stable ICU 2.0 */ public int matches(Replaceable text, int[] offset, int limit, boolean incremental) { int c; if (offset[0] < limit && contains(c = text.char32At(offset[0]))) { offset[0] += UTF16.getCharCount(c); return U_MATCH; } if (offset[0] > limit && contains(c = text.char32At(offset[0]))) { // Backup offset by 1, unless the preceding character is a // surrogate pair -- then backup by 2 (keep offset pointing at // the lead surrogate). --offset[0]; if (offset[0] >= 0) { offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1; } return U_MATCH; } if (incremental && offset[0] == limit) { return U_PARTIAL_MATCH; } return U_MISMATCH; } // TODO Remove this when the JDK property implements MemberDoc.isSynthetic /** * (This should not be here; it is declared to make CheckTags * happy. Java inserts a synthetic constructor and CheckTags * can't tell that it's synthetic.) * * @internal * @deprecated This API is ICU internal only. */ protected UnicodeFilter() {} }