2 *******************************************************************************
3 * Copyright (C) 2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
9 import java.text.CharacterIterator;
10 import java.util.Stack;
12 abstract class DictionaryBreakEngine implements LanguageBreakEngine {
13 protected UnicodeSet fSet = new UnicodeSet();
14 private final int fTypes;
17 * @param breakTypes A mask of the break iterators that can use this engine.
18 * For example, (1 << KIND_WORD) | (1 << KIND_LINE) could be used by
19 * word iterators and line iterators, but not any other kind.
21 public DictionaryBreakEngine(int breakTypes) {
22 // TODO: consider using a java.util.BitSet with nbits <= 32
26 public boolean handles(int c, int breakType) {
27 return (breakType >= 0 && breakType < 32) && // breakType is in range
28 ((1 << breakType) & fTypes) != 0 && // this type can use us
29 fSet.contains(c); // we recognize the character
32 public int findBreaks(CharacterIterator text_, int startPos, int endPos,
33 boolean reverse, int breakType, Stack<Integer> foundBreaks) {
34 if (breakType < 0 || breakType >= 32 ||
35 ((1 << breakType) & fTypes) == 0) {
40 UCharacterIterator text = UCharacterIterator.getInstance(text_);
41 int start = text.getIndex();
42 int current, rangeStart, rangeEnd;
43 int c = text.current();
45 boolean isDict = fSet.contains(c);
46 while ((current = text.getIndex()) > startPos && isDict) {
48 isDict = fSet.contains(c);
50 rangeStart = (current < startPos) ? startPos :
51 current + (isDict ? 0 : 1);
54 while ((current = text.getIndex()) < endPos && fSet.contains(c)) {
61 result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
62 text.setIndex(current);
67 protected abstract int divideUpDictionaryRange(UCharacterIterator text,
68 int rangeStart, int rangeEnd, Stack<Integer> foundBreaks);