2 *******************************************************************************
3 * Copyright (C) 2000-2013, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
9 * Port From: ICU4C v2.1 : collate/StringSearchTest
10 * Source File: $ICU4CRoot/source/test/intltest/srchtest.cpp
13 package com.ibm.icu.dev.test.search;
15 import java.text.StringCharacterIterator;
16 import java.util.Locale;
18 import com.ibm.icu.dev.test.TestFmwk;
19 import com.ibm.icu.text.BreakIterator;
20 import com.ibm.icu.text.Collator;
21 import com.ibm.icu.text.RuleBasedCollator;
22 import com.ibm.icu.text.SearchIterator;
23 import com.ibm.icu.text.StringSearch;
24 import com.ibm.icu.util.ULocale;
26 public class SearchTest extends TestFmwk {
29 static class SearchData {
30 SearchData(String text, String pattern, String coll, int strength, String breaker,
31 int[] offset, int[] size) {
33 this.pattern = pattern;
35 this.strength = strength;
36 this.breaker = breaker;
49 RuleBasedCollator m_en_us_;
50 RuleBasedCollator m_fr_fr_;
51 RuleBasedCollator m_de_;
52 RuleBasedCollator m_es_;
53 BreakIterator m_en_wordbreaker_;
54 BreakIterator m_en_characterbreaker_;
56 static SearchData[] BASIC = {
57 new SearchData("xxxxxxxxxxxxxxxxxxxx", "fisher", null, Collator.TERTIARY, null, new int[] {-1}, new int[]{0}),
58 new SearchData("silly spring string", "string", null, Collator.TERTIARY, null, new int[]{13, -1}, new int[]{6}),
59 new SearchData("silly spring string string", "string", null, Collator.TERTIARY, null, new int[]{13, 20, -1}, new int[]{6, 6}),
60 new SearchData("silly string spring string", "string", null, Collator.TERTIARY, null, new int[]{6, 20, -1}, new int[]{6, 6}),
61 new SearchData("string spring string", "string", null, Collator.TERTIARY, null, new int[]{0, 14, -1}, new int[]{6, 6}),
62 new SearchData("Scott Ganyo", "c", null, Collator.TERTIARY, null, new int[]{1, -1}, new int[]{1}),
63 new SearchData("Scott Ganyo", " ", null, Collator.TERTIARY, null, new int[]{5, -1}, new int[]{1}),
64 new SearchData("\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int[]{-1}, new int[]{0}),
65 new SearchData("a\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int[]{-1}, new int[]{0}),
66 new SearchData("a\u0300\u0325", "\u0300\u0325", null, Collator.TERTIARY, null, new int[]{1, -1}, new int[]{2}),
67 new SearchData("a\u0300b", "\u0300", null, Collator.TERTIARY, null, new int[]{1, -1}, new int[]{1}),
68 new SearchData("\u00c9", "e", null, Collator.PRIMARY, null, new int[]{0, -1}, new int[]{1}),
69 new SearchData(null, null, null, Collator.TERTIARY, null, new int[]{-1}, new int[]{0})
72 SearchData BREAKITERATOREXACT[] = {
73 new SearchData("foxy fox", "fox", null, Collator.TERTIARY, "characterbreaker", new int[] {0, 5, -1}, new int[] {3, 3}),
74 new SearchData("foxy fox", "fox", null, Collator.TERTIARY, "wordbreaker", new int[] {5, -1}, new int[] {3}),
75 new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, "characterbreaker", new int[] {10, 14, -1}, new int[] {3, 2}),
76 new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, "wordbreaker", new int[] {10, -1}, new int[] {3}),
77 new SearchData("Channel, another channel, more channels, and one last Channel", "Channel", "es", Collator.TERTIARY,
78 "wordbreaker", new int[] {0, 54, -1}, new int[] {7, 7}),
80 new SearchData("testing that \u00e9 does not match e", "e", null, Collator.TERTIARY,
81 "characterbreaker", new int[] {1, 17, 30, -1}, new int[] {1, 1, 1}),
82 new SearchData("testing that string ab\u00e9cd does not match e", "e", null, Collator.TERTIARY,
83 "characterbreaker", new int[] {1, 28, 41, -1}, new int[] {1, 1, 1}),
84 new SearchData("\u00c9", "e", "fr", Collator.PRIMARY, "characterbreaker", new int[]{0, -1}, new int[]{1}),
85 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
88 SearchData BREAKITERATORCANONICAL[] = {
89 new SearchData("foxy fox", "fox", null, Collator.TERTIARY, "characterbreaker", new int[] {0, 5, -1}, new int[] {3, 3}),
90 new SearchData("foxy fox", "fox", null, Collator.TERTIARY, "wordbreaker", new int[] {5, -1}, new int[] {3}),
91 new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, "characterbreaker", new int[] {10, 14, -1}, new int[] {3, 2}),
92 new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, "wordbreaker", new int[] {10, -1}, new int[] {3}),
93 new SearchData("Channel, another channel, more channels, and one last Channel", "Channel", "es", Collator.TERTIARY, "wordbreaker",
94 new int[] {0, 54, -1}, new int[] {7, 7}),
96 new SearchData("testing that \u00e9 does not match e", "e", null, Collator.TERTIARY,
97 "characterbreaker", new int[] {1, 17, 30, -1}, new int[] {1, 1, 1}),
98 new SearchData("testing that string ab\u00e9cd does not match e", "e", null,
99 Collator.TERTIARY, "characterbreaker", new int[] {1, 28, 41, -1}, new int[] {1, 1, 1}),
100 new SearchData("\u00c9", "e", "fr", Collator.PRIMARY, "characterbreaker", new int[]{0, -1}, new int[]{1}),
101 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
104 SearchData BASICCANONICAL[] = {
105 new SearchData("xxxxxxxxxxxxxxxxxxxx", "fisher", null, Collator.TERTIARY, null, new int[] {-1}, new int [] {0}),
106 new SearchData("silly spring string", "string", null, Collator.TERTIARY, null, new int[] {13, -1}, new int[] {6}),
107 new SearchData("silly spring string string", "string", null, Collator.TERTIARY, null, new int[] {13, 20, -1}, new int[] {6, 6}),
108 new SearchData("silly string spring string", "string", null, Collator.TERTIARY, null, new int[] {6, 20, -1}, new int[] {6, 6}),
109 new SearchData("string spring string", "string", null, Collator.TERTIARY, null, new int[] {0, 14, -1}, new int[] {6, 6}),
110 new SearchData("Scott Ganyo", "c", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {1}),
111 new SearchData("Scott Ganyo", " ", null, Collator.TERTIARY, null, new int[] {5, -1}, new int[] {1}),
112 new SearchData("\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int [] {0, -1}, new int[] {2}),
113 new SearchData("a\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int [] {1, -1}, new int[] {2}),
114 new SearchData("a\u0300\u0325", "\u0300\u0325", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[]{2}),
115 new SearchData("a\u0300b", "\u0300", null, Collator.TERTIARY, null, new int[]{1, -1}, new int[] {1}),
116 new SearchData("a\u0300\u0325b", "\u0300b", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {3}),
117 new SearchData("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {5}),
118 new SearchData("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {5}),
119 new SearchData("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, Collator.TERTIARY, null,
120 new int[] {1, 12, -1}, new int[] {5, 3}),
121 new SearchData("\u00c4\u0323", "A\u0323\u0308", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
122 new SearchData("\u0308\u0323", "\u0323\u0308", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
123 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
126 SearchData COLLATOR[] = {
128 new SearchData("fox fpx", "fox", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {3}),
130 new SearchData("fox fpx", "fox", null, Collator.PRIMARY, null, new int[] {0, 4, -1}, new int[] {3, 3}),
131 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
134 String TESTCOLLATORRULE = "& o,O ; p,P";
135 String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc";
138 SearchData COLLATORCANONICAL[] = {
140 new SearchData("fox fpx", "fox", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {3}),
142 new SearchData("fox fpx", "fox", null, Collator.PRIMARY, null, new int[] {0, 4, -1}, new int[] {3, 3}),
143 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
146 SearchData COMPOSITEBOUNDARIES[] = {
147 new SearchData("\u00C0", "A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
148 new SearchData("A\u00C0C", "A", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
149 new SearchData("\u00C0A", "A", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
150 new SearchData("B\u00C0", "A", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {1}),
151 new SearchData("\u00C0B", "A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
152 new SearchData("\u00C0", "\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
153 new SearchData("\u0300\u00C0", "\u0300", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
154 new SearchData("\u00C0\u0300", "\u0300", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
155 /* A + 030A + 0301 */
156 new SearchData("\u01FA", "\u01FA", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
157 new SearchData("\u01FA", "\u030A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
158 new SearchData("\u01FA", "A\u030A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
159 new SearchData("\u01FA", "\u030AA", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
160 new SearchData("\u01FA", "\u0301", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
161 new SearchData("\u01FA", "A\u0301", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
162 new SearchData("\u01FA", "\u0301A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
163 new SearchData("\u01FA", "\u030A\u0301", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
164 new SearchData("A\u01FA", "A\u030A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
165 new SearchData("\u01FAA", "\u0301A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
166 new SearchData("\u0F73", "\u0F73", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
167 new SearchData("\u0F73", "\u0F71", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
168 new SearchData("\u0F73", "\u0F72", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
169 new SearchData("\u0F73", "\u0F71\u0F72", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
170 new SearchData("A\u0F73", "A\u0F71", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
171 new SearchData("\u0F73A", "\u0F72A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
172 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
175 SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
176 new SearchData("\u00C0", "A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
177 new SearchData("A\u00C0C", "A", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
178 new SearchData("\u00C0A", "A", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
179 new SearchData("B\u00C0", "A", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {1}),
180 new SearchData("\u00C0B", "A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
181 new SearchData("\u00C0", "\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
182 new SearchData("\u0300\u00C0", "\u0300", null, Collator.TERTIARY, null, new int[] {0, 1, -1}, new int[] {1, 1}),
183 /* \u0300 blocked by \u0300 */
184 new SearchData("\u00C0\u0300", "\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
185 /* A + 030A + 0301 */
186 new SearchData("\u01FA", "\u01FA", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
187 new SearchData("\u01FA", "\u030A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
188 new SearchData("\u01FA", "A\u030A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
189 new SearchData("\u01FA", "\u030AA", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
190 new SearchData("\u01FA", "\u0301", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
192 new SearchData("\u01FA", "A\u0301", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
193 new SearchData("\u01FA", "\u0301A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
194 new SearchData("\u01FA", "\u030A\u0301", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
195 new SearchData("A\u01FA", "A\u030A", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {1}),
196 new SearchData("\u01FAA", "\u0301A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
197 new SearchData("\u0F73", "\u0F73", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
198 new SearchData("\u0F73", "\u0F71", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
199 new SearchData("\u0F73", "\u0F72", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
200 new SearchData("\u0F73", "\u0F71\u0F72", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {1}),
201 new SearchData("A\u0F73", "A\u0F71", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
202 new SearchData("\u0F73A", "\u0F72A", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
203 new SearchData("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A",
204 null, Collator.TERTIARY, null, new int[] {0, 6, 10, 13, -1}, new int[] {1, 3, 2, 1}),
205 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
208 SearchData SUPPLEMENTARY[] = {
209 /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
210 new SearchData("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
211 "\uD800\uDC00", null, Collator.TERTIARY, null,
212 new int[] {4, 13, 22, 26, 29, -1}, new int[] {2, 2, 2, 2, 2}),
213 new SearchData("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null,
214 Collator.TERTIARY, null, new int[] {3, -1},
216 new SearchData("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ",
217 null, Collator.TERTIARY, null, new int[] {3, -1},
219 new SearchData("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-",
220 null, Collator.TERTIARY, null, new int[] {3, -1},
222 new SearchData("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,",
223 null, Collator.TERTIARY, null, new int[] {3, -1},
225 new SearchData("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?",
226 null, Collator.TERTIARY, null, new int[] {3, -1},
228 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
231 String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315";
233 SearchData CONTRACTION[] = {
234 /* common discontiguous */
235 new SearchData("A\u0300\u0315", "\u0300", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
236 new SearchData("A\u0300\u0315", "\u0300\u0315", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
237 /* contraction prefix */
238 new SearchData("AB\u0315C", "A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
239 new SearchData("AB\u0315C", "AB", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
240 new SearchData("AB\u0315C", "\u0315", null, Collator.TERTIARY, null, new int[] {2, -1}, new int[] {1}),
241 /* discontiguous problem here for backwards iteration.
242 accents not found because discontiguous stores all information */
243 new SearchData("X\u0300\u0319\u0315", "\u0319", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
244 /* ends not with a contraction character */
245 new SearchData("X\u0315\u0300D", "\u0300\u0315", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
246 new SearchData("X\u0315\u0300D", "X\u0300\u0315", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {3}),
247 new SearchData("X\u0300\u031A\u0315D", "X\u0300", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
248 /* blocked discontiguous */
249 new SearchData("X\u0300\u031A\u0315D", "\u031A\u0315D", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
250 new SearchData("ab", "z", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
251 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
254 SearchData CONTRACTIONCANONICAL[] = {
255 /* common discontiguous */
256 new SearchData("A\u0300\u0315", "\u0300", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
257 new SearchData("A\u0300\u0315", "\u0300\u0315", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
258 /* contraction prefix */
259 new SearchData("AB\u0315C", "A", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
260 new SearchData("AB\u0315C", "AB", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
261 new SearchData("AB\u0315C", "\u0315", null, Collator.TERTIARY, null, new int[] {2, -1}, new int[] {1}),
262 /* discontiguous problem here for backwards iteration.
263 forwards gives 0, 4 but backwards give 1, 3 */
264 /* {"X\u0300\u0319\u0315", "\u0319", null, Collator.TERTIARY, null, {0, -1},
267 /* ends not with a contraction character */
268 new SearchData("X\u0315\u0300D", "\u0300\u0315", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
269 new SearchData("X\u0315\u0300D", "X\u0300\u0315", null, Collator.TERTIARY, null,
270 new int[] {0, -1}, new int[] {3}),
271 new SearchData("X\u0300\u031A\u0315D", "X\u0300", null, Collator.TERTIARY, null,
272 new int[] {0, -1}, new int[] {4}),
273 /* blocked discontiguous */
274 new SearchData("X\u0300\u031A\u0315D", "\u031A\u0315D", null, Collator.TERTIARY, null,
275 new int[] {1, -1}, new int[] {4}),
276 new SearchData("ab", "z", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
277 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
280 SearchData MATCH[] = {
281 new SearchData("a busy bee is a very busy beeee", "bee", null, Collator.TERTIARY, null,
282 new int[] {7, 26, -1}, new int[] {3, 3}),
283 /* 012345678901234567890123456789012345678901234567890 */
284 new SearchData("a busy bee is a very busy beeee with no bee life", "bee", null,
285 Collator.TERTIARY, null, new int[] {7, 26, 40, -1}, new int[] {3, 3, 3}),
286 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
289 String IGNORABLERULE = "&a = \u0300";
291 SearchData IGNORABLE[] = {
292 new SearchData("\u0300\u0315 \u0300\u0315 ", "\u0300", null, Collator.PRIMARY, null,
293 new int[] {0, 3, -1}, new int[] {2, 2}),
294 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
297 SearchData DIACTRICMATCH[] = {
298 new SearchData("\u0061\u0061\u00E1", "\u0061\u00E1", null, Collator.SECONDARY, null,
299 new int[] {1, -1}, new int[] {2}),
300 new SearchData("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020",
301 "\u00C2\u0303", null, Collator.PRIMARY, null, new int[] {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, new int[] {2, 1, 1, 1, 3, 2, 1, 3, 2}),
302 new SearchData("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, Collator.PRIMARY, null,
303 new int[] {0, 5, -1}, new int[] {4, 3}),
304 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
307 SearchData NORMCANONICAL[] = {
308 new SearchData("\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
309 new SearchData("\u0300\u0325", "\u0325", null, Collator.TERTIARY, null, new int[] {0, -1}, new int[] {2}),
310 new SearchData("a\u0300\u0325", "\u0325\u0300", null, Collator.TERTIARY, null, new int[] {1, -1},
312 new SearchData("a\u0300\u0325", "\u0300\u0325", null, Collator.TERTIARY, null, new int[] {1, -1},
314 new SearchData("a\u0300\u0325", "\u0325", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
315 new SearchData("a\u0300\u0325", "\u0300", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
316 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
319 SearchData NORMEXACT[] = {
320 new SearchData("a\u0300\u0325", "\u0325\u0300", null, Collator.TERTIARY, null, new int[] {1, -1}, new int[] {2}),
321 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
324 SearchData NONNORMEXACT[] = {
325 new SearchData("a\u0300\u0325", "\u0325\u0300", null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0}),
326 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
329 SearchData OVERLAP[] = {
330 new SearchData("abababab", "abab", null, Collator.TERTIARY, null, new int[] {0, 2, 4, -1}, new int[] {4, 4, 4}),
331 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
334 SearchData NONOVERLAP[] = {
335 new SearchData("abababab", "abab", null, Collator.TERTIARY, null, new int[] {0, 4, -1}, new int[] {4, 4}),
336 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
339 SearchData OVERLAPCANONICAL[] = {
340 new SearchData("abababab", "abab", null, Collator.TERTIARY, null, new int[] {0, 2, 4, -1},
341 new int[] {4, 4, 4}),
342 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
345 SearchData NONOVERLAPCANONICAL[] = {
346 new SearchData("abababab", "abab", null, Collator.TERTIARY, null, new int[] {0, 4, -1}, new int[] {4, 4}),
347 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
350 SearchData PATTERNCANONICAL[] = {
351 new SearchData("The quick brown fox jumps over the lazy foxes", "the", null,
352 Collator.PRIMARY, null, new int[] {0, 31, -1}, new int[] {3, 3}),
353 new SearchData("The quick brown fox jumps over the lazy foxes", "fox", null,
354 Collator.PRIMARY, null, new int[] {16, 40, -1}, new int[] {3, 3}),
355 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
358 SearchData PATTERN[] = {
359 new SearchData("The quick brown fox jumps over the lazy foxes", "the", null,
360 Collator.PRIMARY, null, new int[] {0, 31, -1}, new int[] {3, 3}),
361 new SearchData("The quick brown fox jumps over the lazy foxes", "fox", null,
362 Collator.PRIMARY, null, new int[] {16, 40, -1}, new int[] {3, 3}),
363 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
366 SearchData STRENGTH[] = {
367 /*012345678901234567890123456789012345678901234567890123456789*/
368 new SearchData("The quick brown fox jumps over the lazy foxes", "fox", "en",
369 Collator.PRIMARY, null, new int[] {16, 40, -1}, new int[] {3, 3}),
370 new SearchData("The quick brown fox jumps over the lazy foxes", "fox", "en",
371 Collator.PRIMARY, "wordbreaker", new int[] {16, -1}, new int[] {3}),
372 new SearchData("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
373 "peche", "fr", Collator.PRIMARY, null, new int[] {15, 21, 27, 34, -1}, new int[] {5, 5, 5, 5}),
374 new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, null,
375 new int[] {10, 14, -1}, new int[] {3, 2}),
376 new SearchData("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es",
377 Collator.PRIMARY, null, new int[] {2, 19, 33, 56, -1}, new int[] {7, 7, 7, 7}),
378 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
381 SearchData STRENGTHCANONICAL[] = {
382 /*012345678901234567890123456789012345678901234567890123456789 */
383 new SearchData("The quick brown fox jumps over the lazy foxes", "fox", "en",
384 Collator.PRIMARY, null, new int[] {16, 40, -1}, new int[] {3, 3}),
385 new SearchData("The quick brown fox jumps over the lazy foxes", "fox", "en",
386 Collator.PRIMARY, "wordbreaker", new int[] {16, -1}, new int[] {3}),
387 new SearchData("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
388 "peche", "fr", Collator.PRIMARY, null, new int[] {15, 21, 27, 34, -1}, new int[] {5, 5, 5, 5}),
389 new SearchData("This is a toe T\u00F6ne", "toe", "de", Collator.PRIMARY, null,
390 new int[] {10, 14, -1}, new int[] {3, 2}),
391 new SearchData("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es",
392 Collator.PRIMARY, null, new int[]{2, 19, 33, 56, -1}, new int[] {7, 7, 7, 7}),
393 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[]{0})
396 SearchData SUPPLEMENTARYCANONICAL[] = {
397 /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
398 new SearchData("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
399 "\uD800\uDC00", null, Collator.TERTIARY, null, new int[] {4, 13, 22, 26, 29, -1},
400 new int[] {2, 2, 2, 2, 2}),
401 new SearchData("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null,
402 Collator.TERTIARY, null, new int[] {3, -1},
404 new SearchData("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ",
405 null, Collator.TERTIARY, null, new int[] {3, -1},
407 new SearchData("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-",
408 null, Collator.TERTIARY, null, new int[] {3, -1},
410 new SearchData("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,",
411 null, Collator.TERTIARY, null, new int[] {3, -1},
413 new SearchData("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?",
414 null, Collator.TERTIARY, null, new int[] {3, -1},
416 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
419 static SearchData VARIABLE[] = {
420 /*012345678901234567890123456789012345678901234567890123456789*/
421 new SearchData("blackbirds black blackbirds blackbird black-bird", "blackbird", null, Collator.TERTIARY, null,
422 new int[] {0, 17, 28, 38, -1}, new int[] {9, 9, 9, 10}),
424 /* to see that it doesn't go into an infinite loop if the start of text
425 is a ignorable character */
426 new SearchData(" on", "go", null, Collator.TERTIARY, null,
427 new int[] {-1}, new int[]{0}),
428 new SearchData("abcdefghijklmnopqrstuvwxyz", " ", null, Collator.PRIMARY, null,
429 new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1},
430 new int[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
432 /* testing tightest match */
433 new SearchData(" abc a bc ab c a bc ab c", "abc", null, Collator.QUATERNARY, null,
434 new int[]{1, -1}, new int[] {3}),
435 /*012345678901234567890123456789012345678901234567890123456789 */
436 new SearchData(" abc a bc ab c a bc ab c", "abc", null, Collator.SECONDARY, null,
437 new int[] {1, 6, 13, 21, 31, -1}, new int[] {3, 4, 4, 5, 5}),
439 /* totally ignorable text */
440 new SearchData(" ---------------", "abc", null, Collator.SECONDARY, null,
441 new int[] {-1}, new int[] {0}),
442 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[] {0})
445 static SearchData TEXTCANONICAL[] = {
446 new SearchData("the foxy brown fox", "fox", null, Collator.TERTIARY, null,
447 new int[] {4, 15, -1}, new int[] {3, 3}),
448 new SearchData("the quick brown fox", "fox", null, Collator.TERTIARY, null,
449 new int[] {16, -1}, new int[]{3}),
450 new SearchData(null, null, null, Collator.TERTIARY,null, new int[] {-1}, new int[] {0})
461 protected void init()throws Exception{
462 m_en_us_ = (RuleBasedCollator)Collator.getInstance(Locale.US);
463 m_fr_fr_ = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE);
464 m_de_ = (RuleBasedCollator)Collator.getInstance(new Locale("de", "DE"));
465 m_es_ = (RuleBasedCollator)Collator.getInstance(new Locale("es", "ES"));
466 m_en_wordbreaker_ = BreakIterator.getWordInstance();
467 m_en_characterbreaker_ = BreakIterator.getCharacterInstance();
468 String rules = m_de_.getRules() + EXTRACOLLATIONRULE;
469 m_de_ = new RuleBasedCollator(rules);
470 rules = m_es_.getRules() + EXTRACOLLATIONRULE;
471 m_es_ = new RuleBasedCollator(rules);
474 public static void main(String[] args) throws Exception {
475 new SearchTest().run(args);
476 // new SearchTest().TestContraction();
479 RuleBasedCollator getCollator(String collator) {
480 if (collator == null) {
482 } if (collator.equals("fr")) {
484 } else if (collator.equals("de")) {
486 } else if (collator.equals("es")) {
493 BreakIterator getBreakIterator(String breaker) {
494 if (breaker == null) {
496 } if (breaker.equals("wordbreaker")) {
497 return m_en_wordbreaker_;
499 return m_en_characterbreaker_;
503 boolean assertCanonicalEqual(SearchData search) {
504 Collator collator = getCollator(search.collator);
505 BreakIterator breaker = getBreakIterator(search.breaker);
506 StringSearch strsrch;
508 String text = search.text;
509 String pattern = search.pattern;
511 if (breaker != null) {
512 breaker.setText(text);
514 collator.setStrength(search.strength);
516 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
517 strsrch.setCanonical(true);
518 } catch (Exception e) {
519 errln("Error opening string search" + e.getMessage());
523 if (!assertEqualWithStringSearch(strsrch, search)) {
524 collator.setStrength(Collator.TERTIARY);
527 collator.setStrength(Collator.TERTIARY);
531 boolean assertEqual(SearchData search) {
532 Collator collator = getCollator(search.collator);
533 BreakIterator breaker = getBreakIterator(search.breaker);
534 StringSearch strsrch;
536 String text = search.text;
537 String pattern = search.pattern;
539 if (breaker != null) {
540 breaker.setText(text);
542 collator.setStrength(search.strength);
544 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
545 } catch (Exception e) {
546 errln("Error opening string search " + e.getMessage());
550 if (!assertEqualWithStringSearch(strsrch, search)) {
551 collator.setStrength(Collator.TERTIARY);
554 collator.setStrength(Collator.TERTIARY);
558 boolean assertEqualWithAttribute(SearchData search, boolean canonical, boolean overlap) {
559 Collator collator = getCollator(search.collator);
560 BreakIterator breaker = getBreakIterator(search.breaker);
561 StringSearch strsrch;
563 String text = search.text;
564 String pattern = search.pattern;
566 if (breaker != null) {
567 breaker.setText(text);
569 collator.setStrength(search.strength);
571 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
572 strsrch.setCanonical(canonical);
573 strsrch.setOverlapping(overlap);
574 } catch (Exception e) {
575 errln("Error opening string search " + e.getMessage());
579 if (!assertEqualWithStringSearch(strsrch, search)) {
580 collator.setStrength(Collator.TERTIARY);
583 collator.setStrength(Collator.TERTIARY);
587 boolean assertEqualWithStringSearch(StringSearch strsrch, SearchData search) {
589 int matchindex = search.offset[count];
592 if (strsrch.getMatchStart() != SearchIterator.DONE ||
593 strsrch.getMatchLength() != 0) {
594 errln("Error with the initialization of match start and length");
596 // start of following matches
597 while (matchindex >= 0) {
598 int matchlength = search.size[count];
600 //int x = strsrch.getMatchStart();
601 if (matchindex != strsrch.getMatchStart() ||
602 matchlength != strsrch.getMatchLength()) {
603 errln("Text: " + search.text);
604 errln("Searching forward for pattern: " + strsrch.getPattern());
605 errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
610 matchtext = strsrch.getMatchedText();
611 String targetText = search.text;
612 if (matchlength > 0 &&
613 targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) {
614 errln("Error getting following matched text");
617 matchindex = search.offset[count];
620 if (strsrch.getMatchStart() != SearchIterator.DONE ||
621 strsrch.getMatchLength() != 0) {
622 errln("Text: " + search.text);
623 errln("Searching forward for pattern: " + strsrch.getPattern());
624 errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
627 // start of preceding matches
628 count = count == 0 ? 0 : count - 1;
629 matchindex = search.offset[count];
630 while (matchindex >= 0) {
631 int matchlength = search.size[count];
633 if (matchindex != strsrch.getMatchStart() ||
634 matchlength != strsrch.getMatchLength()) {
635 errln("Text: " + search.text);
636 errln("Searching backward for pattern: " + strsrch.getPattern());
637 errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
641 matchtext = strsrch.getMatchedText();
642 String targetText = search.text;
643 if (matchlength > 0 &&
644 targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) {
645 errln("Error getting following matched text");
648 matchindex = count > 0 ? search.offset[count - 1] : -1;
652 if (strsrch.getMatchStart() != SearchIterator.DONE ||
653 strsrch.getMatchLength() != 0) {
654 errln("Text: " + search.text);
655 errln("Searching backward for pattern: " + strsrch.getPattern());
656 errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
662 public void TestConstructor()
664 String pattern = "pattern";
665 String text = "text";
666 StringCharacterIterator textiter = new StringCharacterIterator(text);
667 Collator defaultcollator = Collator.getInstance();
668 BreakIterator breaker = BreakIterator.getCharacterInstance();
669 breaker.setText(text);
670 StringSearch search = new StringSearch(pattern, text);
671 if (!search.getPattern().equals(pattern)
672 || !search.getTarget().equals(textiter)
673 || !search.getCollator().equals(defaultcollator)
674 /*|| !search.getBreakIterator().equals(breaker)*/) {
675 errln("StringSearch(String, String) error");
677 search = new StringSearch(pattern, textiter, m_fr_fr_);
678 if (!search.getPattern().equals(pattern)
679 || !search.getTarget().equals(textiter)
680 || !search.getCollator().equals(m_fr_fr_)
681 /*|| !search.getBreakIterator().equals(breaker)*/) {
682 errln("StringSearch(String, StringCharacterIterator, "
683 + "RuleBasedCollator) error");
685 Locale de = new Locale("de", "DE");
686 breaker = BreakIterator.getCharacterInstance(de);
687 breaker.setText(text);
688 search = new StringSearch(pattern, textiter, de);
689 if (!search.getPattern().equals(pattern)
690 || !search.getTarget().equals(textiter)
691 || !search.getCollator().equals(Collator.getInstance(de))
692 /*|| !search.getBreakIterator().equals(breaker)*/) {
693 errln("StringSearch(String, StringCharacterIterator, Locale) "
697 search = new StringSearch(pattern, textiter, m_fr_fr_,
699 if (!search.getPattern().equals(pattern)
700 || !search.getTarget().equals(textiter)
701 || !search.getCollator().equals(m_fr_fr_)
702 || !search.getBreakIterator().equals(m_en_wordbreaker_)) {
703 errln("StringSearch(String, StringCharacterIterator, Locale) "
708 public void TestBasic() {
710 while (BASIC[count].text != null) {
711 if (!assertEqual(BASIC[count])) {
712 errln("Error at test number " + count);
718 public void TestBreakIterator() {
720 String text = BREAKITERATOREXACT[0].text;
721 String pattern = BREAKITERATOREXACT[0].pattern;
722 StringSearch strsrch = null;
724 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
725 } catch (Exception e) {
726 errln("Error opening string search");
730 strsrch.setBreakIterator(null);
731 if (strsrch.getBreakIterator() != null) {
732 errln("Error usearch_getBreakIterator returned wrong object");
735 strsrch.setBreakIterator(m_en_characterbreaker_);
736 if (!strsrch.getBreakIterator().equals(m_en_characterbreaker_)) {
737 errln("Error usearch_getBreakIterator returned wrong object");
740 strsrch.setBreakIterator(m_en_wordbreaker_);
741 if (!strsrch.getBreakIterator().equals(m_en_wordbreaker_)) {
742 errln("Error usearch_getBreakIterator returned wrong object");
747 // special purposes for tests numbers 0-3
748 SearchData search = BREAKITERATOREXACT[count];
749 RuleBasedCollator collator = getCollator(search.collator);
750 BreakIterator breaker = getBreakIterator(search.breaker);
751 //StringSearch strsrch;
754 pattern = search.pattern;
755 if (breaker != null) {
756 breaker.setText(text);
758 collator.setStrength(search.strength);
759 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker);
760 if (strsrch.getBreakIterator() != breaker) {
761 errln("Error setting break iterator");
763 if (!assertEqualWithStringSearch(strsrch, search)) {
764 collator.setStrength(Collator.TERTIARY);
766 search = BREAKITERATOREXACT[count + 1];
767 breaker = getBreakIterator(search.breaker);
768 if (breaker != null) {
769 breaker.setText(text);
771 strsrch.setBreakIterator(breaker);
772 if (strsrch.getBreakIterator() != breaker) {
773 errln("Error setting break iterator");
776 if (!assertEqualWithStringSearch(strsrch, search)) {
777 errln("Error at test number " + count);
782 while (BREAKITERATOREXACT[count].text != null) {
783 if (!assertEqual(BREAKITERATOREXACT[count])) {
784 errln("Error at test number " + count);
790 public void TestBreakIteratorCanonical() {
793 // special purposes for tests numbers 0-3
794 SearchData search = BREAKITERATORCANONICAL[count];
796 String text = search.text;
797 String pattern = search.pattern;
798 RuleBasedCollator collator = getCollator(search.collator);
799 collator.setStrength(search.strength);
801 BreakIterator breaker = getBreakIterator(search.breaker);
802 StringSearch strsrch = null;
804 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker);
805 } catch (Exception e) {
806 errln("Error creating string search data");
809 strsrch.setCanonical(true);
810 if (!strsrch.getBreakIterator().equals(breaker)) {
811 errln("Error setting break iterator");
814 if (!assertEqualWithStringSearch(strsrch, search)) {
815 collator.setStrength(Collator.TERTIARY);
818 search = BREAKITERATOREXACT[count + 1];
819 breaker = getBreakIterator(search.breaker);
820 breaker.setText(strsrch.getTarget());
821 strsrch.setBreakIterator(breaker);
822 if (!strsrch.getBreakIterator().equals(breaker)) {
823 errln("Error setting break iterator");
827 strsrch.setCanonical(true);
828 if (!assertEqualWithStringSearch(strsrch, search)) {
829 errln("Error at test number " + count);
835 while (BREAKITERATORCANONICAL[count].text != null) {
836 if (!assertEqual(BREAKITERATORCANONICAL[count])) {
837 errln("Error at test number " + count);
844 public void TestCanonical() {
846 while (BASICCANONICAL[count].text != null) {
847 if (!assertCanonicalEqual(BASICCANONICAL[count])) {
848 errln("Error at test number " + count);
854 public void TestCollator() {
855 // test collator that thinks "o" and "p" are the same thing
856 String text = COLLATOR[0].text;
857 String pattern = COLLATOR[0].pattern;
858 StringSearch strsrch = null;
860 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
861 } catch (Exception e) {
862 errln("Error opening string search ");
865 if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
868 String rules = TESTCOLLATORRULE;
869 RuleBasedCollator tailored = null;
871 tailored = new RuleBasedCollator(rules);
872 tailored.setStrength(COLLATOR[1].strength);
873 } catch (Exception e) {
874 errln("Error opening rule based collator ");
878 strsrch.setCollator(tailored);
879 if (!strsrch.getCollator().equals(tailored)) {
880 errln("Error setting rule based collator");
883 if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) {
886 strsrch.setCollator(m_en_us_);
888 if (!strsrch.getCollator().equals(m_en_us_)) {
889 errln("Error setting rule based collator");
891 if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
892 errln("Error searching collator test");
896 public void TestCollatorCanonical() {
897 /* test collator that thinks "o" and "p" are the same thing */
898 String text = COLLATORCANONICAL[0].text;
899 String pattern = COLLATORCANONICAL[0].pattern;
901 StringSearch strsrch = null;
903 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
904 strsrch.setCanonical(true);
905 } catch (Exception e) {
906 errln("Error opening string search ");
909 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
913 String rules = TESTCOLLATORRULE;
914 RuleBasedCollator tailored = null;
916 tailored = new RuleBasedCollator(rules);
917 tailored.setStrength(COLLATORCANONICAL[1].strength);
918 tailored.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
919 } catch (Exception e) {
920 errln("Error opening rule based collator ");
923 strsrch.setCollator(tailored);
924 if (!strsrch.getCollator().equals(tailored)) {
925 errln("Error setting rule based collator");
928 strsrch.setCanonical(true);
929 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[1])) {
930 logln("COLLATORCANONICAL[1] failed"); // Error should already be reported.
932 strsrch.setCollator(m_en_us_);
934 if (!strsrch.getCollator().equals(m_en_us_)) {
935 errln("Error setting rule based collator");
937 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
938 logln("COLLATORCANONICAL[0] failed"); // Error should already be reported.
942 public void TestCompositeBoundaries() {
944 while (COMPOSITEBOUNDARIES[count].text != null) {
945 // logln("composite " + count);
946 if (!assertEqual(COMPOSITEBOUNDARIES[count])) {
947 errln("Error at test number " + count);
953 public void TestCompositeBoundariesCanonical() {
955 while (COMPOSITEBOUNDARIESCANONICAL[count].text != null) {
956 // logln("composite " + count);
957 if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) {
958 errln("Error at test number " + count);
964 public void TestContraction() {
965 String rules = CONTRACTIONRULE;
966 RuleBasedCollator collator = null;
968 collator = new RuleBasedCollator(rules);
969 collator.setStrength(Collator.TERTIARY);
970 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
971 } catch (Exception e) {
972 errln("Error opening collator ");
974 String text = "text";
975 String pattern = "pattern";
976 StringSearch strsrch = null;
978 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
979 } catch (Exception e) {
980 errln("Error opening string search ");
984 while (CONTRACTION[count].text != null) {
985 text = CONTRACTION[count].text;
986 pattern = CONTRACTION[count].pattern;
987 strsrch.setTarget(new StringCharacterIterator(text));
988 strsrch.setPattern(pattern);
989 if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) {
990 errln("Error at test number " + count);
996 public void TestContractionCanonical() {
997 String rules = CONTRACTIONRULE;
998 RuleBasedCollator collator = null;
1000 collator = new RuleBasedCollator(rules);
1001 collator.setStrength(Collator.TERTIARY);
1002 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1003 } catch (Exception e) {
1004 errln("Error opening collator ");
1006 String text = "text";
1007 String pattern = "pattern";
1008 StringSearch strsrch = null;
1010 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1011 strsrch.setCanonical(true);
1012 } catch (Exception e) {
1013 errln("Error opening string search");
1017 while (CONTRACTIONCANONICAL[count].text != null) {
1018 text = CONTRACTIONCANONICAL[count].text;
1019 pattern = CONTRACTIONCANONICAL[count].pattern;
1020 strsrch.setTarget(new StringCharacterIterator(text));
1021 strsrch.setPattern(pattern);
1022 if (!assertEqualWithStringSearch(strsrch, CONTRACTIONCANONICAL[count])) {
1023 errln("Error at test number " + count);
1029 public void TestGetMatch() {
1030 SearchData search = MATCH[0];
1031 String text = search.text;
1032 String pattern = search.pattern;
1034 StringSearch strsrch = null;
1036 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1037 } catch (Exception e) {
1038 errln("Error opening string search ");
1043 int matchindex = search.offset[count];
1045 while (matchindex >= 0) {
1046 int matchlength = search.size[count];
1048 if (matchindex != strsrch.getMatchStart() ||
1049 matchlength != strsrch.getMatchLength()) {
1050 errln("Text: " + search.text);
1051 errln("Pattern: " + strsrch.getPattern());
1052 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1057 matchtext = strsrch.getMatchedText();
1058 if (matchtext.length() != matchlength){
1059 errln("Error getting match text");
1061 matchindex = search.offset[count];
1064 if (strsrch.getMatchStart() != StringSearch.DONE ||
1065 strsrch.getMatchLength() != 0) {
1066 errln("Error end of match not found");
1068 matchtext = strsrch.getMatchedText();
1069 if (matchtext != null) {
1070 errln("Error getting null matches");
1074 public void TestGetSetAttribute() {
1075 String pattern = "pattern";
1076 String text = "text";
1077 StringSearch strsrch = null;
1079 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1080 } catch (Exception e) {
1081 errln("Error opening search");
1085 if (strsrch.isOverlapping()) {
1086 errln("Error default overlaping should be false");
1088 strsrch.setOverlapping(true);
1089 if (!strsrch.isOverlapping()) {
1090 errln("Error setting overlap true");
1092 strsrch.setOverlapping(false);
1093 if (strsrch.isOverlapping()) {
1094 errln("Error setting overlap false");
1097 strsrch.setCanonical(true);
1098 if (!strsrch.isCanonical()) {
1099 errln("Error setting canonical match true");
1101 strsrch.setCanonical(false);
1102 if (strsrch.isCanonical()) {
1103 errln("Error setting canonical match false");
1108 public void TestGetSetOffset() {
1109 String pattern = "1234567890123456";
1110 String text = "12345678901234567890123456789012";
1111 StringSearch strsrch = null;
1113 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1114 } catch (Exception e) {
1115 errln("Error opening search");
1120 /* testing out of bounds error */
1122 strsrch.setIndex(-1);
1123 errln("Error expecting set offset error");
1124 } catch (IndexOutOfBoundsException e) {
1125 logln("PASS: strsrch.setIndex(-1) failed as expected");
1129 strsrch.setIndex(128);
1130 errln("Error expecting set offset error");
1131 } catch (IndexOutOfBoundsException e) {
1132 logln("PASS: strsrch.setIndex(128) failed as expected");
1136 while (BASIC[index].text != null) {
1137 SearchData search = BASIC[index ++];
1140 pattern = search.pattern;
1141 strsrch.setTarget(new StringCharacterIterator(text));
1142 strsrch.setPattern(pattern);
1143 strsrch.getCollator().setStrength(search.strength);
1147 int matchindex = search.offset[count];
1149 while (matchindex >= 0) {
1150 int matchlength = search.size[count];
1152 if (matchindex != strsrch.getMatchStart() ||
1153 matchlength != strsrch.getMatchLength()) {
1154 errln("Text: " + text);
1155 errln("Pattern: " + strsrch.getPattern());
1156 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1159 matchindex = search.offset[count + 1] == -1 ? -1 :
1160 search.offset[count + 2];
1161 if (search.offset[count + 1] != -1) {
1162 strsrch.setIndex(search.offset[count + 1] + 1);
1163 if (strsrch.getIndex() != search.offset[count + 1] + 1) {
1164 errln("Error setting offset\n");
1172 if (strsrch.getMatchStart() != StringSearch.DONE) {
1173 errln("Text: " + text);
1174 errln("Pattern: " + strsrch.getPattern());
1175 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1179 strsrch.getCollator().setStrength(Collator.TERTIARY);
1182 public void TestGetSetOffsetCanonical() {
1184 String text = "text";
1185 String pattern = "pattern";
1186 StringSearch strsrch = null;
1188 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1189 } catch (Exception e) {
1190 errln("Fail to open StringSearch!");
1193 strsrch.setCanonical(true);
1194 /* testing out of bounds error */
1196 strsrch.setIndex(-1);
1197 errln("Error expecting set offset error");
1198 } catch (IndexOutOfBoundsException e) {
1199 logln("PASS: strsrch.setIndex(-1) failed as expected");
1202 strsrch.setIndex(128);
1203 errln("Error expecting set offset error");
1204 } catch (IndexOutOfBoundsException e) {
1205 logln("PASS: strsrch.setIndex(128) failed as expected");
1209 while (BASICCANONICAL[index].text != null) {
1210 SearchData search = BASICCANONICAL[index ++];
1211 if (BASICCANONICAL[index].text == null) {
1212 // skip the last one
1217 pattern = search.pattern;
1218 strsrch.setTarget(new StringCharacterIterator(text));
1219 strsrch.setPattern(pattern);
1221 int matchindex = search.offset[count];
1222 while (matchindex >= 0) {
1223 int matchlength = search.size[count];
1225 if (matchindex != strsrch.getMatchStart() ||
1226 matchlength != strsrch.getMatchLength()) {
1227 errln("Text: " + text);
1228 errln("Pattern: " + strsrch.getPattern());
1229 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1232 matchindex = search.offset[count + 1] == -1 ? -1 :
1233 search.offset[count + 2];
1234 if (search.offset[count + 1] != -1) {
1235 strsrch.setIndex(search.offset[count + 1] + 1);
1236 if (strsrch.getIndex() != search.offset[count + 1] + 1) {
1237 errln("Error setting offset");
1245 if (strsrch.getMatchStart() != StringSearch.DONE) {
1246 errln("Text: " + text);
1247 errln("Pattern: %s" + strsrch.getPattern());
1248 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1252 strsrch.getCollator().setStrength(Collator.TERTIARY);
1255 public void TestIgnorable() {
1256 String rules = IGNORABLERULE;
1258 RuleBasedCollator collator = null;
1260 collator = new RuleBasedCollator(rules);
1261 collator.setStrength(IGNORABLE[count].strength);
1262 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1263 } catch (Exception e) {
1264 errln("Error opening collator ");
1267 String pattern = "pattern";
1268 String text = "text";
1269 StringSearch strsrch = null;
1271 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1272 } catch (Exception e) {
1273 errln("Error opening string search ");
1277 while (IGNORABLE[count].text != null) {
1278 text = IGNORABLE[count].text;
1279 pattern = IGNORABLE[count].pattern;
1280 strsrch.setTarget(new StringCharacterIterator(text));
1281 strsrch.setPattern(pattern);
1282 if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) {
1283 errln("Error at test number " + count);
1289 public void TestInitialization() {
1293 StringSearch result;
1295 /* simple test on the pattern ce construction */
1296 pattern = temp + temp;
1297 text = temp + temp + temp;
1299 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1300 } catch (Exception e) {
1301 errln("Error opening search ");
1305 /* testing if an extremely large pattern will fail the initialization */
1307 for (int count = 0; count < 512; count ++) {
1311 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1312 logln("pattern:" + result.getPattern());
1313 } catch (Exception e) {
1314 errln("Fail: an extremely large pattern will fail the initialization");
1319 public void TestNormCanonical() {
1320 m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1322 while (NORMCANONICAL[count].text != null) {
1323 if (!assertCanonicalEqual(NORMCANONICAL[count])) {
1324 errln("Error at test number " + count);
1328 m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
1331 public void TestNormExact() {
1333 m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1334 while (BASIC[count].text != null) {
1335 if (!assertEqual(BASIC[count])) {
1336 errln("Error at test number " + count);
1341 while (NORMEXACT[count].text != null) {
1342 if (!assertEqual(NORMEXACT[count])) {
1343 errln("Error at test number " + count);
1347 m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
1349 while (NONNORMEXACT[count].text != null) {
1350 if (!assertEqual(NONNORMEXACT[count])) {
1351 errln("Error at test number " + count);
1357 public void TestOpenClose() {
1358 StringSearch result;
1359 BreakIterator breakiter = m_en_wordbreaker_;
1360 String pattern = "";
1363 StringCharacterIterator chariter= new StringCharacterIterator(text);
1365 /* testing null arguments */
1367 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1368 errln("Error: null arguments should produce an error");
1369 } catch (Exception e) {
1370 logln("PASS: null arguments failed as expected");
1373 chariter.setText(text);
1375 result = new StringSearch(pattern, chariter, null, null);
1376 errln("Error: null arguments should produce an error");
1377 } catch (Exception e) {
1378 logln("PASS: null arguments failed as expected");
1381 text = String.valueOf(0x1);
1383 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1384 errln("Error: Empty pattern should produce an error");
1385 } catch (Exception e) {
1386 logln("PASS: Empty pattern failed as expected");
1389 chariter.setText(text);
1391 result = new StringSearch(pattern, chariter, null, null);
1392 errln("Error: Empty pattern should produce an error");
1393 } catch (Exception e) {
1394 logln("PASS: Empty pattern failed as expected");
1400 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1401 errln("Error: Empty text should produce an error");
1402 } catch (Exception e) {
1403 logln("PASS: Empty text failed as expected");
1406 chariter.setText(text);
1408 result = new StringSearch(pattern, chariter, null, null);
1409 errln("Error: Empty text should produce an error");
1410 } catch (Exception e) {
1411 logln("PASS: Empty text failed as expected");
1416 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1417 errln("Error: null arguments should produce an error");
1418 } catch (Exception e) {
1419 logln("PASS: null arguments failed as expected");
1422 chariter.setText(text);
1424 result = new StringSearch(pattern, chariter, null, null);
1425 errln("Error: null arguments should produce an error");
1426 } catch (Exception e) {
1427 logln("PASS: null arguments failed as expected");
1431 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1432 } catch (Exception e) {
1433 errln("Error: null break iterator is valid for opening search");
1437 result = new StringSearch(pattern, chariter, m_en_us_, null);
1438 } catch (Exception e) {
1439 errln("Error: null break iterator is valid for opening search");
1443 result = new StringSearch(pattern, new StringCharacterIterator(text), Locale.ENGLISH);
1444 } catch (Exception e) {
1445 errln("Error: null break iterator is valid for opening search");
1449 result = new StringSearch(pattern, chariter, Locale.ENGLISH);
1450 } catch (Exception e) {
1451 errln("Error: null break iterator is valid for opening search");
1455 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, breakiter);
1456 } catch (Exception e) {
1457 errln("Error: Break iterator is valid for opening search");
1461 result = new StringSearch(pattern, chariter, m_en_us_, null);
1462 logln("pattern:" + result.getPattern());
1463 } catch (Exception e) {
1464 errln("Error: Break iterator is valid for opening search");
1468 public void TestOverlap() {
1470 while (OVERLAP[count].text != null) {
1471 if (!assertEqualWithAttribute(OVERLAP[count], false, true)) {
1472 errln("Error at overlap test number " + count);
1477 while (NONOVERLAP[count].text != null) {
1478 if (!assertEqual(NONOVERLAP[count])) {
1479 errln("Error at non overlap test number " + count);
1486 SearchData search = (OVERLAP[count]);
1487 String text = search.text;
1488 String pattern = search.pattern;
1490 RuleBasedCollator collator = getCollator(search.collator);
1491 StringSearch strsrch = null;
1493 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1494 } catch (Exception e) {
1495 errln("error open StringSearch");
1499 strsrch.setOverlapping(true);
1500 if (!strsrch.isOverlapping()) {
1501 errln("Error setting overlap option");
1503 if (!assertEqualWithStringSearch(strsrch, search)) {
1507 search = NONOVERLAP[count];
1508 strsrch.setOverlapping(false);
1509 if (strsrch.isOverlapping()) {
1510 errln("Error setting overlap option");
1513 if (!assertEqualWithStringSearch(strsrch, search)) {
1514 errln("Error at test number " + count);
1520 public void TestOverlapCanonical() {
1522 while (OVERLAPCANONICAL[count].text != null) {
1523 if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true,
1525 errln("Error at overlap test number %d" + count);
1530 while (NONOVERLAP[count].text != null) {
1531 if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) {
1532 errln("Error at non overlap test number %d" + count);
1540 const SearchData *search = &(OVERLAPCANONICAL[count]);
1541 UErrorCode status = U_ZERO_ERROR;*/
1542 SearchData search = OVERLAPCANONICAL[count];
1544 /*u_unescape(search.text, temp, 128);
1546 text.setTo(temp, u_strlen(temp));
1547 u_unescape(search.pattern, temp, 128);
1548 UnicodeString pattern;
1549 pattern.setTo(temp, u_strlen(temp));*/
1550 RuleBasedCollator collator = getCollator(search.collator);
1551 StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), collator, null);
1552 strsrch.setCanonical(true);
1553 strsrch.setOverlapping(true);
1554 if (strsrch.isOverlapping() != true) {
1555 errln("Error setting overlap option");
1557 if (!assertEqualWithStringSearch(strsrch, search)) {
1561 search = NONOVERLAPCANONICAL[count];
1562 strsrch.setOverlapping(false);
1563 if (strsrch.isOverlapping() != false) {
1564 errln("Error setting overlap option");
1567 if (!assertEqualWithStringSearch(strsrch, search)) {
1569 errln("Error at test number %d" + count);
1577 public void TestPattern() {
1578 m_en_us_.setStrength(PATTERN[0].strength);
1579 StringSearch strsrch = new StringSearch(PATTERN[0].pattern, new StringCharacterIterator(PATTERN[0].text), m_en_us_, null);
1581 /*if (U_FAILURE(status)) {
1582 errln("Error opening string search %s", u_errorName(status));
1583 m_en_us_.setStrength(getECollationStrength(UCOL_TERTIARY));
1584 if (strsrch != NULL) {
1590 if (strsrch.getPattern() != PATTERN[0].pattern) {
1591 errln("Error setting pattern");
1593 if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
1594 m_en_us_.setStrength(Collator.TERTIARY);
1595 if (strsrch != null) {
1601 strsrch.setPattern(PATTERN[1].pattern);
1602 if (PATTERN[1].pattern != strsrch.getPattern()) {
1603 errln("Error setting pattern");
1604 m_en_us_.setStrength(Collator.TERTIARY);
1605 if (strsrch != null) {
1612 if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) {
1613 m_en_us_.setStrength(Collator.TERTIARY);
1614 if (strsrch != null) {
1620 strsrch.setPattern(PATTERN[0].pattern);
1621 if (PATTERN[0].pattern != strsrch.getPattern()) {
1622 errln("Error setting pattern");
1623 m_en_us_.setStrength(Collator.TERTIARY);
1624 if (strsrch != null) {
1631 if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
1632 m_en_us_.setStrength(Collator.TERTIARY);
1633 if (strsrch != null) {
1638 /* enormous pattern size to see if this crashes */
1639 String pattern = "";
1640 for (int templength = 0; templength != 512; templength ++) {
1644 strsrch.setPattern(pattern);
1645 }catch(Exception e) {
1646 errln("Error setting pattern with size 512");
1649 m_en_us_.setStrength(Collator.TERTIARY);
1650 if (strsrch != null) {
1655 public void TestPatternCanonical() {
1656 //StringCharacterIterator text = new StringCharacterIterator(PATTERNCANONICAL[0].text);
1657 m_en_us_.setStrength(PATTERNCANONICAL[0].strength);
1658 StringSearch strsrch = new StringSearch(PATTERNCANONICAL[0].pattern, new StringCharacterIterator(PATTERNCANONICAL[0].text),
1660 strsrch.setCanonical(true);
1662 if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
1663 errln("Error setting pattern");
1665 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
1666 m_en_us_.setStrength(Collator.TERTIARY);
1671 strsrch.setPattern(PATTERNCANONICAL[1].pattern);
1672 if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) {
1673 errln("Error setting pattern");
1674 m_en_us_.setStrength(Collator.TERTIARY);
1679 strsrch.setCanonical(true);
1681 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) {
1682 m_en_us_.setStrength(Collator.TERTIARY);
1687 strsrch.setPattern(PATTERNCANONICAL[0].pattern);
1688 if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
1689 errln("Error setting pattern");
1690 m_en_us_.setStrength(Collator.TERTIARY);
1696 strsrch.setCanonical(true);
1697 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
1698 m_en_us_.setStrength(Collator.TERTIARY);
1704 public void TestReset() {
1705 StringCharacterIterator text = new StringCharacterIterator("fish fish");
1706 String pattern = "s";
1708 StringSearch strsrch = new StringSearch(pattern, text, m_en_us_, null);
1709 strsrch.setOverlapping(true);
1710 strsrch.setCanonical(true);
1711 strsrch.setIndex(9);
1713 if (strsrch.isCanonical() || strsrch.isOverlapping() ||
1714 strsrch.getIndex() != 0 || strsrch.getMatchLength() != 0 ||
1715 strsrch.getMatchStart() != SearchIterator.DONE) {
1716 errln("Error resetting string search");
1720 if (strsrch.getMatchStart() != 7 || strsrch.getMatchLength() != 1) {
1721 errln("Error resetting string search\n");
1725 public void TestSetMatch() {
1727 while (MATCH[count].text != null) {
1728 SearchData search = MATCH[count];
1729 StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text),
1733 while (search.offset[size] != -1) {
1737 if (strsrch.first() != search.offset[0]) {
1738 errln("Error getting first match");
1740 if (strsrch.last() != search.offset[size -1]) {
1741 errln("Error getting last match");
1745 while (index < size) {
1746 if (index + 2 < size) {
1747 if (strsrch.following(search.offset[index + 2] - 1) != search.offset[index + 2]) {
1748 errln("Error getting following match at index " + (search.offset[index + 2]-1));
1751 if (index + 1 < size) {
1752 if (strsrch.preceding(search.offset[index + 1] + search.size[index + 1] + 1) != search.offset[index + 1]) {
1753 errln("Error getting preceeding match at index " + (search.offset[index + 1] + 1));
1759 if (strsrch.following(search.text.length()) != SearchIterator.DONE) {
1760 errln("Error expecting out of bounds match");
1762 if (strsrch.preceding(0) != SearchIterator.DONE) {
1763 errln("Error expecting out of bounds match");
1770 public void TestStrength() {
1772 while (STRENGTH[count].text != null) {
1773 if (count == 3) count ++;
1774 if (!assertEqual(STRENGTH[count])) {
1775 errln("Error at test number " + count);
1781 public void TestStrengthCanonical() {
1783 while (STRENGTHCANONICAL[count].text != null) {
1784 if (count == 3) count ++;
1785 if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) {
1786 errln("Error at test number" + count);
1792 public void TestSupplementary() {
1793 if (logKnownIssue("8080", null)) {
1797 while (SUPPLEMENTARY[count].text != null) {
1798 if (!assertEqual(SUPPLEMENTARY[count])) {
1799 errln("Error at test number " + count);
1805 public void TestSupplementaryCanonical() {
1806 if (logKnownIssue("8080", null)) {
1810 while (SUPPLEMENTARYCANONICAL[count].text != null) {
1811 if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) {
1812 errln("Error at test number" + count);
1818 public void TestText() {
1819 SearchData TEXT[] = {
1820 new SearchData("the foxy brown fox", "fox", null, Collator.TERTIARY, null, new int[] {4, 15, -1}, new int[] {3, 3}),
1821 new SearchData("the quick brown fox", "fox", null, Collator.TERTIARY, null, new int[] {16, -1}, new int[] {3}),
1822 new SearchData(null, null, null, Collator.TERTIARY, null, new int[] {-1}, new int[]{0})
1824 StringCharacterIterator t = new StringCharacterIterator(TEXT[0].text);
1825 StringSearch strsrch = new StringSearch(TEXT[0].pattern, t, m_en_us_, null);
1827 if (!t.equals(strsrch.getTarget())) {
1828 errln("Error setting text");
1830 if (!assertEqualWithStringSearch(strsrch, TEXT[0])) {
1831 errln("Error at assertEqualWithStringSearch");
1835 t = new StringCharacterIterator(TEXT[1].text);
1836 strsrch.setTarget(t);
1837 if (!t.equals(strsrch.getTarget())) {
1838 errln("Error setting text");
1842 if (!assertEqualWithStringSearch(strsrch, TEXT[1])) {
1843 errln("Error at assertEqualWithStringSearch");
1848 public void TestTextCanonical() {
1849 StringCharacterIterator t = new StringCharacterIterator(TEXTCANONICAL[0].text);
1850 StringSearch strsrch = new StringSearch(TEXTCANONICAL[0].pattern, t, m_en_us_, null);
1851 strsrch.setCanonical(true);
1853 if (!t.equals(strsrch.getTarget())) {
1854 errln("Error setting text");
1856 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
1861 t = new StringCharacterIterator(TEXTCANONICAL[1].text);
1862 strsrch.setTarget(t);
1863 if (!t.equals(strsrch.getTarget())) {
1864 errln("Error setting text");
1869 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[1])) {
1874 t = new StringCharacterIterator(TEXTCANONICAL[0].text);
1875 strsrch.setTarget(t);
1876 if (!t.equals(strsrch.getTarget())) {
1877 errln("Error setting text");
1882 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
1883 errln("Error at assertEqualWithStringSearch");
1889 public void TestVariable() {
1891 m_en_us_.setAlternateHandlingShifted(true);
1892 while (VARIABLE[count].text != null) {
1893 // logln("variable" + count);
1894 if (!assertEqual(VARIABLE[count])) {
1895 errln("Error at test number " + count);
1899 m_en_us_.setAlternateHandlingShifted(false);
1902 public void TestVariableCanonical() {
1904 m_en_us_.setAlternateHandlingShifted(true);
1905 while (VARIABLE[count].text != null) {
1906 // logln("variable " + count);
1907 if (!assertCanonicalEqual(VARIABLE[count])) {
1908 errln("Error at test number " + count);
1912 m_en_us_.setAlternateHandlingShifted(false);
1915 public void TestSubClass()
1917 class TestSearch extends SearchIterator
1922 TestSearch(StringCharacterIterator target, BreakIterator breaker,
1925 super(target, breaker);
1926 this.pattern = pattern;
1927 StringBuffer buffer = new StringBuffer();
1928 while (targetText.getIndex() != targetText.getEndIndex()) {
1929 buffer.append(targetText.current());
1932 text = buffer.toString();
1933 targetText.setIndex(targetText.getBeginIndex());
1935 protected int handleNext(int start)
1937 int match = text.indexOf(pattern, start);
1942 targetText.setIndex(match);
1943 setMatchLength(pattern.length());
1946 protected int handlePrevious(int start)
1948 int match = text.lastIndexOf(pattern, start - 1);
1950 targetText.setIndex(0);
1953 targetText.setIndex(match);
1954 setMatchLength(pattern.length());
1958 public int getIndex()
1960 int result = targetText.getIndex();
1961 if (result < 0 || result >= text.length()) {
1968 TestSearch search = new TestSearch(
1969 new StringCharacterIterator("abc abcd abc"),
1971 int expected[] = {0, 4, 9};
1972 for (int i = 0; i < expected.length; i ++) {
1973 if (search.next() != expected[i]) {
1974 errln("Error getting next match");
1976 if (search.getMatchLength() != search.pattern.length()) {
1977 errln("Error getting next match length");
1980 if (search.next() != SearchIterator.DONE) {
1981 errln("Error should have reached the end of the iteration");
1983 for (int i = expected.length - 1; i >= 0; i --) {
1984 if (search.previous() != expected[i]) {
1985 errln("Error getting next match");
1987 if (search.getMatchLength() != search.pattern.length()) {
1988 errln("Error getting next match length");
1991 if (search.previous() != SearchIterator.DONE) {
1992 errln("Error should have reached the start of the iteration");
1996 //Test for ticket 5024
1997 public void TestDiactricMatch() {
1998 String pattern = "pattern";
1999 String text = "text";
2000 StringSearch strsrch = null;
2003 strsrch = new StringSearch(pattern, text);
2004 } catch (Exception e) {
2005 errln("Error opening string search ");
2009 while (DIACTRICMATCH[count].text != null) {
2010 strsrch.setCollator(getCollator(DIACTRICMATCH[count].collator));
2011 strsrch.getCollator().setStrength(DIACTRICMATCH[count].strength);
2012 strsrch.setBreakIterator(getBreakIterator(DIACTRICMATCH[count].breaker));
2014 text = DIACTRICMATCH[count].text;
2015 pattern = DIACTRICMATCH[count].pattern;
2016 strsrch.setTarget(new StringCharacterIterator(text));
2017 strsrch.setPattern(pattern);
2018 if (!assertEqualWithStringSearch(strsrch, DIACTRICMATCH[count])) {
2019 errln("Error at test number " + count);
2025 public void TestUsingSearchCollator() {
2028 /*01*/ "\uAC00 " + // simple LV Hangul
2029 /*03*/ "\uAC01 " + // simple LVT Hangul
2030 /*05*/ "\uAC0F " + // LVTT, last jamo expands for search
2031 /*07*/ "\uAFFF " + // LLVVVTT, every jamo expands for search
2032 /*09*/ "\u1100\u1161\u11A8 " + // 0xAC01 as conjoining jamo
2033 /*13*/ "\u1100\u1161\u1100 " + // 0xAC01 as basic conjoining jamo (per search rules)
2034 /*17*/ "\u3131\u314F\u3131 " + // 0xAC01 as compatibility jamo
2035 /*21*/ "\u1100\u1161\u11B6 " + // 0xAC0F as conjoining jamo; last expands for search
2036 /*25*/ "\u1100\u1161\u1105\u1112 " + // 0xAC0F as basic conjoining jamo; last expands for search
2037 /*30*/ "\u1101\u1170\u11B6 " + // 0xAFFF as conjoining jamo; all expand for search
2038 /*34*/ "\u00E6 " + // small letter ae, expands
2039 /*36*/ "\u1E4D " + // small letter o with tilde and acute, decomposes
2042 String scKoPat0 = "\uAC01";
2043 String scKoPat1 = "\u1100\u1161\u11A8"; // 0xAC01 as conjoining jamo
2044 String scKoPat2 = "\uAC0F";
2045 String scKoPat3 = "\u1100\u1161\u1105\u1112"; // 0xAC0F as basic conjoining jamo
2046 String scKoPat4 = "\uAFFF";
2047 String scKoPat5 = "\u1101\u1170\u11B6"; // 0xAFFF as conjoining jamo
2049 int[] scKoSrchOff01 = { 3, 9, 13 };
2050 int[] scKoSrchOff23 = { 5, 21, 25 };
2051 int[] scKoSrchOff45 = { 7, 30 };
2053 int[] scKoStndOff01 = { 3, 9 };
2054 int[] scKoStndOff2 = { 5, 21 };
2055 int[] scKoStndOff3 = { 25 };
2056 int[] scKoStndOff45 = { 7, 30 };
2058 class PatternAndOffsets {
2059 private String pattern;
2060 private int[] offsets;
2061 PatternAndOffsets(String pat, int[] offs) {
2065 public String getPattern() { return pattern; }
2066 public int[] getOffsets() { return offsets; }
2068 final PatternAndOffsets[] scKoSrchPatternsOffsets = {
2069 new PatternAndOffsets( scKoPat0, scKoSrchOff01 ),
2070 new PatternAndOffsets( scKoPat1, scKoSrchOff01 ),
2071 new PatternAndOffsets( scKoPat2, scKoSrchOff23 ),
2072 new PatternAndOffsets( scKoPat3, scKoSrchOff23 ),
2073 new PatternAndOffsets( scKoPat4, scKoSrchOff45 ),
2074 new PatternAndOffsets( scKoPat5, scKoSrchOff45 ),
2076 final PatternAndOffsets[] scKoStndPatternsOffsets = {
2077 new PatternAndOffsets( scKoPat0, scKoStndOff01 ),
2078 new PatternAndOffsets( scKoPat1, scKoStndOff01 ),
2079 new PatternAndOffsets( scKoPat2, scKoStndOff2 ),
2080 new PatternAndOffsets( scKoPat3, scKoStndOff3 ),
2081 new PatternAndOffsets( scKoPat4, scKoStndOff45 ),
2082 new PatternAndOffsets( scKoPat5, scKoStndOff45 ),
2086 private String localeString;
2087 private String text;
2088 private PatternAndOffsets[] patternsAndOffsets;
2089 TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) {
2090 localeString = locStr;
2092 patternsAndOffsets = patsAndOffs;
2094 public String getLocaleString() { return localeString; }
2095 public String getText() { return text; }
2096 public PatternAndOffsets[] getPatternsAndOffsets() { return patternsAndOffsets; }
2098 final TUSCItem[] tuscItems = {
2099 new TUSCItem( "root", scKoText, scKoStndPatternsOffsets ),
2100 new TUSCItem( "root@collation=search", scKoText, scKoSrchPatternsOffsets ),
2101 new TUSCItem( "ko@collation=search", scKoText, scKoSrchPatternsOffsets ),
2104 String dummyPat = "a";
2106 for (TUSCItem tuscItem: tuscItems) {
2107 String localeString = tuscItem.getLocaleString();
2108 ULocale uloc = new ULocale(localeString);
2109 RuleBasedCollator col = null;
2111 col = (RuleBasedCollator)Collator.getInstance(uloc);
2112 } catch (Exception e) {
2113 errln("Error: in locale " + localeString + ", err in Collator.getInstance");
2116 StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText());
2117 StringSearch srch = new StringSearch(dummyPat, ci, col);
2118 for ( PatternAndOffsets patternAndOffsets: tuscItem.getPatternsAndOffsets() ) {
2119 srch.setPattern(patternAndOffsets.getPattern());
2120 int[] offsets = patternAndOffsets.getOffsets();
2121 int ioff, noff = offsets.length;
2127 offset = srch.next();
2128 if (offset == SearchIterator.DONE) {
2131 if ( ioff < noff ) {
2132 if ( offset != offsets[ioff] ) {
2133 errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset);
2139 errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected");
2142 if ( ioff < noff ) {
2143 errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected");
2149 offset = srch.previous();
2150 if (offset == SearchIterator.DONE) {
2155 if ( offset != offsets[ioff] ) {
2156 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset);
2161 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected");
2165 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected");