]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetStringSpanTest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / core / src / com / ibm / icu / dev / test / lang / UnicodeSetStringSpanTest.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 2009-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.test.lang;\r
8 \r
9 import com.ibm.icu.dev.test.TestFmwk;\r
10 import com.ibm.icu.impl.Utility;\r
11 import com.ibm.icu.text.UTF16;\r
12 import com.ibm.icu.text.UnicodeSet;\r
13 import com.ibm.icu.text.UnicodeSetIterator;\r
14 import com.ibm.icu.text.UnicodeSet.SpanCondition;\r
15 \r
16 /**\r
17  * @test\r
18  * @summary General test of UnicodeSet string span.\r
19  */\r
20 public class UnicodeSetStringSpanTest extends TestFmwk {\r
21 \r
22     public static void main(String[] args) throws Exception {\r
23         new UnicodeSetStringSpanTest().run(args);\r
24     }\r
25 \r
26     // Simple test first, easier to debug.\r
27     public void TestSimpleStringSpan() {\r
28         String pattern = "[a{ab}{bc}]";\r
29         String string = "abc";\r
30         UnicodeSet set = new UnicodeSet(pattern);\r
31         set.complement();\r
32         int pos = set.spanBack(string, 3, SpanCondition.SIMPLE);\r
33         if (pos != 1) {\r
34             errln(String.format("FAIL: UnicodeSet(%s).spanBack(%s) returns the wrong value pos %d (!= 1)",\r
35                     set.toString(), string, pos));\r
36         }\r
37         pos = set.span(string, SpanCondition.SIMPLE);\r
38         if (pos != 3) {\r
39             errln(String.format("FAIL: UnicodeSet(%s).span(%s) returns the wrong value pos %d (!= 3)",\r
40                     set.toString(), string, pos));\r
41         }\r
42         pos = set.span(string, 1, SpanCondition.SIMPLE);\r
43         if (pos != 3) {\r
44             errln(String.format("FAIL: UnicodeSet(%s).span(%s) returns the wrong value pos %d (!= 3)",\r
45                     set.toString(), string, pos));\r
46         }\r
47     }\r
48 \r
49     // test our slow implementation\r
50     public void TestSimpleStringSpanSlow() {\r
51         String pattern = "[a{ab}{bc}]";\r
52         String string = "abc";\r
53         UnicodeSet uset = new UnicodeSet(pattern);\r
54         uset.complement();\r
55         UnicodeSetWithStrings set = new UnicodeSetWithStrings(uset);\r
56 \r
57         int length = containsSpanBackUTF16(set, string, 3, SpanCondition.SIMPLE);\r
58         if (length != 1) {\r
59             errln(String.format("FAIL: UnicodeSet(%s) containsSpanBackUTF16(%s) returns the wrong value length %d (!= 1)",\r
60                     set.toString(), string, length));\r
61         }\r
62         length = containsSpanUTF16(set, string, SpanCondition.SIMPLE);\r
63         if (length != 3) {\r
64             errln(String.format("FAIL: UnicodeSet(%s) containsSpanUTF16(%s) returns the wrong value length %d (!= 3)",\r
65                     set.toString(), string, length));\r
66         }\r
67         length = containsSpanUTF16(set, string.substring(1), SpanCondition.SIMPLE);\r
68         if (length != 2) {\r
69             errln(String.format("FAIL: UnicodeSet(%s) containsSpanUTF16(%s) returns the wrong value length %d (!= 2)",\r
70                     set.toString(), string, length));\r
71         }\r
72     }\r
73 \r
74     // Test select patterns and strings, and test SIMPLE.\r
75     public void TestSimpleStringSpanAndFreeze() {\r
76         String pattern = "[x{xy}{xya}{axy}{ax}]";\r
77         final String string = "xx"\r
78                 + "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya" + "xx"\r
79                 + "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya" + "xx"\r
80                 + "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxy" + "aaaa";\r
81 \r
82         UnicodeSet set = new UnicodeSet(pattern);\r
83         if (set.containsAll(string)) {\r
84             errln("FAIL: UnicodeSet(" + pattern + ").containsAll(" + string + ") should be FALSE");\r
85         }\r
86 \r
87         // Remove trailing "aaaa".\r
88         String string16 = string.substring(0, string.length() - 4);\r
89         if (!set.containsAll(string16)) {\r
90             errln("FAIL: UnicodeSet(" + pattern + ").containsAll(" + string + "[:-4]) should be TRUE");\r
91         }\r
92 \r
93         String s16 = "byayaxya";\r
94         if (   set.span(s16.substring(0, 8), SpanCondition.NOT_CONTAINED) != 4\r
95             || set.span(s16.substring(0, 7), SpanCondition.NOT_CONTAINED) != 4\r
96             || set.span(s16.substring(0, 6), SpanCondition.NOT_CONTAINED) != 4\r
97             || set.span(s16.substring(0, 5), SpanCondition.NOT_CONTAINED) != 5\r
98             || set.span(s16.substring(0, 4), SpanCondition.NOT_CONTAINED) != 4\r
99             || set.span(s16.substring(0, 3), SpanCondition.NOT_CONTAINED) != 3) {\r
100             errln("FAIL: UnicodeSet(" + pattern + ").span(while not) returns the wrong value");\r
101         }\r
102 \r
103         pattern = "[a{ab}{abc}{cd}]";\r
104         set.applyPattern(pattern);\r
105         s16 = "acdabcdabccd";\r
106         if (   set.span(s16.substring(0, 12), SpanCondition.CONTAINED) != 12\r
107             || set.span(s16.substring(0, 12), SpanCondition.SIMPLE) != 6\r
108             || set.span(s16.substring(7),     SpanCondition.SIMPLE) != 5) {\r
109             errln("FAIL: UnicodeSet(" + pattern + ").span(while longest match) returns the wrong value");\r
110         }\r
111         set.freeze();\r
112         if (   set.span(s16.substring(0, 12), SpanCondition.CONTAINED) != 12\r
113             || set.span(s16.substring(0, 12), SpanCondition.SIMPLE) != 6\r
114             || set.span(s16.substring(7),     SpanCondition.SIMPLE) != 5) {\r
115             errln("FAIL: UnicodeSet(" + pattern + ").span(while longest match) returns the wrong value");\r
116         }\r
117 \r
118         pattern = "[d{cd}{bcd}{ab}]";\r
119         set = (UnicodeSet)set.cloneAsThawed();\r
120         set.applyPattern(pattern).freeze();\r
121         s16 = "abbcdabcdabd";\r
122         if (   set.spanBack(s16, 12, SpanCondition.CONTAINED) != 0\r
123             || set.spanBack(s16, 12, SpanCondition.SIMPLE) != 6\r
124             || set.spanBack(s16,  5, SpanCondition.SIMPLE) != 0) {\r
125             errln("FAIL: UnicodeSet(" + pattern + ").spanBack(while longest match) returns the wrong value");\r
126         }\r
127     }\r
128 \r
129     // more complex test. --------------------------------------------------------\r
130 \r
131     // Make the strings in a UnicodeSet easily accessible.\r
132     static class UnicodeSetWithStrings {\r
133 \r
134         private UnicodeSet set;\r
135 \r
136         private String strings[];\r
137         private int stringsLength;\r
138         private boolean hasSurrogates;\r
139 \r
140         public UnicodeSetWithStrings(final UnicodeSet normalSet) {\r
141             set = normalSet;\r
142             stringsLength = 0;\r
143             hasSurrogates = false;\r
144             strings = new String[20];\r
145             int size = set.size();\r
146             if (size > 0 && set.charAt(size - 1) < 0) {\r
147                 // If a set's last element is not a code point, then it must contain strings.\r
148                 // Iterate over the set, skip all code point ranges, and cache the strings.\r
149                 UnicodeSetIterator iter = new UnicodeSetIterator(set);\r
150                 while (iter.nextRange() && stringsLength < strings.length) {\r
151                     if (iter.codepoint == UnicodeSetIterator.IS_STRING) {\r
152                         // Store the pointer to the set's string element\r
153                         // which we happen to know is a stable pointer.\r
154                         strings[stringsLength] = iter.getString();\r
155                         ++stringsLength;\r
156                     }\r
157                 }\r
158             }\r
159         }\r
160 \r
161         public final UnicodeSet getSet() {\r
162             return set;\r
163         }\r
164 \r
165         public boolean hasStrings() {\r
166             return (stringsLength > 0);\r
167         }\r
168 \r
169         public boolean hasStringsWithSurrogates() {\r
170             return hasSurrogates;\r
171         }\r
172 \r
173     }\r
174 \r
175     static class UnicodeSetWithStringsIterator {\r
176 \r
177         private UnicodeSetWithStrings fSet;\r
178         private int nextStringIndex;\r
179 \r
180         public UnicodeSetWithStringsIterator(final UnicodeSetWithStrings set) {\r
181             fSet = set;\r
182             nextStringIndex = 0;\r
183         }\r
184 \r
185         public void reset() {\r
186             nextStringIndex = 0;\r
187         }\r
188 \r
189         public final String nextString() {\r
190             if (nextStringIndex < fSet.stringsLength) {\r
191                 return fSet.strings[nextStringIndex++];\r
192             } else {\r
193                 return null;\r
194             }\r
195         }\r
196 \r
197     }\r
198 \r
199     // Compare 16-bit Unicode strings (which may be malformed UTF-16)\r
200     // at code point boundaries.\r
201     // That is, each edge of a match must not be in the middle of a surrogate pair.\r
202     static boolean matches16CPB(final String s, int start, int limit, final String t) {\r
203         limit -= start;\r
204         int length = t.length();\r
205         return t.equals(s.substring(start, start + length))\r
206                 && !(0 < start && UTF16.isLeadSurrogate (s.charAt(start - 1)) &&\r
207                                   UTF16.isTrailSurrogate(s.charAt(start)))\r
208                 && !(length < limit && UTF16.isLeadSurrogate (s.charAt(start + length - 1)) &&\r
209                                        UTF16.isTrailSurrogate(s.charAt(start + length)));\r
210     }\r
211 \r
212     // Implement span() with contains() for comparison.\r
213     static int containsSpanUTF16(final UnicodeSetWithStrings set, final String s,\r
214             SpanCondition spanCondition) {\r
215         final UnicodeSet realSet = set.getSet();\r
216         int length = s.length();\r
217         if (!set.hasStrings()) {\r
218             boolean spanContained = false;\r
219             if (spanCondition != SpanCondition.NOT_CONTAINED) {\r
220                 spanContained = true; // Pin to 0/1 values.\r
221             }\r
222 \r
223             int c;\r
224             int start = 0, prev;\r
225             while ((prev = start) < length) {\r
226                 c = s.codePointAt(start);\r
227                 start = s.offsetByCodePoints(start, 1);\r
228                 if (realSet.contains(c) != spanContained) {\r
229                     break;\r
230                 }\r
231             }\r
232             return prev;\r
233         } else if (spanCondition == SpanCondition.NOT_CONTAINED) {\r
234             UnicodeSetWithStringsIterator iter = new UnicodeSetWithStringsIterator(set);\r
235             int c;\r
236             int start, next;\r
237             for (start = next = 0; start < length;) {\r
238                 c = s.codePointAt(next);\r
239                 next = s.offsetByCodePoints(next, 1);\r
240                 if (realSet.contains(c)) {\r
241                     break;\r
242                 }\r
243                 String str;\r
244                 iter.reset();\r
245                 while ((str = iter.nextString()) != null) {\r
246                     if (str.length() <= (length - start) && matches16CPB(s, start, length, str)) {\r
247                         // spanNeedsStrings=true;\r
248                         return start;\r
249                     }\r
250                 }\r
251                 start = next;\r
252             }\r
253             return start;\r
254         } else /* CONTAINED or SIMPLE */{\r
255             UnicodeSetWithStringsIterator iter = new UnicodeSetWithStringsIterator(set);\r
256             int c;\r
257             int start, next, maxSpanLimit = 0;\r
258             for (start = next = 0; start < length;) {\r
259                 c = s.codePointAt(next);\r
260                 next = s.offsetByCodePoints(next, 1);\r
261                 if (!realSet.contains(c)) {\r
262                     next = start; // Do not span this single, not-contained code point.\r
263                 }\r
264                 String str;\r
265                 iter.reset();\r
266                 while ((str = iter.nextString()) != null) {\r
267                     if (str.length() <= (length - start) && matches16CPB(s, start, length, str)) {\r
268                         // spanNeedsStrings=true;\r
269                         int matchLimit = start + str.length();\r
270                         if (matchLimit == length) {\r
271                             return length;\r
272                         }\r
273                         if (spanCondition == SpanCondition.CONTAINED) {\r
274                             // Iterate for the shortest match at each position.\r
275                             // Recurse for each but the shortest match.\r
276                             if (next == start) {\r
277                                 next = matchLimit; // First match from start.\r
278                             } else {\r
279                                 if (matchLimit < next) {\r
280                                     // Remember shortest match from start for iteration.\r
281                                     int temp = next;\r
282                                     next = matchLimit;\r
283                                     matchLimit = temp;\r
284                                 }\r
285                                 // Recurse for non-shortest match from start.\r
286                                 int spanLength = containsSpanUTF16(set, s.substring(matchLimit),\r
287                                         SpanCondition.CONTAINED);\r
288                                 if ((matchLimit + spanLength) > maxSpanLimit) {\r
289                                     maxSpanLimit = matchLimit + spanLength;\r
290                                     if (maxSpanLimit == length) {\r
291                                         return length;\r
292                                     }\r
293                                 }\r
294                             }\r
295                         } else /* spanCondition==SIMPLE */{\r
296                             if (matchLimit > next) {\r
297                                 // Remember longest match from start.\r
298                                 next = matchLimit;\r
299                             }\r
300                         }\r
301                     }\r
302                 }\r
303                 if (next == start) {\r
304                     break; // No match from start.\r
305                 }\r
306                 start = next;\r
307             }\r
308             if (start > maxSpanLimit) {\r
309                 return start;\r
310             } else {\r
311                 return maxSpanLimit;\r
312             }\r
313         }\r
314     }\r
315 \r
316     static int containsSpanBackUTF16(final UnicodeSetWithStrings set, final String s, int length,\r
317             SpanCondition spanCondition) {\r
318         if (length == 0) {\r
319             return 0;\r
320         }\r
321         final UnicodeSet realSet = set.getSet();\r
322         if (!set.hasStrings()) {\r
323             boolean spanContained = false;\r
324             if (spanCondition != SpanCondition.NOT_CONTAINED) {\r
325                 spanContained = true; // Pin to 0/1 values.\r
326             }\r
327 \r
328             int c;\r
329             int prev = length;\r
330             do {\r
331                 c = s.codePointBefore(prev);\r
332                 if (realSet.contains(c) != spanContained) {\r
333                     break;\r
334                 }\r
335                 prev = s.offsetByCodePoints(prev, -1);\r
336             } while (prev > 0);\r
337             return prev;\r
338         } else if (spanCondition == SpanCondition.NOT_CONTAINED) {\r
339             UnicodeSetWithStringsIterator iter = new UnicodeSetWithStringsIterator(set);\r
340             int c;\r
341             int prev = length, length0 = length;\r
342             do {\r
343                 c = s.codePointBefore(prev);\r
344                 if (realSet.contains(c)) {\r
345                     break;\r
346                 }\r
347                 String str;\r
348                 iter.reset();\r
349                 while ((str = iter.nextString()) != null) {\r
350                     if (str.length() <= prev && matches16CPB(s, prev - str.length(), length0, str)) {\r
351                         // spanNeedsStrings=true;\r
352                         return prev;\r
353                     }\r
354                 }\r
355                 prev = s.offsetByCodePoints(prev, -1);\r
356             } while (prev > 0);\r
357             return prev;\r
358         } else /* SpanCondition.CONTAINED or SIMPLE */{\r
359             UnicodeSetWithStringsIterator iter = new UnicodeSetWithStringsIterator(set);\r
360             int c;\r
361             int prev = length, minSpanStart = length, length0 = length;\r
362             do {\r
363                 c = s.codePointBefore(length);\r
364                 length = s.offsetByCodePoints(length, -1);\r
365                 if (!realSet.contains(c)) {\r
366                     length = prev; // Do not span this single, not-contained code point.\r
367                 }\r
368                 String str;\r
369                 iter.reset();\r
370                 while ((str = iter.nextString()) != null) {\r
371                     if (str.length() <= prev && matches16CPB(s, prev - str.length(), length0, str)) {\r
372                         // spanNeedsStrings=true;\r
373                         int matchStart = prev - str.length();\r
374                         if (matchStart == 0) {\r
375                             return 0;\r
376                         }\r
377                         if (spanCondition == SpanCondition.CONTAINED) {\r
378                             // Iterate for the shortest match at each position.\r
379                             // Recurse for each but the shortest match.\r
380                             if (length == prev) {\r
381                                 length = matchStart; // First match from prev.\r
382                             } else {\r
383                                 if (matchStart > length) {\r
384                                     // Remember shortest match from prev for iteration.\r
385                                     int temp = length;\r
386                                     length = matchStart;\r
387                                     matchStart = temp;\r
388                                 }\r
389                                 // Recurse for non-shortest match from prev.\r
390                                 int spanStart = containsSpanBackUTF16(set, s, matchStart,\r
391                                         SpanCondition.CONTAINED);\r
392                                 if (spanStart < minSpanStart) {\r
393                                     minSpanStart = spanStart;\r
394                                     if (minSpanStart == 0) {\r
395                                         return 0;\r
396                                     }\r
397                                 }\r
398                             }\r
399                         } else /* spanCondition==SIMPLE */{\r
400                             if (matchStart < length) {\r
401                                 // Remember longest match from prev.\r
402                                 length = matchStart;\r
403                             }\r
404                         }\r
405                     }\r
406                 }\r
407                 if (length == prev) {\r
408                     break; // No match from prev.\r
409                 }\r
410             } while ((prev = length) > 0);\r
411             if (prev < minSpanStart) {\r
412                 return prev;\r
413             } else {\r
414                 return minSpanStart;\r
415             }\r
416         }\r
417     }\r
418 \r
419     // spans to be performed and compared\r
420     static final int SPAN_UTF16 = 1;\r
421     static final int SPAN_UTF8 = 2;\r
422     static final int SPAN_UTFS = 3;\r
423 \r
424     static final int SPAN_SET = 4;\r
425     static final int SPAN_COMPLEMENT = 8;\r
426     static final int SPAN_POLARITY = 0xc;\r
427 \r
428     static final int SPAN_FWD = 0x10;\r
429     static final int SPAN_BACK = 0x20;\r
430     static final int SPAN_DIRS = 0x30;\r
431 \r
432     static final int SPAN_CONTAINED = 0x100;\r
433     static final int SPAN_SIMPLE = 0x200;\r
434     static final int SPAN_CONDITION = 0x300;\r
435 \r
436     static final int SPAN_ALL = 0x33f;\r
437 \r
438     static SpanCondition invertSpanCondition(SpanCondition spanCondition, SpanCondition contained) {\r
439         return spanCondition == SpanCondition.NOT_CONTAINED ? contained\r
440                 : SpanCondition.NOT_CONTAINED;\r
441     }\r
442 \r
443     /*\r
444      * Count spans on a string with the method according to type and set the span limits. The set may be the complement\r
445      * of the original. When using spanBack() and comparing with span(), use a span condition for the first spanBack()\r
446      * according to the expected number of spans. Sets typeName to an empty string if there is no such type. Returns -1\r
447      * if the span option is filtered out.\r
448      */\r
449     static int getSpans(final UnicodeSetWithStrings set, boolean isComplement, final String s,\r
450             int whichSpans, int type, String[] typeName, int limits[], int limitsCapacity,\r
451             int expectCount) {\r
452         final UnicodeSet realSet = set.getSet();\r
453         int start, count, i;\r
454         SpanCondition spanCondition, firstSpanCondition, contained;\r
455         boolean isForward;\r
456 \r
457         int length = s.length();\r
458         if (type < 0 || 7 < type) {\r
459             typeName[0] = null;\r
460             return 0;\r
461         }\r
462 \r
463         final String typeNames16[] = {\r
464                 "contains",\r
465                 "contains(LM)",\r
466                 "span",\r
467                 "span(LM)",\r
468                 "containsBack",\r
469                 "containsBack(LM)",\r
470                 "spanBack",\r
471                 "spanBack(LM)" };\r
472 \r
473         typeName[0] = typeNames16[type];\r
474 \r
475         // filter span options\r
476         if (type <= 3) {\r
477             // span forward\r
478             if ((whichSpans & SPAN_FWD) == 0) {\r
479                 return -1;\r
480             }\r
481             isForward = true;\r
482         } else {\r
483             // span backward\r
484             if ((whichSpans & SPAN_BACK) == 0) {\r
485                 return -1;\r
486             }\r
487             isForward = false;\r
488         }\r
489         if ((type & 1) == 0) {\r
490             // use SpanCondition.CONTAINED\r
491             if ((whichSpans & SPAN_CONTAINED) == 0) {\r
492                 return -1;\r
493             }\r
494             contained = SpanCondition.CONTAINED;\r
495         } else {\r
496             // use SIMPLE\r
497             if ((whichSpans & SPAN_SIMPLE) == 0) {\r
498                 return -1;\r
499             }\r
500             contained = SpanCondition.SIMPLE;\r
501         }\r
502 \r
503         // Default first span condition for going forward with an uncomplemented set.\r
504         spanCondition = SpanCondition.NOT_CONTAINED;\r
505         if (isComplement) {\r
506             spanCondition = invertSpanCondition(spanCondition, contained);\r
507         }\r
508 \r
509         // First span condition for span(), used to terminate the spanBack() iteration.\r
510         firstSpanCondition = spanCondition;\r
511 \r
512         // spanBack(): Its initial span condition is span()'s last span condition,\r
513         // which is the opposite of span()'s first span condition\r
514         // if we expect an even number of spans.\r
515         // (The loop inverts spanCondition (expectCount-1) times\r
516         // before the expectCount'th span() call.)\r
517         // If we do not compare forward and backward directions, then we do not have an\r
518         // expectCount and just start with firstSpanCondition.\r
519         if (!isForward && (whichSpans & SPAN_FWD) != 0 && (expectCount & 1) == 0) {\r
520             spanCondition = invertSpanCondition(spanCondition, contained);\r
521         }\r
522 \r
523         count = 0;\r
524         switch (type) {\r
525         case 0:\r
526         case 1:\r
527             start = 0;\r
528             for (;;) {\r
529                 start += containsSpanUTF16(set, s.substring(start), spanCondition);\r
530                 if (count < limitsCapacity) {\r
531                     limits[count] = start;\r
532                 }\r
533                 ++count;\r
534                 if (start >= length) {\r
535                     break;\r
536                 }\r
537                 spanCondition = invertSpanCondition(spanCondition, contained);\r
538             }\r
539             break;\r
540         case 2:\r
541         case 3:\r
542             start = 0;\r
543             for (;;) {\r
544                 start = realSet.span(s, start, spanCondition);\r
545                 if (count < limitsCapacity) {\r
546                     limits[count] = start;\r
547                 }\r
548                 ++count;\r
549                 if (start >= length) {\r
550                     break;\r
551                 }\r
552                 spanCondition = invertSpanCondition(spanCondition, contained);\r
553             }\r
554             break;\r
555         case 4:\r
556         case 5:\r
557             for (;;) {\r
558                 ++count;\r
559                 if (count <= limitsCapacity) {\r
560                     limits[limitsCapacity - count] = length;\r
561                 }\r
562                 length = containsSpanBackUTF16(set, s, length, spanCondition);\r
563                 if (length == 0 && spanCondition == firstSpanCondition) {\r
564                     break;\r
565                 }\r
566                 spanCondition = invertSpanCondition(spanCondition, contained);\r
567             }\r
568             if (count < limitsCapacity) {\r
569                 for (i = count; i-- > 0;) {\r
570                     limits[i] = limits[limitsCapacity - count + i];\r
571                 }\r
572             }\r
573             break;\r
574         case 6:\r
575         case 7:\r
576             for (;;) {\r
577                 ++count;\r
578                 if (count <= limitsCapacity) {\r
579                     limits[limitsCapacity - count] = length >= 0 ? length : s.length();\r
580                 }\r
581                 length = realSet.spanBack(s, length, spanCondition);\r
582                 if (length == 0 && spanCondition == firstSpanCondition) {\r
583                     break;\r
584                 }\r
585                 spanCondition = invertSpanCondition(spanCondition, contained);\r
586             }\r
587             if (count < limitsCapacity) {\r
588                 for (i = count; i-- > 0;) {\r
589                     limits[i] = limits[limitsCapacity - count + i];\r
590                 }\r
591             }\r
592             break;\r
593         default:\r
594             typeName = null;\r
595             return -1;\r
596         }\r
597 \r
598         return count;\r
599     }\r
600 \r
601     // sets to be tested; odd index=isComplement\r
602     static final int SLOW = 0;\r
603     static final int SLOW_NOT = 1;\r
604     static final int FAST = 2;\r
605     static final int FAST_NOT = 3;\r
606     static final int SET_COUNT = 4;\r
607 \r
608     static final String setNames[] = { "slow", "slow.not", "fast", "fast.not" };\r
609 \r
610     /*\r
611      * Verify that we get the same results whether we look at text with contains(), span() or spanBack(), using unfrozen\r
612      * or frozen versions of the set, and using the set or its complement (switching the spanConditions accordingly).\r
613      * The latter verifies that set.span(spanCondition) == set.complement().span(!spanCondition).\r
614      * \r
615      * The expectLimits[] are either provided by the caller (with expectCount>=0) or returned to the caller (with an\r
616      * input expectCount<0).\r
617      */\r
618     void verifySpan(final UnicodeSetWithStrings sets[], final String s, int whichSpans,\r
619             int expectLimits[], int expectCount, // TODO\r
620             final String testName, int index) {\r
621         int[] limits = new int[500];\r
622         int limitsCount;\r
623         int i, j;\r
624         String[] typeName = new String[1];\r
625         int type;\r
626 \r
627         for (i = 0; i < SET_COUNT; ++i) {\r
628             if ((i & 1) == 0) {\r
629                 // Even-numbered sets are original, uncomplemented sets.\r
630                 if ((whichSpans & SPAN_SET) == 0) {\r
631                     continue;\r
632                 }\r
633             } else {\r
634                 // Odd-numbered sets are complemented.\r
635                 if ((whichSpans & SPAN_COMPLEMENT) == 0) {\r
636                     continue;\r
637                 }\r
638             }\r
639             for (type = 0;; ++type) {\r
640                 limitsCount = getSpans(sets[i], (0 != (i & 1)), s, whichSpans, type, typeName, limits,\r
641                         limits.length, expectCount);\r
642                 if (typeName[0] == null) {\r
643                     break; // All types tried.\r
644                 }\r
645                 if (limitsCount < 0) {\r
646                     continue; // Span option filtered out.\r
647                 }\r
648                 if (expectCount < 0) {\r
649                     expectCount = limitsCount;\r
650                     if (limitsCount > limits.length) {\r
651                         errln(String.format("FAIL: %s[0x%x].%s.%s span count=%d > %d capacity - too many spans",\r
652                                 testName, index, setNames[i], typeName[0], limitsCount, limits.length));\r
653                         return;\r
654                     }\r
655                     for (j = limitsCount; j-- > 0;) {\r
656                         expectLimits[j] = limits[j];\r
657                     }\r
658                 } else if (limitsCount != expectCount) {\r
659                     errln(String.format("FAIL: %s[0x%x].%s.%s span count=%d != %d", testName, index, setNames[i],\r
660                             typeName[0], limitsCount, expectCount));\r
661                 } else {\r
662                     for (j = 0; j < limitsCount; ++j) {\r
663                         if (limits[j] != expectLimits[j]) {\r
664                             errln(String.format("FAIL: %s[0x%x].%s.%s span count=%d limits[%d]=%d != %d", testName,\r
665                                     index, setNames[i], typeName[0], limitsCount, j, limits[j], expectLimits[j]));\r
666                             break;\r
667                         }\r
668                     }\r
669                 }\r
670             }\r
671         }\r
672 \r
673         // Compare span() with containsAll()/containsNone(),\r
674         // but only if we have expectLimits[] from the uncomplemented set.\r
675         if ((whichSpans & SPAN_SET) != 0) {\r
676             final String s16 = s;\r
677             String string;\r
678             int prev = 0, limit, len;\r
679             for (i = 0; i < expectCount; ++i) {\r
680                 limit = expectLimits[i];\r
681                 len = limit - prev;\r
682                 if (len > 0) {\r
683                     string = s16.substring(prev, prev + len); // read-only alias\r
684                     if (0 != (i & 1)) {\r
685                         if (!sets[SLOW].getSet().containsAll(string)) {\r
686                             errln(String.format("FAIL: %s[0x%x].%s.containsAll(%d..%d)==false contradicts span()",\r
687                                     testName, index, setNames[SLOW], prev, limit));\r
688                             return;\r
689                         }\r
690                         if (!sets[FAST].getSet().containsAll(string)) {\r
691                             errln(String.format("FAIL: %s[0x%x].%s.containsAll(%d..%d)==false contradicts span()",\r
692                                     testName, index, setNames[FAST], prev, limit));\r
693                             return;\r
694                         }\r
695                     } else {\r
696                         if (!sets[SLOW].getSet().containsNone(string)) {\r
697                             errln(String.format("FAIL: %s[0x%x].%s.containsNone(%d..%d)==false contradicts span()",\r
698                                     testName, index, setNames[SLOW], prev, limit));\r
699                             return;\r
700                         }\r
701                         if (!sets[FAST].getSet().containsNone(string)) {\r
702                             errln(String.format("FAIL: %s[0x%x].%s.containsNone(%d..%d)==false contradicts span()",\r
703                                     testName, index, setNames[FAST], prev, limit));\r
704                             return;\r
705                         }\r
706                     }\r
707                 }\r
708                 prev = limit;\r
709             }\r
710         }\r
711     }\r
712 \r
713     // Specifically test either UTF-16 or UTF-8.\r
714     void verifySpan(final UnicodeSetWithStrings sets[], final String s, int whichSpans,\r
715             final String testName, int index) {\r
716         int[] expectLimits = new int[500];\r
717         int expectCount = -1;\r
718         verifySpan(sets, s, whichSpans, expectLimits, expectCount, testName, index);\r
719     }\r
720 \r
721     // Test both UTF-16 and UTF-8 versions of span() etc. on the same sets and text,\r
722     // unless either UTF is turned off in whichSpans.\r
723     // Testing UTF-16 and UTF-8 together requires that surrogate code points\r
724     // have the same contains(c) value as U+FFFD.\r
725     void verifySpanBothUTFs(final UnicodeSetWithStrings sets[], final String s16, int whichSpans,\r
726             final String testName, int index) {\r
727         int[] expectLimits = new int[500];\r
728         int expectCount;\r
729 \r
730         expectCount = -1; // Get expectLimits[] from verifySpan().\r
731 \r
732         if ((whichSpans & SPAN_UTF16) != 0) {\r
733             verifySpan(sets, s16, whichSpans, expectLimits, expectCount, testName, index);\r
734         }\r
735     }\r
736 \r
737     static int nextCodePoint(int c) {\r
738         // Skip some large and boring ranges.\r
739         switch (c) {\r
740         case 0x3441:\r
741             return 0x4d7f;\r
742         case 0x5100:\r
743             return 0x9f00;\r
744         case 0xb040:\r
745             return 0xd780;\r
746         case 0xe041:\r
747             return 0xf8fe;\r
748         case 0x10100:\r
749             return 0x20000;\r
750         case 0x20041:\r
751             return 0xe0000;\r
752         case 0xe0101:\r
753             return 0x10fffd;\r
754         default:\r
755             return c + 1;\r
756         }\r
757     }\r
758 \r
759     // Verify that all implementations represent the same set.\r
760     void verifySpanContents(final UnicodeSetWithStrings sets[], int whichSpans, final String testName) {\r
761         StringBuffer s = new StringBuffer();\r
762         int localWhichSpans;\r
763         int c, first;\r
764         for (first = c = 0;; c = nextCodePoint(c)) {\r
765             if (c > 0x10ffff || s.length() > 1024) {\r
766                 localWhichSpans = whichSpans;\r
767                 verifySpanBothUTFs(sets, s.toString(), localWhichSpans, testName, first);\r
768                 if (c > 0x10ffff) {\r
769                     break;\r
770                 }\r
771                 s.delete(0, s.length());\r
772                 first = c;\r
773             }\r
774             UTF16.append(s, c);\r
775         }\r
776     }\r
777 \r
778     // Test with a particular, interesting string.\r
779     // Specify length and try NUL-termination.\r
780     static final char interestingStringChars[] = { 0x61, 0x62, 0x20, // Latin, space\r
781             0x3b1, 0x3b2, 0x3b3, // Greek\r
782             0xd900, // lead surrogate\r
783             0x3000, 0x30ab, 0x30ad, // wide space, Katakana\r
784             0xdc05, // trail surrogate\r
785             0xa0, 0xac00, 0xd7a3, // nbsp, Hangul\r
786             0xd900, 0xdc05, // unassigned supplementary\r
787             0xd840, 0xdfff, 0xd860, 0xdffe, // Han supplementary\r
788             0xd7a4, 0xdc05, 0xd900, 0x2028  // unassigned, surrogates in wrong order, LS\r
789     };\r
790     static String interestingString = new String(interestingStringChars);\r
791     static final String unicodeSet1 = "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u3000\\u30ab}{\\u3000\\u30ab\\u30ad}]";\r
792 \r
793     public void TestInterestingStringSpan() {\r
794         UnicodeSet uset = new UnicodeSet(Utility.unescape(unicodeSet1));\r
795         SpanCondition spanCondition = SpanCondition.NOT_CONTAINED;\r
796         int expect = 2;\r
797         int start = 14;\r
798 \r
799         int c = 0xd840;\r
800         boolean contains = uset.contains(c);\r
801         if (false != contains) {\r
802             errln(String.format("FAIL: UnicodeSet(unicodeSet1).contains(%d) = true (expect false)",\r
803                   c));\r
804         }\r
805 \r
806         UnicodeSetWithStrings set = new UnicodeSetWithStrings(uset);\r
807         int len = containsSpanUTF16(set, interestingString.substring(start), spanCondition);\r
808         if (expect != len) {\r
809             errln(String.format("FAIL: containsSpanUTF16(unicodeSet1, \"%s(%d)\") = %d (expect %d)",\r
810                   interestingString, start, len, expect));\r
811         }\r
812 \r
813         len = uset.span(interestingString, start, spanCondition) - start;\r
814         if (expect != len) {\r
815             errln(String.format("FAIL: UnicodeSet(unicodeSet1).span(\"%s\", %d) = %d (expect %d)",\r
816                   interestingString, start, len, expect));\r
817         }\r
818     }\r
819 \r
820     void verifySpanUTF16String(final UnicodeSetWithStrings sets[], int whichSpans, final String testName) {\r
821         if ((whichSpans & SPAN_UTF16) == 0) {\r
822             return;\r
823         }\r
824         verifySpan(sets, interestingString, (whichSpans & ~SPAN_UTF8), testName, 1);\r
825     }\r
826 \r
827     // Take a set of span options and multiply them so that\r
828     // each portion only has one of the options a, b and c.\r
829     // If b==0, then the set of options is just modified with mask and a.\r
830     // If b!=0 and c==0, then the set of options is just modified with mask, a and b.\r
831     static int addAlternative(int whichSpans[], int whichSpansCount, int mask, int a, int b, int c) {\r
832         int s;\r
833         int i;\r
834 \r
835         for (i = 0; i < whichSpansCount; ++i) {\r
836             s = whichSpans[i] & mask;\r
837             whichSpans[i] = s | a;\r
838             if (b != 0) {\r
839                 whichSpans[whichSpansCount + i] = s | b;\r
840                 if (c != 0) {\r
841                     whichSpans[2 * whichSpansCount + i] = s | c;\r
842                 }\r
843             }\r
844         }\r
845         return b == 0 ? whichSpansCount : c == 0 ? 2 * whichSpansCount : 3 * whichSpansCount;\r
846     }\r
847 \r
848     // They are not representable in UTF-8, and a leading trail surrogate\r
849     // and a trailing lead surrogate must not match in the middle of a proper surrogate pair.\r
850     // U+20001 == \\uD840\\uDC01\r
851     // U+20400 == \\uD841\\uDC00\r
852     static final String patternWithUnpairedSurrogate =\r
853         "[a\\U00020001\\U00020400{ab}{b\\uD840}{\\uDC00a}]";\r
854     static final String stringWithUnpairedSurrogate =\r
855         "aaab\\U00020001ba\\U00020400aba\\uD840ab\\uD840\\U00020000b\\U00020000a\\U00020000\\uDC00a\\uDC00babbb";\r
856 \r
857     static final String _63_a = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";\r
858     static final String _64_a = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";\r
859     static final String _63_b = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";\r
860     static final String _64_b = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";\r
861     static final String longPattern =\r
862         "[a{" + _64_a + _64_a + _64_a + _64_a + "b}" + "{a" + _64_b + _64_b + _64_b + _64_b + "}]";\r
863 \r
864     public void TestStringWithUnpairedSurrogateSpan() {\r
865         String string = Utility.unescape(stringWithUnpairedSurrogate);\r
866         UnicodeSet uset = new UnicodeSet(Utility.unescape(patternWithUnpairedSurrogate));\r
867         SpanCondition spanCondition = SpanCondition.NOT_CONTAINED;\r
868         int start = 17;\r
869         int expect = 5;\r
870 \r
871         UnicodeSetWithStrings set = new UnicodeSetWithStrings(uset);\r
872         int len = containsSpanUTF16(set, string.substring(start), spanCondition);\r
873         if (expect != len) {\r
874             errln(String.format("FAIL: containsSpanUTF16(patternWithUnpairedSurrogate, \"%s(%d)\") = %d (expect %d)",\r
875                   string, start, len, expect));\r
876         }\r
877 \r
878         len = uset.span(string, start, spanCondition) - start;\r
879         if (expect != len) {\r
880             errln(String.format("FAIL: UnicodeSet(patternWithUnpairedSurrogate).span(\"%s\", %d) = %d (expect %d)",\r
881                   string, start, len, expect));\r
882         }\r
883     }\r
884 \r
885     public void TestSpan() {\r
886         // "[...]" is a UnicodeSet pattern.\r
887         // "*" performs tests on all Unicode code points and on a selection of\r
888         // malformed UTF-8/16 strings.\r
889         // "-options" limits the scope of testing for the current set.\r
890         // By default, the test verifies that equivalent boundaries are found\r
891         // for UTF-16 and UTF-8, going forward and backward,\r
892         // alternating NOT_CONTAINED with\r
893         // either CONTAINED or SIMPLE.\r
894         // Single-character options:\r
895         // 8 -- UTF-16 and UTF-8 boundaries may differ.\r
896         // Cause: contains(U+FFFD) is inconsistent with contains(some surrogates),\r
897         // or the set contains strings with unpaired surrogates\r
898         // which do not translate to valid UTF-8.\r
899         // c -- set.span() and set.complement().span() boundaries may differ.\r
900         // Cause: Set strings are not complemented.\r
901         // b -- span() and spanBack() boundaries may differ.\r
902         // Cause: Strings in the set overlap, and spanBack(CONTAINED)\r
903         // and spanBack(SIMPLE) are defined to\r
904         // match with non-overlapping substrings.\r
905         // For example, with a set containing "ab" and "ba",\r
906         // span() of "aba" yields boundaries { 0, 2, 3 }\r
907         // because the initial "ab" matches from 0 to 2,\r
908         // while spanBack() yields boundaries { 0, 1, 3 }\r
909         // because the final "ba" matches from 1 to 3.\r
910         // l -- CONTAINED and SIMPLE boundaries may differ.\r
911         // Cause: Strings in the set overlap, and a longer match may\r
912         // require a sequence including non-longest substrings.\r
913         // For example, with a set containing "ab", "abc" and "cd",\r
914         // span(contained) of "abcd" spans the entire string\r
915         // but span(longest match) only spans the first 3 characters.\r
916         // Each "-options" first resets all options and then applies the specified options.\r
917         // A "-" without options resets the options.\r
918         // The options are also reset for each new set.\r
919         // Other strings will be spanned.\r
920         final String testdata[] = {\r
921                 "[:ID_Continue:]",\r
922                 "*",\r
923                 "[:White_Space:]",\r
924                 "*",\r
925                 "[]",\r
926                 "*",\r
927                 "[\\u0000-\\U0010FFFF]",\r
928                 "*",\r
929                 "[\\u0000\\u0080\\u0800\\U00010000]",\r
930                 "*",\r
931                 "[\\u007F\\u07FF\\uFFFF\\U0010FFFF]",\r
932                 "*",\r
933                 unicodeSet1,\r
934                 "-c",\r
935                 "*",\r
936                 "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u30ab\\u30ad}{\\u3000\\u30ab\\u30ad}]",\r
937                 "-c",\r
938                 "*",\r
939 \r
940                 // Overlapping strings cause overlapping attempts to match.\r
941                 "[x{xy}{xya}{axy}{ax}]",\r
942                 "-cl",\r
943 \r
944                 // More repetitions of "xya" would take too long with the recursive\r
945                 // reference implementation.\r
946                 // containsAll()=false\r
947                 // test_string 0x14\r
948                 "xx" + "xyaxyaxyaxya" + // set.complement().span(longest match) will stop here.\r
949                         "xx" + // set.complement().span(contained) will stop between the two 'x'es.\r
950                         "xyaxyaxyaxya" + "xx" + "xyaxyaxyaxya" + // span() ends here.\r
951                         "aaa",\r
952 \r
953                 // containsAll()=true\r
954                 // test_string 0x15\r
955                 "xx" + "xyaxyaxyaxya" + "xx" + "xyaxyaxyaxya" + "xx" + "xyaxyaxyaxy",\r
956 \r
957                 "-bc",\r
958                 // test_string 0x17\r
959                 "byayaxya", // span() -> { 4, 7, 8 } spanBack() -> { 5, 8 }\r
960                 "-c",\r
961                 "byayaxy", // span() -> { 4, 7 } complement.span() -> { 7 }\r
962                 "byayax", // span() -> { 4, 6 } complement.span() -> { 6 }\r
963                 "-",\r
964                 "byaya", // span() -> { 5 }\r
965                 "byay", // span() -> { 4 }\r
966                 "bya", // span() -> { 3 }\r
967 \r
968                 // span(longest match) will not span the whole string.\r
969                 "[a{ab}{bc}]",\r
970                 "-cl",\r
971                 // test_string 0x21\r
972                 "abc",\r
973 \r
974                 "[a{ab}{abc}{cd}]",\r
975                 "-cl",\r
976                 "acdabcdabccd",\r
977 \r
978                 // spanBack(longest match) will not span the whole string.\r
979                 "[c{ab}{bc}]",\r
980                 "-cl",\r
981                 "abc",\r
982 \r
983                 "[d{cd}{bcd}{ab}]",\r
984                 "-cl",\r
985                 "abbcdabcdabd",\r
986 \r
987                 // Test with non-ASCII set strings - test proper handling of surrogate pairs\r
988                 // and UTF-8 trail bytes.\r
989                 // Copies of above test sets and strings, but transliterated to have\r
990                 // different code points with similar trail units.\r
991                 // Previous: a b c d\r
992                 // Unicode: 042B 30AB 200AB 204AB\r
993                 // UTF-16: 042B 30AB D840 DCAB D841 DCAB\r
994                 // UTF-8: D0 AB E3 82 AB F0 A0 82 AB F0 A0 92 AB\r
995                 "[\\u042B{\\u042B\\u30AB}{\\u042B\\u30AB\\U000200AB}{\\U000200AB\\U000204AB}]",\r
996                 "-cl",\r
997                 "\\u042B\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000200AB\\U000204AB",\r
998 \r
999                 "[\\U000204AB{\\U000200AB\\U000204AB}{\\u30AB\\U000200AB\\U000204AB}{\\u042B\\u30AB}]",\r
1000                 "-cl",\r
1001                 "\\u042B\\u30AB\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000204AB",\r
1002 \r
1003                 // Stress bookkeeping and recursion.\r
1004                 // The following strings are barely doable with the recursive\r
1005                 // reference implementation.\r
1006                 // The not-contained character at the end prevents an early exit from the span().\r
1007                 "[b{bb}]",\r
1008                 "-c",\r
1009                 // test_string 0x33\r
1010                 "bbbbbbbbbbbbbbbbbbbbbbbb-",\r
1011                 // On complement sets, span() and spanBack() get different results\r
1012                 // because b is not in the complement set and there is an odd number of b's\r
1013                 // in the test string.\r
1014                 "-bc",\r
1015                 "bbbbbbbbbbbbbbbbbbbbbbbbb-",\r
1016 \r
1017                 // Test with set strings with an initial or final code point span\r
1018                 // longer than 254.\r
1019                 longPattern,\r
1020                 "-c",\r
1021                 _64_a + _64_a + _64_a + _63_a + "b",\r
1022                 _64_a + _64_a + _64_a + _64_a + "b",\r
1023                 _64_a + _64_a + _64_a + _64_a + "aaaabbbb",\r
1024                 "a" + _64_b + _64_b + _64_b + _63_b,\r
1025                 "a" + _64_b + _64_b + _64_b + _64_b,\r
1026                 "aaaabbbb" + _64_b + _64_b + _64_b + _64_b,\r
1027 \r
1028                 // Test with strings containing unpaired surrogates.\r
1029                 patternWithUnpairedSurrogate, "-8cl",\r
1030                 stringWithUnpairedSurrogate };\r
1031         int i, j;\r
1032         int whichSpansCount = 1;\r
1033         int[] whichSpans = new int[96];\r
1034         for (i = whichSpans.length; i-- > 0;) {\r
1035             whichSpans[i] = SPAN_ALL;\r
1036         }\r
1037 \r
1038         UnicodeSet[] sets = new UnicodeSet[SET_COUNT];\r
1039         UnicodeSetWithStrings[] sets_with_str = new UnicodeSetWithStrings[SET_COUNT];\r
1040 \r
1041         String testName = null;\r
1042         String testNameLimit;\r
1043 \r
1044         for (i = 0; i < testdata.length; ++i) {\r
1045             final String s = testdata[i];\r
1046             if (s.charAt(0) == '[') {\r
1047                 // Create new test sets from this pattern.\r
1048                 for (j = 0; j < SET_COUNT; ++j) {\r
1049                     sets_with_str[j] = null;\r
1050                     sets[j] = null;\r
1051                 }\r
1052                 sets[SLOW] = new UnicodeSet(Utility.unescape(s));\r
1053                 sets[SLOW_NOT] = new UnicodeSet(sets[SLOW]);\r
1054                 sets[SLOW_NOT].complement();\r
1055                 // Intermediate set: Test cloning of a frozen set.\r
1056                 UnicodeSet fast = new UnicodeSet(sets[SLOW]);\r
1057                 fast.freeze();\r
1058                 sets[FAST] = (UnicodeSet) fast.clone();\r
1059                 fast = null;\r
1060                 UnicodeSet fastNot = new UnicodeSet(sets[SLOW_NOT]);\r
1061                 fastNot.freeze();\r
1062                 sets[FAST_NOT] = (UnicodeSet) fastNot.clone();\r
1063                 fastNot = null;\r
1064 \r
1065                 for (j = 0; j < SET_COUNT; ++j) {\r
1066                     sets_with_str[j] = new UnicodeSetWithStrings(sets[j]);\r
1067                 }\r
1068 \r
1069                 testName = s + ':';\r
1070                 whichSpans[0] = SPAN_ALL;\r
1071                 whichSpansCount = 1;\r
1072             } else if (s.charAt(0) == '-') {\r
1073                 whichSpans[0] = SPAN_ALL;\r
1074                 whichSpansCount = 1;\r
1075 \r
1076                 for (j = 1; j < s.length(); j++) {\r
1077                     switch (s.charAt(j)) {\r
1078                     case 'c':\r
1079                         whichSpansCount = addAlternative(whichSpans, whichSpansCount, ~SPAN_POLARITY, SPAN_SET,\r
1080                                 SPAN_COMPLEMENT, 0);\r
1081                         break;\r
1082                     case 'b':\r
1083                         whichSpansCount = addAlternative(whichSpans, whichSpansCount, ~SPAN_DIRS, SPAN_FWD, SPAN_BACK,\r
1084                                 0);\r
1085                         break;\r
1086                     case 'l':\r
1087                         // test CONTAINED FWD & BACK, and separately\r
1088                         // SIMPLE only FWD, and separately\r
1089                         // SIMPLE only BACK\r
1090                         whichSpansCount = addAlternative(whichSpans, whichSpansCount, ~(SPAN_DIRS | SPAN_CONDITION),\r
1091                                 SPAN_DIRS | SPAN_CONTAINED, SPAN_FWD | SPAN_SIMPLE, SPAN_BACK | SPAN_SIMPLE);\r
1092                         break;\r
1093                     case '8':\r
1094                         whichSpansCount = addAlternative(whichSpans, whichSpansCount, ~SPAN_UTFS, SPAN_UTF16,\r
1095                                 SPAN_UTF8, 0);\r
1096                         break;\r
1097                     default:\r
1098                         errln(String.format("FAIL: unrecognized span set option in \"%s\"", testdata[i]));\r
1099                         break;\r
1100                     }\r
1101                 }\r
1102             } else if (s.equals("*")) {\r
1103                 testNameLimit = "bad_string";\r
1104                 for (j = 0; j < whichSpansCount; ++j) {\r
1105                     if (whichSpansCount > 1) {\r
1106                         testNameLimit += String.format("%%0x%3x", whichSpans[j]);\r
1107                     }\r
1108                     verifySpanUTF16String(sets_with_str, whichSpans[j], testName);\r
1109                 }\r
1110 \r
1111                 testNameLimit = "contents";\r
1112                 for (j = 0; j < whichSpansCount; ++j) {\r
1113                     if (whichSpansCount > 1) {\r
1114                         testNameLimit += String.format("%%0x%3x", whichSpans[j]);\r
1115                     }\r
1116                     verifySpanContents(sets_with_str, whichSpans[j], testName);\r
1117                 }\r
1118             } else {\r
1119                 String string = Utility.unescape(s);\r
1120                 testNameLimit = "test_string";\r
1121                 for (j = 0; j < whichSpansCount; ++j) {\r
1122                     if (whichSpansCount > 1) {\r
1123                         testNameLimit += String.format("%%0x%3x", whichSpans[j]);\r
1124                     }\r
1125                     verifySpanBothUTFs(sets_with_str, string, whichSpans[j], testName, i);\r
1126                 }\r
1127             }\r
1128         }\r
1129     }\r
1130 \r
1131 }\r