jars/icu4j-4_4_2-src/main/classes/core/src/com/ibm/icu/util/StringTokenizer.java

   1 /**\r
   2 *******************************************************************************\r
   3 * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
   4 * others. All Rights Reserved.                                                *\r
   5 *******************************************************************************\r
   6 */\r
   7 \r
   8 package com.ibm.icu.util;\r
   9 \r
  10 import java.util.Enumeration;\r
  11 import java.util.NoSuchElementException;\r
  12 \r
  13 import com.ibm.icu.text.UTF16;\r
  14 import com.ibm.icu.text.UnicodeSet;\r
  15 \r
  16 /**\r
  17  * <p>The string tokenizer class allows an application to break a string \r
  18  * into tokens by performing code point comparison. \r
  19  * The <code>StringTokenizer</code> methods do not distinguish \r
  20  * among identifiers, numbers, and quoted strings, nor do they recognize \r
  21  * and skip comments.</p>\r
  22  * <p>\r
  23  * The set of delimiters (the codepoints that separate tokens) may be \r
  24  * specified either at creation time or on a per-token basis. \r
  25  * </p>\r
  26  * <p>\r
  27  * An instance of <code>StringTokenizer</code> behaves in one of three ways, \r
  28  * depending on whether it was created with the <code>returnDelims</code> \r
  29  * and <code>coalesceDelims</code>\r
  30  * flags having the value <code>true</code> or <code>false</code>: \r
  31  * <ul>\r
  32  * <li>If returnDelims is <code>false</code>, delimiter code points serve to \r
  33  * separate tokens. A token is a maximal sequence of consecutive \r
  34  * code points that are not delimiters. \r
  35  * <li>If returnDelims is <code>true</code>, delimiter code points are \r
  36  * themselves considered to be tokens. In this case, if coalesceDelims is\r
  37  * <code>true</code>, such tokens will be the maximal sequence of consecutive\r
  38  * code points that <em>are</em> delimiters.  If coalesceDelims is false,\r
  39  * a token will be received for each delimiter code point.\r
  40  * </ul>\r
  41  * <p>A token is thus either one \r
  42  * delimiter code point, a maximal sequence of consecutive code points that\r
  43  * are delimiters, or a maximal sequence of consecutive code \r
  44  * points that are not delimiters.\r
  45  * </p>\r
  46  * <p>\r
  47  * A <tt>StringTokenizer</tt> object internally maintains a current \r
  48  * position within the string to be tokenized. Some operations advance this \r
  49  * current position past the code point processed.\r
  50  * </p>\r
  51  * <p>\r
  52  * A token is returned by taking a substring of the string that was used to \r
  53  * create the <tt>StringTokenizer</tt> object.\r
  54  * </p>\r
  55  * <p>\r
  56  * Example of the use of the default delimiter tokenizer.\r
  57  * <blockquote><pre>\r
  58  * StringTokenizer st = new StringTokenizer("this is a test");\r
  59  * while (st.hasMoreTokens()) {\r
  60  *     println(st.nextToken());\r
  61  *     }\r
  62  * </pre></blockquote>\r
  63  * </p>\r
  64  * <p>\r
  65  * prints the following output:\r
  66  * <blockquote><pre>\r
  67  *     this\r
  68  *     is\r
  69  *     a\r
  70  *     test\r
  71  * </pre></blockquote>\r
  72  * </p>\r
  73  * <p>\r
  74  * Example of the use of the tokenizer with user specified delimiter.\r
  75  * <blockquote><pre>\r
  76  *     StringTokenizer st = new StringTokenizer(\r
  77  *     "this is a test with supplementary characters &#92;ud800&#92;ud800&#92;udc00&#92;udc00",\r
  78  *         " &#92;ud800&#92;udc00");\r
  79  *     while (st.hasMoreTokens()) {\r
  80  *         println(st.nextToken());\r
  81  *     }\r
  82  * </pre></blockquote>\r
  83  * </p>\r
  84  * <p>\r
  85  * prints the following output:\r
  86  * <blockquote><pre>\r
  87  *     this\r
  88  *     is\r
  89  *     a\r
  90  *     test\r
  91  *     with\r
  92  *     supplementary\r
  93  *     characters\r
  94  *     &#92;ud800\r
  95  *     &#92;udc00\r
  96  * </pre></blockquote>\r
  97  * </p>\r
  98  * @author syn wee\r
  99  * @stable ICU 2.4\r
 100  */\r
 101 public final class StringTokenizer implements Enumeration<Object>\r
 102 {\r
 103     // public constructors ---------------------------------------------\r
 104      \r
 105     /**\r
 106      * <p>Constructs a string tokenizer for the specified string. All \r
 107      * characters in the delim argument are the delimiters for separating \r
 108      * tokens.</p> \r
 109      * <p>If the returnDelims flag is false, the delimiter characters are \r
 110      * skipped and only serve as separators between tokens.</p>\r
 111      * <p>If the returnDelims flag is true, then the delimiter characters \r
 112      * are also returned as tokens, one per delimiter.\r
 113      * @param str a string to be parsed.\r
 114      * @param delim the delimiters.\r
 115      * @param returndelims flag indicating whether to return the delimiters \r
 116      *        as tokens.\r
 117      * @exception NullPointerException if str is null\r
 118      * @stable ICU 2.4\r
 119      */\r
 120     public StringTokenizer(String str, UnicodeSet delim, boolean returndelims)\r
 121     {\r
 122         this(str, delim, returndelims, false);\r
 123     }\r
 124 \r
 125     /**\r
 126      * <p>Constructs a string tokenizer for the specified string. All \r
 127      * characters in the delim argument are the delimiters for separating \r
 128      * tokens.</p> \r
 129      * <p>If the returnDelims flag is false, the delimiter characters are \r
 130      * skipped and only serve as separators between tokens.</p>\r
 131      * <p>If the returnDelims flag is true, then the delimiter characters \r
 132      * are also returned as tokens.  If coalescedelims is true, one token\r
 133      * is returned for each run of delimiter characters, otherwise one\r
 134      * token is returned per delimiter.  Since surrogate pairs can be\r
 135      * delimiters, the returned token might be two chars in length.</p>\r
 136      * @param str a string to be parsed.\r
 137      * @param delim the delimiters.\r
 138      * @param returndelims flag indicating whether to return the delimiters \r
 139      *        as tokens.\r
 140      * @param coalescedelims flag indicating whether to return a run of \r
 141      *        delimiters as a single token or as one token per delimiter.  \r
 142      *        This only takes effect if returndelims is true.\r
 143      * @exception NullPointerException if str is null\r
 144      * @internal\r
 145      * @deprecated This API is ICU internal only.\r
 146      */\r
 147     public StringTokenizer(String str, UnicodeSet delim, boolean returndelims, boolean coalescedelims)\r
 148     {\r
 149         m_source_ = str;\r
 150         m_length_ = str.length();\r
 151         if (delim == null) {\r
 152             m_delimiters_ = EMPTY_DELIMITER_;\r
 153         }\r
 154         else {\r
 155             m_delimiters_ = delim;   \r
 156         }\r
 157         m_returnDelimiters_ = returndelims;\r
 158         m_coalesceDelimiters_ = coalescedelims;\r
 159         m_tokenOffset_ = -1;\r
 160         m_tokenSize_ = -1;\r
 161         if (m_length_ == 0) {\r
 162             // string length 0, no tokens\r
 163             m_nextOffset_ = -1;\r
 164         }\r
 165         else {\r
 166             m_nextOffset_ = 0;\r
 167             if (!returndelims) {\r
 168                 m_nextOffset_ = getNextNonDelimiter(0);\r
 169             }\r
 170         }\r
 171     }\r
 172     \r
 173     /**\r
 174      * <p>Constructs a string tokenizer for the specified string. The \r
 175      * characters in the delim argument are the delimiters for separating \r
 176      * tokens.</p> \r
 177      * <p>Delimiter characters themselves will not be treated as tokens.</p>\r
 178      * @param str a string to be parsed.\r
 179      * @param delim the delimiters.\r
 180      * @exception NullPointerException if str is null\r
 181      * @stable ICU 2.4\r
 182      */\r
 183     public StringTokenizer(String str, UnicodeSet delim)\r
 184     {\r
 185         this(str, delim, false, false);\r
 186     }\r
 187        \r
 188     /**\r
 189      * <p>Constructs a string tokenizer for the specified string. All \r
 190      * characters in the delim argument are the delimiters for separating \r
 191      * tokens.</p> \r
 192      * <p>If the returnDelims flag is false, the delimiter characters are \r
 193      * skipped and only serve as separators between tokens.</p>\r
 194      * <p>If the returnDelims flag is true, then the delimiter characters \r
 195      * are also returned as tokens, one per delimiter.\r
 196      * @param str a string to be parsed.\r
 197      * @param delim the delimiters.\r
 198      * @param returndelims flag indicating whether to return the delimiters \r
 199      *        as tokens.\r
 200      * @exception NullPointerException if str is null\r
 201      * @stable ICU 2.4\r
 202      */\r
 203     public StringTokenizer(String str, String delim, boolean returndelims)\r
 204     {\r
 205         this(str, delim, returndelims, false); // java default behavior\r
 206     }\r
 207 \r
 208     /**\r
 209      * <p>Constructs a string tokenizer for the specified string. All \r
 210      * characters in the delim argument are the delimiters for separating \r
 211      * tokens.</p> \r
 212      * <p>If the returnDelims flag is false, the delimiter characters are \r
 213      * skipped and only serve as separators between tokens.</p>\r
 214      * <p>If the returnDelims flag is true, then the delimiter characters \r
 215      * are also returned as tokens.  If coalescedelims is true, one token\r
 216      * is returned for each run of delimiter characters, otherwise one\r
 217      * token is returned per delimiter.  Since surrogate pairs can be\r
 218      * delimiters, the returned token might be two chars in length.</p>\r
 219      * @param str a string to be parsed.\r
 220      * @param delim the delimiters.\r
 221      * @param returndelims flag indicating whether to return the delimiters \r
 222      *        as tokens.\r
 223      * @param coalescedelims flag indicating whether to return a run of \r
 224      *        delimiters as a single token or as one token per delimiter.  \r
 225      *        This only takes effect if returndelims is true.\r
 226      * @exception NullPointerException if str is null\r
 227      * @internal\r
 228      * @deprecated This API is ICU internal only.\r
 229      */\r
 230     public StringTokenizer(String str, String delim, boolean returndelims, boolean coalescedelims)\r
 231     {\r
 232         // don't ignore whitespace\r
 233         m_delimiters_ = EMPTY_DELIMITER_;\r
 234         if (delim != null && delim.length() > 0) {\r
 235             m_delimiters_ = new UnicodeSet();\r
 236             m_delimiters_.addAll(delim);\r
 237             checkDelimiters();\r
 238         }\r
 239         m_coalesceDelimiters_ = coalescedelims;\r
 240         m_source_ = str;\r
 241         m_length_ = str.length();\r
 242         m_returnDelimiters_ = returndelims;\r
 243         m_tokenOffset_ = -1;\r
 244         m_tokenSize_ = -1;\r
 245         if (m_length_ == 0) {\r
 246             // string length 0, no tokens\r
 247             m_nextOffset_ = -1;\r
 248         }\r
 249         else {\r
 250             m_nextOffset_ = 0;\r
 251             if (!returndelims) {\r
 252                 m_nextOffset_ = getNextNonDelimiter(0);\r
 253             }\r
 254         }\r
 255     }\r
 256     \r
 257     /**\r
 258      * <p>Constructs a string tokenizer for the specified string. The \r
 259      * characters in the delim argument are the delimiters for separating \r
 260      * tokens.</p> \r
 261      * <p>Delimiter characters themselves will not be treated as tokens.</p>\r
 262      * @param str a string to be parsed.\r
 263      * @param delim the delimiters.\r
 264      * @exception NullPointerException if str is null\r
 265      * @stable ICU 2.4\r
 266      */\r
 267     public StringTokenizer(String str, String delim)\r
 268     {\r
 269         // don't ignore whitespace\r
 270         this(str, delim, false, false);\r
 271     }\r
 272 \r
 273     /**\r
 274      * <p>Constructs a string tokenizer for the specified string. \r
 275      * The tokenizer uses the default delimiter set, which is \r
 276      * " &#92;t&#92;n&#92;r&#92;f": \r
 277      * the space character, the tab character, the newline character, the \r
 278      * carriage-return character, and the form-feed character.</p> \r
 279      * <p>Delimiter characters themselves will not be treated as tokens.</p>\r
 280      * @param str a string to be parsed\r
 281      * @exception NullPointerException if str is null\r
 282      * @stable ICU 2.4\r
 283      */\r
 284     public StringTokenizer(String str) \r
 285     {\r
 286         this(str, DEFAULT_DELIMITERS_, false, false);\r
 287     }\r
 288     \r
 289     // public methods --------------------------------------------------\r
 290     \r
 291     /**\r
 292      * Tests if there are more tokens available from this tokenizer's \r
 293      * string. \r
 294      * If this method returns <tt>true</tt>, then a subsequent call to \r
 295      * <tt>nextToken</tt> with no argument will successfully return a token.\r
 296      * @return <code>true</code> if and only if there is at least one token \r
 297      *         in the string after the current position; <code>false</code> \r
 298      *         otherwise.\r
 299      * @stable ICU 2.4\r
 300      */\r
 301     public boolean hasMoreTokens() \r
 302     {\r
 303         return m_nextOffset_ >= 0;\r
 304     }\r
 305     \r
 306     /**\r
 307      * Returns the next token from this string tokenizer.\r
 308      * @return the next token from this string tokenizer.\r
 309      * @exception NoSuchElementException if there are no more tokens in \r
 310      *            this tokenizer's string.\r
 311      * @stable ICU 2.4\r
 312      */\r
 313     public String nextToken() \r
 314     {\r
 315         if (m_tokenOffset_ < 0) {\r
 316             if (m_nextOffset_ < 0) {\r
 317                 throw new NoSuchElementException("No more tokens in String");   \r
 318             }\r
 319             // pre-calculations of tokens not done\r
 320             if (m_returnDelimiters_) {\r
 321                 int tokenlimit = 0;\r
 322                 int c = UTF16.charAt(m_source_, m_nextOffset_);\r
 323                 boolean contains = delims == null \r
 324                     ? m_delimiters_.contains(c) \r
 325                     : c < delims.length && delims[c];\r
 326                 if (contains) {\r
 327                      if (m_coalesceDelimiters_) {\r
 328                         tokenlimit = getNextNonDelimiter(m_nextOffset_);\r
 329                      } else {\r
 330                         tokenlimit = m_nextOffset_ + UTF16.getCharCount(c);\r
 331                         if (tokenlimit == m_length_) {\r
 332                             tokenlimit = -1;\r
 333                         }\r
 334                      }\r
 335                 }\r
 336                 else {\r
 337                     tokenlimit = getNextDelimiter(m_nextOffset_);\r
 338                 }\r
 339                 String result;\r
 340                 if (tokenlimit < 0) {\r
 341                     result = m_source_.substring(m_nextOffset_);\r
 342                 }\r
 343                 else {\r
 344                     result = m_source_.substring(m_nextOffset_, tokenlimit);\r
 345                 }\r
 346                 m_nextOffset_ = tokenlimit;\r
 347                 return result;\r
 348             }\r
 349             else {\r
 350                 int tokenlimit = getNextDelimiter(m_nextOffset_);\r
 351                 String result;\r
 352                 if (tokenlimit < 0) {\r
 353                     result = m_source_.substring(m_nextOffset_);\r
 354                     m_nextOffset_ = tokenlimit;\r
 355                 }\r
 356                 else {\r
 357                     result = m_source_.substring(m_nextOffset_, tokenlimit);\r
 358                     m_nextOffset_ = getNextNonDelimiter(tokenlimit);\r
 359                 }\r
 360                 \r
 361                 return result;\r
 362             }\r
 363         }\r
 364         // count was called before and we have all the tokens\r
 365         if (m_tokenOffset_ >= m_tokenSize_) {\r
 366             throw new NoSuchElementException("No more tokens in String");\r
 367         }\r
 368         String result;\r
 369         if (m_tokenLimit_[m_tokenOffset_] >= 0) {\r
 370             result = m_source_.substring(m_tokenStart_[m_tokenOffset_],\r
 371                                          m_tokenLimit_[m_tokenOffset_]);\r
 372         }\r
 373         else {\r
 374             result = m_source_.substring(m_tokenStart_[m_tokenOffset_]);\r
 375         }\r
 376         m_tokenOffset_ ++;\r
 377         m_nextOffset_ = -1;\r
 378         if (m_tokenOffset_ < m_tokenSize_) {\r
 379             m_nextOffset_ = m_tokenStart_[m_tokenOffset_];\r
 380         }\r
 381         return result;\r
 382     }\r
 383     \r
 384     /**\r
 385      * Returns the next token in this string tokenizer's string. First, \r
 386      * the set of characters considered to be delimiters by this \r
 387      * <tt>StringTokenizer</tt> object is changed to be the characters in \r
 388      * the string <tt>delim</tt>. Then the next token in the string\r
 389      * after the current position is returned. The current position is \r
 390      * advanced beyond the recognized token.  The new delimiter set \r
 391      * remains the default after this call. \r
 392      * @param delim the new delimiters.\r
 393      * @return the next token, after switching to the new delimiter set.\r
 394      * @exception NoSuchElementException if there are no more tokens in \r
 395      *            this tokenizer's string.\r
 396      * @stable ICU 2.4\r
 397      */\r
 398     public String nextToken(String delim) \r
 399     {\r
 400         m_delimiters_ = EMPTY_DELIMITER_;\r
 401         if (delim != null && delim.length() > 0) {\r
 402             m_delimiters_ = new UnicodeSet();\r
 403             m_delimiters_.addAll(delim);\r
 404         }\r
 405         return nextToken(m_delimiters_);\r
 406     }\r
 407     \r
 408     /**\r
 409      * Returns the next token in this string tokenizer's string. First, \r
 410      * the set of characters considered to be delimiters by this \r
 411      * <tt>StringTokenizer</tt> object is changed to be the characters in \r
 412      * the string <tt>delim</tt>. Then the next token in the string\r
 413      * after the current position is returned. The current position is \r
 414      * advanced beyond the recognized token.  The new delimiter set \r
 415      * remains the default after this call. \r
 416      * @param delim the new delimiters.\r
 417      * @return the next token, after switching to the new delimiter set.\r
 418      * @exception NoSuchElementException if there are no more tokens in \r
 419      *            this tokenizer's string.\r
 420      * @stable ICU 2.4\r
 421      */\r
 422     public String nextToken(UnicodeSet delim) \r
 423     {\r
 424         m_delimiters_ = delim;\r
 425         checkDelimiters();\r
 426         m_tokenOffset_ = -1;\r
 427         m_tokenSize_ = -1;\r
 428         if (!m_returnDelimiters_) {\r
 429             m_nextOffset_ = getNextNonDelimiter(m_nextOffset_);\r
 430         }\r
 431         return nextToken();\r
 432     }\r
 433     \r
 434     /**\r
 435      * Returns the same value as the <code>hasMoreTokens</code> method. \r
 436      * It exists so that this class can implement the \r
 437      * <code>Enumeration</code> interface. \r
 438      * @return <code>true</code> if there are more tokens;\r
 439      *         <code>false</code> otherwise.\r
 440      * @see #hasMoreTokens()\r
 441      * @stable ICU 2.4\r
 442      */\r
 443     public boolean hasMoreElements() \r
 444     {\r
 445         return hasMoreTokens();\r
 446     }\r
 447     \r
 448     /**\r
 449      * Returns the same value as the <code>nextToken</code> method, except \r
 450      * that its declared return value is <code>Object</code> rather than \r
 451      * <code>String</code>. It exists so that this class can implement the \r
 452      * <code>Enumeration</code> interface. \r
 453      * @return the next token in the string.\r
 454      * @exception NoSuchElementException if there are no more tokens in \r
 455      *            this tokenizer's string.\r
 456      * @see #nextToken()\r
 457      * @stable ICU 2.4\r
 458      */\r
 459     public Object nextElement() \r
 460     {\r
 461         return nextToken();\r
 462     }\r
 463     \r
 464     /**\r
 465      * Calculates the number of times that this tokenizer's \r
 466      * <code>nextToken</code> method can be called before it generates an \r
 467      * exception. The current position is not advanced.\r
 468      * @return the number of tokens remaining in the string using the \r
 469      *         current delimiter set.\r
 470      * @see #nextToken()\r
 471      * @stable ICU 2.4\r
 472      */\r
 473     public int countTokens() \r
 474     {\r
 475         int result = 0;\r
 476         if (hasMoreTokens()) {\r
 477             if (m_tokenOffset_ >= 0) {\r
 478                 return m_tokenSize_ - m_tokenOffset_;\r
 479             }\r
 480             if (m_tokenStart_ == null) {\r
 481                 m_tokenStart_ = new int[TOKEN_SIZE_];\r
 482                 m_tokenLimit_ = new int[TOKEN_SIZE_];\r
 483             }\r
 484             do {\r
 485                 if (m_tokenStart_.length == result) {\r
 486                     int temptokenindex[] = m_tokenStart_;\r
 487                     int temptokensize[] = m_tokenLimit_;\r
 488                     int originalsize = temptokenindex.length;\r
 489                     int newsize = originalsize + TOKEN_SIZE_;\r
 490                     m_tokenStart_ = new int[newsize];\r
 491                     m_tokenLimit_ = new int[newsize];\r
 492                     System.arraycopy(temptokenindex, 0, m_tokenStart_, 0, \r
 493                                      originalsize);\r
 494                     System.arraycopy(temptokensize, 0, m_tokenLimit_, 0, \r
 495                                      originalsize);\r
 496                 }\r
 497                 m_tokenStart_[result] = m_nextOffset_;\r
 498                 if (m_returnDelimiters_) {\r
 499                     int c = UTF16.charAt(m_source_, m_nextOffset_);\r
 500                     boolean contains = delims == null \r
 501                         ? m_delimiters_.contains(c) \r
 502                         : c < delims.length && delims[c];\r
 503                     if (contains) {\r
 504                         if (m_coalesceDelimiters_) {\r
 505                             m_tokenLimit_[result] = getNextNonDelimiter(\r
 506                                                                 m_nextOffset_);\r
 507                         } else {\r
 508                             int p = m_nextOffset_ + 1;\r
 509                             if (p == m_length_) {\r
 510                                 p = -1;\r
 511                             }\r
 512                             m_tokenLimit_[result] = p;\r
 513 \r
 514                         }\r
 515                     }\r
 516                     else {\r
 517                         m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_);\r
 518                     }\r
 519                     m_nextOffset_ = m_tokenLimit_[result];\r
 520                 }\r
 521                 else {\r
 522                     m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_);\r
 523                     m_nextOffset_ = getNextNonDelimiter(m_tokenLimit_[result]);\r
 524                 }\r
 525                 result ++;\r
 526             } while (m_nextOffset_ >= 0);\r
 527             m_tokenOffset_ = 0;\r
 528             m_tokenSize_ = result;\r
 529             m_nextOffset_ = m_tokenStart_[0];\r
 530         }\r
 531         return result;\r
 532     }\r
 533     \r
 534     // private data members -------------------------------------------------\r
 535     \r
 536     /**\r
 537      * Current offset to the token array. If the array token is not set up yet,\r
 538      * this value is a -1\r
 539      */\r
 540     private int m_tokenOffset_;\r
 541     /**\r
 542      * Size of the token array. If the array token is not set up yet,\r
 543      * this value is a -1\r
 544      */\r
 545     private int m_tokenSize_;\r
 546     /**\r
 547      * Array of pre-calculated tokens start indexes in source string terminated \r
 548      * by -1.\r
 549      * This is only set up during countTokens() and only stores the remaining\r
 550      * tokens, not all tokens including parsed ones\r
 551      */\r
 552     private int m_tokenStart_[];\r
 553     /**\r
 554      * Array of pre-calculated tokens limit indexes in source string.\r
 555      * This is only set up during countTokens() and only stores the remaining\r
 556      * tokens, not all tokens including parsed ones\r
 557      */\r
 558     private int m_tokenLimit_[];\r
 559     /**\r
 560      * UnicodeSet containing delimiters\r
 561      */\r
 562     private UnicodeSet m_delimiters_;\r
 563     /**\r
 564      * String to parse for tokens\r
 565      */\r
 566     private String m_source_;\r
 567     /**\r
 568      * Length of m_source_\r
 569      */\r
 570     private int m_length_;\r
 571     /**\r
 572      * Current position in string to parse for tokens\r
 573      */\r
 574     private int m_nextOffset_;\r
 575     /**\r
 576      * Flag indicator if delimiters are to be treated as tokens too\r
 577      */\r
 578     private boolean m_returnDelimiters_;\r
 579 \r
 580     /**\r
 581      * Flag indicating whether to coalesce runs of delimiters into single tokens\r
 582      */\r
 583     private boolean m_coalesceDelimiters_;\r
 584 \r
 585     /**\r
 586      * Default set of delimiters &#92;t&#92;n&#92;r&#92;f\r
 587      */\r
 588     private static final UnicodeSet DEFAULT_DELIMITERS_ \r
 589                                         = new UnicodeSet("[ \t\n\r\f]", false);\r
 590     /**\r
 591      * Array size increments\r
 592      */\r
 593     private static final int TOKEN_SIZE_ = 100;\r
 594     /**\r
 595      * A empty delimiter UnicodeSet, used when user specified null delimiters\r
 596      */\r
 597     private static final UnicodeSet EMPTY_DELIMITER_ = new UnicodeSet();\r
 598     \r
 599     // private methods ------------------------------------------------------\r
 600     \r
 601     /**\r
 602      * Gets the index of the next delimiter after offset\r
 603      * @param offset to the source string\r
 604      * @return offset of the immediate next delimiter, otherwise \r
 605      *         (- source string length - 1) if there\r
 606      *         are no more delimiters after m_nextOffset\r
 607      */\r
 608     private int getNextDelimiter(int offset)\r
 609     {\r
 610         if (offset >= 0) {\r
 611             int result = offset; \r
 612             int c = 0;\r
 613             if (delims == null) {\r
 614                 do {\r
 615                     c = UTF16.charAt(m_source_, result);\r
 616                     if (m_delimiters_.contains(c)) {\r
 617                         break;\r
 618                     }\r
 619                     result ++;\r
 620                 } while (result < m_length_);\r
 621             } else {\r
 622                 do {\r
 623                     c = UTF16.charAt(m_source_, result);\r
 624                     if (c < delims.length && delims[c]) {\r
 625                         break;\r
 626                     }\r
 627                     result ++;\r
 628                 } while (result < m_length_);\r
 629             }                \r
 630             if (result < m_length_) {\r
 631                 return result;\r
 632             }\r
 633         }\r
 634         return -1 - m_length_;\r
 635     }\r
 636     \r
 637     /**\r
 638      * Gets the index of the next non-delimiter after m_nextOffset_\r
 639      * @param offset to the source string\r
 640      * @return offset of the immediate next non-delimiter, otherwise \r
 641      *         (- source string length - 1) if there\r
 642      *         are no more delimiters after m_nextOffset\r
 643      */\r
 644     private int getNextNonDelimiter(int offset)\r
 645     {\r
 646         if (offset >= 0) {\r
 647             int result = offset; \r
 648             int c = 0;\r
 649             if (delims == null) {\r
 650                 do {\r
 651                     c = UTF16.charAt(m_source_, result);\r
 652                     if (!m_delimiters_.contains(c)) {\r
 653                         break;\r
 654                     }\r
 655                     result ++;\r
 656                 } while (result < m_length_);\r
 657             } else {\r
 658                 do {\r
 659                     c = UTF16.charAt(m_source_, result);\r
 660                     if (!(c < delims.length && delims[c])) {\r
 661                         break;\r
 662                     }\r
 663                     result ++;\r
 664                 } while (result < m_length_);\r
 665             }\r
 666             if (result < m_length_) {\r
 667                 return result;\r
 668             }\r
 669         }\r
 670         return -1 - m_length_;\r
 671     }\r
 672 \r
 673     void checkDelimiters() {\r
 674         if (m_delimiters_ == null || m_delimiters_.size() == 0) {\r
 675             delims = new boolean[0];\r
 676         } else {\r
 677             int maxChar = m_delimiters_.getRangeEnd(m_delimiters_.getRangeCount()-1);\r
 678             if (maxChar < 0x7f) {\r
 679                 delims = new boolean[maxChar+1];\r
 680                 for (int i = 0, ch; -1 != (ch = m_delimiters_.charAt(i)); ++i) {\r
 681                     delims[ch] = true;\r
 682                 }\r
 683             } else {\r
 684                 delims = null;\r
 685             }\r
 686         }\r
 687     }\r
 688     private boolean[] delims;\r
 689 }\r