jars/icu4j-4_4_2-src/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java

   1 /*\r
   2 **********************************************************************\r
   3 * Copyright (c) 2004-2008, International Business Machines\r
   4 * Corporation and others.  All Rights Reserved.\r
   5 **********************************************************************\r
   6 * Author: Alan Liu\r
   7 * Created: March 16 2004\r
   8 * Since: ICU 3.0\r
   9 **********************************************************************\r
  10 */\r
  11 package com.ibm.icu.impl.data;\r
  12 \r
  13 import java.io.IOException;\r
  14 \r
  15 import com.ibm.icu.impl.UCharacterProperty;\r
  16 import com.ibm.icu.impl.Utility;\r
  17 import com.ibm.icu.text.UTF16;\r
  18 \r
  19 /**\r
  20  * An iterator class that returns successive string tokens from some\r
  21  * source.  String tokens are, in general, separated by rule white\r
  22  * space in the source test.  Furthermore, they may be delimited by\r
  23  * either single or double quotes (opening and closing quotes must\r
  24  * match).  Escapes are processed using standard ICU unescaping.\r
  25  */\r
  26 public class TokenIterator {\r
  27 \r
  28     private ResourceReader reader;\r
  29     private String line;\r
  30     private StringBuffer buf;\r
  31     private boolean done;\r
  32     private int pos;\r
  33     private int lastpos;\r
  34 \r
  35     /**\r
  36      * Construct an iterator over the tokens returned by the given\r
  37      * ResourceReader, ignoring blank lines and comment lines (first\r
  38      * non-blank character is '#').  Note that trailing comments on a\r
  39      * line, beginning with the first unquoted '#', are recognized.\r
  40      */\r
  41     public TokenIterator(ResourceReader r) {\r
  42         reader = r;\r
  43         line = null;\r
  44         done = false;\r
  45         buf = new StringBuffer();\r
  46         pos = lastpos = -1;\r
  47     }\r
  48 \r
  49     /**\r
  50      * Return the next token from this iterator, or null if the last\r
  51      * token has been returned.\r
  52      */\r
  53     public String next() throws IOException {\r
  54         if (done) {\r
  55             return null;\r
  56         }\r
  57         for (;;) {\r
  58             if (line == null) {\r
  59                 line = reader.readLineSkippingComments();\r
  60                 if (line == null) {\r
  61                     done = true;\r
  62                     return null;\r
  63                 }\r
  64                 pos = 0;\r
  65             }\r
  66             buf.setLength(0);\r
  67             lastpos = pos;\r
  68             pos = nextToken(pos);\r
  69             if (pos < 0) {\r
  70                 line = null;\r
  71                 continue;\r
  72             }\r
  73             return buf.toString();\r
  74         }\r
  75     }\r
  76 \r
  77     /**\r
  78      * Return the one-based line number of the line of the last token returned by\r
  79      * next(). Should only be called\r
  80      * after a call to next(); otherwise the return\r
  81      * value is undefined.\r
  82      */\r
  83     public int getLineNumber() {\r
  84         return reader.getLineNumber();\r
  85     }\r
  86     \r
  87     /**\r
  88      * Return a string description of the position of the last line\r
  89      * returned by readLine() or readLineSkippingComments().\r
  90      */\r
  91     public String describePosition() {\r
  92         return reader.describePosition() + ':' + (lastpos+1);\r
  93     }\r
  94     \r
  95     /**\r
  96      * Read the next token from 'this.line' and append it to\r
  97      * 'this.buf'.  Tokens are separated by rule white space.  Tokens\r
  98      * may also be delimited by double or single quotes.  The closing\r
  99      * quote must match the opening quote.  If a '#' is encountered,\r
 100      * the rest of the line is ignored, unless it is backslash-escaped\r
 101      * or within quotes.\r
 102      * @param position the offset into the string\r
 103      * @return offset to the next character to read from line, or if\r
 104      * the end of the line is reached without scanning a valid token,\r
 105      * -1\r
 106      */\r
 107     private int nextToken(int position) {\r
 108         position = Utility.skipWhitespace(line, position);\r
 109         if (position == line.length()) {\r
 110             return -1;\r
 111         }\r
 112         int startpos = position;\r
 113         char c = line.charAt(position++);\r
 114         char quote = 0;\r
 115         switch (c) {\r
 116         case '"':\r
 117         case '\'':\r
 118             quote = c;\r
 119             break;\r
 120         case '#':\r
 121             return -1;\r
 122         default:\r
 123             buf.append(c);\r
 124             break;\r
 125         }\r
 126         int[] posref = null;\r
 127         while (position < line.length()) {\r
 128             c = line.charAt(position); // 16-bit ok\r
 129             if (c == '\\') {\r
 130                 if (posref == null) {\r
 131                     posref = new int[1];\r
 132                 }\r
 133                 posref[0] = position+1;\r
 134                 int c32 = Utility.unescapeAt(line, posref);\r
 135                 if (c32 < 0) {\r
 136                     throw new RuntimeException("Invalid escape at " +\r
 137                                                reader.describePosition() + ':' +\r
 138                                                position);\r
 139                 }\r
 140                 UTF16.append(buf, c32);\r
 141                 position = posref[0];\r
 142             } else if ((quote != 0 && c == quote) ||\r
 143                        (quote == 0 && UCharacterProperty.isRuleWhiteSpace(c))) {\r
 144                 return ++position;\r
 145             } else if (quote == 0 && c == '#') {\r
 146                 return position; // do NOT increment\r
 147             } else {\r
 148                 buf.append(c);\r
 149                 ++position;\r
 150             }\r
 151         }\r
 152         if (quote != 0) {\r
 153             throw new RuntimeException("Unterminated quote at " +\r
 154                                        reader.describePosition() + ':' +\r
 155                                        startpos);\r
 156         }\r
 157         return position;\r
 158     }\r
 159 }\r