jars/icu4j-52_1/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java

   1 /*
   2 **********************************************************************
   3 * Copyright (c) 2004-2011, International Business Machines
   4 * Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 * Author: Alan Liu
   7 * Created: March 16 2004
   8 * Since: ICU 3.0
   9 **********************************************************************
  10 */
  11 package com.ibm.icu.impl.data;
  12
  13 import java.io.IOException;
  14
  15 import com.ibm.icu.impl.PatternProps;
  16 import com.ibm.icu.impl.Utility;
  17 import com.ibm.icu.text.UTF16;
  18
  19 /**
  20  * An iterator class that returns successive string tokens from some
  21  * source.  String tokens are, in general, separated by Pattern_White_Space
  22  * in the source test.  Furthermore, they may be delimited by
  23  * either single or double quotes (opening and closing quotes must
  24  * match).  Escapes are processed using standard ICU unescaping.
  25  */
  26 public class TokenIterator {
  27
  28     private ResourceReader reader;
  29     private String line;
  30     private StringBuffer buf;
  31     private boolean done;
  32     private int pos;
  33     private int lastpos;
  34
  35     /**
  36      * Construct an iterator over the tokens returned by the given
  37      * ResourceReader, ignoring blank lines and comment lines (first
  38      * non-blank character is '#').  Note that trailing comments on a
  39      * line, beginning with the first unquoted '#', are recognized.
  40      */
  41     public TokenIterator(ResourceReader r) {
  42         reader = r;
  43         line = null;
  44         done = false;
  45         buf = new StringBuffer();
  46         pos = lastpos = -1;
  47     }
  48
  49     /**
  50      * Return the next token from this iterator, or null if the last
  51      * token has been returned.
  52      */
  53     public String next() throws IOException {
  54         if (done) {
  55             return null;
  56         }
  57         for (;;) {
  58             if (line == null) {
  59                 line = reader.readLineSkippingComments();
  60                 if (line == null) {
  61                     done = true;
  62                     return null;
  63                 }
  64                 pos = 0;
  65             }
  66             buf.setLength(0);
  67             lastpos = pos;
  68             pos = nextToken(pos);
  69             if (pos < 0) {
  70                 line = null;
  71                 continue;
  72             }
  73             return buf.toString();
  74         }
  75     }
  76
  77     /**
  78      * Return the one-based line number of the line of the last token returned by
  79      * next(). Should only be called
  80      * after a call to next(); otherwise the return
  81      * value is undefined.
  82      */
  83     public int getLineNumber() {
  84         return reader.getLineNumber();
  85     }
  86
  87     /**
  88      * Return a string description of the position of the last line
  89      * returned by readLine() or readLineSkippingComments().
  90      */
  91     public String describePosition() {
  92         return reader.describePosition() + ':' + (lastpos+1);
  93     }
  94
  95     /**
  96      * Read the next token from 'this.line' and append it to
  97      * 'this.buf'.  Tokens are separated by Pattern_White_Space.  Tokens
  98      * may also be delimited by double or single quotes.  The closing
  99      * quote must match the opening quote.  If a '#' is encountered,
 100      * the rest of the line is ignored, unless it is backslash-escaped
 101      * or within quotes.
 102      * @param position the offset into the string
 103      * @return offset to the next character to read from line, or if
 104      * the end of the line is reached without scanning a valid token,
 105      * -1
 106      */
 107     private int nextToken(int position) {
 108         position = PatternProps.skipWhiteSpace(line, position);
 109         if (position == line.length()) {
 110             return -1;
 111         }
 112         int startpos = position;
 113         char c = line.charAt(position++);
 114         char quote = 0;
 115         switch (c) {
 116         case '"':
 117         case '\'':
 118             quote = c;
 119             break;
 120         case '#':
 121             return -1;
 122         default:
 123             buf.append(c);
 124             break;
 125         }
 126         int[] posref = null;
 127         while (position < line.length()) {
 128             c = line.charAt(position); // 16-bit ok
 129             if (c == '\\') {
 130                 if (posref == null) {
 131                     posref = new int[1];
 132                 }
 133                 posref[0] = position+1;
 134                 int c32 = Utility.unescapeAt(line, posref);
 135                 if (c32 < 0) {
 136                     throw new RuntimeException("Invalid escape at " +
 137                                                reader.describePosition() + ':' +
 138                                                position);
 139                 }
 140                 UTF16.append(buf, c32);
 141                 position = posref[0];
 142             } else if ((quote != 0 && c == quote) ||
 143                        (quote == 0 && PatternProps.isWhiteSpace(c))) {
 144                 return ++position;
 145             } else if (quote == 0 && c == '#') {
 146                 return position; // do NOT increment
 147             } else {
 148                 buf.append(c);
 149                 ++position;
 150             }
 151         }
 152         if (quote != 0) {
 153             throw new RuntimeException("Unterminated quote at " +
 154                                        reader.describePosition() + ':' +
 155                                        startpos);
 156         }
 157         return position;
 158     }
 159 }