2 **********************************************************************
3 * Copyright (c) 2004-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Created: March 16 2004
9 **********************************************************************
11 package com.ibm.icu.impl.data;
13 import java.io.IOException;
15 import com.ibm.icu.impl.PatternProps;
16 import com.ibm.icu.impl.Utility;
17 import com.ibm.icu.text.UTF16;
20 * An iterator class that returns successive string tokens from some
21 * source. String tokens are, in general, separated by Pattern_White_Space
22 * in the source test. Furthermore, they may be delimited by
23 * either single or double quotes (opening and closing quotes must
24 * match). Escapes are processed using standard ICU unescaping.
26 public class TokenIterator {
28 private ResourceReader reader;
30 private StringBuffer buf;
36 * Construct an iterator over the tokens returned by the given
37 * ResourceReader, ignoring blank lines and comment lines (first
38 * non-blank character is '#'). Note that trailing comments on a
39 * line, beginning with the first unquoted '#', are recognized.
41 public TokenIterator(ResourceReader r) {
45 buf = new StringBuffer();
50 * Return the next token from this iterator, or null if the last
51 * token has been returned.
53 public String next() throws IOException {
59 line = reader.readLineSkippingComments();
73 return buf.toString();
78 * Return the one-based line number of the line of the last token returned by
79 * next(). Should only be called
80 * after a call to next(); otherwise the return
83 public int getLineNumber() {
84 return reader.getLineNumber();
88 * Return a string description of the position of the last line
89 * returned by readLine() or readLineSkippingComments().
91 public String describePosition() {
92 return reader.describePosition() + ':' + (lastpos+1);
96 * Read the next token from 'this.line' and append it to
97 * 'this.buf'. Tokens are separated by Pattern_White_Space. Tokens
98 * may also be delimited by double or single quotes. The closing
99 * quote must match the opening quote. If a '#' is encountered,
100 * the rest of the line is ignored, unless it is backslash-escaped
102 * @param position the offset into the string
103 * @return offset to the next character to read from line, or if
104 * the end of the line is reached without scanning a valid token,
107 private int nextToken(int position) {
108 position = PatternProps.skipWhiteSpace(line, position);
109 if (position == line.length()) {
112 int startpos = position;
113 char c = line.charAt(position++);
127 while (position < line.length()) {
128 c = line.charAt(position); // 16-bit ok
130 if (posref == null) {
133 posref[0] = position+1;
134 int c32 = Utility.unescapeAt(line, posref);
136 throw new RuntimeException("Invalid escape at " +
137 reader.describePosition() + ':' +
140 UTF16.append(buf, c32);
141 position = posref[0];
142 } else if ((quote != 0 && c == quote) ||
143 (quote == 0 && PatternProps.isWhiteSpace(c))) {
145 } else if (quote == 0 && c == '#') {
146 return position; // do NOT increment
153 throw new RuntimeException("Unterminated quote at " +
154 reader.describePosition() + ':' +