2 **********************************************************************
\r
3 * Copyright (c) 2004-2008, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 **********************************************************************
\r
7 * Created: March 16 2004
\r
9 **********************************************************************
\r
11 package com.ibm.icu.impl.data;
\r
13 import java.io.IOException;
\r
15 import com.ibm.icu.impl.UCharacterProperty;
\r
16 import com.ibm.icu.impl.Utility;
\r
17 import com.ibm.icu.text.UTF16;
\r
20 * An iterator class that returns successive string tokens from some
\r
21 * source. String tokens are, in general, separated by rule white
\r
22 * space in the source test. Furthermore, they may be delimited by
\r
23 * either single or double quotes (opening and closing quotes must
\r
24 * match). Escapes are processed using standard ICU unescaping.
\r
26 public class TokenIterator {
\r
28 private ResourceReader reader;
\r
29 private String line;
\r
30 private StringBuffer buf;
\r
31 private boolean done;
\r
33 private int lastpos;
\r
36 * Construct an iterator over the tokens returned by the given
\r
37 * ResourceReader, ignoring blank lines and comment lines (first
\r
38 * non-blank character is '#'). Note that trailing comments on a
\r
39 * line, beginning with the first unquoted '#', are recognized.
\r
41 public TokenIterator(ResourceReader r) {
\r
45 buf = new StringBuffer();
\r
50 * Return the next token from this iterator, or null if the last
\r
51 * token has been returned.
\r
53 public String next() throws IOException {
\r
59 line = reader.readLineSkippingComments();
\r
68 pos = nextToken(pos);
\r
73 return buf.toString();
\r
78 * Return the one-based line number of the line of the last token returned by
\r
79 * next(). Should only be called
\r
80 * after a call to next(); otherwise the return
\r
81 * value is undefined.
\r
83 public int getLineNumber() {
\r
84 return reader.getLineNumber();
\r
88 * Return a string description of the position of the last line
\r
89 * returned by readLine() or readLineSkippingComments().
\r
91 public String describePosition() {
\r
92 return reader.describePosition() + ':' + (lastpos+1);
\r
96 * Read the next token from 'this.line' and append it to
\r
97 * 'this.buf'. Tokens are separated by rule white space. Tokens
\r
98 * may also be delimited by double or single quotes. The closing
\r
99 * quote must match the opening quote. If a '#' is encountered,
\r
100 * the rest of the line is ignored, unless it is backslash-escaped
\r
101 * or within quotes.
\r
102 * @param position the offset into the string
\r
103 * @return offset to the next character to read from line, or if
\r
104 * the end of the line is reached without scanning a valid token,
\r
107 private int nextToken(int position) {
\r
108 position = Utility.skipWhitespace(line, position);
\r
109 if (position == line.length()) {
\r
112 int startpos = position;
\r
113 char c = line.charAt(position++);
\r
126 int[] posref = null;
\r
127 while (position < line.length()) {
\r
128 c = line.charAt(position); // 16-bit ok
\r
130 if (posref == null) {
\r
131 posref = new int[1];
\r
133 posref[0] = position+1;
\r
134 int c32 = Utility.unescapeAt(line, posref);
\r
136 throw new RuntimeException("Invalid escape at " +
\r
137 reader.describePosition() + ':' +
\r
140 UTF16.append(buf, c32);
\r
141 position = posref[0];
\r
142 } else if ((quote != 0 && c == quote) ||
\r
143 (quote == 0 && UCharacterProperty.isRuleWhiteSpace(c))) {
\r
145 } else if (quote == 0 && c == '#') {
\r
146 return position; // do NOT increment
\r
153 throw new RuntimeException("Unterminated quote at " +
\r
154 reader.describePosition() + ':' +
\r