1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.List;
21 import junit.framework.TestCase;
23 public class WikiTokenizerTest extends TestCase {
25 public void testWikiLink() {
29 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
30 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
31 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
32 assertEquals(null, new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
34 wikiText = "[[abc|def]]";
35 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
36 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
37 assertEquals("def", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
38 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
40 wikiText = "[[abc|def|ghi{{a|=2}}p]]";
41 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
42 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
43 assertEquals("ghi{{a|=2}}p", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
44 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
46 wikiText = "[[abc]][[def]]";
47 assertEquals("[[abc]]", new WikiTokenizer(wikiText).nextToken().token());
48 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
49 assertEquals("def", new WikiTokenizer(wikiText).nextToken().nextToken().wikiLinkText());
53 public void testWikiList() {
56 wikiText = "* This is ''bold''' asdf.";
57 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
60 public void testFunction() {
64 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
65 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
66 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
67 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionPositionArgs().size());
68 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
70 wikiText = "{{abc|def}}";
71 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
72 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
73 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
74 assertEquals(Arrays.asList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
75 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
77 wikiText = "{{abc|d[[|]]ef|ghi}}";
78 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
79 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
80 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
81 assertEquals(Arrays.asList("d[[|]]ef", "ghi"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
82 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
84 wikiText = "{{abc|arg1=101|ghi|arg2=202|arg3={{n1|n2=7|n3}}|{{d}}}}";
85 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
86 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
87 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
88 assertEquals(Arrays.asList("ghi", "{{d}}"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
89 assertEquals(3, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
90 assertEquals("101", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg1"));
91 assertEquals("202", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg2"));
92 assertEquals("{{n1|n2=7|n3}}", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg3"));
97 public void testReturn() {
100 wikiText = "hello\n=Heading=\nhello2";
102 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
104 assertEquals("hello", tokenizer.nextToken().token());
105 tokenizer.returnToLineStart();
106 assertEquals("hello", tokenizer.nextToken().token());
107 assertEquals("\n", tokenizer.nextToken().token());
108 tokenizer.returnToLineStart();
109 assertEquals("hello", tokenizer.nextToken().token());
110 assertEquals("\n", tokenizer.nextToken().token());
112 assertEquals("=Heading=", tokenizer.nextToken().token());
113 tokenizer.returnToLineStart();
114 assertEquals("=Heading=", tokenizer.nextToken().token());
115 assertEquals("\n", tokenizer.nextToken().token());
116 tokenizer.returnToLineStart();
117 assertEquals("=Heading=", tokenizer.nextToken().token());
118 assertEquals("\n", tokenizer.nextToken().token());
120 assertEquals("hello2", tokenizer.nextToken().token());
121 assertEquals(null, tokenizer.nextToken());
122 tokenizer.returnToLineStart();
123 assertEquals("hello2", tokenizer.nextToken().token());
124 assertEquals(null, tokenizer.nextToken());
129 public void testWikiHeading() {
133 assertEquals("==", new WikiTokenizer(wikiText).nextToken().token());
134 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
135 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
136 assertEquals("", new WikiTokenizer(wikiText).nextToken().headingWikiText());
137 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
141 assertEquals("=a", new WikiTokenizer(wikiText).nextToken().token());
142 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
143 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
144 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
145 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
148 assertEquals("=a==", new WikiTokenizer(wikiText).nextToken().token());
149 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
150 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
151 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
152 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
155 assertEquals("a", new WikiTokenizer(wikiText).nextToken().token());
156 assertFalse(new WikiTokenizer(wikiText).nextToken().isHeading());
159 assertEquals("=a=", new WikiTokenizer(wikiText).nextToken().token());
160 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
161 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
162 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
163 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
165 wikiText = "==aa[[|=]] {{|={{=}} }}==";
166 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
167 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
168 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
169 assertEquals("aa[[|=]] {{|={{=}} }}", new WikiTokenizer(wikiText).nextToken().headingWikiText());
170 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
175 public void testSimple() {
176 final String wikiText =
178 "Hello =thad| you're <!-- not --> '''pretty''' cool '''''over''''' there." + "\n" +
180 "multi-line" + "\n" +
181 "# comment -->" + "\n" +
184 "{{template_not_in_list}}" + "\n" +
185 "# {{template_in_list}}" + "\n" +
186 "[[wikitext]]:[[wikitext]]" + "\n" + // don't want this to trigger a list
187 ": but this is a list!" + "\n" +
188 "*:* and so is this :::" + "\n" +
189 "here's [[some blah|some]] wikitext." + "\n" +
190 "here's a {{template|this has an = sign|blah=2|blah2=3|" + "\n" +
191 "blah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}} and some more text." + "\n" +
192 "== Header 2 ==" + "\n" +
193 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}" + "\n" +
194 "{{mismatched]]" + "\n" +
195 "[[mismatched}}" + "\n" +
196 "{extraterminated}}" + "\n" +
197 "[extraterminated]]" + "\n" +
198 "=== {{header-template}} ===" + "\n";
200 final String[] expectedTokens = new String[] {
222 "<!--\nmulti-line\n# comment -->",
227 "{{template_not_in_list}}",
229 "# {{template_in_list}}",
235 ": but this is a list!",
237 "*:* and so is this :::",
240 "[[some blah|some]]",
244 "{{template|this has an = sign|blah=2|blah2=3|\nblah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}}",
245 " and some more text.",
249 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}",
261 "=== {{header-template}} ===",
265 final List<String> actualTokens = new ArrayList<String>();
267 final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
270 while ((token = wikiTokenizer.nextToken()) != null) {
271 actualTokens.add(token.token());
272 System.out.println("\"" + token.token().replace("\n", "\\n") + "\",");
273 assertEquals(expectedTokens[i++], token.token());
275 assertEquals(Arrays.asList(expectedTokens), actualTokens);