1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.List;
21 import junit.framework.TestCase;
23 public class WikiTokenizerTest extends TestCase {
25 public void testWikiLink() {
29 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
30 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
31 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
32 assertEquals(null, new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
34 wikiText = "[[abc|def]]";
35 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
36 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
37 assertEquals("def", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
38 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
40 wikiText = "[[abc|def|ghi{{a|=2}}p]]";
41 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
42 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
43 assertEquals("ghi{{a|=2}}p", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
44 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
46 wikiText = "[[abc]][[def]]";
47 assertEquals("[[abc]]", new WikiTokenizer(wikiText).nextToken().token());
48 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
49 assertEquals("def", new WikiTokenizer(wikiText).nextToken().nextToken().wikiLinkText());
53 public void testWikiList() {
56 wikiText = "* This is ''bold''' asdf.";
57 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
60 public void testFunction() {
64 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
65 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
66 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
67 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionPositionArgs().size());
68 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
70 wikiText = "{{abc|def}}";
71 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
72 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
73 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
74 assertEquals(Arrays.asList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
75 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
77 wikiText = "{{abc|d[[|]]ef|ghi}}";
78 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
79 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
80 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
81 assertEquals(Arrays.asList("d[[|]]ef", "ghi"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
82 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
84 wikiText = "{{abc|arg1=101|ghi|arg2=202|arg3={{n1|n2=7|n3}}|{{d}}}}";
85 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
86 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
87 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
88 assertEquals(Arrays.asList("ghi", "{{d}}"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
89 assertEquals(3, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
90 assertEquals("101", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg1"));
91 assertEquals("202", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg2"));
92 assertEquals("{{n1|n2=7|n3}}", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg3"));
94 wikiText = "{{gloss|asdf}\nAsdf\n\n";
95 assertEquals("{{gloss|asdf}", new WikiTokenizer(wikiText).nextToken().token());
97 wikiText = "#*{{quote-book|year=1960|author={{w|P. G. Wodehouse}}\n" +
98 "|title={{w|Jeeves in the Offing}}\n" +
99 "|section=chapter XI\n" +
100 "|passage=“I'm sorely beset, Jeeves. Do you recall telling me once about someone who told somebody he could tell him something which would make him think a bit? Knitted socks and porcu\n" +
101 "pines entered into it, I remember.” “I think you may be referring to the ghost of the father of Hamlet, Prince of Denmark, sir. Addressing his son, he said ‘I could a tale unfold whos\n" +
102 "e lightest word would harrow up thy soul, freeze thy young blood, make thy two eyes, like stars, start from their spheres, thy knotted and combined locks to part and each particular h\n" +
103 "air to stand on end like quills upon the fretful '''porpentine'''.’ ” “That's right. Locks, of course, not socks. Odd that he should have said '''porpentine''' when he meant porc\n" +
104 "upine. Slip of the tongue, no doubt, as so often happens with ghosts.”}}";
105 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
110 public void testReturn() {
113 wikiText = "hello\n=Heading=\nhello2";
115 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
117 assertEquals("hello", tokenizer.nextToken().token());
118 tokenizer.returnToLineStart();
119 assertEquals("hello", tokenizer.nextToken().token());
120 assertEquals("\n", tokenizer.nextToken().token());
121 tokenizer.returnToLineStart();
122 assertEquals("hello", tokenizer.nextToken().token());
123 assertEquals("\n", tokenizer.nextToken().token());
125 assertEquals("=Heading=", tokenizer.nextToken().token());
126 tokenizer.returnToLineStart();
127 assertEquals("=Heading=", tokenizer.nextToken().token());
128 assertEquals("\n", tokenizer.nextToken().token());
129 tokenizer.returnToLineStart();
130 assertEquals("=Heading=", tokenizer.nextToken().token());
131 assertEquals("\n", tokenizer.nextToken().token());
133 assertEquals("hello2", tokenizer.nextToken().token());
134 assertEquals(null, tokenizer.nextToken());
135 tokenizer.returnToLineStart();
136 assertEquals("hello2", tokenizer.nextToken().token());
137 assertEquals(null, tokenizer.nextToken());
142 public void testWikiHeading() {
146 assertEquals("==", new WikiTokenizer(wikiText).nextToken().token());
147 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
148 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
149 assertEquals("", new WikiTokenizer(wikiText).nextToken().headingWikiText());
150 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
154 assertEquals("=a", new WikiTokenizer(wikiText).nextToken().token());
155 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
156 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
157 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
158 assertEquals(2, new WikiTokenizer(wikiText).nextToken().errors.size());
161 assertEquals("=a==", new WikiTokenizer(wikiText).nextToken().token());
162 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
163 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
164 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
165 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
168 assertEquals("a", new WikiTokenizer(wikiText).nextToken().token());
169 assertFalse(new WikiTokenizer(wikiText).nextToken().isHeading());
172 assertEquals("=a=", new WikiTokenizer(wikiText).nextToken().token());
173 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
174 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
175 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
176 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
178 wikiText = "==aa[[|=]] {{|={{=}} }}==";
179 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
180 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
181 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
182 assertEquals("aa[[|=]] {{|={{=}} }}", new WikiTokenizer(wikiText).nextToken().headingWikiText());
183 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
189 public void testSimple() {
190 final String wikiText =
192 "Hello =thad| you're <!-- not --> '''pretty''' cool '''''over''''' there." + "\n" +
194 "multi-line" + "\n" +
195 "# comment -->" + "\n" +
198 "{{template_not_in_list}}" + "\n" +
199 "# {{template_in_list}}" + "\n" +
200 "[[wikitext]]:[[wikitext]]" + "\n" + // don't want this to trigger a list
201 ": but this is a list!" + "\n" +
202 "*:* and so is this :::" + "\n" +
203 "here's [[some blah|some]] wikitext." + "\n" +
204 "here's a {{template|this has an = sign|blah=2|blah2=3|" + "\n" +
205 "blah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}} and some more text." + "\n" +
206 "== Header 2 ==" + "\n" +
207 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}" + "\n" +
208 "{{mismatched]]" + "\n" +
209 "[[mismatched}}" + "\n" +
210 "{extraterminated}}" + "\n" +
211 "[extraterminated]]" + "\n" +
212 "=== {{header-template}} ===" + "\n";
214 final String[] expectedTokens = new String[] {
236 "<!--\nmulti-line\n# comment -->",
241 "{{template_not_in_list}}",
243 "# {{template_in_list}}",
249 ": but this is a list!",
251 "*:* and so is this :::",
254 "[[some blah|some]]",
258 "{{template|this has an = sign|blah=2|blah2=3|\nblah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}}",
259 " and some more text.",
263 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}",
275 "=== {{header-template}} ===",
279 final List<String> actualTokens = new ArrayList<String>();
281 final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
284 while ((token = wikiTokenizer.nextToken()) != null) {
285 actualTokens.add(token.token());
286 System.out.println("\"" + token.token().replace("\n", "\\n") + "\",");
287 assertEquals(expectedTokens[i++], token.token());
289 assertEquals(Arrays.asList(expectedTokens), actualTokens);