1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.List;
21 import junit.framework.TestCase;
23 public class WikiTokenizerTest extends TestCase {
25 public void testWikiLink() {
29 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
30 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
31 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
32 assertEquals(null, new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
34 wikiText = "[[abc|def]]";
35 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
36 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
37 assertEquals("def", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
38 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
40 wikiText = "[[abc|def|ghi{{a|=2}}p]]";
41 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
42 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
43 assertEquals("ghi{{a|=2}}p", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
44 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
46 wikiText = "[[abc]][[def]]";
47 assertEquals("[[abc]]", new WikiTokenizer(wikiText).nextToken().token());
48 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
49 assertEquals("def", new WikiTokenizer(wikiText).nextToken().nextToken().wikiLinkText());
53 public void testWikiList() {
56 wikiText = "* This is ''bold''' asdf.";
57 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
59 wikiText = "* {{a|US}} {{IPA|[ˈfɔɹ.wɝd]]}}\nasdf\n";
60 assertEquals("* {{a|US}} {{IPA|[ˈfɔɹ.wɝd]]}}", new WikiTokenizer(wikiText).nextToken().token());
61 assertTrue(new WikiTokenizer(wikiText).nextToken().isListItem());
62 assertEquals("\n", new WikiTokenizer(wikiText).nextToken().nextToken().token());
66 public void testFunction() {
70 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
71 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
72 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
73 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionPositionArgs().size());
74 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
76 wikiText = "{{abc|def}}";
77 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
78 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
79 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
80 assertEquals(Arrays.asList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
81 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
83 wikiText = "{{abc|d[[|]]ef|ghi}}";
84 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
85 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
86 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
87 assertEquals(Arrays.asList("d[[|]]ef", "ghi"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
88 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
90 wikiText = "{{abc|arg1=101|ghi|arg2=202|arg3={{n1|n2=7|n3}}|{{d}}}}";
91 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
92 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
93 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
94 assertEquals(Arrays.asList("ghi", "{{d}}"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
95 assertEquals(3, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
96 assertEquals("101", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg1"));
97 assertEquals("202", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg2"));
98 assertEquals("{{n1|n2=7|n3}}", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg3"));
100 wikiText = "{{gloss|asdf}\nAsdf\n\n";
101 assertEquals("{{gloss|asdf}", new WikiTokenizer(wikiText).nextToken().token());
103 wikiText = "#*{{quote-book|year=1960|author={{w|P. G. Wodehouse}}\n" +
104 "|title={{w|Jeeves in the Offing}}\n" +
105 "|section=chapter XI\n" +
106 "|passage=“I'm sorely beset, Jeeves. Do you recall telling me once about someone who told somebody he could tell him something which would make him think a bit? Knitted socks and porcu\n" +
107 "pines entered into it, I remember.” “I think you may be referring to the ghost of the father of Hamlet, Prince of Denmark, sir. Addressing his son, he said ‘I could a tale unfold whos\n" +
108 "e lightest word would harrow up thy soul, freeze thy young blood, make thy two eyes, like stars, start from their spheres, thy knotted and combined locks to part and each particular h\n" +
109 "air to stand on end like quills upon the fretful '''porpentine'''.’ ” “That's right. Locks, of course, not socks. Odd that he should have said '''porpentine''' when he meant porc\n" +
110 "upine. Slip of the tongue, no doubt, as so often happens with ghosts.”}}";
111 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
116 public void testReturn() {
119 wikiText = "hello\n=Heading=\nhello2";
121 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
123 assertEquals("hello", tokenizer.nextToken().token());
124 tokenizer.returnToLineStart();
125 assertEquals("hello", tokenizer.nextToken().token());
126 assertEquals("\n", tokenizer.nextToken().token());
127 tokenizer.returnToLineStart();
128 assertEquals("hello", tokenizer.nextToken().token());
129 assertEquals("\n", tokenizer.nextToken().token());
131 assertEquals("=Heading=", tokenizer.nextToken().token());
132 tokenizer.returnToLineStart();
133 assertEquals("=Heading=", tokenizer.nextToken().token());
134 assertEquals("\n", tokenizer.nextToken().token());
135 tokenizer.returnToLineStart();
136 assertEquals("=Heading=", tokenizer.nextToken().token());
137 assertEquals("\n", tokenizer.nextToken().token());
139 assertEquals("hello2", tokenizer.nextToken().token());
140 assertEquals(null, tokenizer.nextToken());
141 tokenizer.returnToLineStart();
142 assertEquals("hello2", tokenizer.nextToken().token());
143 assertEquals(null, tokenizer.nextToken());
148 public void testWikiHeading() {
152 assertEquals("==", new WikiTokenizer(wikiText).nextToken().token());
153 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
154 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
155 assertEquals("", new WikiTokenizer(wikiText).nextToken().headingWikiText());
156 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
160 assertEquals("=a", new WikiTokenizer(wikiText).nextToken().token());
161 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
162 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
163 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
164 assertEquals(2, new WikiTokenizer(wikiText).nextToken().errors.size());
167 assertEquals("=a==", new WikiTokenizer(wikiText).nextToken().token());
168 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
169 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
170 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
171 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
174 assertEquals("a", new WikiTokenizer(wikiText).nextToken().token());
175 assertFalse(new WikiTokenizer(wikiText).nextToken().isHeading());
178 assertEquals("=a=", new WikiTokenizer(wikiText).nextToken().token());
179 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
180 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
181 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
182 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
184 wikiText = "==aa[[|=]] {{|={{=}} }}==";
185 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
186 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
187 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
188 assertEquals("aa[[|=]] {{|={{=}} }}", new WikiTokenizer(wikiText).nextToken().headingWikiText());
189 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
195 public void testSimple() {
196 final String wikiText =
198 "Hello =thad| you're <!-- not --> '''pretty''' cool '''''over''''' there." + "\n" +
200 "multi-line" + "\n" +
201 "# comment -->" + "\n" +
204 "{{template_not_in_list}}" + "\n" +
205 "# {{template_in_list}}" + "\n" +
206 "[[wikitext]]:[[wikitext]]" + "\n" + // don't want this to trigger a list
207 ": but this is a list!" + "\n" +
208 "*:* and so is this :::" + "\n" +
209 "here's [[some blah|some]] wikitext." + "\n" +
210 "here's a {{template|this has an = sign|blah=2|blah2=3|" + "\n" +
211 "blah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}} and some more text." + "\n" +
212 "== Header 2 ==" + "\n" +
213 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}" + "\n" +
214 "{{mismatched]]" + "\n" +
215 "[[mismatched}}" + "\n" +
216 "{extraterminated}}" + "\n" +
217 "[extraterminated]]" + "\n" +
218 "=== {{header-template}} ===" + "\n";
220 final String[] expectedTokens = new String[] {
242 "<!--\nmulti-line\n# comment -->",
247 "{{template_not_in_list}}",
249 "# {{template_in_list}}",
255 ": but this is a list!",
257 "*:* and so is this :::",
260 "[[some blah|some]]",
264 "{{template|this has an = sign|blah=2|blah2=3|\nblah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}}",
265 " and some more text.",
269 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}",
281 "=== {{header-template}} ===",
285 final List<String> actualTokens = new ArrayList<String>();
287 final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
290 while ((token = wikiTokenizer.nextToken()) != null) {
291 actualTokens.add(token.token());
292 System.out.println("\"" + token.token().replace("\n", "\\n") + "\",");
293 assertEquals(expectedTokens[i++], token.token());
295 assertEquals(Arrays.asList(expectedTokens), actualTokens);