1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.List;
21 import junit.framework.TestCase;
23 public class WikiTokenizerTest extends TestCase {
25 public void testWikiLink() {
29 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
30 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
31 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
32 assertEquals(null, new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
34 wikiText = "[[abc|def]]";
35 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
36 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
37 assertEquals("def", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
38 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
40 wikiText = "[[abc|def|ghi{{a|=2}}p]]";
41 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
42 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
43 assertEquals("ghi{{a|=2}}p", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
44 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
46 wikiText = "[[abc]][[def]]";
47 assertEquals("[[abc]]", new WikiTokenizer(wikiText).nextToken().token());
48 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
49 assertEquals("def", new WikiTokenizer(wikiText).nextToken().nextToken().wikiLinkText());
53 public void testWikiList() {
56 wikiText = "* This is ''bold''' asdf.";
57 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
59 wikiText = "* {{a|US}} {{IPA|[ˈfɔɹ.wɝd]]}}\nasdf\n";
60 assertEquals("* {{a|US}} {{IPA|[ˈfɔɹ.wɝd]]}}", new WikiTokenizer(wikiText).nextToken().token());
61 assertTrue(new WikiTokenizer(wikiText).nextToken().isListItem());
62 assertEquals("\n", new WikiTokenizer(wikiText).nextToken().nextToken().token());
65 wikiText = "* [[asdf|\u2028" +
67 assertEquals("* [[asdf|\n" +
68 "asdf]]", new WikiTokenizer(wikiText).nextToken().token());
69 assertTrue(new WikiTokenizer(wikiText).nextToken().isListItem());
73 public void testFunction() {
77 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
78 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
79 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
80 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionPositionArgs().size());
81 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
83 wikiText = "{{abc|def}}";
84 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
85 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
86 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
87 assertEquals(Arrays.asList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
88 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
90 wikiText = "{{abc|d[[|]]ef|ghi}}";
91 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
92 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
93 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
94 assertEquals(Arrays.asList("d[[|]]ef", "ghi"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
95 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
97 wikiText = "{{abc|arg1=101|ghi|arg2=202|arg3={{n1|n2=7|n3}}|{{d}}}}";
98 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
99 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
100 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
101 assertEquals(Arrays.asList("ghi", "{{d}}"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
102 assertEquals(3, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
103 assertEquals("101", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg1"));
104 assertEquals("202", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg2"));
105 assertEquals("{{n1|n2=7|n3}}", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg3"));
107 wikiText = "{{gloss|asdf}\nAsdf\n\n";
108 assertEquals("{{gloss|asdf}", new WikiTokenizer(wikiText).nextToken().token());
110 wikiText = "#*{{quote-book|year=1960|author={{w|P. G. Wodehouse}}\n" +
111 "|title={{w|Jeeves in the Offing}}\n" +
112 "|section=chapter XI\n" +
113 "|passage=“I'm sorely beset, Jeeves. Do you recall telling me once about someone who told somebody he could tell him something which would make him think a bit? Knitted socks and porcu\n" +
114 "pines entered into it, I remember.” “I think you may be referring to the ghost of the father of Hamlet, Prince of Denmark, sir. Addressing his son, he said ‘I could a tale unfold whos\n" +
115 "e lightest word would harrow up thy soul, freeze thy young blood, make thy two eyes, like stars, start from their spheres, thy knotted and combined locks to part and each particular h\n" +
116 "air to stand on end like quills upon the fretful '''porpentine'''.’ ” “That's right. Locks, of course, not socks. Odd that he should have said '''porpentine''' when he meant porc\n" +
117 "upine. Slip of the tongue, no doubt, as so often happens with ghosts.”}}";
118 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
123 public void testReturn() {
126 wikiText = "hello\n=Heading=\nhello2";
128 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
130 assertEquals("hello", tokenizer.nextToken().token());
131 tokenizer.returnToLineStart();
132 assertEquals("hello", tokenizer.nextToken().token());
133 assertEquals("\n", tokenizer.nextToken().token());
134 tokenizer.returnToLineStart();
135 assertEquals("hello", tokenizer.nextToken().token());
136 assertEquals("\n", tokenizer.nextToken().token());
138 assertEquals("=Heading=", tokenizer.nextToken().token());
139 tokenizer.returnToLineStart();
140 assertEquals("=Heading=", tokenizer.nextToken().token());
141 assertEquals("\n", tokenizer.nextToken().token());
142 tokenizer.returnToLineStart();
143 assertEquals("=Heading=", tokenizer.nextToken().token());
144 assertEquals("\n", tokenizer.nextToken().token());
146 assertEquals("hello2", tokenizer.nextToken().token());
147 assertEquals(null, tokenizer.nextToken());
148 tokenizer.returnToLineStart();
149 assertEquals("hello2", tokenizer.nextToken().token());
150 assertEquals(null, tokenizer.nextToken());
155 public void testWikiHeading() {
159 assertEquals("==", new WikiTokenizer(wikiText).nextToken().token());
160 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
161 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
162 assertEquals("", new WikiTokenizer(wikiText).nextToken().headingWikiText());
163 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
167 assertEquals("=a", new WikiTokenizer(wikiText).nextToken().token());
168 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
169 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
170 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
171 assertEquals(2, new WikiTokenizer(wikiText).nextToken().errors.size());
174 assertEquals("=a==", new WikiTokenizer(wikiText).nextToken().token());
175 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
176 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
177 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
178 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
181 assertEquals("a", new WikiTokenizer(wikiText).nextToken().token());
182 assertFalse(new WikiTokenizer(wikiText).nextToken().isHeading());
185 assertEquals("=a=", new WikiTokenizer(wikiText).nextToken().token());
186 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
187 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
188 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
189 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
191 wikiText = "==aa[[|=]] {{|={{=}} }}==";
192 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
193 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
194 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
195 assertEquals("aa[[|=]] {{|={{=}} }}", new WikiTokenizer(wikiText).nextToken().headingWikiText());
196 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
202 public void testSimple() {
203 final String wikiText =
205 "Hello =thad| you're <!-- not --> '''pretty''' cool '''''over''''' there." + "\n" +
207 "multi-line" + "\n" +
208 "# comment -->" + "\n" +
211 "{{template_not_in_list}}" + "\n" +
212 "# {{template_in_list}}" + "\n" +
213 "[[wikitext]]:[[wikitext]]" + "\n" + // don't want this to trigger a list
214 ": but this is a list!" + "\n" +
215 "*:* and so is this :::" + "\n" +
216 "here's [[some blah|some]] wikitext." + "\n" +
217 "here's a {{template|this has an = sign|blah=2|blah2=3|" + "\n" +
218 "blah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}} and some more text." + "\n" +
219 "== Header 2 ==" + "\n" +
220 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}" + "\n" +
221 "{{mismatched]]" + "\n" +
222 "[[mismatched}}" + "\n" +
223 "{extraterminated}}" + "\n" +
224 "[extraterminated]]" + "\n" +
225 "=== {{header-template}} ===" + "\n";
227 final String[] expectedTokens = new String[] {
249 "<!--\nmulti-line\n# comment -->",
254 "{{template_not_in_list}}",
256 "# {{template_in_list}}",
262 ": but this is a list!",
264 "*:* and so is this :::",
267 "[[some blah|some]]",
271 "{{template|this has an = sign|blah=2|blah2=3|\nblah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}}",
272 " and some more text.",
276 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}",
288 "=== {{header-template}} ===",
292 final List<String> actualTokens = new ArrayList<String>();
294 final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
297 while ((token = wikiTokenizer.nextToken()) != null) {
298 actualTokens.add(token.token());
299 System.out.println("\"" + token.token().replace("\n", "\\n") + "\",");
300 assertEquals(expectedTokens[i++], token.token());
302 assertEquals(Arrays.asList(expectedTokens), actualTokens);
305 public void testHtml() {
309 wikiText = " zz <pre> asdf </pre> ZZ <math> 1234 </math> XX ";
310 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
311 assertEquals(" zz ", tokenizer.nextToken().token());
312 assertEquals("<pre> asdf </pre>", tokenizer.nextToken().token());
313 assertEquals(" ZZ ", tokenizer.nextToken().token());
314 assertEquals("<math> 1234 </math>", tokenizer.nextToken().token());
315 assertEquals(" XX ", tokenizer.nextToken().token());
318 wikiText = "\n<math> 1234 </math>";
319 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
320 assertEquals("<math> 1234 </math>", tokenizer.nextToken().nextToken().token());
324 wikiText = "# z'' is the '''free''' variable in \"<math>\\forall x\\exists y:xy=z</math>\".''";
325 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
326 assertEquals(wikiText, tokenizer.nextToken().token());