1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.List;
21 import junit.framework.TestCase;
23 public class WikiTokenizerTest extends TestCase {
25 public void testWikiLink() {
29 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
30 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
31 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
32 assertEquals(null, new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
34 wikiText = "[[abc|def]]";
35 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
36 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
37 assertEquals("def", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
38 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
40 wikiText = "[[abc|def|ghi{{a|=2}}p]]";
41 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
42 assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
43 assertEquals("ghi{{a|=2}}p", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
44 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
46 wikiText = "[[abc]][[def]]";
47 assertEquals("[[abc]]", new WikiTokenizer(wikiText).nextToken().token());
48 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
49 assertEquals("def", new WikiTokenizer(wikiText).nextToken().nextToken().wikiLinkText());
53 public void testWikiList() {
56 wikiText = "* This is ''bold''' asdf.";
57 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
59 wikiText = "* {{a|US}} {{IPA|[ˈfɔɹ.wɝd]]}}\nasdf\n";
60 assertEquals("* {{a|US}} {{IPA|[ˈfɔɹ.wɝd]]}}", new WikiTokenizer(wikiText).nextToken().token());
61 assertTrue(new WikiTokenizer(wikiText).nextToken().isListItem());
62 assertEquals("\n", new WikiTokenizer(wikiText).nextToken().nextToken().token());
65 wikiText = "* [[asdf|\u2028" +
67 assertEquals("* [[asdf|\n" +
68 "asdf]]", new WikiTokenizer(wikiText).nextToken().token());
69 assertTrue(new WikiTokenizer(wikiText).nextToken().isListItem());
73 public void testFunction() {
77 WikiTokenizer wt = new WikiTokenizer("'''Προστατευόμενη Ονομασία Προέλευσης''', \"Protected Designation of Origin\" {{");
78 while (wt.nextToken() != null) {
79 if (wt.isFunction()) {
80 assertEquals("", wt.functionName());
86 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
87 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
88 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
89 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionPositionArgs().size());
90 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
92 wikiText = "{{abc|def}}";
93 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
94 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
95 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
96 assertEquals(Arrays.asList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
97 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
99 wikiText = "{{abc|d[[|]]ef|ghi}}";
100 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
101 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
102 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
103 assertEquals(Arrays.asList("d[[|]]ef", "ghi"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
104 assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
106 wikiText = "{{abc|arg1=101|ghi|arg2=202|arg3={{n1|n2=7|n3}}|{{d}}}}";
107 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
108 assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
109 assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
110 assertEquals(Arrays.asList("ghi", "{{d}}"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
111 assertEquals(3, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
112 assertEquals("101", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg1"));
113 assertEquals("202", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg2"));
114 assertEquals("{{n1|n2=7|n3}}", new WikiTokenizer(wikiText).nextToken().functionNamedArgs().get("arg3"));
116 wikiText = "{{gloss|asdf}\nAsdf\n\n";
117 assertEquals("{{gloss|asdf}", new WikiTokenizer(wikiText).nextToken().token());
119 wikiText = "#*{{quote-book|year=1960|author={{w|P. G. Wodehouse}}\n" +
120 "|title={{w|Jeeves in the Offing}}\n" +
121 "|section=chapter XI\n" +
122 "|passage=“I'm sorely beset, Jeeves. Do you recall telling me once about someone who told somebody he could tell him something which would make him think a bit? Knitted socks and porcu\n" +
123 "pines entered into it, I remember.” “I think you may be referring to the ghost of the father of Hamlet, Prince of Denmark, sir. Addressing his son, he said ‘I could a tale unfold whos\n" +
124 "e lightest word would harrow up thy soul, freeze thy young blood, make thy two eyes, like stars, start from their spheres, thy knotted and combined locks to part and each particular h\n" +
125 "air to stand on end like quills upon the fretful '''porpentine'''.’ ” “That's right. Locks, of course, not socks. Odd that he should have said '''porpentine''' when he meant porc\n" +
126 "upine. Slip of the tongue, no doubt, as so often happens with ghosts.”}}";
127 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
132 public void testReturn() {
135 wikiText = "hello\n=Heading=\nhello2";
137 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
139 assertEquals("hello", tokenizer.nextToken().token());
140 tokenizer.returnToLineStart();
141 assertEquals("hello", tokenizer.nextToken().token());
142 assertEquals("\n", tokenizer.nextToken().token());
143 tokenizer.returnToLineStart();
144 assertEquals("hello", tokenizer.nextToken().token());
145 assertEquals("\n", tokenizer.nextToken().token());
147 assertEquals("=Heading=", tokenizer.nextToken().token());
148 tokenizer.returnToLineStart();
149 assertEquals("=Heading=", tokenizer.nextToken().token());
150 assertEquals("\n", tokenizer.nextToken().token());
151 tokenizer.returnToLineStart();
152 assertEquals("=Heading=", tokenizer.nextToken().token());
153 assertEquals("\n", tokenizer.nextToken().token());
155 assertEquals("hello2", tokenizer.nextToken().token());
156 assertEquals(null, tokenizer.nextToken());
157 tokenizer.returnToLineStart();
158 assertEquals("hello2", tokenizer.nextToken().token());
159 assertEquals(null, tokenizer.nextToken());
164 public void testWikiHeading() {
168 assertEquals("==", new WikiTokenizer(wikiText).nextToken().token());
169 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
170 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
171 assertEquals("", new WikiTokenizer(wikiText).nextToken().headingWikiText());
172 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
176 assertEquals("=a", new WikiTokenizer(wikiText).nextToken().token());
177 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
178 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
179 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
180 assertEquals(2, new WikiTokenizer(wikiText).nextToken().errors.size());
183 assertEquals("=a==", new WikiTokenizer(wikiText).nextToken().token());
184 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
185 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
186 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
187 assertEquals(1, new WikiTokenizer(wikiText).nextToken().errors.size());
190 assertEquals("a", new WikiTokenizer(wikiText).nextToken().token());
191 assertFalse(new WikiTokenizer(wikiText).nextToken().isHeading());
194 assertEquals("=a=", new WikiTokenizer(wikiText).nextToken().token());
195 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
196 assertEquals(1, new WikiTokenizer(wikiText).nextToken().headingDepth());
197 assertEquals("a", new WikiTokenizer(wikiText).nextToken().headingWikiText());
198 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
200 wikiText = "==aa[[|=]] {{|={{=}} }}==";
201 assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
202 assertTrue(new WikiTokenizer(wikiText).nextToken().isHeading());
203 assertEquals(2, new WikiTokenizer(wikiText).nextToken().headingDepth());
204 assertEquals("aa[[|=]] {{|={{=}} }}", new WikiTokenizer(wikiText).nextToken().headingWikiText());
205 assertEquals(0, new WikiTokenizer(wikiText).nextToken().errors.size());
211 public void testSimple() {
212 final String wikiText =
214 "Hello =thad| you're <!-- not --> '''pretty''' cool '''''over''''' there." + "\n" +
216 "multi-line" + "\n" +
217 "# comment -->" + "\n" +
220 "{{template_not_in_list}}" + "\n" +
221 "# {{template_in_list}}" + "\n" +
222 "[[wikitext]]:[[wikitext]]" + "\n" + // don't want this to trigger a list
223 ": but this is a list!" + "\n" +
224 "*:* and so is this :::" + "\n" +
225 "here's [[some blah|some]] wikitext." + "\n" +
226 "here's a {{template|this has an = sign|blah=2|blah2=3|" + "\n" +
227 "blah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}} and some more text." + "\n" +
228 "== Header 2 ==" + "\n" +
229 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}" + "\n" +
230 "{{mismatched]]" + "\n" +
231 "[[mismatched}}" + "\n" +
232 "{extraterminated}}" + "\n" +
233 "[extraterminated]]" + "\n" +
234 "=== {{header-template}} ===" + "\n";
236 final String[] expectedTokens = new String[] {
258 "<!--\nmulti-line\n# comment -->",
263 "{{template_not_in_list}}",
265 "# {{template_in_list}}",
271 ": but this is a list!",
273 "*:* and so is this :::",
276 "[[some blah|some]]",
280 "{{template|this has an = sign|blah=2|blah2=3|\nblah3=3,[[asdf]|[asdf asdf]|[asdf asdf asdf]],blah4=4}}",
281 " and some more text.",
285 "{{some-func|blah={{nested-func|n2}}|blah2=asdf}}",
297 "=== {{header-template}} ===",
301 final List<String> actualTokens = new ArrayList<String>();
303 final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
306 while ((token = wikiTokenizer.nextToken()) != null) {
307 actualTokens.add(token.token());
308 System.out.println("\"" + token.token().replace("\n", "\\n") + "\",");
309 assertEquals(expectedTokens[i++], token.token());
311 assertEquals(Arrays.asList(expectedTokens), actualTokens);
314 public void testHtml() {
318 wikiText = " zz <pre> asdf </pre> ZZ <math> 1234 </math> XX ";
319 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
320 assertEquals(" zz ", tokenizer.nextToken().token());
321 assertEquals("<pre> asdf </pre>", tokenizer.nextToken().token());
322 assertEquals(" ZZ ", tokenizer.nextToken().token());
323 assertEquals("<math> 1234 </math>", tokenizer.nextToken().token());
324 assertEquals(" XX ", tokenizer.nextToken().token());
327 wikiText = "\n<math> 1234 </math>";
328 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
329 assertEquals("<math> 1234 </math>", tokenizer.nextToken().nextToken().token());
333 wikiText = "# z'' is the '''free''' variable in \"<math>\\forall x\\exists y:xy=z</math>\".''";
334 final WikiTokenizer tokenizer = new WikiTokenizer(wikiText);
335 assertEquals(wikiText, tokenizer.nextToken().token());