transliteration unit test.
//isoToWikiName.clear();
boolean go = false;
for (final String foreignIso : isoToWikiName.keySet()) {
- if (foreignIso.equals("JA")) {
+ if (foreignIso.equals("SK")) {
go = true;
}
if (!go) {
final Language zh = Language.lookup("zh");
final Transliterator transliterator = Transliterator.createFromRules("", zh.getDefaultNormalizerRules(), Transliterator.FORWARD);
- assertEquals("xie xie", transliterator.transliterate("謝謝"));
- assertEquals("xie xie", transliterator.transliterate("谢谢"));
+ assertEquals("xiexie", transliterator.transliterate("謝謝"));
+ assertEquals("xiexie", transliterator.transliterate("谢谢"));
- assertEquals("dian nao", transliterator.transliterate("電腦"));
- assertEquals("dian nao", transliterator.transliterate("电脑"));
- assertEquals("ji suan ji", transliterator.transliterate("計算機"));
- assertEquals("ji suan ji", transliterator.transliterate("计算机"));
+ assertEquals("diannao", transliterator.transliterate("電腦"));
+ assertEquals("diannao", transliterator.transliterate("电脑"));
+ assertEquals("jisuanji", transliterator.transliterate("計算機"));
+ assertEquals("jisuanji", transliterator.transliterate("计算机"));
- assertEquals("cheng jiu", transliterator.transliterate("成就"));
+ assertEquals("chengjiu", transliterator.transliterate("成就"));
}
assert isWikiLink();
// "[[.."
if (lastUnescapedPipePos != -1) {
- return wikiText.substring(lastUnescapedPipePos + 1, end - 2);
+ return trimNewlines(wikiText.substring(lastUnescapedPipePos + 1, end - 2));
}
assert start + 2 < wikiText.length() && end >= 2: wikiText;
- return wikiText.substring(start + 2, end - 2);
+ return trimNewlines(wikiText.substring(start + 2, end - 2));
}
public String wikiLinkDest() {
assert isWikiLink();
// "[[.."
if (firstUnescapedPipePos != -1) {
- return wikiText.substring(start + 2, firstUnescapedPipePos);
+ return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos));
}
return null;
}
"Preposition|Proper noun|Article|Prepositional phrase|Acronym|" +
"Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" +
"Ligature|Idiom|Phrase|\\{\\{acronym\\}\\}|\\{\\{initialism\\}\\}|" +
+ "\\{\\{abbreviation\\}\\}|" +
// These are @deprecated:
"Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" +
"Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" +
} else if (headerName.equals("Translations")) {
if (pos == null) {
- LOG.warning("Translations without POS: " + title);
+ LOG.info("Translations without POS (but using anyway): " + title);
}
doTranslations(wikiTokenizer, pos);
} else if (headerName.equals("Pronunciation")) {
for (int i = 0; i < listSection.nextPrefixes.size(); ++i) {
final String nextPrefix = listSection.nextPrefixes.get(i);
final String nextLine = listSection.nextLines.get(i);
+
+ // TODO: This splitting is not sensitive to wiki code.
int dash = nextLine.indexOf("—");
int mdashLen = 7;
if (dash == -1) {
appendAndIndexWikiCallback.reset(builder, indexedEntry);
appendAndIndexWikiCallback.entryTypeName = EntryTypeName.WIKTIONARY_EXAMPLE;
appendAndIndexWikiCallback.entryTypeNameSticks = true;
- appendAndIndexWikiCallback.dispatch(example, indexBuilder, EntryTypeName.WIKTIONARY_EXAMPLE);
+ try {
+ // TODO: this is a hack needed because we don't safely split on the dash.
+ appendAndIndexWikiCallback.dispatch(example, indexBuilder, EntryTypeName.WIKTIONARY_EXAMPLE);
+ } catch (AssertionError e) {
+ return "--";
+ }
final String result = trim(builder.toString());
return result.length() > 0 ? result : "--";
}
displayText = ListUtil.get(args, 1, null);
}
- appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
+ if (displayText != null) {
+ appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
+ } else {
+ LOG.warning("no display text: " + wikiTokenizer.token());
+ }
final String tr = namedArgs.remove("tr");
if (tr != null) {
For next release:
-refactor wiki parsing.
"form of" to bottom
handle examples like "asdf (asdf)"
random word jump
multiword find.
dictionary update.
-{{Arab}}
???italian verbs
pronunciation
fare {{it-verb}} {{transitive}} :: To do
-
-
-**** Wiktionary:
-
-in wiktionary
- futurismo :: futurism () (noun)
-
done:
{infl}
! Check analytics
! Upload dics
font size
+refactor wiki parsing.
+{{Arab}}
\ No newline at end of file