transliteration unit test.
//isoToWikiName.clear();
boolean go = false;
for (final String foreignIso : isoToWikiName.keySet()) {
//isoToWikiName.clear();
boolean go = false;
for (final String foreignIso : isoToWikiName.keySet()) {
- if (foreignIso.equals("JA")) {
+ if (foreignIso.equals("SK")) {
final Language zh = Language.lookup("zh");
final Transliterator transliterator = Transliterator.createFromRules("", zh.getDefaultNormalizerRules(), Transliterator.FORWARD);
final Language zh = Language.lookup("zh");
final Transliterator transliterator = Transliterator.createFromRules("", zh.getDefaultNormalizerRules(), Transliterator.FORWARD);
- assertEquals("xie xie", transliterator.transliterate("謝謝"));
- assertEquals("xie xie", transliterator.transliterate("谢谢"));
+ assertEquals("xiexie", transliterator.transliterate("謝謝"));
+ assertEquals("xiexie", transliterator.transliterate("谢谢"));
- assertEquals("dian nao", transliterator.transliterate("電腦"));
- assertEquals("dian nao", transliterator.transliterate("电脑"));
- assertEquals("ji suan ji", transliterator.transliterate("計算機"));
- assertEquals("ji suan ji", transliterator.transliterate("计算机"));
+ assertEquals("diannao", transliterator.transliterate("電腦"));
+ assertEquals("diannao", transliterator.transliterate("电脑"));
+ assertEquals("jisuanji", transliterator.transliterate("計算機"));
+ assertEquals("jisuanji", transliterator.transliterate("计算机"));
- assertEquals("cheng jiu", transliterator.transliterate("成就"));
+ assertEquals("chengjiu", transliterator.transliterate("成就"));
assert isWikiLink();
// "[[.."
if (lastUnescapedPipePos != -1) {
assert isWikiLink();
// "[[.."
if (lastUnescapedPipePos != -1) {
- return wikiText.substring(lastUnescapedPipePos + 1, end - 2);
+ return trimNewlines(wikiText.substring(lastUnescapedPipePos + 1, end - 2));
}
assert start + 2 < wikiText.length() && end >= 2: wikiText;
}
assert start + 2 < wikiText.length() && end >= 2: wikiText;
- return wikiText.substring(start + 2, end - 2);
+ return trimNewlines(wikiText.substring(start + 2, end - 2));
}
public String wikiLinkDest() {
assert isWikiLink();
// "[[.."
if (firstUnescapedPipePos != -1) {
}
public String wikiLinkDest() {
assert isWikiLink();
// "[[.."
if (firstUnescapedPipePos != -1) {
- return wikiText.substring(start + 2, firstUnescapedPipePos);
+ return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos));
"Preposition|Proper noun|Article|Prepositional phrase|Acronym|" +
"Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" +
"Ligature|Idiom|Phrase|\\{\\{acronym\\}\\}|\\{\\{initialism\\}\\}|" +
"Preposition|Proper noun|Article|Prepositional phrase|Acronym|" +
"Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" +
"Ligature|Idiom|Phrase|\\{\\{acronym\\}\\}|\\{\\{initialism\\}\\}|" +
+ "\\{\\{abbreviation\\}\\}|" +
// These are @deprecated:
"Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" +
"Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" +
// These are @deprecated:
"Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" +
"Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" +
} else if (headerName.equals("Translations")) {
if (pos == null) {
} else if (headerName.equals("Translations")) {
if (pos == null) {
- LOG.warning("Translations without POS: " + title);
+ LOG.info("Translations without POS (but using anyway): " + title);
}
doTranslations(wikiTokenizer, pos);
} else if (headerName.equals("Pronunciation")) {
}
doTranslations(wikiTokenizer, pos);
} else if (headerName.equals("Pronunciation")) {
for (int i = 0; i < listSection.nextPrefixes.size(); ++i) {
final String nextPrefix = listSection.nextPrefixes.get(i);
final String nextLine = listSection.nextLines.get(i);
for (int i = 0; i < listSection.nextPrefixes.size(); ++i) {
final String nextPrefix = listSection.nextPrefixes.get(i);
final String nextLine = listSection.nextLines.get(i);
+
+ // TODO: This splitting is not sensitive to wiki code.
int dash = nextLine.indexOf("—");
int mdashLen = 7;
if (dash == -1) {
int dash = nextLine.indexOf("—");
int mdashLen = 7;
if (dash == -1) {
appendAndIndexWikiCallback.reset(builder, indexedEntry);
appendAndIndexWikiCallback.entryTypeName = EntryTypeName.WIKTIONARY_EXAMPLE;
appendAndIndexWikiCallback.entryTypeNameSticks = true;
appendAndIndexWikiCallback.reset(builder, indexedEntry);
appendAndIndexWikiCallback.entryTypeName = EntryTypeName.WIKTIONARY_EXAMPLE;
appendAndIndexWikiCallback.entryTypeNameSticks = true;
- appendAndIndexWikiCallback.dispatch(example, indexBuilder, EntryTypeName.WIKTIONARY_EXAMPLE);
+ try {
+ // TODO: this is a hack needed because we don't safely split on the dash.
+ appendAndIndexWikiCallback.dispatch(example, indexBuilder, EntryTypeName.WIKTIONARY_EXAMPLE);
+ } catch (AssertionError e) {
+ return "--";
+ }
final String result = trim(builder.toString());
return result.length() > 0 ? result : "--";
}
final String result = trim(builder.toString());
return result.length() > 0 ? result : "--";
}
displayText = ListUtil.get(args, 1, null);
}
displayText = ListUtil.get(args, 1, null);
}
- appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
+ if (displayText != null) {
+ appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
+ } else {
+ LOG.warning("no display text: " + wikiTokenizer.token());
+ }
final String tr = namedArgs.remove("tr");
if (tr != null) {
final String tr = namedArgs.remove("tr");
if (tr != null) {
"form of" to bottom
handle examples like "asdf (asdf)"
random word jump
multiword find.
dictionary update.
"form of" to bottom
handle examples like "asdf (asdf)"
random word jump
multiword find.
dictionary update.
???italian verbs
pronunciation
???italian verbs
pronunciation
fare {{it-verb}} {{transitive}} :: To do
fare {{it-verb}} {{transitive}} :: To do
-
-
-**** Wiktionary:
-
-in wiktionary
- futurismo :: futurism () (noun)
-
! Check analytics
! Upload dics
font size
! Check analytics
! Upload dics
font size
+refactor wiki parsing.
+{{Arab}}
\ No newline at end of file
\ No newline at end of file