sub-levels in translations.
handle word-info in English.
italian verbs... (show conjugation, pulled from a linked place....)
+add unit test for: Errors: [Unmatched {{ error: * {{a|US}} {{IPA|[ˈfɔɹ.wɝd]]}}
import java.io.PrintWriter;
import java.io.RandomAccessFile;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
// German handled in file.
isoToDedication.put("EL", "Greek dictionary dedicated to Noah Egge.");
isoToDedication.put("IT", "Italian dictionary dedicated to Carolina Tropini, my favorite stardust in the whole universe! Ti amo!");
- isoToDedication.put("JA", "Japanese dictionary dedicated to Akane Watanabe.");
isoToDedication.put("KO", "Korean dictionary dedicated to Ande Elwood--fall fashion und Fernsehturms!");
isoToDedication.put("PT", "Portuguese dictionary dedicated to Carlos Melo, one Tough Mudder.");
isoToDedication.put("RO", "Romanian dictionary dedicated to Radu Teodorescu.");
isoToStoplist.put("FR", "fr.txt");
final Map<String,String> isoToRegex = new LinkedHashMap<String, String>();
- isoToRegex.put("ZH", ".*Chinese.*|.*Mandarin.*|.*Cantonese.*");
+ isoToRegex.put("ZH", "Chinese|Mandarin|Cantonese");
- boolean go = false;
- isoToWikiName.clear();
+ isoToWikiName.keySet().retainAll(Arrays.asList("UK", "HR", "FI"));
+
+ boolean go = true;
+// isoToWikiName.clear();
for (final String foreignIso : isoToWikiName.keySet()) {
- if (foreignIso.equals("GA")) {
+ if (foreignIso.equals("SV")) {
go = true;
}
if (!go) {
isoToDedication.put(foreignIso, "");
}
if (!isoToRegex.containsKey(foreignIso)) {
- isoToRegex.put(foreignIso, ".*" + isoToWikiName.get(foreignIso) + ".*");
+ isoToRegex.put(foreignIso, isoToWikiName.get(foreignIso));
}
DictionaryBuilder.main(new String[] {
wiktionaryTestWithLangToEn("wiktionary.zh_en.quickdic", "ZH", "empty.txt",
"EN.data", "enwiktionary.english", "Chinese|Mandarin|Cantonese", "zh");
}
-
// German
public void testWiktionary_DE_DE() throws Exception {
"Noun|Verb|Adjective|Adverb|Pronoun|Conjunction|Interjection|" +
"Preposition|Proper noun|Article|Prepositional phrase|Acronym|" +
"Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" +
- "Ligature|Idiom|Phrase|" +
+ "Ligature|Idiom|Phrase|{{initialism}}|" +
// These are @deprecated:
"Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" +
"Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" +
// TODO: would also be nice...
} else if (functionName.startsWith("picdic")) {
} else if (functionName.startsWith("checktrans")) {
+ done = true;
} else if (functionName.startsWith("ttbc")) {
wikiTokenizer.nextLine();
// TODO: would be great to handle ttbc
// This line could produce an output...
if (line.contains("ich hoan dich gear")) {
- System.out.println();
+ //System.out.println();
}
// First strip the language and check whether it matches.
pairEntry.pairs.add(pair);
}
} else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")) {
- if (lastForeign != null) {
+ if (lastForeign != null && pairEntry.pairs.size() > 0) {
pairEntry.pairs.remove(pairEntry.pairs.size() - 1);
final Pair pair = new Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, otherIndexBuilder, indexedEntry), swap);
if (pair.lang1 != "--" && pair.lang1 != "--") {
pairEntry.pairs.add(pair);
}
+ lastForeign = null;
} else {
- LOG.warning("English example with no foreign: " + title + ", " + nextLine);
+ LOG.warning("TODO: English example with no foreign: " + title + ", " + nextLine);
+ // TODO: add something.
}
} else if (nextPrefix.equals("#*")) {
// Can't really index these.
if (lastUnescapedPipePos != -1) {
return wikiText.substring(lastUnescapedPipePos + 1, end - 2);
}
+ assert start + 2 < wikiText.length() && end >= 2: wikiText;
return wikiText.substring(start + 2, end - 2);
}