-
-
- // Now build the EN ones.
-
-// isoToWikiName.keySet().retainAll(Arrays.asList("UK", "HR", "FI"));
- //isoToWikiName.clear();
- boolean go = false;
- for (final String foreignIso : isoToWikiName.keySet()) {
- if (foreignIso.equals("SL")) {
- go = true;
- }
- if (!go) {
- continue;
- }
-
- final String dictFile = String.format("%s/EN-%s_enwiktionary.quickdic", OUTPUTS, foreignIso);
- System.out.println("building dictFile: " + dictFile);
-
- if (!isoToStoplist.containsKey(foreignIso)) {
- isoToStoplist.put(foreignIso, "empty.txt");
- }
- if (!isoToDedication.containsKey(foreignIso)) {
- isoToDedication.put(foreignIso, "");
- }
- if (!isoToRegex.containsKey(foreignIso)) {
- isoToRegex.put(foreignIso, isoToWikiName.get(foreignIso));
- }
-
- DictionaryBuilder.main(new String[] {
- String.format("--dictOut=%s", dictFile),
- String.format("--lang1=EN"),
- String.format("--lang2=%s", foreignIso),
- String.format("--lang1Stoplist=%s", STOPLISTS + isoToStoplist.get("EN")),
- String.format("--lang2Stoplist=%s", STOPLISTS + isoToStoplist.get(foreignIso)),
- String.format("--dictInfo=(EN)Wikitionary-based EN-%s dictionary.\n\n%s", foreignIso, isoToDedication.get(foreignIso)),
-
- "--input2=" + INPUTS + "wikiSplit/en/" + foreignIso + ".data",
- "--input2Name=enwiktionary." + foreignIso,
- "--input2Format=enwiktionary",
- "--input2WiktionaryType=EnForeign",
- "--input2LangPattern=" + isoToRegex.get(foreignIso),
- "--input2LangCodePattern=" + foreignIso.toLowerCase(),
- "--input2EnIndex=1",
-
- "--input3=" + INPUTS + "wikiSplit/en/EN.data",
- "--input3Name=enwiktionary.english",
- "--input3Format=enwiktionary",
- "--input3WiktionaryType=EnToTranslation",
- "--input3LangPattern=" + isoToRegex.get(foreignIso),
- "--input3LangCodePattern=" + foreignIso.toLowerCase(),
- "--input3EnIndex=1",
-
- });
-
- } // foreignIso
-
- // Now special case German-English.
-
- final String dictFile = String.format("%s/DE-EN_chemnitz_enwiktionary.quickdic", OUTPUTS);
- DictionaryBuilder.main(new String[] {
- "--dictOut=" + dictFile,
- "--lang1=DE",
- "--lang2=EN",
- String.format("--lang1Stoplist=%s", STOPLISTS + "de.txt"),
- String.format("--lang2Stoplist=%s", STOPLISTS + "en.txt"),
- "--dictInfo=@" + INPUTS + "de-en_chemnitz_enwiktionary.info",
-
- "--input4=" + INPUTS + "de-en_chemnitz.txt",
- "--input4Name=chemnitz",
- "--input4Charset=UTF8",
- "--input4Format=chemnitz",
-
- "--input2=" + INPUTS + "wikiSplit/en/DE.data",
- "--input2Name=enwiktionary.DE",
- "--input2Format=enwiktionary",
- "--input2WiktionaryType=EnForeign",
- "--input2LangPattern=German",
- "--input2LangCodePattern=de",
- "--input2EnIndex=2",
-
- "--input3=" + INPUTS + "wikiSplit/en/EN.data",
- "--input3Name=enwiktionary.english",
- "--input3Format=enwiktionary",
- "--input3WiktionaryType=EnToTranslation",
- "--input3LangPattern=German",
- "--input3LangCodePattern=de",
- "--input3EnIndex=2",
- });
-
- }