X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FDictionaryBuilderMain.java;h=57e76cc7d435acc8999535fe097cc68661be2380;hb=58f90bc7be44db5f61d02527ced3cac01863b076;hp=b2980409b039d85079c0a0b833752a97cd1ed248;hpb=0cde0a508bc65074c94b408e9f74b01aca9b8b29;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java index b298040..57e76cc 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java @@ -37,17 +37,17 @@ public class DictionaryBuilderMain extends TestCase { // Build the non EN ones. static final String[][] nonEnPairs = new String[][] { - /* {"EN"}, {"DE"}, {"IT"}, - {"FR"}, + // This one takes a really long time, and the result is too big for code.google.com + //{"FR"}, // The 3 I use most: {"IT", "EN" }, {"DE", "EN" }, {"DE", "IT" }, - + {"AR", "DE" }, {"AR", "ES" }, {"AR", "FR" }, @@ -56,7 +56,7 @@ public class DictionaryBuilderMain extends TestCase { {"AR", "JA" }, {"AR", "RU" }, {"AR", "TR" }, // Turkish - {"AR", "ZH" }, + {"AR", "cmn" }, {"DE", "AR" }, {"DE", "FR" }, @@ -75,7 +75,7 @@ public class DictionaryBuilderMain extends TestCase { {"DE", "RU" }, {"DE", "SV" }, // Swedish {"DE", "TR" }, // Turkish - {"DE", "ZH" }, + {"DE", "cmn" }, {"DE", "TA" }, // Tamil {"ES", "RU" }, // Spanish-Russian @@ -90,7 +90,7 @@ public class DictionaryBuilderMain extends TestCase { {"FR", "NL" }, // Dutch {"FR", "RU" }, {"FR", "TR" }, // Turkish - {"FR", "ZH" }, + {"FR", "cmn" }, {"FR", "EL" }, {"IT", "DE" }, @@ -106,20 +106,19 @@ public class DictionaryBuilderMain extends TestCase { {"IT", "RU" }, {"IT", "SV" }, {"IT", "TR" }, // Turkish - {"IT", "ZH" }, + {"IT", "cmn" }, - {"JA", "ZH" }, + {"JA", "cmn" }, {"JA", "AR" }, {"JA", "KO" }, - {"ZH", "AR" }, - {"ZH", "DE" }, - {"ZH", "ES" }, - {"ZH", "FR" }, - {"ZH", "IT" }, - {"ZH", "KO" }, + {"cmn", "AR" }, + {"cmn", "DE" }, + {"cmn", "ES" }, + {"cmn", "FR" }, + {"cmn", "IT" }, + {"cmn", "KO" }, - {"NO", "SV" }, {"NO", "FI" }, {"FI", "SV" }, @@ -135,30 +134,28 @@ public class DictionaryBuilderMain extends TestCase { {"FA", "SV" }, // Persian, Swedish, by request. {"NL", "PL" }, // Dutch, Polish, by request. - */ - }; static final Map isoToDedication = new LinkedHashMap(); static { - isoToDedication.put("AF", "Afrikaans dictionary dedicated to Heiko and Mariëtte Horn."); - isoToDedication.put("HR", "Croatian dictionary dedicated to Ines Viskic and Miro Kresonja."); - isoToDedication.put("NL", "Dutch dictionary dedicated to Mike LeBeau."); - // German handled in file. - isoToDedication.put("EL", "Greek dictionary dedicated to Noah Egge."); - isoToDedication.put("IT", "Italian dictionary dedicated to Carolina Tropini, my favorite stardust in the whole universe! Ti amo!"); - isoToDedication.put("KO", "Korean dictionary dedicated to Ande Elwood--fall fashion und Fernsehturms!"); - isoToDedication.put("PT", "Portuguese dictionary dedicated to Carlos Melo, one Tough Mudder."); - isoToDedication.put("RO", "Romanian dictionary dedicated to Radu Teodorescu."); - isoToDedication.put("RU", "Russian dictionary dedicated to Maxim Aronin--best friend always!."); - isoToDedication.put("SR", "Serbian dictionary dedicated to Filip Crnogorac--thanks for the honey."); - isoToDedication.put("ES", "Spanish dictionary made especially for Carolina Tropini! <3 XoXoXXXXX!"); - isoToDedication.put("SV", "Swedish dictionary dedicated to Kajsa Palmblad--björn kramar!"); + isoToDedication.put("AF", "Wiktionary-based Afrikaans dictionary dedicated to Heiko and Mariëtte Horn."); + isoToDedication.put("HR", "Wiktionary-based Croatian dictionary dedicated to Ines Viskic and Miro Kresonja."); + isoToDedication.put("NL", "Wiktionary-based Dutch dictionary dedicated to Mike LeBeau."); + isoToDedication.put("DE", "@data/inputs/de-en_dedication.txt"); + isoToDedication.put("EL", "Wiktionary-based Greek dictionary dedicated to Noah Egge."); + isoToDedication.put("IT", "Wiktionary-based Italian dictionary dedicated to Carolina Tropini, my favorite stardust in the whole universe! Ti amo!"); + isoToDedication.put("KO", "Wiktionary-based Korean dictionary dedicated to Ande Elwood--fall fashion und Fernsehturms!"); + isoToDedication.put("PT", "Wiktionary-based Portuguese dictionary dedicated to Carlos Melo, one Tough Mudder."); + isoToDedication.put("RO", "Wiktionary-based Romanian dictionary dedicated to Radu Teodorescu."); + isoToDedication.put("RU", "Wiktionary-based Russian dictionary dedicated to Maxim Aronin--best friend always!."); + isoToDedication.put("SR", "Wiktionary-based Serbian dictionary dedicated to Filip Crnogorac--thanks for the honey!"); + isoToDedication.put("ES", "Wiktionary-based Spanish dictionary made especially for Carolina Tropini! <3 XoXoXXXXX!"); + isoToDedication.put("SV", "Wiktionary-based Swedish dictionary dedicated to Kajsa Palmblad--björn kramar!"); } - private static String getDedication(String iso) { - return isoToDedication.containsKey(iso) ? "\n\n" + isoToDedication.get(iso) : ""; + private static String getEnDictionaryInfo(String iso) { + return isoToDedication.containsKey(iso) ? isoToDedication.get(iso) : String.format("Wiktionary-based %s dictionary.", iso); } static final Map isoToStoplist = new LinkedHashMap(); @@ -208,7 +205,9 @@ public class DictionaryBuilderMain extends TestCase { System.err.println("Can't read file: " + wikiSplitFile); } - if (lang1.equals("EN")) { + if (lang1.equals("EN") && !lang1.equals("EN")) { + // Add a parser that tries to use the definitions. This is + // not very pretty yet. result.add(String.format("--input%d=%s/wikiSplit/en/%s.data", i, INPUTS, lang1)); result.add(String.format("--input%dName=ENWiktionary.%s", i, lang1)) ; result.add(String.format("--input%dFormat=enwiktionary", i)); @@ -241,7 +240,7 @@ public class DictionaryBuilderMain extends TestCase { final String foreignIso = getOtherLang(pair, wikitionaryLang); final String wikiSplitFile = String.format("%s/wikiSplit/%s/%s.data", INPUTS, wikitionaryLang.toLowerCase(), foreignIso); if (!new File(wikiSplitFile).canRead()) { - System.err.println("Can't read file: " + wikiSplitFile); + System.err.println("WARNING: Can't read file: " + wikiSplitFile); continue; } result.add(String.format("--input%d=%s", i, wikiSplitFile)); @@ -257,17 +256,13 @@ public class DictionaryBuilderMain extends TestCase { // Deal with the pairs where one is English. if (Arrays.asList(pair).contains("EN")) { final String foreignIso = getOtherLang(pair, "EN"); - String foreignRegex = WiktionaryLangs.isoCodeToEnWikiName.get(foreignIso); - if (foreignIso.equals("ZH")) { - // HACK: The missing "e" prevents a full match, causing "Cantonese" to be appended to the entries. - foreignRegex = "Chinese|Mandarin|Cantones"; - } result.add(String.format("--lang1=%s", lang1)); result.add(String.format("--lang2=%s", lang2)); - result.add(String.format("--dictInfo=(EN)Wikitionary-based EN-%s dictionary.%s", foreignIso, getDedication(foreignIso))); + result.add(String.format("--dictInfo=%s", getEnDictionaryInfo(foreignIso))); + // Foreign section. result.add(String.format("--input%d=%s/wikiSplit/en/%s.data", i, INPUTS, foreignIso)); result.add(String.format("--input%dName=ENWiktionary.%s", i, foreignIso)) ; result.add(String.format("--input%dFormat=enwiktionary", i)); @@ -277,6 +272,7 @@ public class DictionaryBuilderMain extends TestCase { result.add(String.format("--input%dEnIndex=%d", i, Arrays.asList(pair).indexOf("EN") + 1)); ++i; + // Translation section. result.add(String.format("--input%d=%swikiSplit/en/EN.data", i, INPUTS)); result.add(String.format("--input%dName=enwiktionary.english", i)); result.add(String.format("--input%dFormat=enwiktionary", i)); @@ -306,6 +302,8 @@ public class DictionaryBuilderMain extends TestCase { result.add(String.format("--input%dLangPattern1=%s", i, lang1)); result.add(String.format("--input%dLangPattern2=%s", i, lang2)); ++i; + + // TODO: Could use FR translation section here too. } return result; @@ -318,11 +316,14 @@ public class DictionaryBuilderMain extends TestCase { allPairs.addAll(Arrays.asList(nonEnPairs)); // Add all the EN-XX pairs. for (final String isoCode : WiktionaryLangs.isoCodeToEnWikiName.keySet()) { - allPairs.add(new String[] {"EN", isoCode}); + if (!isoCode.equals("EN")) { + allPairs.add(new String[] {"EN", isoCode}); + } } final Set> done = new LinkedHashSet>(); + boolean go = true; for (final String[] pair : allPairs) { Arrays.sort(pair); final List pairList = Arrays.asList(pair); @@ -331,8 +332,14 @@ public class DictionaryBuilderMain extends TestCase { } done.add(pairList); - if (!pairList.contains("EN") && !pairList.contains("EL")) { - //continue; + if (pairList.contains("EN") && pairList.contains("DE")) { + go = true; + } else { + go = false; + } + + if (!go) { + continue; } DictionaryBuilder.main(getMainArgs(pair).toArray(new String[0]));