]> gitweb.fperrin.net Git - DictionaryPC.git/blob - src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java
Rename enwiktionary package to wiktionary.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WiktionaryLangs.java
1 package com.hughes.android.dictionary.parser.wiktionary;
2
3 import java.util.LinkedHashMap;
4 import java.util.Map;
5 import java.util.regex.Pattern;
6
7 public class WiktionaryLangs {
8   
9   public static final Map<String,String> isoCodeToWikiName = new LinkedHashMap<String,String>();
10   static {
11     isoCodeToWikiName.put("AF", "Afrikaans");
12     isoCodeToWikiName.put("SQ", "Albanian");
13     isoCodeToWikiName.put("AR", "Arabic");
14     isoCodeToWikiName.put("HY", "Armenian");
15     isoCodeToWikiName.put("BE", "Belarusian");
16     isoCodeToWikiName.put("BN", "Bengali");
17     isoCodeToWikiName.put("BS", "Bosnian");
18     isoCodeToWikiName.put("BG", "Bulgarian");
19     isoCodeToWikiName.put("CA", "Catalan");
20     isoCodeToWikiName.put("HR", "Croatian");
21     isoCodeToWikiName.put("CS", "Czech");
22     isoCodeToWikiName.put("ZH", "Chinese|Mandarin|Cantonese");
23     isoCodeToWikiName.put("DA", "Danish");
24     isoCodeToWikiName.put("NL", "Dutch");
25     isoCodeToWikiName.put("EN", "English");
26     isoCodeToWikiName.put("EO", "Esperanto");
27     isoCodeToWikiName.put("ET", "Estonian");
28     isoCodeToWikiName.put("FI", "Finnish");
29     isoCodeToWikiName.put("FR", "French");
30     isoCodeToWikiName.put("DE", "German");
31     isoCodeToWikiName.put("EL", "Greek");
32     isoCodeToWikiName.put("haw", "Hawaiian");
33     isoCodeToWikiName.put("HE", "Hebrew");
34     isoCodeToWikiName.put("HI", "Hindi");
35     isoCodeToWikiName.put("HU", "Hungarian");
36     isoCodeToWikiName.put("IS", "Icelandic");
37     isoCodeToWikiName.put("ID", "Indonesian");
38     isoCodeToWikiName.put("GA", "Gaelic");
39     isoCodeToWikiName.put("IT", "Italian");
40     isoCodeToWikiName.put("LA", "Latin");
41     isoCodeToWikiName.put("LV", "Latvian");
42     isoCodeToWikiName.put("LT", "Lithuanian");
43     isoCodeToWikiName.put("JA", "Japanese");
44     isoCodeToWikiName.put("KO", "Korean");
45     isoCodeToWikiName.put("KU", "Kurdish");
46     isoCodeToWikiName.put("MS", "Malay");
47     isoCodeToWikiName.put("MI", "Maori");
48     isoCodeToWikiName.put("MN", "Mongolian");
49     isoCodeToWikiName.put("NE", "Nepali");
50     isoCodeToWikiName.put("NO", "Norwegian");
51     isoCodeToWikiName.put("FA", "Persian");
52     isoCodeToWikiName.put("PL", "Polish");
53     isoCodeToWikiName.put("PT", "Portuguese");
54     isoCodeToWikiName.put("PA", "Punjabi");
55     isoCodeToWikiName.put("RO", "Romanian");
56     isoCodeToWikiName.put("RU", "Russian");
57     isoCodeToWikiName.put("SA", "Sanskrit");
58     isoCodeToWikiName.put("SR", "Serbian");
59     isoCodeToWikiName.put("SK", "Slovak");
60     isoCodeToWikiName.put("SO", "Somali");
61     isoCodeToWikiName.put("ES", "Spanish");
62     isoCodeToWikiName.put("SW", "Swahili");
63     isoCodeToWikiName.put("SV", "Swedish");
64     isoCodeToWikiName.put("TL", "Tagalog");
65     isoCodeToWikiName.put("TG", "Tajik");
66     isoCodeToWikiName.put("TH", "Thai");
67     isoCodeToWikiName.put("BO", "Tibetan");
68     isoCodeToWikiName.put("TR", "Turkish");
69     isoCodeToWikiName.put("UK", "Ukrainian");
70     isoCodeToWikiName.put("UR", "Urdu");
71     isoCodeToWikiName.put("VI", "Vietnamese");
72     isoCodeToWikiName.put("CI", "Welsh");
73     isoCodeToWikiName.put("YI", "Yiddish");
74     isoCodeToWikiName.put("ZU", "Zulu");
75     
76     isoCodeToWikiName.put("AZ", "Azeri");
77     isoCodeToWikiName.put("EU", "Basque");
78     isoCodeToWikiName.put("BR", "Breton");
79     isoCodeToWikiName.put("MR", "Burmese");
80     isoCodeToWikiName.put("FO", "Faroese");
81     isoCodeToWikiName.put("GL", "Galician");
82     isoCodeToWikiName.put("KA", "Georgian");
83     isoCodeToWikiName.put("HT", "Haitian Creole");
84     isoCodeToWikiName.put("LB", "Luxembourgish");
85     isoCodeToWikiName.put("MK", "Macedonian");
86     
87   }
88
89   public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<String, Map<String,String>>();
90   static {
91     // en
92     wikiCodeToIsoCodeToWikiName.put("en", isoCodeToWikiName);
93     
94     Map<String,String> isoCodeToWikiName;
95     
96     // egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr
97     isoCodeToWikiName = new LinkedHashMap<String, String>();
98     wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName);
99     isoCodeToWikiName.put("DE", "Deutsch");
100     isoCodeToWikiName.put("EN", "Englisch");
101     isoCodeToWikiName.put("IT", "Italienisch");
102     isoCodeToWikiName.put("PL", "Polnisch");
103     isoCodeToWikiName.put("FR", "Französisch");
104     isoCodeToWikiName.put("EO", "Esperanto");
105     isoCodeToWikiName.put("CA", "Katalanisch");
106     isoCodeToWikiName.put("LA", "Lateinisch");
107     isoCodeToWikiName.put("CS", "Tschechisch");
108     isoCodeToWikiName.put("HU", "Ungarisch");
109     isoCodeToWikiName.put("SV", "Schwedisch");
110     isoCodeToWikiName.put("ES", "Spanisch");
111
112     // egrep -o '\{\{=[a-zA-Z]+=\}\}' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
113     isoCodeToWikiName = new LinkedHashMap<String, String>();
114     wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName);
115     isoCodeToWikiName.put("FR", Pattern.quote("{{=fr=}}"));
116     isoCodeToWikiName.put("RU", Pattern.quote("{{=ru=}}"));
117     isoCodeToWikiName.put("BG", Pattern.quote("{{=bg=}}"));  // Bulgarian
118     isoCodeToWikiName.put("EN", Pattern.quote("{{=en=}}"));
119     //isoCodeToWikiName.put("", Pattern.quote("{{=sl=}}"));
120     isoCodeToWikiName.put("LA", Pattern.quote("{{=la=}}"));
121     isoCodeToWikiName.put("IT", Pattern.quote("{{=it=}}"));
122     isoCodeToWikiName.put("EO", Pattern.quote("{{=eo=}}"));
123     isoCodeToWikiName.put("CS", Pattern.quote("{{=cs=}}"));  // Czech
124     isoCodeToWikiName.put("NL", Pattern.quote("{{=nl=}}"));  // Dutch
125     //isoCodeToWikiName.put("", Pattern.quote("{{=mg=}}"));
126     //isoCodeToWikiName.put("", Pattern.quote("{{=hsb=}}"));
127     isoCodeToWikiName.put("ZH", Pattern.quote("{{=zh=}}"));
128     isoCodeToWikiName.put("JA", Pattern.quote("{{=ja=}}"));
129     isoCodeToWikiName.put("DE", Pattern.quote("{{=de=}}"));
130     isoCodeToWikiName.put("IS", Pattern.quote("{{=is=}}"));  // Icelandic
131     isoCodeToWikiName.put("ES", Pattern.quote("{{=es=}}"));
132     isoCodeToWikiName.put("UK", Pattern.quote("{{=uk=}}"));
133
134     // egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n
135     isoCodeToWikiName = new LinkedHashMap<String, String>();
136     wikiCodeToIsoCodeToWikiName.put("it", isoCodeToWikiName);
137     isoCodeToWikiName.put("IT", "\\{\\{-(it|scn|nap|cal|lmo)-\\}\\}");  // scn, nap, cal, lmo
138     isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));
139     isoCodeToWikiName.put("FR", Pattern.quote("{{-fr-}}"));
140     isoCodeToWikiName.put("DE", Pattern.quote("{{-de-}}"));
141     isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}"));
142     isoCodeToWikiName.put("JA", Pattern.quote("{{-ja-}}"));
143     isoCodeToWikiName.put("PL", Pattern.quote("{{-pl-}}"));
144     isoCodeToWikiName.put("NL", Pattern.quote("{{-nl-}}"));
145     isoCodeToWikiName.put("LV", Pattern.quote("{{-lv-}}"));
146     isoCodeToWikiName.put("LV", Pattern.quote("{{-la-}}"));
147     isoCodeToWikiName.put("HU", Pattern.quote("{{-hu-}}"));
148     isoCodeToWikiName.put("PL", Pattern.quote("{{-pl-}}"));
149     isoCodeToWikiName.put("EL", Pattern.quote("{{-grc-}}"));
150     isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}"));
151
152   }
153   
154 }