1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser.wiktionary;
17 import java.util.LinkedHashMap;
19 import java.util.regex.Pattern;
21 public class WiktionaryLangs {
23 public static final Map<String,String> isoCodeToWikiName = new LinkedHashMap<String,String>();
25 isoCodeToWikiName.put("AF", "Afrikaans");
26 isoCodeToWikiName.put("SQ", "Albanian");
27 isoCodeToWikiName.put("AR", "Arabic");
28 isoCodeToWikiName.put("HY", "Armenian");
29 isoCodeToWikiName.put("BE", "Belarusian");
30 isoCodeToWikiName.put("BN", "Bengali");
31 isoCodeToWikiName.put("BS", "Bosnian");
32 isoCodeToWikiName.put("BG", "Bulgarian");
33 isoCodeToWikiName.put("CA", "Catalan");
34 isoCodeToWikiName.put("HR", "Croatian");
35 isoCodeToWikiName.put("CS", "Czech");
36 isoCodeToWikiName.put("ZH", "Chinese|Mandarin|Cantonese");
37 isoCodeToWikiName.put("DA", "Danish");
38 isoCodeToWikiName.put("NL", "Dutch");
39 isoCodeToWikiName.put("EN", "English");
40 isoCodeToWikiName.put("EO", "Esperanto");
41 isoCodeToWikiName.put("ET", "Estonian");
42 isoCodeToWikiName.put("FI", "Finnish");
43 isoCodeToWikiName.put("FR", "French");
44 isoCodeToWikiName.put("DE", "German");
45 isoCodeToWikiName.put("EL", "Greek");
46 isoCodeToWikiName.put("haw", "Hawaiian");
47 isoCodeToWikiName.put("HE", "Hebrew");
48 isoCodeToWikiName.put("HI", "Hindi");
49 isoCodeToWikiName.put("HU", "Hungarian");
50 isoCodeToWikiName.put("IS", "Icelandic");
51 isoCodeToWikiName.put("ID", "Indonesian");
52 isoCodeToWikiName.put("GA", "Irish");
53 isoCodeToWikiName.put("GD", "Gaelic");
54 isoCodeToWikiName.put("IT", "Italian");
55 isoCodeToWikiName.put("LA", "Latin");
56 isoCodeToWikiName.put("LV", "Latvian");
57 isoCodeToWikiName.put("LT", "Lithuanian");
58 isoCodeToWikiName.put("JA", "Japanese");
59 isoCodeToWikiName.put("KO", "Korean");
60 isoCodeToWikiName.put("KU", "Kurdish");
61 isoCodeToWikiName.put("LO", "Lao");
62 isoCodeToWikiName.put("MS", "Malay");
63 isoCodeToWikiName.put("ML", "Malayalam");
64 isoCodeToWikiName.put("MI", "Maori");
65 isoCodeToWikiName.put("MN", "Mongolian");
66 isoCodeToWikiName.put("NE", "Nepali");
67 isoCodeToWikiName.put("NO", "Norwegian");
68 isoCodeToWikiName.put("FA", "Persian");
69 isoCodeToWikiName.put("PL", "Polish");
70 isoCodeToWikiName.put("PT", "Portuguese");
71 isoCodeToWikiName.put("PA", "Punjabi");
72 isoCodeToWikiName.put("RO", "Romanian");
73 isoCodeToWikiName.put("RU", "Russian");
74 isoCodeToWikiName.put("SA", "Sanskrit");
75 isoCodeToWikiName.put("SR", "Serbian");
76 isoCodeToWikiName.put("SK", "Slovak");
77 isoCodeToWikiName.put("SL", "Slovene|Slovenian");
78 isoCodeToWikiName.put("SO", "Somali");
79 isoCodeToWikiName.put("ES", "Spanish");
80 isoCodeToWikiName.put("SW", "Swahili");
81 isoCodeToWikiName.put("SV", "Swedish");
82 isoCodeToWikiName.put("TL", "Tagalog");
83 isoCodeToWikiName.put("TG", "Tajik");
84 isoCodeToWikiName.put("TH", "Thai");
85 isoCodeToWikiName.put("BO", "Tibetan");
86 isoCodeToWikiName.put("TR", "Turkish");
87 isoCodeToWikiName.put("UK", "Ukrainian");
88 isoCodeToWikiName.put("UR", "Urdu");
89 isoCodeToWikiName.put("VI", "Vietnamese");
90 isoCodeToWikiName.put("CI", "Welsh");
91 isoCodeToWikiName.put("YI", "Yiddish");
92 isoCodeToWikiName.put("ZU", "Zulu");
94 isoCodeToWikiName.put("AZ", "Azeri");
95 isoCodeToWikiName.put("EU", "Basque");
96 isoCodeToWikiName.put("BR", "Breton");
97 isoCodeToWikiName.put("MR", "Burmese");
98 isoCodeToWikiName.put("FO", "Faroese");
99 isoCodeToWikiName.put("GL", "Galician");
100 isoCodeToWikiName.put("KA", "Georgian");
101 isoCodeToWikiName.put("HT", "Haitian Creole");
102 isoCodeToWikiName.put("LB", "Luxembourgish");
103 isoCodeToWikiName.put("MK", "Macedonian");
107 public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<String, Map<String,String>>();
110 wikiCodeToIsoCodeToWikiName.put("en", isoCodeToWikiName);
112 Map<String,String> isoCodeToWikiName;
114 // egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr
115 isoCodeToWikiName = new LinkedHashMap<String, String>();
116 wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName);
117 isoCodeToWikiName.put("DE", "Deutsch");
118 isoCodeToWikiName.put("EN", "Englisch");
119 isoCodeToWikiName.put("IT", "Italienisch");
120 isoCodeToWikiName.put("PL", "Polnisch");
121 isoCodeToWikiName.put("FR", "Französisch");
122 isoCodeToWikiName.put("EO", "Esperanto");
123 isoCodeToWikiName.put("CA", "Katalanisch");
124 isoCodeToWikiName.put("LA", "Lateinisch");
125 isoCodeToWikiName.put("CS", "Tschechisch");
126 isoCodeToWikiName.put("HU", "Ungarisch");
127 isoCodeToWikiName.put("SV", "Schwedisch");
128 isoCodeToWikiName.put("ES", "Spanisch");
130 // egrep -o '\{\{=[a-zA-Z]+=\}\}' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
131 isoCodeToWikiName = new LinkedHashMap<String, String>();
132 wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName);
133 isoCodeToWikiName.put("FR", Pattern.quote("{{=fr=}}"));
134 isoCodeToWikiName.put("RU", Pattern.quote("{{=ru=}}"));
135 isoCodeToWikiName.put("BG", Pattern.quote("{{=bg=}}")); // Bulgarian
136 isoCodeToWikiName.put("EN", Pattern.quote("{{=en=}}"));
137 //isoCodeToWikiName.put("", Pattern.quote("{{=sl=}}"));
138 isoCodeToWikiName.put("LA", Pattern.quote("{{=la=}}"));
139 isoCodeToWikiName.put("IT", Pattern.quote("{{=it=}}"));
140 isoCodeToWikiName.put("EO", Pattern.quote("{{=eo=}}"));
141 isoCodeToWikiName.put("CS", Pattern.quote("{{=cs=}}")); // Czech
142 isoCodeToWikiName.put("NL", Pattern.quote("{{=nl=}}")); // Dutch
143 //isoCodeToWikiName.put("", Pattern.quote("{{=mg=}}"));
144 //isoCodeToWikiName.put("", Pattern.quote("{{=hsb=}}"));
145 isoCodeToWikiName.put("ZH", Pattern.quote("{{=zh=}}"));
146 isoCodeToWikiName.put("JA", Pattern.quote("{{=ja=}}"));
147 isoCodeToWikiName.put("DE", Pattern.quote("{{=de=}}"));
148 isoCodeToWikiName.put("IS", Pattern.quote("{{=is=}}")); // Icelandic
149 isoCodeToWikiName.put("ES", Pattern.quote("{{=es=}}"));
150 isoCodeToWikiName.put("UK", Pattern.quote("{{=uk=}}"));
152 // egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n
153 isoCodeToWikiName = new LinkedHashMap<String, String>();
154 wikiCodeToIsoCodeToWikiName.put("it", isoCodeToWikiName);
155 isoCodeToWikiName.put("IT", "\\{\\{-(it|scn|nap|cal|lmo)-\\}\\}"); // scn, nap, cal, lmo
156 isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));
157 isoCodeToWikiName.put("FR", Pattern.quote("{{-fr-}}"));
158 isoCodeToWikiName.put("DE", Pattern.quote("{{-de-}}"));
159 isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}"));
160 isoCodeToWikiName.put("JA", Pattern.quote("{{-ja-}}"));
161 isoCodeToWikiName.put("PL", Pattern.quote("{{-pl-}}"));
162 isoCodeToWikiName.put("NL", Pattern.quote("{{-nl-}}"));
163 isoCodeToWikiName.put("LV", Pattern.quote("{{-lv-}}"));
164 isoCodeToWikiName.put("LA", Pattern.quote("{{-la-}}"));
165 isoCodeToWikiName.put("HU", Pattern.quote("{{-hu-}}"));
166 isoCodeToWikiName.put("EL", Pattern.quote("{{-grc-}}"));
167 isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}"));