package com.hughes.android.dictionary;\r
\r
+import java.text.Collator;\r
import java.util.Comparator;\r
import java.util.LinkedHashMap;\r
+import java.util.Locale;\r
import java.util.Map;\r
\r
-import com.hughes.util.StringUtil;\r
-\r
-public abstract class Language {\r
+public class Language {\r
\r
final String symbol;\r
- final Comparator<String> tokenComparator;\r
+ final Locale locale;\r
+\r
+ final Collator sortCollator;\r
+ final Comparator<String> sortComparator;\r
\r
- public Language(final String symbol) {\r
+ final Collator findCollator;\r
+ final Comparator<String> findComparator;\r
+\r
+ public Language(final String symbol, final Locale locale) {\r
this.symbol = symbol;\r
- this.tokenComparator = new Comparator<String>() {\r
+ this.locale = locale;\r
+\r
+ this.sortCollator = Collator.getInstance(locale);\r
+ this.sortCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
+ this.sortCollator.setStrength(Collator.IDENTICAL);\r
+ this.sortComparator = new Comparator<String>() {\r
public int compare(final String s1, final String s2) {\r
- final String norm1 = normalizeTokenForSort(s1);\r
- final String norm2 = normalizeTokenForSort(s2);\r
- final int c = norm1.compareTo(norm2);\r
- if (c != 0) {\r
- return c;\r
- }\r
- return StringUtil.flipCase(StringUtil.reverse(s1)).compareTo(StringUtil.flipCase(StringUtil.reverse(s2)));\r
- }};\r
+ return sortCollator.compare(textNorm(s1), textNorm(s2));\r
+ }\r
+ };\r
+\r
+ this.findCollator = Collator.getInstance(locale);\r
+ this.findCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
+ this.findCollator.setStrength(Collator.SECONDARY);\r
+ this.findComparator = new Comparator<String>() {\r
+ public int compare(final String s1, final String s2) {\r
+ return findCollator.compare(textNorm(s1), textNorm(s2));\r
+ }\r
+ };\r
+\r
}\r
- \r
+\r
+ public String textNorm(final String s) {\r
+ return s;\r
+ }\r
+\r
@Override\r
public String toString() {\r
return symbol;\r
}\r
\r
- abstract String normalizeTokenForSort(final String token);\r
+ // ----------------------------------------------------------------\r
\r
+ public static final Language EN = new Language("EN", Locale.ENGLISH);\r
\r
- // ----------------------------------------------------------------\r
- \r
- static final String normalizeTokenForSort(final String token, final boolean vowelETranslation) {\r
- final StringBuilder result = new StringBuilder();\r
- for (int i = 0; i < token.length(); ++i) {\r
- Character c = token.charAt(i);\r
- c = Character.toLowerCase(c);\r
- // only check for lowercase 'e' in subsequent position means don't treat acronyms as umlauted: SAE.\r
- if (vowelETranslation && (c == 'a' || c == 'o' || c == 'u') && i + 1 < token.length() && token.charAt(i + 1) == 'e') {\r
- if (c == 'a') {\r
- result.append('ä');\r
- } else if (c == 'o') {\r
- result.append('ö');\r
- } else if (c == 'u') {\r
- result.append('ü');\r
+ public static final Language DE = new Language("DE", Locale.GERMAN) {\r
+ @Override\r
+ public String textNorm(String token) {\r
+ boolean sub = false;\r
+ for (int ePos = token.indexOf('e', 1); ePos != -1; ePos = token.indexOf(\r
+ 'e', ePos + 1)) {\r
+ final char pre = Character.toLowerCase(token.charAt(ePos - 1));\r
+ if (pre == 'a' || pre == 'o' || pre == 'u') {\r
+ sub = true;\r
+ break;\r
}\r
- ++i;\r
- } else if (c >= 'a' && c <= 'z' || c >= '0' && c <= '9') {\r
- result.append(c);\r
- } else if (c == 'ß') {\r
- result.append("ss");\r
- } else if (c == 'ä') {\r
- result.append(c);\r
- } else if (c == 'ö') {\r
- result.append(c);\r
- } else if (c == 'ü') {\r
- result.append(c);\r
}\r
- }\r
- return result.toString();\r
- }\r
+ if (!sub) {\r
+ return token;\r
+ }\r
+ token = token.replaceAll("ae", "ä");\r
+ token = token.replaceAll("oe", "ö");\r
+ token = token.replaceAll("ue", "ü");\r
\r
- public static final Language EN = new Language("EN") {\r
- @Override\r
- public String normalizeTokenForSort(final String token) {\r
- return Language.normalizeTokenForSort(token, false);\r
- }\r
- };\r
- \r
- public static final Language DE = new Language("DE") {\r
- @Override\r
- String normalizeTokenForSort(final String token) {\r
- return Language.normalizeTokenForSort(token, true);\r
+ token = token.replaceAll("Ae", "Ä");\r
+ token = token.replaceAll("Oe", "Ö");\r
+ token = token.replaceAll("Ue", "Ü");\r
+ return token;\r
}\r
};\r
\r
symbolToLangauge.put(EN.symbol, EN);\r
symbolToLangauge.put(DE.symbol, DE);\r
}\r
- \r
+\r
static Language lookup(final String symbol) {\r
return symbolToLangauge.get(symbol);\r
}\r
\r
-\r
}\r