package com.hughes.android.dictionary.engine;\r
\r
-import java.util.Comparator;\r
import java.util.LinkedHashMap;\r
import java.util.Locale;\r
import java.util.Map;\r
\r
final String symbol;\r
final Locale locale;\r
-\r
- Collator sortCollator;\r
- final Comparator<String> sortComparator;\r
-\r
- private Collator findCollator;\r
- final Comparator<String> findComparator;\r
+ \r
+ final Collator collator;\r
\r
public Language(final Locale locale) {\r
this.symbol = locale.getLanguage();\r
this.locale = locale;\r
+ this.collator = Collator.getInstance(locale);\r
+ this.collator.setStrength(Collator.IDENTICAL);\r
\r
- this.sortComparator = new Comparator<String>() {\r
- public int compare(final String s1, final String s2) {\r
- return getSortCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
- }\r
- };\r
-\r
- this.findComparator = new Comparator<String>() {\r
- public int compare(final String s1, final String s2) {\r
- return getFindCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
- }\r
- };\r
- \r
symbolToLangauge.put(symbol.toLowerCase(), this);\r
}\r
\r
- public String textNorm(final String s, final boolean toLower) {\r
- return toLower ? s.toLowerCase() : s;\r
- }\r
-\r
@Override\r
public String toString() {\r
return locale.toString();\r
return symbol;\r
}\r
\r
- public synchronized Collator getFindCollator() {\r
- if (findCollator == null) {\r
- findCollator = Collator.getInstance(locale);\r
- findCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
- findCollator.setStrength(Collator.SECONDARY);\r
- }\r
- return findCollator;\r
+ public Collator getCollator() {\r
+ return collator;\r
}\r
-\r
- public synchronized Collator getSortCollator() {\r
- if (sortCollator == null) {\r
- sortCollator = Collator.getInstance(locale);\r
- sortCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
- sortCollator.setStrength(Collator.IDENTICAL);\r
- }\r
- return sortCollator;\r
+ \r
+ public String getDefaultNormalizerRules() {\r
+ return ":: Any-Latin; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;";\r
}\r
-\r
// ----------------------------------------------------------------\r
\r
public static final Language en = new Language(Locale.ENGLISH);\r
\r
public static final Language de = new Language(Locale.GERMAN) {\r
@Override\r
- public String textNorm(String token, final boolean toLower) {\r
- if (toLower) {\r
- token = token.toLowerCase();\r
- }\r
- boolean sub = false;\r
- // This is meant to be fast: occurrences of ae, oe, ue are probably rare.\r
- for (int ePos = token.indexOf('e', 1); ePos != -1; ePos = token.indexOf(\r
- 'e', ePos + 1)) {\r
- final char pre = Character.toLowerCase(token.charAt(ePos - 1));\r
- if (pre == 'a' || pre == 'o' || pre == 'u') {\r
- sub = true;\r
- break;\r
- }\r
- }\r
- if (!sub) {\r
- return token;\r
- }\r
- \r
- token = token.replaceAll("ae", "ä");\r
- token = token.replaceAll("oe", "ö");\r
- token = token.replaceAll("ue", "ü");\r
-\r
- token = token.replaceAll("Ae", "Ä");\r
- token = token.replaceAll("Oe", "Ö");\r
- token = token.replaceAll("Ue", "Ü");\r
-\r
- token = token.replaceAll("AE", "Ä");\r
- token = token.replaceAll("OE", "Ö");\r
- token = token.replaceAll("UE", "Ü");\r
- \r
- return token; \r
+ public String getDefaultNormalizerRules() {\r
+ return ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; ";\r
}\r
};\r
\r