- public String textNorm(String token, final boolean toLower) {\r
- if (toLower) {\r
- token = token.toLowerCase();\r
- }\r
- boolean sub = false;\r
- // This is meant to be fast: occurrences of ae, oe, ue are probably rare.\r
- for (int ePos = token.indexOf('e', 1); ePos != -1; ePos = token.indexOf(\r
- 'e', ePos + 1)) {\r
- final char pre = Character.toLowerCase(token.charAt(ePos - 1));\r
- if (pre == 'a' || pre == 'o' || pre == 'u') {\r
- sub = true;\r
- break;\r
- }\r
- }\r
- if (!sub) {\r
- return token;\r
- }\r
- \r
- token = token.replaceAll("ae", "ä");\r
- token = token.replaceAll("oe", "ö");\r
- token = token.replaceAll("ue", "ü");\r
-\r
- token = token.replaceAll("Ae", "Ä");\r
- token = token.replaceAll("Oe", "Ö");\r
- token = token.replaceAll("Ue", "Ü");\r
-\r
- token = token.replaceAll("AE", "Ä");\r
- token = token.replaceAll("OE", "Ö");\r
- token = token.replaceAll("UE", "Ü");\r
- \r
- return token; \r