\r
public Set<String> getIndexableTokens(final byte lang) {\r
final Set<String> result = new LinkedHashSet<String>();\r
- String text = Arrays.asList(getAllText(lang)).toString();\r
+ String text = " ";\r
+ for (final String subentry : getAllText(lang)) {\r
+ text += subentry + " ";\r
+ }\r
\r
text = text.replaceAll("fig\\.", " ");\r
text = text.replaceAll("\\{[^\\}]+}", " ");\r
text = text.replaceAll("\"-", "-");\r
text = text.replaceAll("-\"", "-");\r
- text = text.replaceAll("[\":/\\()<>\\[\\],;?!.]", " ");\r
+ text = text.replaceAll("[\"/\\()<>\\[\\],;?!.]", " ");\r
+ text = text.replaceAll("[:] ", " ");\r
+ text = text.replaceAll(" [:]", " ");\r
result.addAll(Arrays.asList(WHITESPACE.split(text)));\r
\r
text = text.replaceAll("[-]", " ");\r