import com.ibm.icu.text.Collator;
-final public class CollatorWrapper {
-static public Collator getInstance() {
+public final class CollatorWrapper {
+public static Collator getInstance() {
return Collator.getInstance();
}
-static public Collator getInstanceStrengthIdentical(Locale l) {
+public static Collator getInstanceStrengthIdentical(Locale l) {
Collator c = Collator.getInstance(l);
c.setStrength(Collator.IDENTICAL);
return c;
package com.hughes.android.dictionary;
-final public class DictionaryApplication {
- final static public boolean USE_COLLATOR = true;
+public final class DictionaryApplication {
+ public static final boolean USE_COLLATOR = true;
}
package com.hughes.android.dictionary;
-final public class FeatureConfig {
- final static public boolean enableWrite = true;
+public final class FeatureConfig {
+ public static final boolean enableWrite = true;
}
// dictionaryInfoOut.println("# LANG_1\t%LANG_2\tFILENAME\tVERSION_CODE\tFILESIZE\tNUM_MAIN_WORDS_1\tNUM_MAIN_WORDS_2\tNUM_ALL_WORDS_1\tNUM_ALL_WORDS_2");
final File[] files = dictDir.listFiles();
- final List<String> dictNames = new ArrayList<String>();
+ final List<String> dictNames = new ArrayList<>();
Arrays.sort(files);
for (final File dictFile : files) {
if (!dictFile.getName().endsWith("quickdic")) {
// Find the stats.
System.out.println("Stats...");
- final List<String> indexNames = new ArrayList<String>();
+ final List<String> indexNames = new ArrayList<>();
for (final IndexInfo indexInfo : dictionaryInfo.indexInfos) {
indexNames.add(indexInfo.shortName);
}
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
-import java.nio.channels.FileChannel;
public class ConvertToV6 {
public static void main(final String[] args) throws IOException {
public class DictionaryBuilder {
public final Dictionary dictionary;
- public final List<IndexBuilder> indexBuilders = new ArrayList<IndexBuilder>();
+ public final List<IndexBuilder> indexBuilders = new ArrayList<>();
public DictionaryBuilder(final String dictInfoString, final Language lang0, final Language lang1, final String normalizerRules1, final String normalizerRules2, final Set<String> lang1Stoplist, final Set<String> lang2Stoplist) {
dictionary = new Dictionary(dictInfoString);
}
}
- public static void main(final String[] args) throws IOException, ParserConfigurationException, SAXException {
+ public static void main(final String[] args) throws IOException {
System.out.println("Running with arguments:");
for (final String arg : args) {
System.out.println(arg);
lang2 = null;
}
- final Set<String> lang1Stoplist = new LinkedHashSet<String>();
- final Set<String> lang2Stoplist = new LinkedHashSet<String>();
+ final Set<String> lang1Stoplist = new LinkedHashSet<>();
+ final Set<String> lang2Stoplist = new LinkedHashSet<>();
final String lang1StoplistFile = keyValueArgs.remove("lang1Stoplist");
final String lang2StoplistFile = keyValueArgs.remove("lang2Stoplist");
if (lang1StoplistFile != null) {
final int pageLimit = Integer.parseInt(pageLimitString);
final EntrySource entrySource = new EntrySource(dictionaryBuilder.dictionary.sources.size(), inputName, 0);
- System.out.println("");
+ System.out.println();
String inputFormat = keyValueArgs.remove(prefix + "Format");
if ("tab_separated".equals(inputFormat)) {
static final String OUTPUTS = "data/outputs/";
// Build the non EN ones.
- static final String[][] nonEnPairs = new String[][] {
+ static final String[][] nonEnPairs = {
{"EN"},
{"DE"},
{"IT"},
- static final Map<String,String> isoToDedication = new LinkedHashMap<String, String>();
+ static final Map<String,String> isoToDedication = new LinkedHashMap<>();
static {
isoToDedication.put("AF", "Wiktionary-based Afrikaans dictionary dedicated to Heiko and Mariëtte Horn.");
isoToDedication.put("HR", "Wiktionary-based Croatian dictionary dedicated to Ines Viskic and Miro Kresonja.");
return isoToDedication.containsKey(iso) ? isoToDedication.get(iso) : String.format("Wiktionary-based %s dictionary.", iso);
}
- static final Map<String,String> isoToStoplist = new LinkedHashMap<String, String>();
+ static final Map<String,String> isoToStoplist = new LinkedHashMap<>();
static {
isoToStoplist.put("DE", "de.txt");
isoToStoplist.put("EN", "en.txt");
isoToStoplist.put("FR", "fr.txt");
}
private static String getStoplist(String iso) {
- return isoToStoplist.containsKey(iso) ? isoToStoplist.get(iso) : "empty.txt";
+ return isoToStoplist.getOrDefault(iso, "empty.txt");
}
static String getOtherLang(final String[] pair, final String first) {
}
static List<String> getMainArgs(final String[] pair) {
- final List<String> result = new ArrayList<String>();
+ final List<String> result = new ArrayList<>();
int i = 1;
public static void main(final String[] args) throws Exception {
- final List<String[]> allPairs = new ArrayList<String[]>();
-
- allPairs.addAll(Arrays.asList(nonEnPairs));
+ final List<String[]> allPairs = new ArrayList<>(Arrays.asList(nonEnPairs));
// Add all the EN-XX pairs.
for (final String isoCode : WiktionaryLangs.isoCodeToEnWikiName.keySet()) {
if (!isoCode.equals("EN")) {
}
- final Set<List<String>> done = new LinkedHashSet<List<String>>();
+ final Set<List<String>> done = new LinkedHashSet<>();
boolean go = true;
for (final String[] pair : allPairs) {
Arrays.sort(pair);
}
done.add(pairList);
- if (pairList.contains("EN") && pairList.contains("DE")) {
- go = true;
- } else {
- go = false;
- }
+ go = pairList.contains("EN") && pairList.contains("DE");
if (!go) {
continue;
package com.hughes.android.dictionary.engine;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
}
private void checkGolden(final String dictName, final File dictFile)
- throws IOException, FileNotFoundException {
+ throws IOException {
// Check it once:
assertFilesEqual(GOLDENS + dictName + ".text", dictFile.getPath() + ".text");
assertTrue(rows.toString(), rows.size() > 0);
assertTrue(rows.get(0).toString().startsWith("come mai@"));
assertTrue(rows.get(0) instanceof TokenRow);
- assertTrue(!((TokenRow)rows.get(0)).getIndexEntry().htmlEntries.isEmpty());
+ assertFalse(((TokenRow) rows.get(0)).getIndexEntry().htmlEntries.isEmpty());
}
{
assertTrue(rows.toString(), rows.size() > 0);
assertTrue(rows.get(0).toString().startsWith("buon giorno@"));
assertTrue(rows.get(0) instanceof TokenRow);
- assertTrue(!((TokenRow)rows.get(0)).getIndexEntry().htmlEntries.isEmpty());
+ assertFalse(((TokenRow) rows.get(0)).getIndexEntry().htmlEntries.isEmpty());
}
{
// Check that search in lowercase works.
assertSearchResult("Alibi", "Alibi", deIndex.findInsertionPoint("alib", new AtomicBoolean(false)));
- System.out.println(deIndex.findInsertionPoint("alib", new AtomicBoolean(false)).toString());
+ System.out.println(deIndex.findInsertionPoint("alib", new AtomicBoolean(false)));
raf.close();
}
import java.util.List;
import java.util.zip.GZIPOutputStream;
-import com.hughes.android.dictionary.engine.Dictionary;
-
public class DictionaryV6Writer {
private final Dictionary d;
outb.writeBoolean(hasNormalizedForm);
if (hasNormalizedForm) outb.writeUTF(e.normalizedToken());
writev6HtmlIndices(outb, dataPos + outb.size(),
- prunedRowIdx == null ? e.htmlEntries : Collections.<HtmlEntry>emptyList());
+ prunedRowIdx == null ? e.htmlEntries : Collections.emptyList());
}
dataPos += outb.size();
outb.flush();
IndexBuilder(final DictionaryBuilder dictionaryBuilder, final String shortName, final String longName, final Language language, final String normalizerRules, final Set<String> stoplist, final boolean swapPairEntries) {
this.dictionaryBuilder = dictionaryBuilder;
index = new Index(dictionaryBuilder.dictionary, shortName, longName, language, normalizerRules, swapPairEntries, stoplist);
- tokenToData = new TreeMap<String, TokenData>(index.getSortComparator());
+ tokenToData = new TreeMap<>(index.getSortComparator());
this.stoplist = stoplist;
}
public void build() {
- final Set<IndexedEntry> tokenIndexedEntries = new HashSet<IndexedEntry>();
+ final Set<IndexedEntry> tokenIndexedEntries = new HashSet<>();
final List<RowBase> rows = index.rows;
index.mainTokenCount = 0;
for (final TokenData tokenData : tokenToData.values()) {
}
}
- final List<IndexEntry> entriesSortedByNumRows = new ArrayList<IndexEntry>(index.sortedIndexEntries);
- Collections.sort(entriesSortedByNumRows, new Comparator<IndexEntry>() {
- @Override
- public int compare(IndexEntry object1, IndexEntry object2) {
- return object2.numRows - object1.numRows;
- }
- });
+ final List<IndexEntry> entriesSortedByNumRows = new ArrayList<>(index.sortedIndexEntries);
+ entriesSortedByNumRows.sort((object1, object2) -> object2.numRows - object1.numRows);
System.out.println("Most common tokens:");
for (int i = 0; i < 50 && i < entriesSortedByNumRows.size(); ++i) {
System.out.println(" " + entriesSortedByNumRows.get(i));
public static class TokenData {
final String token;
- final Map<EntryTypeName, List<IndexedEntry>> typeToEntries = new EnumMap<EntryTypeName, List<IndexedEntry>>(EntryTypeName.class);
+ final Map<EntryTypeName, List<IndexedEntry>> typeToEntries = new EnumMap<>(EntryTypeName.class);
public boolean hasMainEntry = false;
- public List<HtmlEntry> htmlEntries = new ArrayList<HtmlEntry>();
+ public final List<HtmlEntry> htmlEntries = new ArrayList<>();
TokenData(final String token) {
assert token.equals(token.trim());
tokenData.hasMainEntry = true;
}
if (entries == null) {
- entries = new ArrayList<IndexedEntry>();
+ entries = new ArrayList<>();
tokenData.typeToEntries.put(entryTypeName, entries);
}
return entries;
assertEquals("hulle", normalizer.transform("Hulle"));
- final List<String> sorted = new ArrayList<String>(words);
+ final List<String> sorted = new ArrayList<>(words);
// Collections.shuffle(shuffled, new Random(0));
- Collections.sort(sorted, comparator);
- System.out.println(sorted.toString());
+ sorted.sort(comparator);
+ System.out.println(sorted);
for (int i = 0; i < words.size(); ++i) {
System.out.println(words.get(i) + "\t" + sorted.get(i));
assertEquals(words.get(i), sorted.get(i));
"preppy",
"preprocess");
- final List<String> sorted = new ArrayList<String>(words);
+ final List<String> sorted = new ArrayList<>(words);
final NormalizeComparator comparator = new NormalizeComparator(normalizer, Language.en.getCollator(), 7);
- Collections.sort(sorted, comparator);
+ sorted.sort(comparator);
for (int i = 0; i < words.size(); ++i) {
if (i > 0) {
assertTrue(comparator.compare(words.get(i-1), words.get(i)) < 0);
public void testEnWiktionaryNames() {
- final Set<String> enLangs = new LinkedHashSet<String>(WiktionaryLangs.isoCodeToEnWikiName.keySet());
- final List<String> names = new ArrayList<String>();
+ final Set<String> enLangs = new LinkedHashSet<>(WiktionaryLangs.isoCodeToEnWikiName.keySet());
+ final List<String> names = new ArrayList<>();
for (final String code : WiktionaryLangs.isoCodeToEnWikiName.keySet()) {
names.add(WiktionaryLangs.isoCodeToEnWikiName.get(code));
enLangs.add(code.toLowerCase());
try {
pipe = new PipedOutputStream(this);
} catch (IOException e) {}
- new Thread(new Runnable() {
- public void run() {
- try {
- int read;
- final byte buffer[] = new byte[BLOCK_SIZE];
- while ((read = in.read(buffer)) > 0)
- {
- pipe.write(buffer, 0, read);
- pipe.flush();
- }
- } catch (IOException e) {}
- try {
- pipe.close();
- } catch (IOException e) {}
- }
+ new Thread(() -> {
+ try {
+ int read;
+ final byte[] buffer = new byte[BLOCK_SIZE];
+ while ((read = in.read(buffer)) > 0)
+ {
+ pipe.write(buffer, 0, read);
+ pipe.flush();
+ }
+ } catch (IOException e) {}
+ try {
+ pipe.close();
+ } catch (IOException e) {}
}).start();
}
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
static final Matcher headingStart = Pattern.compile("^(=+)[^=].*$", Pattern.MULTILINE).matcher("");
static final Matcher startSpanish = Pattern.compile("\\{\\{ES(\\|[^{}=]*)?}}").matcher("");
- final Map<String,List<Selector>> pathToSelectors = new LinkedHashMap<String, List<Selector>>();
+ final Map<String,List<Selector>> pathToSelectors = new LinkedHashMap<>();
List<Selector> currentSelectors = null;
StringBuilder titleBuilder;
List<Selector> selectors;
for (final String code : WiktionaryLangs.wikiCodeToIsoCodeToWikiName.keySet()) {
//if (!code.equals("fr")) {continue;}
- selectors = new ArrayList<WiktionarySplitter.Selector>();
+ selectors = new ArrayList<>();
pathToSelectors.put(String.format("data/inputs/%swiktionary-pages-articles.xml", code), selectors);
for (final Map.Entry<String, String> entry : WiktionaryLangs.wikiCodeToIsoCodeToWikiName.get(code).entrySet()) {
final String dir = String.format("data/inputs/wikiSplit/%s", code);
parser.parse(new BufferedInputStream(in), this);
}
} catch (Exception e) {
- System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder.toString() + " of file " + pathToSelectorsEntry.getKey());
+ System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder + " of file " + pathToSelectorsEntry.getKey());
throw e;
}
String lastPageTitle = null;
int pageCount = 0;
- Matcher endPatterns[] = new Matcher[100];
+ final Matcher[] endPatterns = new Matcher[100];
private Matcher getEndPattern(int depth) {
if (endPatterns[depth] == null)
// For Translingual entries just store the text for later
// use in the per-language sections
- if (heading.indexOf("Translingual") != -1) {
+ if (heading.contains("Translingual")) {
// Find end.
final int depth = headingStart.group(1).length();
final Matcher endMatcher = getEndPattern(depth).reset(text);
sectionText.charAt(dummy_end + 1) == '\n') ++dummy_end;
sectionText = sectionText.substring(dummy_end);
}
- if (heading.indexOf("Japanese") == -1) sectionText += translingual;
+ if (!heading.contains("Japanese")) sectionText += translingual;
final Section section = new Section(title, heading, sectionText);
try {
selector.out.writeUTF(section.title);
selector.out.writeUTF(section.heading);
- final byte[] bytes = section.text.getBytes("UTF8");
+ final byte[] bytes = section.text.getBytes(StandardCharsets.UTF_8);
selector.out.writeInt(bytes.length);
selector.out.write(bytes);
} catch (IOException e) {
}
@Override
- public void characters(char[] ch, int start, int length) throws SAXException {
+ public void characters(char[] ch, int start, int length) {
if (currentBuilder != null) {
currentBuilder.append(ch, start, length);
}
}
@Override
- public void endElement(String uri, String localName, String qName)
- throws SAXException {
+ public void endElement(String uri, String localName, String qName) {
currentBuilder = null;
if ("page".equals(qName)) {
endPage();
try {
pipe = new PipedInputStream(this, size);
buffer = new byte[BLOCK_SIZE];
- writeThread = new Thread(new Runnable() {
- public void run() {
- int read;
- try {
- while ((read = pipe.read(buffer)) > 0)
- {
- out.write(buffer, 0, read);
- out.flush();
- }
- } catch (IOException e) {
- System.out.println("Error writing to file " + e);
+ writeThread = new Thread(() -> {
+ int read;
+ try {
+ while ((read = pipe.read(buffer)) > 0)
+ {
+ out.write(buffer, 0, read);
+ out.flush();
}
- try {
- out.close();
- } catch (IOException e) {}
+ } catch (IOException e) {
+ System.out.println("Error writing to file " + e);
}
+ try {
+ out.close();
+ } catch (IOException e) {}
});
writeThread.start();
} catch (IOException e) {}
Thread writeThread;
OutputStream out;
PipedInputStream pipe;
- byte buffer[];
+ byte[] buffer;
}
return field;
}
- public static final Set<String> tokenize(final String text, final Pattern pattern) {
+ public static Set<String> tokenize(final String text, final Pattern pattern) {
final String[] split = pattern.split(text);
- final Set<String> result = new LinkedHashSet<String>(Arrays.asList(split));
+ final Set<String> result = new LinkedHashSet<>(Arrays.asList(split));
result.remove("");
return result;
}
package com.hughes.android.dictionary.parser;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class WikiTokenizer {
- public static interface Callback {
+ public interface Callback {
void onPlainText(final String text);
void onMarkup(WikiTokenizer wikiTokenizer);
void onWikiLink(WikiTokenizer wikiTokenizer);
int end = 0;
int start = -1;
- final List<String> errors = new ArrayList<String>();
- final List<String> tokenStack = new ArrayList<String>();
+ final List<String> errors = new ArrayList<>();
+ final List<String> tokenStack = new ArrayList<>();
private String headingWikiText;
private int lastUnescapedPipePos;
private int lastUnescapedEqualsPos;
- private final List<String> positionArgs = new ArrayList<String>();
- private final Map<String,String> namedArgs = new LinkedHashMap<String,String>();
+ private final List<String> positionArgs = new ArrayList<>();
+ private final Map<String,String> namedArgs = new LinkedHashMap<>();
public WikiTokenizer(final String wikiText) {
return token;
}
- final static String[] patterns = { "\n", "{{", "}}", "[[", "]]", "[", "]", "|", "=", "<!--" };
+ static final String[] patterns = { "\n", "{{", "}}", "[[", "]]", "[", "]", "|", "=", "<!--" };
private int escapedFindEnd(final int start, final String toFind) {
assert tokenStack.isEmpty();
int end = start;
int firstNewline = -1;
int[] nextMatch = new int[patterns.length];
- for (int i = 0; i < nextMatch.length; ++i) {
- nextMatch[i] = -2;
- }
+ Arrays.fill(nextMatch, -2);
int singleBrackets = 0;
while (end < wikiText.length()) {
// Manual replacement for matcher.find(end),
lastUnescapedPipePos = matchStart;
}
- static final String trimNewlines(String s) {
+ static String trimNewlines(String s) {
while (s.startsWith("\n")) {
s = s.substring(1);
}
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import junit.framework.TestCase;
assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
- assertEquals(null, new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
+ assertNull(new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
wikiText = "[[abc|def]]";
assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
- assertEquals(Arrays.asList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
+ assertEquals(Collections.singletonList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
wikiText = "{{abc|d[[|]]ef|ghi}}";
assertEquals("\n", tokenizer.nextToken().token());
assertEquals("hello2", tokenizer.nextToken().token());
- assertEquals(null, tokenizer.nextToken());
+ assertNull(tokenizer.nextToken());
tokenizer.returnToLineStart();
assertEquals("hello2", tokenizer.nextToken().token());
- assertEquals(null, tokenizer.nextToken());
+ assertNull(tokenizer.nextToken());
}
"[extraterminated]]" + "\n" +
"=== {{header-template}} ===" + "\n";
- final String[] expectedTokens = new String[] {
+ final String[] expectedTokens = {
"Hi",
"\n",
"Hello ",
"\n",
};
- final List<String> actualTokens = new ArrayList<String>();
+ final List<String> actualTokens = new ArrayList<>();
final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
WikiTokenizer token;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
private static final Pattern SUPERSCRIPT = Pattern.compile("<sup>[0-9]*</sup>");
- final SortedMap<String, AtomicInteger> counters = new TreeMap<String, AtomicInteger>();
- final Set<String> pairsAdded = new LinkedHashSet<String>();
+ final SortedMap<String, AtomicInteger> counters = new TreeMap<>();
+ final Set<String> pairsAdded = new LinkedHashSet<>();
public EntrySource entrySource;
public String title;
final int bytesLength = dis.readInt();
final byte[] bytes = new byte[bytesLength];
dis.readFully(bytes);
- final String text = new String(bytes, "UTF8");
+ final String text = new String(bytes, StandardCharsets.UTF_8);
parseSection(heading, replaceSuperscript(text));
StringBuilder builder;
IndexedEntry indexedEntry;
IndexBuilder indexBuilder;
- final Map<String,FunctionCallback<T>> functionCallbacks = new LinkedHashMap<String, FunctionCallback<T>>();
+ final Map<String,FunctionCallback<T>> functionCallbacks = new LinkedHashMap<>();
boolean entryTypeNameSticks = false;
EntryTypeName entryTypeName = null;
- final Map<String,AtomicInteger> langCodeToTCount = new LinkedHashMap<String, AtomicInteger>();
+ final Map<String,AtomicInteger> langCodeToTCount = new LinkedHashMap<>();
- final NameAndArgs<T> nameAndArgs = new NameAndArgs<T>();
+ final NameAndArgs<T> nameAndArgs = new NameAndArgs<>();
public AppendAndIndexWikiCallback(final T parser) {
this.parser = parser;
if (name != null) {
appendAndIndexWikiCallback.dispatch(name, null);
}
- for (int i = 0; i < args.size(); ++i) {
- if (args.get(i).length() > 0) {
+ for (String arg : args) {
+ if (arg.length() > 0) {
appendAndIndexWikiCallback.builder.append("|");
- appendAndIndexWikiCallback.dispatch(args.get(i), null, null);
+ appendAndIndexWikiCallback.dispatch(arg, null, null);
}
}
appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
return true;
}
}
- static NameAndArgs<AbstractWiktionaryParser> NAME_AND_ARGS = new NameAndArgs<AbstractWiktionaryParser>();
+ static NameAndArgs<AbstractWiktionaryParser> NAME_AND_ARGS = new NameAndArgs<>();
static void appendNamedArgs(final Map<String, String> namedArgs,
final AppendAndIndexWikiCallback<?> appendAndIndexWikiCallback) {
class DeFunctionCallbacks {
static <T extends AbstractWiktionaryParser> void addGenericCallbacks(Map<String, FunctionCallback<T>> callbacks) {
- FunctionCallback<T> callback = new MakeHeadingFromName<T>("====");
+ FunctionCallback<T> callback = new MakeHeadingFromName<>("====");
callbacks.put("Aussprache", callback);
callbacks.put("Worttrennung", callback);
callbacks.put("Bedeutungen", callback);
}
- static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
+ static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<>();
static final class MakeHeadingFromName<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
static final class ListSection {
final String firstPrefix;
final String firstLine;
- final List<String> nextPrefixes = new ArrayList<String>();
- final List<String> nextLines = new ArrayList<String>();
+ final List<String> nextPrefixes = new ArrayList<>();
+ final List<String> nextLines = new ArrayList<>();
public ListSection(String firstPrefix, String firstLine) {
this.firstPrefix = firstPrefix;
}
final StringBuilder foreignBuilder = new StringBuilder();
- final List<EnForeignParser.ListSection> listSections = new ArrayList<EnForeignParser.ListSection>();
+ final List<EnForeignParser.ListSection> listSections = new ArrayList<>();
appendAndIndexWikiCallback.reset(foreignBuilder, null);
this.state = State.ENGLISH_DEF_OF_FOREIGN; // TODO: this is wrong, need new category....
class EnFunctionCallbacks {
- static final Map<String,FunctionCallback<EnParser>> DEFAULT = new LinkedHashMap<String, FunctionCallback<EnParser>>();
+ static final Map<String,FunctionCallback<EnParser>> DEFAULT = new LinkedHashMap<>();
static <T extends AbstractWiktionaryParser> void addGenericCallbacks(Map<String, FunctionCallback<T>> callbacks) {
- FunctionCallback<T> callback = new Gender<T>();
+ FunctionCallback<T> callback = new Gender<>();
callbacks.put("m", callback);
callbacks.put("f", callback);
callbacks.put("n", callback);
callbacks.put("p", callback);
callbacks.put("g", callback);
- callbacks.put("etyl", new etyl<T>());
- callbacks.put("term", new term<T>());
-
- callback = new EncodingCallback<T>();
- Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
- "IPA", "IPAchar", // Not really encodings, but it works.
- "zh-ts", "zh-tsp",
- "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
- "fa-Arab", "Khmr", "Cyrl", "ug-Arab", "ko-inline",
- "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
- "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
+ callbacks.put("etyl", new etyl<>());
+ callbacks.put("term", new term<>());
+
+ callback = new EncodingCallback<>();
+ Set<String> encodings = new LinkedHashSet<>(Arrays.asList(
+ "IPA", "IPAchar", // Not really encodings, but it works.
+ "zh-ts", "zh-tsp",
+ "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
+ "fa-Arab", "Khmr", "Cyrl", "ug-Arab", "ko-inline",
+ "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
+ "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
for (final String encoding : encodings) {
callbacks.put(encoding, callback);
}
- callback = new Ignore<T>();
+ callback = new Ignore<>();
callbacks.put("trreq", callback);
callbacks.put("t-image", callback);
callbacks.put("defn", callback);
callbacks.put("der-mid3", callback);
callbacks.put("der-bottom", callback);
- callback = new AppendName<T>();
+ callback = new AppendName<>();
callbacks.put("...", callback);
- callbacks.put("qualifier", new QualifierCallback<T>());
- callbacks.put("italbrac", new italbrac<T>());
- callbacks.put("gloss", new gloss<T>());
- callbacks.put("not used", new not_used<T>());
- callbacks.put("wikipedia", new wikipedia<T>());
+ callbacks.put("qualifier", new QualifierCallback<>());
+ callbacks.put("italbrac", new italbrac<>());
+ callbacks.put("gloss", new gloss<>());
+ callbacks.put("not used", new not_used<>());
+ callbacks.put("wikipedia", new wikipedia<>());
- final it_conj<T> it_conj_cb = new it_conj<T>();
+ final it_conj<T> it_conj_cb = new it_conj<>();
callbacks.put("it-conj", it_conj_cb);
- callbacks.put("it-conj-are", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-arsi", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-care", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-carsi", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-ciare", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-ciarsi", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-iare", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-iarsi", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-iare-b", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-iarsi-b", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-ire", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-irsi", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-ire-b", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-irsi-b", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-cire", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-cirsi", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-ire", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-ere", new it_conj_ere<T>(it_conj_cb));
- callbacks.put("it-conj-ersi", new it_conj_ere<T>(it_conj_cb));
- callbacks.put("it-conj-urre", new it_conj_urre<T>(it_conj_cb));
- callbacks.put("it-conj-ursi", new it_conj_urre<T>(it_conj_cb));
- callbacks.put("it-conj-fare", new it_conj_fare<T>(it_conj_cb));
+ callbacks.put("it-conj-are", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-arsi", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-care", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-carsi", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-ciare", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-ciarsi", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-iare", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-iarsi", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-iare-b", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-iarsi-b", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-ire", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-irsi", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-ire-b", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-irsi-b", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-cire", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-cirsi", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-ire", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-ere", new it_conj_ere<>(it_conj_cb));
+ callbacks.put("it-conj-ersi", new it_conj_ere<>(it_conj_cb));
+ callbacks.put("it-conj-urre", new it_conj_urre<>(it_conj_cb));
+ callbacks.put("it-conj-ursi", new it_conj_urre<>(it_conj_cb));
+ callbacks.put("it-conj-fare", new it_conj_fare<>(it_conj_cb));
//"{{it-conj-fare|putre|avere}}\n" +
static {
addGenericCallbacks(DEFAULT);
- FunctionCallback<EnParser> callback = new TranslationCallback<EnParser>();
+ FunctionCallback<EnParser> callback = new TranslationCallback<>();
DEFAULT.put("t", callback);
DEFAULT.put("t+", callback);
DEFAULT.put("t-", callback);
DEFAULT.put("head", callback);
}
- static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
+ static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<>();
// ------------------------------------------------------------------
namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
if (args.size() < 2) {
if (!name.equals("ttbc")) {
- EnParser.LOG.warning("{{t...}} with wrong args: title=" + parser.title + ", " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("{{t...}} with wrong args: title=" + parser.title + ", " + wikiTokenizer.token());
}
return false;
}
// Catch-all for anything else...
if (!namedArgs.isEmpty()) {
appendAndIndexWikiCallback.builder.append(" {");
- EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
+ AbstractWiktionaryParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
appendAndIndexWikiCallback.builder.append("}");
}
final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
namedArgs.remove("lang");
if (!namedArgs.isEmpty()) {
- EnParser.LOG.warning("weird qualifier: " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("weird qualifier: " + wikiTokenizer.token());
return false;
}
appendAndIndexWikiCallback.builder.append("(");
final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
namedArgs.remove("lang");
if (!namedArgs.isEmpty()) {
- EnParser.LOG.warning("weird encoding: " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("weird encoding: " + wikiTokenizer.token());
return false;
}
if (args.size() == 0) {
}
appendAndIndexWikiCallback.builder.append("{");
appendAndIndexWikiCallback.builder.append(name);
- for (int i = 0; i < args.size(); ++i) {
- appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
+ for (String arg : args) {
+ appendAndIndexWikiCallback.builder.append("|").append(arg);
}
appendAndIndexWikiCallback.builder.append("}");
return true;
if (displayText != null) {
appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
} else {
- EnParser.LOG.warning("no display text: " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("no display text: " + wikiTokenizer.token());
}
final String tr = namedArgs.remove("tr");
namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
if (!namedArgs.isEmpty()) {
appendAndIndexWikiCallback.builder.append(" {").append(name);
- EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
+ AbstractWiktionaryParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
appendAndIndexWikiCallback.builder.append("}");
}
formName = ListUtil.remove(args, 0, null);
}
if (formName == null) {
- EnParser.LOG.warning("Missing form name: " + parser.title);
+ AbstractWiktionaryParser.LOG.warning("Missing form name: " + parser.title);
formName = "form of";
}
String baseForm = ListUtil.get(args, 1, "");
parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
} else {
// null baseForm happens in Danish.
- EnParser.LOG.warning("Null baseform: " + parser.title);
+ AbstractWiktionaryParser.LOG.warning("Null baseform: " + parser.title);
}
return true;
}
if (args.size() > 1 || !namedArgs.isEmpty()) {
// Unindexed!
return false;
- } else if (args.size() == 1) {
- return false;
- } else {
- return true;
- }
+ } else return args.size() != 1;
}
}
return false;
}
String langName = WiktionaryLangs.getEnglishName(langCode);
- if (langName != null) {
- appendAndIndexWikiCallback.dispatch(langName, null);
- } else {
- appendAndIndexWikiCallback.dispatch("lang:" + langCode, null);
- }
+ appendAndIndexWikiCallback.dispatch(langName == null ? "lang:" + langCode : langName, null);
return true;
}
}
if (!StringUtil.isNullOrEmpty(literally)) {
literally = String.format("literally %s", literally);
}
- final List<String> inParens = new ArrayList<String>(Arrays.asList(tr, pos, gloss, literally));
+ final List<String> inParens = new ArrayList<>(Arrays.asList(tr, pos, gloss, literally));
cleanList(inParens);
appendCommaSeparatedList(appendAndIndexWikiCallback, inParens);
}
parser.wordForms.add(singular);
if (!namedArgs.isEmpty() || args.size() > 4) {
- EnParser.LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
}
return true;
}
}
static {
- DEFAULT.put("it-proper noun", new it_proper_noun<EnParser>());
+ DEFAULT.put("it-proper noun", new it_proper_noun<>());
}
static final class it_proper_noun<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
@Override
}
}
- static final Map<String,String> it_indicativePronouns = new LinkedHashMap<String, String>();
+ static final Map<String,String> it_indicativePronouns = new LinkedHashMap<>();
static {
it_indicativePronouns.put("1s", "io");
it_indicativePronouns.put("2s", "tu");
it_indicativePronouns.put("3p", "essi/esse");
}
- static final Map<String,String> it_subjunctivePronouns = new LinkedHashMap<String, String>();
+ static final Map<String,String> it_subjunctivePronouns = new LinkedHashMap<>();
static {
it_subjunctivePronouns.put("1s", "che io");
it_subjunctivePronouns.put("2s", "che tu");
it_subjunctivePronouns.put("3p", "che essi/esse");
}
- static final Map<String,String> it_imperativePronouns = new LinkedHashMap<String, String>();
+ static final Map<String,String> it_imperativePronouns = new LinkedHashMap<>();
static {
it_imperativePronouns.put("1s", "-");
it_imperativePronouns.put("2s", "tu");
final List<String> prefixes = (inf != null && inf.endsWith("si")) ? it_reflexive_pronouns : it_empty;
String style = " style=\"background:#c0cfe4\"";
- outputDataRow(appendAndIndexWikiCallback, style, "indicativo", style, "th", "", new LinkedHashMap<String, String>(it_indicativePronouns), it_empty, false);
+ outputDataRow(appendAndIndexWikiCallback, style, "indicativo", style, "th", "", new LinkedHashMap<>(it_indicativePronouns), it_empty, false);
outputDataRow(appendAndIndexWikiCallback, style, "presente", "", "td", "pres", namedArgs, prefixes, true);
outputDataRow(appendAndIndexWikiCallback, style, "imperfetto", "", "td", "imperf", namedArgs, prefixes, true);
outputDataRow(appendAndIndexWikiCallback, style, "passato remoto", "", "td", "prem", namedArgs, prefixes, true);
outputDataRow(appendAndIndexWikiCallback, style, "futuro", "", "td", "fut", namedArgs, prefixes, true);
style = " style=\"background:#c0d8e4\"";
- outputDataRow(appendAndIndexWikiCallback, style, "condizionale", style, "th", "", new LinkedHashMap<String, String>(it_indicativePronouns), it_empty, false);
+ outputDataRow(appendAndIndexWikiCallback, style, "condizionale", style, "th", "", new LinkedHashMap<>(it_indicativePronouns), it_empty, false);
outputDataRow(appendAndIndexWikiCallback, style, "presente", "", "td", "cond", namedArgs, prefixes, true);
style = " style=\"background:#c0e4c0\"";
- outputDataRow(appendAndIndexWikiCallback, style, "congiuntivo", style, "th", "", new LinkedHashMap<String, String>(it_subjunctivePronouns), it_empty, false);
+ outputDataRow(appendAndIndexWikiCallback, style, "congiuntivo", style, "th", "", new LinkedHashMap<>(it_subjunctivePronouns), it_empty, false);
namedArgs.put("sub3s2", namedArgs.remove("sub3s"));
namedArgs.put("sub1s", namedArgs.get("sub123s"));
namedArgs.put("sub2s", namedArgs.get("sub123s"));
outputDataRow(appendAndIndexWikiCallback, style, "imperfetto", "", "td", "impsub", namedArgs, prefixes, true);
style = " style=\"background:#e4d4c0\"";
- outputDataRow(appendAndIndexWikiCallback, style, "imperativo", style, "th", "", new LinkedHashMap<String, String>(it_imperativePronouns), it_empty, false);
+ outputDataRow(appendAndIndexWikiCallback, style, "imperativo", style, "th", "", new LinkedHashMap<>(it_imperativePronouns), it_empty, false);
outputDataRow(appendAndIndexWikiCallback, style, "", "", "td", "imp", namedArgs, it_empty, false); // these are attached to the stem.
builder.append("</table>\n");
for (final String number : it_number_s_p) {
for (final String person : it_person_1_2_3) {
// Output <td> or <th>
- builder.append("<").append(type2).append("").append(col2Style).append(">");
+ builder.append("<").append(type2).append(col2Style).append(">");
final String keyBase = String.format("%s%s%s", moodName, person, number);
appendAndIndexWikiCallback.dispatch(prefixes.get(i++), null);
outputKeyVariations(appendAndIndexWikiCallback, builder, keyBase, namedArgs, isForm);
"Particle|Interjection|Pronominal adverb|" +
"Han character|Hanzi|Hanja|Kanji|Katakana character|Syllable");
- static final Set<String> USELESS_WIKI_ARGS = new LinkedHashSet<String>(
- Arrays.asList(
- "lang",
- "sc",
- "sort",
- "cat",
- "cat2",
- "xs",
- "nodot"));
+ static final Set<String> USELESS_WIKI_ARGS = new LinkedHashSet<>(
+ Arrays.asList(
+ "lang",
+ "sc",
+ "sort",
+ "cat",
+ "cat2",
+ "xs",
+ "nodot"));
static boolean isIgnorableTitle(final String title) {
return title.startsWith("Wiktionary:") ||
State state = null;
public boolean entryIsFormOfSomething = false;
- final Collection<String> wordForms = new ArrayList<String>();
+ final Collection<String> wordForms = new ArrayList<>();
boolean titleAppended = false;
final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback = new AppendAndIndexCallback(this);
{
appendAndIndexWikiCallback.functionCallbacks.putAll(EnFunctionCallbacks.DEFAULT);
- for (final String key : new ArrayList<String>(appendAndIndexWikiCallback.functionCallbacks.keySet())) {
+ for (final String key : new ArrayList<>(appendAndIndexWikiCallback.functionCallbacks.keySet())) {
// Don't handle the it-conj functions here.
if (key.startsWith("it-conj")) {
appendAndIndexWikiCallback.functionCallbacks.remove(key);
final Pair pair = new Pair(trim(englishText.toString()), trim(foreignText.toString()), swap);
pairEntry.pairs.add(pair);
if (!pairsAdded.add(pair.toString())) {
- LOG.warning("Duplicate pair: " + pair.toString());
+ LOG.warning("Duplicate pair: " + pair);
incrementCount("WARNING: Duplicate pair" );
}
}
PairEntry pairEntry = null;
IndexedEntry indexedEntry = null;
StringBuilder[] builders = null;
- HashSet<PairEntry.Pair> allPairs = new HashSet<>();
+ final HashSet<PairEntry.Pair> allPairs = new HashSet<>();
public static final String NAME = "EnTranslationToTranslation";
- final Set<String> Ts = new LinkedHashSet<String>(Arrays.asList("t", "t+",
+ final Set<String> Ts = new LinkedHashSet<>(Arrays.asList("t", "t+",
"t-", "tø", "apdx-t", "ttbc"));
public EnTranslationToTranslationParser(final List<IndexBuilder> indexBuilders,
}
}
- final TranslationCallback<EnTranslationToTranslationParser> translationCallback = new TranslationCallback<EnTranslationToTranslationParser>();
+ final TranslationCallback<EnTranslationToTranslationParser> translationCallback = new TranslationCallback<>();
- final AppendAndIndexWikiCallback<EnTranslationToTranslationParser> appendAndIndexWikiCallback = new AppendAndIndexWikiCallback<EnTranslationToTranslationParser>(
- this);
+ final AppendAndIndexWikiCallback<EnTranslationToTranslationParser> appendAndIndexWikiCallback = new AppendAndIndexWikiCallback<>(
+ this);
{
for (final String t : Ts) {
appendAndIndexWikiCallback.functionCallbacks.put(t, translationCallback);
class FrFunctionCallbacks {
static <T extends AbstractWiktionaryParser> void addGenericCallbacks(Map<String, FunctionCallback<T>> callbacks) {
- callbacks.put("-étym-", new Redispatch<T>("\n==== Étymologie ====\n"));
- callbacks.put("-pron-", new Redispatch<T>("\n==== Prononciation ====\n"));
- callbacks.put("-voir-", new Redispatch<T>("\n==== Voir aussi ====\n"));
- callbacks.put("-drv-", new Redispatch<T>("\n==== Dérivés ====\n"));
- callbacks.put("-syn-", new Redispatch<T>("\n==== Synonymes ====\n"));
+ callbacks.put("-étym-", new Redispatch<>("\n==== Étymologie ====\n"));
+ callbacks.put("-pron-", new Redispatch<>("\n==== Prononciation ====\n"));
+ callbacks.put("-voir-", new Redispatch<>("\n==== Voir aussi ====\n"));
+ callbacks.put("-drv-", new Redispatch<>("\n==== Dérivés ====\n"));
+ callbacks.put("-syn-", new Redispatch<>("\n==== Synonymes ====\n"));
- callbacks.put("-apr-", new Redispatch<T>("\n==== Apparentés étymologiques ====\n"));
- callbacks.put("-hyper-", new Redispatch<T>("\n==== Hyperonymes ====\n"));
- callbacks.put("-hypo-", new Redispatch<T>("\n==== Hyponymes ====\n"));
- callbacks.put("-réf-", new Redispatch<T>("\n==== Références ====\n"));
- callbacks.put("-homo-", new Redispatch<T>("\n==== Homophones ====\n"));
- callbacks.put("-anagr-", new Redispatch<T>("\n==== Anagrammes ====\n"));
- callbacks.put("-voc-", new Redispatch<T>("\n==== Vocabulaire apparenté par le sens ====\n"));
- callbacks.put("-exp-", new Redispatch<T>("\n==== Expressions ====\n"));
- callbacks.put("-note-", new Redispatch<T>("\n==== Note ====\n"));
+ callbacks.put("-apr-", new Redispatch<>("\n==== Apparentés étymologiques ====\n"));
+ callbacks.put("-hyper-", new Redispatch<>("\n==== Hyperonymes ====\n"));
+ callbacks.put("-hypo-", new Redispatch<>("\n==== Hyponymes ====\n"));
+ callbacks.put("-réf-", new Redispatch<>("\n==== Références ====\n"));
+ callbacks.put("-homo-", new Redispatch<>("\n==== Homophones ====\n"));
+ callbacks.put("-anagr-", new Redispatch<>("\n==== Anagrammes ====\n"));
+ callbacks.put("-voc-", new Redispatch<>("\n==== Vocabulaire apparenté par le sens ====\n"));
+ callbacks.put("-exp-", new Redispatch<>("\n==== Expressions ====\n"));
+ callbacks.put("-note-", new Redispatch<>("\n==== Note ====\n"));
- callbacks.put("-trad-", new ItFunctionCallbacks.SkipSection<T>());
+ callbacks.put("-trad-", new ItFunctionCallbacks.SkipSection<>());
}
- static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
+ static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<>();
static final class MakeHeadingFromName<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
static <T extends AbstractWiktionaryParser> void addGenericCallbacks(
Map<String, FunctionCallback<T>> callbacks) {
- callbacks.put("-hyph-", new Redispatch<T>("\n==== Sillabazione ====\n"));
- callbacks.put("-pron-", new Redispatch<T>("\n==== Pronuncia ====\n"));
- callbacks.put("-etim-", new Redispatch<T>("\n==== Etimologia / Derivazione ====\n"));
- callbacks.put("-syn-", new Redispatch<T>("\n==== Sinonimi ====\n"));
- callbacks.put("-ant-", new Redispatch<T>("\n==== Antonimi/Contrari ====\n"));
- callbacks.put("-drv-", new Redispatch<T>("\n==== Parole derivate ====\n"));
- callbacks.put("-prov-", new Redispatch<T>("\n==== Proverbi e modi di dire ====\n"));
- callbacks.put("-ref-", new Redispatch<T>("\n==== Note / Riferimenti ====\n"));
- callbacks.put("-rel-", new Redispatch<T>("\n==== Termini correlati ====\n"));
- callbacks.put("-var-", new Redispatch<T>("\n==== Varianti ====\n"));
+ callbacks.put("-hyph-", new Redispatch<>("\n==== Sillabazione ====\n"));
+ callbacks.put("-pron-", new Redispatch<>("\n==== Pronuncia ====\n"));
+ callbacks.put("-etim-", new Redispatch<>("\n==== Etimologia / Derivazione ====\n"));
+ callbacks.put("-syn-", new Redispatch<>("\n==== Sinonimi ====\n"));
+ callbacks.put("-ant-", new Redispatch<>("\n==== Antonimi/Contrari ====\n"));
+ callbacks.put("-drv-", new Redispatch<>("\n==== Parole derivate ====\n"));
+ callbacks.put("-prov-", new Redispatch<>("\n==== Proverbi e modi di dire ====\n"));
+ callbacks.put("-ref-", new Redispatch<>("\n==== Note / Riferimenti ====\n"));
+ callbacks.put("-rel-", new Redispatch<>("\n==== Termini correlati ====\n"));
+ callbacks.put("-var-", new Redispatch<>("\n==== Varianti ====\n"));
- callbacks.put("-trans1-", new SkipSection<T>());
- callbacks.put("-trans2-", new SkipSection<T>());
- callbacks.put("-ref-", new SkipSection<T>());
+ callbacks.put("-trans1-", new SkipSection<>());
+ callbacks.put("-trans2-", new SkipSection<>());
+ callbacks.put("-ref-", new SkipSection<>());
}
- static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
+ static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<>();
static final class Redispatch<T extends AbstractWiktionaryParser> implements
FunctionCallback<T> {
void addFunctionCallbacks(
Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks);
}
- static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<String,LangConfig>();
+ static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<>();
static {
final Pattern enSkipSections = Pattern.compile(".*(Translations|Anagrams|References).*");
isoToLangConfig.put("EN", new LangConfig() {
if (sectionName.equalsIgnoreCase("Antonyms")) {
return EntryTypeName.ANTONYM_MULTI;
}
- if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) {
- // We need to put it in the other index, too (probably)
- return null;
- }
- if (sectionName.equalsIgnoreCase("Derived Terms")) {
- return null;
- }
+ // We need to put it in the other index, too (probably) ?
+ // EnParser.partOfSpeechHeader.matcher(sectionName).matches()
+
+ // Needs special handling?
+ // sectionName.equalsIgnoreCase("Derived Terms")
return null;
}
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Category:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Category:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Categoría:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Categoría:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Categoria:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Categoria:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Kategorie:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Kategorie:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Categoria:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Categoria:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Catégorie:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Catégorie:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
if (!StringUtil.isNullOrEmpty(linkDest)) {
builder.append(String.format("<a href=\"%s\">", HtmlEntry.formatQuickdicUrl("", linkDest)));
super.onWikiLink(wikiTokenizer);
- builder.append(String.format("</a>"));
+ builder.append("</a>");
} else {
super.onWikiLink(wikiTokenizer);
}
builder.append(String.format("</h%d>\n", depth));
}
- final List<Character> listPrefixStack = new ArrayList<Character>();
+ final List<Character> listPrefixStack = new ArrayList<>();
@Override
public void onListItem(WikiTokenizer wikiTokenizer) {
public class WiktionaryLangs {
- public static final Map<String,String> isoCodeToEnWikiName = new LinkedHashMap<String,String>();
+ public static final Map<String,String> isoCodeToEnWikiName = new LinkedHashMap<>();
static {
isoCodeToEnWikiName.put("AF", "Afrikaans");
isoCodeToEnWikiName.put("SQ", "Albanian");
isoCodeToEnWikiName.put("HT", "Haitian Creole");
isoCodeToEnWikiName.put("LB", "Luxembourgish");
isoCodeToEnWikiName.put("MK", "Macedonian");
- isoCodeToEnWikiName.put("GV", "Manx");
isoCodeToEnWikiName.put("scn", "Sicilian");
isoCodeToEnWikiName.put("cu", "Old Church Slavonic");
isoCodeToEnWikiName.put("rom", "Romani");
//assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
}
- public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<String, Map<String,String>>();
+ public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<>();
static {
Map<String,String> isoCodeToWikiName;
wikiCodeToIsoCodeToWikiName.put("en", isoCodeToEnWikiName);
// egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName);
isoCodeToWikiName.put("nds", "Niederdeutsch");
isoCodeToWikiName.put("DE", "Deutsch");
isoCodeToWikiName.put("RO", "Rumänisch");
// egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName);
isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}"));
isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}"));
isoCodeToWikiName.put("PT", Pattern.quote("{{langue|pt}}"));
// egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("it", isoCodeToWikiName);
isoCodeToWikiName.put("IT", "\\{\\{-(it|scn|nap|cal|lmo)-\\}\\}"); // scn, nap, cal, lmo
isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));
isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}"));
// egrep -o '== *\{\{lengua\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName);
isoCodeToWikiName.put("AR", Pattern.quote("{{lengua|ar}}"));
isoCodeToWikiName.put("ES", Pattern.quote("{{lengua|es}}"));
isoCodeToWikiName.put("IT", Pattern.quote("{{lengua|it}}"));
// Pattern seems to match Italian one
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("pt", isoCodeToWikiName);
isoCodeToWikiName.put("PT", Pattern.quote("{{-pt-}}"));
isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));
int equalsIndex;
if (arg.startsWith("--") && (equalsIndex = arg.indexOf("=")) >= 0) {
final String key = arg.substring(2, equalsIndex);
- final String value = arg.substring(equalsIndex + 1, arg.length());
+ final String value = arg.substring(equalsIndex + 1);
dest.put(key, value);
}
}
@SuppressWarnings("WeakerAccess")
public final class EnumUtil {
- public static final <T extends Enum<T>> T min(final T e1, final T e2) {
+ public static <T extends Enum<T>> T min(final T e1, final T e2) {
if (e1 == null) {
return e2;
}