Add french-swedish dictionary support.
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
- <classpathentry combineaccessrules="false" kind="src" path="/Dictionary"/>
- <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry including="com/hughes/android/dictionary/DictionaryInfo.java|com/hughes/android/dictionary/engine/" kind="src" path="Dictionary/src"/>
+ <classpathentry kind="src" path="Dictionary/Util/src"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
+ <attributes>
+ <attribute name="module" value="true"/>
+ <attribute name="limit-modules" value="java.xml,java.logging"/>
+ </attributes>
+ </classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
- <classpathentry kind="lib" path="jars/xerces-2_11_0/xercesImpl.jar"/>
- <classpathentry kind="lib" path="jars/commons-lang3-3.1.jar"/>
+ <classpathentry kind="lib" path="/usr/share/java/icu4j-49.1.jar"/>
+ <classpathentry kind="lib" path="/usr/share/java/commons-text.jar"/>
+ <classpathentry kind="lib" path="/usr/share/java/commons-compress.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>
-dictInputs
-dictOutputs/
+data/inputs/
+data/outputs/
bin
wikiSplit
wikiSplit_2011
wikiSplit_201111
.project
.settings/
-*.class
# Run after downloading (data/downloadInputs.sh) to generate
# per-language data files from enwiktionary.
-ICU4J=/usr/share/java/icu4j-49.1.jar
-test -r "$ICU4J" || ICU4J=/usr/share/icu4j-55/lib/icu4j.jar
-XERCES=/usr/share/java/xercesImpl.jar
-test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar
-COMMONS_COMPRESS=/usr/share/java/commons-compress.jar
-JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
-test -x "$JAVA" || JAVA=java
-"$JAVA" -Xmx4096m -Xverify:none -classpath src:../Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.WiktionarySplitter "$@"
+RUNNER=./DictionaryPC
+if ! test -x "$RUNNER" ; then
+ ICU4J=/usr/share/java/icu4j-49.1.jar
+ test -r "$ICU4J" || ICU4J=/usr/share/icu4j-55/lib/icu4j.jar
+ COMMONS_COMPRESS=/usr/share/java/commons-compress.jar
+ JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
+ test -x "$JAVA" || JAVA=java
+ RUNNER="$JAVA -Xmx4096m -Xverify:none -classpath bin/:$ICU4J:$COMMONS_COMPRESS com.hughes.android.dictionary.engine.Runner"
+fi
+$RUNNER WiktionarySplitter "$@"
test -r "$ICU4J" || ICU4J=/usr/share/icu4j-55/lib/icu4j.jar
JUNIT=/usr/share/java/junit.jar
test -r "$JUNIT" || JUNIT=/usr/share/junit/lib/junit.jar
-XERCES=/usr/share/java/xercesImpl.jar
-test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar
-COMMONS=/usr/share/java/commons-lang3.jar
-test -r "$COMMONS" || COMMONS=/usr/share/commons-lang-3.3/lib/commons-lang.jar
+COMMONS=/usr/share/java/commons-text.jar
COMMONS_COMPRESS=/usr/share/java/commons-compress.jar
if [ ! -x ../Dictionary ] ; then
echo "You need to clone the Dictionary repository (including subprojects) into .."
echo "Junit needs to be installed"
exit 1;
fi
-if [ ! -r "$XERCES" ] ; then
- echo "Xerces needs to be installed"
- exit 1;
-fi
if [ ! -r "$COMMONS" ] ; then
echo "commons-lang needs to be installed"
exit 1;
echo "commons-compress needs to be installed"
exit 1;
fi
-javac -g ../Dictionary/Util/src/com/hughes/util/*.java ../Dictionary/Util/src/com/hughes/util/raf/*.java ../Dictionary/src/com/hughes/android/dictionary/DictionaryInfo.java ../Dictionary/src/com/hughes/android/dictionary/engine/*.java ../Dictionary/src/com/hughes/android/dictionary/C.java src/com/hughes/util/*.java src/com/hughes/android/dictionary/*.java src/com/hughes/android/dictionary/*/*.java src/com/hughes/android/dictionary/*/*/*.java -classpath "$ICU4J:$JUNIT:$XERCES:$COMMONS:$COMMONS_COMPRESS"
+mkdir -p bin
+# -encoding is just a work around for user that still run systems
+# with non-UTF8 locales
+# Limit to Java 11 for compatibility with native-image
+javac --source 11 --target 11 --limit-modules java.xml,java.logging -Xlint:all -encoding UTF-8 -g -d bin/ ../Dictionary/Util/src/com/hughes/util/*.java ../Dictionary/Util/src/com/hughes/util/raf/*.java ../Dictionary/src/com/hughes/android/dictionary/DictionaryInfo.java ../Dictionary/src/com/hughes/android/dictionary/engine/*.java ../Dictionary/src/com/hughes/android/dictionary/C.java src/com/hughes/util/*.java src/com/hughes/android/dictionary/*.java src/com/hughes/android/dictionary/*/*.java src/com/hughes/android/dictionary/*/*/*.java -classpath "$ICU4J:$JUNIT:$COMMONS:$COMMONS_COMPRESS"
--- /dev/null
+RUNNER=./DictionaryPC
+if ! test -x "$RUNNER" ; then
+ JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
+ test -x "$JAVA" || JAVA=java
+ RUNNER="$JAVA -classpath bin/ com.hughes.android.dictionary.engine.Runner"
+fi
+$RUNNER ConvertToV6 "$@"
--- /dev/null
+set -e
+rm -rf data/outputsv6
+mkdir data/outputsv6
+for i in data/outputs/*.quickdic ; do
+ o=data/outputsv6/$(basename "$i")
+ ./convert_to_v6.sh "$i" "$o"
+ 7z a -mx=9 "$o".v006.zip "$o"
+ rm "$o"
+ # skipHtml makes no sense for single-language dictionaries
+ if echo "$o" | grep -q '-' ; then
+ if ./convert_to_v6.sh "$i" "$o" skipHtmlOpt ; then
+ 7z a -mx=9 "$o".small.v006.zip "$o"
+ rm "$o"
+ elif [ $? -ne 3 ] ; then
+ # Check for magic 3 indicating "no HTML entries in dictionary"
+ echo "Converting dictionary failed!"
+ exit 1
+ fi
+ fi
+done
+++ /dev/null
-#!/usr/bin/env python
-#
-# Copyright 2006, 2007 Google Inc. All Rights Reserved.
-# Author: danderson@google.com (David Anderson)
-#
-# Script for uploading files to a Google Code project.
-#
-# This is intended to be both a useful script for people who want to
-# streamline project uploads and a reference implementation for
-# uploading files to Google Code projects.
-#
-# To upload a file to Google Code, you need to provide a path to the
-# file on your local machine, a small summary of what the file is, a
-# project name, and a valid account that is a member or owner of that
-# project. You can optionally provide a list of labels that apply to
-# the file. The file will be uploaded under the same name that it has
-# in your local filesystem (that is, the "basename" or last path
-# component). Run the script with '--help' to get the exact syntax
-# and available options.
-#
-# Note that the upload script requests that you enter your
-# googlecode.com password. This is NOT your Gmail account password!
-# This is the password you use on googlecode.com for committing to
-# Subversion and uploading files. You can find your password by going
-# to http://code.google.com/hosting/settings when logged in with your
-# Gmail account. If you have already committed to your project's
-# Subversion repository, the script will automatically retrieve your
-# credentials from there (unless disabled, see the output of '--help'
-# for details).
-#
-# If you are looking at this script as a reference for implementing
-# your own Google Code file uploader, then you should take a look at
-# the upload() function, which is the meat of the uploader. You
-# basically need to build a multipart/form-data POST request with the
-# right fields and send it to https://PROJECT.googlecode.com/files .
-# Authenticate the request using HTTP Basic authentication, as is
-# shown below.
-#
-# Licensed under the terms of the Apache Software License 2.0:
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Questions, comments, feature requests and patches are most welcome.
-# Please direct all of these to the Google Code users group:
-# http://groups.google.com/group/google-code-hosting
-
-"""Google Code file uploader script.
-"""
-
-__author__ = 'danderson@google.com (David Anderson)'
-
-import httplib
-import os.path
-import optparse
-import getpass
-import base64
-import sys
-
-
-def upload(file, project_name, user_name, password, summary, labels=None):
- """Upload a file to a Google Code project's file server.
-
- Args:
- file: The local path to the file.
- project_name: The name of your project on Google Code.
- user_name: Your Google account name.
- password: The googlecode.com password for your account.
- Note that this is NOT your global Google Account password!
- summary: A small description for the file.
- labels: an optional list of label strings with which to tag the file.
-
- Returns: a tuple:
- http_status: 201 if the upload succeeded, something else if an
- error occured.
- http_reason: The human-readable string associated with http_status
- file_url: If the upload succeeded, the URL of the file on Google
- Code, None otherwise.
- """
- # The login is the user part of user@gmail.com. If the login provided
- # is in the full user@domain form, strip it down.
- if user_name.endswith('@gmail.com'):
- user_name = user_name[:user_name.index('@gmail.com')]
-
- form_fields = [('summary', summary)]
- if labels is not None:
- form_fields.extend([('label', l.strip()) for l in labels])
-
- content_type, body = encode_upload_request(form_fields, file)
-
- upload_host = '%s.googlecode.com' % project_name
- upload_uri = '/files'
- auth_token = base64.b64encode('%s:%s'% (user_name, password))
- headers = {
- 'Authorization': 'Basic %s' % auth_token,
- 'User-Agent': 'Googlecode.com uploader v0.9.4',
- 'Content-Type': content_type,
- }
-
- server = httplib.HTTPSConnection(upload_host)
- server.request('POST', upload_uri, body, headers)
- resp = server.getresponse()
- server.close()
-
- if resp.status == 201:
- location = resp.getheader('Location', None)
- else:
- location = None
- return resp.status, resp.reason, location
-
-
-def encode_upload_request(fields, file_path):
- """Encode the given fields and file into a multipart form body.
-
- fields is a sequence of (name, value) pairs. file is the path of
- the file to upload. The file will be uploaded to Google Code with
- the same file name.
-
- Returns: (content_type, body) ready for httplib.HTTP instance
- """
- BOUNDARY = '----------Googlecode_boundary_reindeer_flotilla'
- CRLF = '\r\n'
-
- body = []
-
- # Add the metadata about the upload first
- for key, value in fields:
- body.extend(
- ['--' + BOUNDARY,
- 'Content-Disposition: form-data; name="%s"' % key,
- '',
- value,
- ])
-
- # Now add the file itself
- file_name = os.path.basename(file_path)
- f = open(file_path, 'rb')
- file_content = f.read()
- f.close()
-
- body.extend(
- ['--' + BOUNDARY,
- 'Content-Disposition: form-data; name="filename"; filename="%s"'
- % file_name,
- # The upload server determines the mime-type, no need to set it.
- 'Content-Type: application/octet-stream',
- '',
- file_content,
- ])
-
- # Finalize the form body
- body.extend(['--' + BOUNDARY + '--', ''])
-
- return 'multipart/form-data; boundary=%s' % BOUNDARY, CRLF.join(body)
-
-
-def upload_find_auth(file_path, project_name, summary, labels=None,
- user_name=None, password=None, tries=3):
- """Find credentials and upload a file to a Google Code project's file server.
-
- file_path, project_name, summary, and labels are passed as-is to upload.
-
- Args:
- file_path: The local path to the file.
- project_name: The name of your project on Google Code.
- summary: A small description for the file.
- labels: an optional list of label strings with which to tag the file.
- config_dir: Path to Subversion configuration directory, 'none', or None.
- user_name: Your Google account name.
- tries: How many attempts to make.
- """
-
- while tries > 0:
- if user_name is None:
- # Read username if not specified or loaded from svn config, or on
- # subsequent tries.
- sys.stdout.write('Please enter your googlecode.com username: ')
- sys.stdout.flush()
- user_name = sys.stdin.readline().rstrip()
- if password is None:
- # Read password if not loaded from svn config, or on subsequent tries.
- print 'Please enter your googlecode.com password.'
- print '** Note that this is NOT your Gmail account password! **'
- print 'It is the password you use to access Subversion repositories,'
- print 'and can be found here: http://code.google.com/hosting/settings'
- password = getpass.getpass()
-
- status, reason, url = upload(file_path, project_name, user_name, password,
- summary, labels)
- # Returns 403 Forbidden instead of 401 Unauthorized for bad
- # credentials as of 2007-07-17.
- if status in [httplib.FORBIDDEN, httplib.UNAUTHORIZED]:
- # Rest for another try.
- user_name = password = None
- tries = tries - 1
- else:
- # We're done.
- break
-
- return status, reason, url
-
-
-def main():
- parser = optparse.OptionParser(usage='googlecode-upload.py -s SUMMARY '
- '-p PROJECT [options] FILE')
- parser.add_option('-s', '--summary', dest='summary',
- help='Short description of the file')
- parser.add_option('-p', '--project', dest='project',
- help='Google Code project name')
- parser.add_option('-u', '--user', dest='user',
- help='Your Google Code username')
- parser.add_option('-w', '--password', dest='password',
- help='Your Google Code password')
- parser.add_option('-l', '--labels', dest='labels',
- help='An optional list of comma-separated labels to attach '
- 'to the file')
-
- options, args = parser.parse_args()
-
- if not options.summary:
- parser.error('File summary is missing.')
- elif not options.project:
- parser.error('Project name is missing.')
- elif len(args) < 1:
- parser.error('File to upload not provided.')
- elif len(args) > 1:
- parser.error('Only one file may be specified.')
-
- file_path = args[0]
-
- if options.labels:
- labels = options.labels.split(',')
- else:
- labels = None
-
- status, reason, url = upload_find_auth(file_path, options.project,
- options.summary, labels,
- options.user, options.password)
- if url:
- print 'The file was uploaded successfully.'
- print 'URL: %s' % url
- return 0
- else:
- print 'An error occurred. Your file was not uploaded.'
- print 'Google Code upload server said: %s (%s)' % (reason, status)
- return 1
-
-
-if __name__ == '__main__':
- sys.exit(main())
--- /dev/null
+[
+ {
+ "name": "com.ibm.icu.text.CollatorServiceShim",
+ "methods": [
+ { "name": "<init>", "parameterTypes": [] }
+ ]
+ }
+]
--- /dev/null
+REM --allow-incomplete-classpath due to missing XZ implementation\r
+%GRAALVM_HOME%/bin/native-image --allow-incomplete-classpath --no-server -H:Name="DictionaryPC" com.hughes.android.dictionary.engine.Runner --no-fallback -cp bin/;commons-compress.jar;commons-text.jar;commons-lang3.jar;icu4j-49.1.jar -H:IncludeResources="com/ibm/icu/.*" -H:ReflectionConfigurationFiles=native-image-reflection.json\r
--- /dev/null
+"$GRAALVM_HOME"/bin/native-image --no-server -H:Name="DictionaryPC" com.hughes.android.dictionary.engine.Runner --no-fallback -cp bin/:/usr/share/java/commons-compress.jar:/usr/share/java/commons-text.jar:/usr/share/java/commons-lang3.jar:/usr/share/java/icu4j-49.1.jar -H:IncludeResources="com/ibm/icu/.*" -H:ReflectionConfigurationFiles=native-image-reflection.json
-# -agentlib:hprof=heap=sites,depth=20
-ICU4J=/usr/share/java/icu4j-49.1.jar
-test -r "$ICU4J" || ICU4J=/usr/share/icu4j-55/lib/icu4j.jar
-XERCES=/usr/share/java/xercesImpl.jar
-test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar
-COMMONS=/usr/share/java/commons-lang3.jar
-test -r "$COMMONS" || COMMONS=/usr/share/commons-lang-3.3/lib/commons-lang.jar
-COMMONS_COMPRESS=/usr/share/java/commons-compress.jar
-JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
-test -x "$JAVA" || JAVA=java
-"$JAVA" -Djava.util.logging.config.file="logging.properties" -Xmx4096m -classpath src:../Dictionary/Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.DictionaryBuilder "$@"
+RUNNER=./DictionaryPC
+if ! test -x "$RUNNER" ; then
+ # -agentlib:hprof=heap=sites,depth=20
+ ICU4J=/usr/share/java/icu4j-49.1.jar
+ test -r "$ICU4J" || ICU4J=/usr/share/icu4j-55/lib/icu4j.jar
+ COMMONS_LANG3=/usr/share/java/commons-lang3.jar
+ test -r "$COMMONS_LANG3" || COMMONS_LANG3=/usr/share/commons-lang-3.3/lib/commons-lang.jar
+ COMMONS_TEXT=/usr/share/java/commons-text.jar
+ COMMONS_COMPRESS=/usr/share/java/commons-compress.jar
+ JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
+ test -x "$JAVA" || JAVA=java
+ RUNNER="$JAVA -Djava.util.logging.config.file=logging.properties -Xmx4096m -classpath bin/:$ICU4J:$COMMONS_LANG3:$COMMONS_TEXT:$COMMONS_COMPRESS com.hughes.android.dictionary.engine.Runner"
+fi
+$RUNNER DictionaryBuilder "$@"
import com.ibm.icu.text.Collator;
-final public class CollatorWrapper {
-static public Collator getInstance() {
+public final class CollatorWrapper {
+public static Collator getInstance() {
return Collator.getInstance();
}
-static public Collator getInstanceStrengthIdentical(Locale l) {
+public static Collator getInstanceStrengthIdentical(Locale l) {
Collator c = Collator.getInstance(l);
c.setStrength(Collator.IDENTICAL);
return c;
-// Copyright 2011 Google Inc. All Rights Reserved.\r
-//\r
-// Licensed under the Apache License, Version 2.0 (the "License");\r
-// you may not use this file except in compliance with the License.\r
-// You may obtain a copy of the License at\r
-//\r
-// http://www.apache.org/licenses/LICENSE-2.0\r
-//\r
-// Unless required by applicable law or agreed to in writing, software\r
-// distributed under the License is distributed on an "AS IS" BASIS,\r
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
-// See the License for the specific language governing permissions and\r
-// limitations under the License.\r
-\r
-package com.hughes.android.dictionary;\r
-\r
-import java.text.SimpleDateFormat;\r
-import java.util.Date;\r
-\r
-public class DateFormatTest {\r
-\r
- /**\r
- * @param args\r
- */\r
- public static void main(String[] args) {\r
- System.out.println(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss").format(new Date()));\r
- }\r
-\r
-}\r
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.hughes.android.dictionary;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+public class DateFormatTest {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ System.out.println(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss").format(new Date()));
+ }
+
+}
package com.hughes.android.dictionary;
-final public class DictionaryApplication {
- final static public boolean USE_COLLATOR = true;
+public final class DictionaryApplication {
+ public static final boolean USE_COLLATOR = true;
}
package com.hughes.android.dictionary;
-final public class FeatureConfig {
- final static public boolean enableWrite = true;
+public final class FeatureConfig {
+ public static final boolean enableWrite = true;
}
-// Copyright 2011 Google Inc. All Rights Reserved.\r
-//\r
-// Licensed under the Apache License, Version 2.0 (the "License");\r
-// you may not use this file except in compliance with the License.\r
-// You may obtain a copy of the License at\r
-//\r
-// http://www.apache.org/licenses/LICENSE-2.0\r
-//\r
-// Unless required by applicable law or agreed to in writing, software\r
-// distributed under the License is distributed on an "AS IS" BASIS,\r
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
-// See the License for the specific language governing permissions and\r
-// limitations under the License.\r
-\r
-package com.hughes.android.dictionary;\r
-\r
-import java.io.File;\r
-import java.io.IOException;\r
-import java.util.Comparator;\r
-\r
-import com.hughes.android.dictionary.engine.Language;\r
-import java.text.Collator;\r
-\r
-public class SerializeCollatorTest {\r
-\r
- /**\r
- * @param args\r
- * @throws IOException\r
- */\r
- public static void main(String[] args) throws IOException {\r
- File temp = File.createTempFile("temp", null);\r
- final Comparator c = Language.de.getCollator();\r
- //FileUtil.writeObject(c, temp);\r
- }\r
-\r
-}\r
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.hughes.android.dictionary;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Comparator;
+
+import com.hughes.android.dictionary.engine.Language;
+
+public class SerializeCollatorTest {
+
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ File temp = File.createTempFile("temp", null);
+ final Comparator<Object> c = Language.de.getCollator();
+ //FileUtil.writeObject(c, temp);
+ }
+
+}
package com.hughes.android.dictionary.engine;
-import com.hughes.android.dictionary.DictionaryInfo;
-import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
-import com.hughes.util.CollectionUtil;
-
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Collections;
import java.util.List;
+import com.hughes.android.dictionary.DictionaryInfo;
+import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
+import com.hughes.util.CollectionUtil;
+
public class CheckDictionariesMain {
- static final String BASE_URL = "http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/";
+ static final String BASE_URL = "https://github.com/rdoeffinger/Dictionary/releases/download/v0.3-dictionaries/";
static final String VERSION_CODE_OLD = "v006";
static final String VERSION_CODE = "v007";
// dictionaryInfoOut.println("# LANG_1\t%LANG_2\tFILENAME\tVERSION_CODE\tFILESIZE\tNUM_MAIN_WORDS_1\tNUM_MAIN_WORDS_2\tNUM_ALL_WORDS_1\tNUM_ALL_WORDS_2");
final File[] files = dictDir.listFiles();
- final List<String> dictNames = new ArrayList<String>();
+ final List<String> dictNames = new ArrayList<>();
Arrays.sort(files);
for (final File dictFile : files) {
if (!dictFile.getName().endsWith("quickdic")) {
// Find the stats.
System.out.println("Stats...");
- final List<String> indexNames = new ArrayList<String>();
+ final List<String> indexNames = new ArrayList<>();
for (final IndexInfo indexInfo : dictionaryInfo.indexInfos) {
indexNames.add(indexInfo.shortName);
}
--- /dev/null
+// Copyright 2020 Reimar Döffinger. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.hughes.android.dictionary.engine;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+public class ConvertToV6 {
+ public static void main(final String[] args) throws IOException {
+ if (args.length != 2 && args.length != 3) {
+ System.out.println("Usage: ConvertToV6 <input.v007> <output.v006> [skipHtml]");
+ System.out.println("If the option third argument is given as 'skipHtml'");
+ System.out.println("the v6 dictionary will be without all HTML entries to reduce its size");
+ return;
+ }
+ boolean skipHtml = false;
+ boolean skipHtmlOpt = false;
+ if (args.length == 3) {
+ if (!args[2].equals("skipHtml") && !args[2].equals("skipHtmlOpt")) {
+ System.out.println("Unknown extra argument '" + args[2] + "'");
+ return;
+ }
+ skipHtml = true;
+ skipHtmlOpt = args[2].equals("skipHtmlOpt");
+ }
+ final String inname = args[0];
+ final String outname = args[1];
+ FileInputStream in;
+ try {
+ in = new FileInputStream(inname);
+ } catch (FileNotFoundException e) {
+ System.out.println("Could not open input file '" + inname + "'");
+ System.out.println(e);
+ return;
+ }
+ final Dictionary dictionary = new Dictionary(in.getChannel());
+ if (dictionary.dictFileVersion <= 6) {
+ System.out.println("Input dictionary is already v6 or older!");
+ return;
+ }
+ if (skipHtmlOpt && dictionary.htmlEntries.size() == 0) {
+ System.exit(3);
+ }
+ RandomAccessFile out;
+ try {
+ out = new RandomAccessFile(outname, "rw");
+ } catch (FileNotFoundException e) {
+ System.out.println("Could not open output file '" + outname + "'");
+ System.out.println(e);
+ return;
+ }
+ if (out.length() > 0) {
+ System.out.println("Output file '" + outname + "' already exists, aborting!");
+ return;
+ }
+ new DictionaryV6Writer(dictionary).writev6(out, skipHtml);
+ out.close();
+ in.close();
+ }
+}
public class DictionaryBuilder {
public final Dictionary dictionary;
- public final List<IndexBuilder> indexBuilders = new ArrayList<IndexBuilder>();
+ public final List<IndexBuilder> indexBuilders = new ArrayList<>();
public DictionaryBuilder(final String dictInfoString, final Language lang0, final Language lang1, final String normalizerRules1, final String normalizerRules2, final Set<String> lang1Stoplist, final Set<String> lang2Stoplist) {
dictionary = new Dictionary(dictInfoString);
}
}
- public static void main(final String[] args) throws IOException, ParserConfigurationException, SAXException {
+ public static void main(final String[] args) throws IOException {
System.out.println("Running with arguments:");
for (final String arg : args) {
System.out.println(arg);
lang2 = null;
}
- final Set<String> lang1Stoplist = new LinkedHashSet<String>();
- final Set<String> lang2Stoplist = new LinkedHashSet<String>();
+ final Set<String> lang1Stoplist = new LinkedHashSet<>();
+ final Set<String> lang2Stoplist = new LinkedHashSet<>();
final String lang1StoplistFile = keyValueArgs.remove("lang1Stoplist");
final String lang2StoplistFile = keyValueArgs.remove("lang2Stoplist");
if (lang1StoplistFile != null) {
final int pageLimit = Integer.parseInt(pageLimitString);
final EntrySource entrySource = new EntrySource(dictionaryBuilder.dictionary.sources.size(), inputName, 0);
- System.out.println("");
+ System.out.println();
String inputFormat = keyValueArgs.remove(prefix + "Format");
if ("tab_separated".equals(inputFormat)) {
package com.hughes.android.dictionary.engine;
-import com.hughes.android.dictionary.parser.wiktionary.EnTranslationToTranslationParser;
-import com.hughes.android.dictionary.parser.wiktionary.WholeSectionToHtmlParser;
-import com.hughes.android.dictionary.parser.wiktionary.WiktionaryLangs;
-
-import junit.framework.TestCase;
-
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
+import com.hughes.android.dictionary.parser.wiktionary.EnTranslationToTranslationParser;
+import com.hughes.android.dictionary.parser.wiktionary.WholeSectionToHtmlParser;
+import com.hughes.android.dictionary.parser.wiktionary.WiktionaryLangs;
+
+import junit.framework.TestCase;
+
public class DictionaryBuilderMain extends TestCase {
static final String INPUTS = "data/inputs/";
static final String OUTPUTS = "data/outputs/";
// Build the non EN ones.
- static final String[][] nonEnPairs = new String[][] {
+ static final String[][] nonEnPairs = {
{"EN"},
{"DE"},
{"IT"},
- static final Map<String,String> isoToDedication = new LinkedHashMap<String, String>();
+ static final Map<String,String> isoToDedication = new LinkedHashMap<>();
static {
isoToDedication.put("AF", "Wiktionary-based Afrikaans dictionary dedicated to Heiko and Mariëtte Horn.");
isoToDedication.put("HR", "Wiktionary-based Croatian dictionary dedicated to Ines Viskic and Miro Kresonja.");
return isoToDedication.containsKey(iso) ? isoToDedication.get(iso) : String.format("Wiktionary-based %s dictionary.", iso);
}
- static final Map<String,String> isoToStoplist = new LinkedHashMap<String, String>();
+ static final Map<String,String> isoToStoplist = new LinkedHashMap<>();
static {
isoToStoplist.put("DE", "de.txt");
isoToStoplist.put("EN", "en.txt");
isoToStoplist.put("FR", "fr.txt");
}
private static String getStoplist(String iso) {
- return isoToStoplist.containsKey(iso) ? isoToStoplist.get(iso) : "empty.txt";
+ return isoToStoplist.getOrDefault(iso, "empty.txt");
}
static String getOtherLang(final String[] pair, final String first) {
}
static List<String> getMainArgs(final String[] pair) {
- final List<String> result = new ArrayList<String>();
+ final List<String> result = new ArrayList<>();
int i = 1;
public static void main(final String[] args) throws Exception {
- final List<String[]> allPairs = new ArrayList<String[]>();
-
- allPairs.addAll(Arrays.asList(nonEnPairs));
+ final List<String[]> allPairs = new ArrayList<>(Arrays.asList(nonEnPairs));
// Add all the EN-XX pairs.
for (final String isoCode : WiktionaryLangs.isoCodeToEnWikiName.keySet()) {
if (!isoCode.equals("EN")) {
}
- final Set<List<String>> done = new LinkedHashSet<List<String>>();
+ final Set<List<String>> done = new LinkedHashSet<>();
boolean go = true;
for (final String[] pair : allPairs) {
Arrays.sort(pair);
}
done.add(pairList);
- if (pairList.contains("EN") && pairList.contains("DE")) {
- go = true;
- } else {
- go = false;
- }
+ go = pairList.contains("EN") && pairList.contains("DE");
if (!go) {
continue;
package com.hughes.android.dictionary.engine;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
}
private void checkGolden(final String dictName, final File dictFile)
- throws IOException, FileNotFoundException {
+ throws IOException {
// Check it once:
assertFilesEqual(GOLDENS + dictName + ".text", dictFile.getPath() + ".text");
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
-import junit.framework.TestCase;
-
import com.hughes.android.dictionary.engine.Index.IndexEntry;
import com.hughes.util.CollectionUtil;
+import junit.framework.TestCase;
+
public class DictionaryTest extends TestCase {
assertTrue(rows.toString(), rows.size() > 0);
assertTrue(rows.get(0).toString().startsWith("come mai@"));
assertTrue(rows.get(0) instanceof TokenRow);
- assertTrue(!((TokenRow)rows.get(0)).getIndexEntry().htmlEntries.isEmpty());
+ assertFalse(((TokenRow) rows.get(0)).getIndexEntry().htmlEntries.isEmpty());
}
{
assertTrue(rows.toString(), rows.size() > 0);
assertTrue(rows.get(0).toString().startsWith("buon giorno@"));
assertTrue(rows.get(0) instanceof TokenRow);
- assertTrue(!((TokenRow)rows.get(0)).getIndexEntry().htmlEntries.isEmpty());
+ assertFalse(((TokenRow) rows.get(0)).getIndexEntry().htmlEntries.isEmpty());
}
{
// Check that search in lowercase works.
assertSearchResult("Alibi", "Alibi", deIndex.findInsertionPoint("alib", new AtomicBoolean(false)));
- System.out.println(deIndex.findInsertionPoint("alib", new AtomicBoolean(false)).toString());
+ System.out.println(deIndex.findInsertionPoint("alib", new AtomicBoolean(false)));
raf.close();
}
--- /dev/null
+// Copyright 2020 Reimar Döffinger. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.hughes.android.dictionary.engine;
+
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+import java.io.RandomAccessFile;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.List;
+import java.util.zip.GZIPOutputStream;
+
+public class DictionaryV6Writer {
+ private final Dictionary d;
+
+ public DictionaryV6Writer(Dictionary dictionary) {
+ d = dictionary;
+ }
+
+ private void writev6Sources(RandomAccessFile out) throws IOException {
+ ByteArrayOutputStream toc = new ByteArrayOutputStream();
+ DataOutputStream tocout = new DataOutputStream(toc);
+
+ out.writeInt(d.sources.size());
+ long tocPos = out.getFilePointer();
+ out.seek(tocPos + d.sources.size() * 8 + 8);
+ for (EntrySource s : d.sources) {
+ long dataPos = out.getFilePointer();
+ tocout.writeLong(dataPos);
+
+ out.writeUTF(s.getName());
+ out.writeInt(s.getNumEntries());
+ }
+ long dataPos = out.getFilePointer();
+ tocout.writeLong(dataPos);
+ tocout.close();
+
+ out.seek(tocPos);
+ out.write(toc.toByteArray());
+ out.seek(dataPos);
+ }
+
+ private void writev6PairEntries(RandomAccessFile out) throws IOException {
+ ByteArrayOutputStream toc = new ByteArrayOutputStream();
+ DataOutputStream tocout = new DataOutputStream(toc);
+
+ long tocPos = out.getFilePointer();
+ long dataPos = tocPos + 4 + d.pairEntries.size() * 8 + 8;
+
+ out.seek(dataPos);
+ DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(out.getFD())));
+
+ tocout.writeInt(d.pairEntries.size());
+ for (PairEntry pe : d.pairEntries) {
+ tocout.writeLong(dataPos + outb.size());
+
+ outb.writeShort(pe.entrySource.index());
+ outb.writeInt(pe.pairs.size());
+ for (PairEntry.Pair p : pe.pairs) {
+ outb.writeUTF(p.lang1);
+ outb.writeUTF(p.lang2);
+ }
+ }
+ dataPos += outb.size();
+ outb.flush();
+ tocout.writeLong(dataPos);
+ tocout.close();
+
+ out.seek(tocPos);
+ out.write(toc.toByteArray());
+ out.seek(dataPos);
+ }
+
+ private void writev6TextEntries(RandomAccessFile out) throws IOException {
+ ByteArrayOutputStream toc = new ByteArrayOutputStream();
+ DataOutputStream tocout = new DataOutputStream(toc);
+
+ out.writeInt(d.textEntries.size());
+ long tocPos = out.getFilePointer();
+ out.seek(tocPos + d.textEntries.size() * 8 + 8);
+ for (TextEntry t : d.textEntries) {
+ long dataPos = out.getFilePointer();
+ tocout.writeLong(dataPos);
+
+ out.writeShort(t.entrySource.index());
+ out.writeUTF(t.text);
+ }
+ long dataPos = out.getFilePointer();
+ tocout.writeLong(dataPos);
+ tocout.close();
+
+ out.seek(tocPos);
+ out.write(toc.toByteArray());
+ out.seek(dataPos);
+ }
+
+ private void writev6EmptyList(RandomAccessFile out) throws IOException {
+ out.writeInt(0);
+ out.writeLong(out.getFilePointer() + 8);
+ }
+
+ private void writev6HtmlEntries(RandomAccessFile out) throws IOException {
+ ByteArrayOutputStream toc = new ByteArrayOutputStream();
+ DataOutputStream tocout = new DataOutputStream(toc);
+
+ long tocPos = out.getFilePointer();
+ long dataPos = tocPos + 4 + d.htmlEntries.size() * 8 + 8;
+
+ out.seek(dataPos);
+ DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(out.getFD())));
+
+ tocout.writeInt(d.htmlEntries.size());
+ for (HtmlEntry h : d.htmlEntries) {
+ tocout.writeLong(dataPos + outb.size());
+
+ outb.writeShort(h.entrySource.index());
+ outb.writeUTF(h.title);
+ byte[] data = h.getHtml().getBytes(StandardCharsets.UTF_8);
+ outb.writeInt(data.length);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ GZIPOutputStream gzout = new GZIPOutputStream(baos);
+ gzout.write(data);
+ gzout.close();
+ outb.writeInt(baos.size());
+ outb.write(baos.toByteArray());
+ }
+ dataPos += outb.size();
+ outb.flush();
+ tocout.writeLong(dataPos);
+ tocout.close();
+
+ out.seek(tocPos);
+ out.write(toc.toByteArray());
+ out.seek(dataPos);
+ }
+
+ private void writev6HtmlIndices(DataOutputStream out, long pos, List<HtmlEntry> entries) throws IOException {
+ long dataPos = pos + 4 + entries.size() * 8 + 8;
+
+ out.writeInt(entries.size());
+
+ // TOC is trivial, so optimize writing it
+ for (int i = 0; i < entries.size(); i++) {
+ out.writeLong(dataPos);
+ dataPos += 4;
+ }
+ out.writeLong(dataPos);
+
+ for (HtmlEntry e : entries) {
+ out.writeInt(e.index());
+ }
+ }
+
+ private void writev6IndexEntries(RandomAccessFile out, List<Index.IndexEntry> entries, int[] prunedRowIdx) throws IOException {
+ ByteArrayOutputStream toc = new ByteArrayOutputStream();
+ DataOutputStream tocout = new DataOutputStream(toc);
+
+ long tocPos = out.getFilePointer();
+ long dataPos = tocPos + 4 + entries.size() * 8 + 8;
+
+ out.seek(dataPos);
+ DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(out.getFD())));
+
+ tocout.writeInt(entries.size());
+ for (Index.IndexEntry e : entries) {
+ tocout.writeLong(dataPos + outb.size());
+
+ outb.writeUTF(e.token);
+
+ int startRow = e.startRow;
+ int numRows = e.numRows;
+ if (prunedRowIdx != null) {
+ // note: the start row will always be a TokenRow
+ // and thus never be pruned
+ int newNumRows = 1;
+ for (int i = 1; i < numRows; i++) {
+ if (prunedRowIdx[startRow + i] >= 0) newNumRows++;
+ }
+ startRow = prunedRowIdx[startRow];
+ numRows = newNumRows;
+ }
+
+ outb.writeInt(startRow);
+ outb.writeInt(numRows);
+ final boolean hasNormalizedForm = !e.token.equals(e.normalizedToken());
+ outb.writeBoolean(hasNormalizedForm);
+ if (hasNormalizedForm) outb.writeUTF(e.normalizedToken());
+ writev6HtmlIndices(outb, dataPos + outb.size(),
+ prunedRowIdx == null ? e.htmlEntries : Collections.emptyList());
+ }
+ dataPos += outb.size();
+ outb.flush();
+ tocout.writeLong(dataPos);
+ tocout.close();
+
+ out.seek(tocPos);
+ out.write(toc.toByteArray());
+ out.seek(dataPos);
+ }
+
+ private void writev6Index(RandomAccessFile out, boolean skipHtml) throws IOException {
+ ByteArrayOutputStream toc = new ByteArrayOutputStream();
+ DataOutputStream tocout = new DataOutputStream(toc);
+
+ out.writeInt(d.indices.size());
+ long tocPos = out.getFilePointer();
+ out.seek(tocPos + d.indices.size() * 8 + 8);
+ for (Index idx : d.indices) {
+ // create pruned index for skipHtml feature
+ int[] prunedRowIdx = null;
+ int prunedSize = 0;
+ if (skipHtml) {
+ prunedRowIdx = new int[idx.rows.size()];
+ for (int i = 0; i < idx.rows.size(); i++) {
+ final RowBase r = idx.rows.get(i);
+ // prune Html entries
+ boolean pruned = r instanceof HtmlEntry.Row;
+ prunedRowIdx[i] = pruned ? -1 : prunedSize;
+ if (!pruned) prunedSize++;
+ }
+ }
+
+ long dataPos = out.getFilePointer();
+ tocout.writeLong(dataPos);
+
+ out.writeUTF(idx.shortName);
+ out.writeUTF(idx.longName);
+ out.writeUTF(idx.sortLanguage.getIsoCode());
+ out.writeUTF(idx.normalizerRules);
+ out.writeBoolean(idx.swapPairEntries);
+ out.writeInt(idx.mainTokenCount);
+ writev6IndexEntries(out, idx.sortedIndexEntries, prunedRowIdx);
+
+ // write stoplist, serializing the whole Set *shudder*
+ // Actually just emulate ObjectOutputStream serialization
+ final byte[] hashSetSerialized = {
+ (byte)0xac, (byte)0xed, // magic
+ 0x00, 0x05, // version
+ 0x73, // object
+ 0x72, // class
+ // "java.util.HashSet"
+ 0x00, 0x11, 0x6a, 0x61, 0x76, 0x61, 0x2e, 0x75, 0x74, 0x69,
+ 0x6c, 0x2e, 0x48, 0x61, 0x73, 0x68, 0x53, 0x65, 0x74,
+ // serialization ID
+ (byte)0xba, 0x44, (byte)0x85, (byte)0x95, (byte)0x96, (byte)0xb8, (byte)0xb7, 0x34,
+ 0x03, // flags: serialized, custom serialization function
+ 0x00, 0x00, // fields count
+ 0x78, // blockdata end
+ 0x70, // null (superclass)
+ 0x77, 0x0c // blockdata short, 0xc bytes
+ };
+ int stoplistlen = hashSetSerialized.length;
+ stoplistlen += 12; // block data: capacity (int), load factor (float), size (int)
+ for (String s : idx.stoplist) {
+ stoplistlen += 3 + s.length();
+ }
+ stoplistlen++;
+
+ DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(out.getFD())));
+ outb.writeInt(stoplistlen);
+ outb.write(hashSetSerialized);
+ outb.writeInt(idx.stoplist.size()); // capacity
+ outb.writeFloat(0.75f); // load factor
+ outb.writeInt(idx.stoplist.size()); // size
+ for (String s : idx.stoplist) {
+ outb.writeByte(0x74); // String type
+ outb.writeUTF(s);
+ }
+ outb.writeByte(0x78); // blockdata end
+
+ outb.writeInt(skipHtml ? prunedSize : idx.rows.size());
+ outb.writeInt(5);
+ for (RowBase r : idx.rows) {
+ int type = 0;
+ if (r instanceof PairEntry.Row) {
+ type = 0;
+ } else if (r instanceof TokenRow) {
+ final TokenRow tokenRow = (TokenRow)r;
+ type = tokenRow.hasMainEntry ? 1 : 3;
+ } else if (r instanceof TextEntry.Row) {
+ type = 2;
+ } else if (r instanceof HtmlEntry.Row) {
+ type = 4;
+ if (skipHtml) continue;
+ } else {
+ throw new RuntimeException("Row type not supported for v6");
+ }
+ outb.writeByte(type);
+ outb.writeInt(r.referenceIndex);
+ }
+ outb.flush();
+ }
+ long dataPos = out.getFilePointer();
+ tocout.writeLong(dataPos);
+ tocout.close();
+
+ out.seek(tocPos);
+ out.write(toc.toByteArray());
+ out.seek(dataPos);
+ }
+
+ public void writev6(RandomAccessFile raf, boolean skipHtml) throws IOException {
+ raf.writeInt(6);
+ raf.writeLong(d.creationMillis);
+ raf.writeUTF(d.dictInfo);
+ System.out.println("sources start: " + raf.getFilePointer());
+ writev6Sources(raf);
+ System.out.println("pair start: " + raf.getFilePointer());
+ writev6PairEntries(raf);
+ System.out.println("text start: " + raf.getFilePointer());
+ writev6TextEntries(raf);
+ System.out.println("html index start: " + raf.getFilePointer());
+ if (skipHtml) writev6EmptyList(raf);
+ else writev6HtmlEntries(raf);
+ System.out.println("indices start: " + raf.getFilePointer());
+ writev6Index(raf, skipHtml);
+ System.out.println("end: " + raf.getFilePointer());
+ raf.writeUTF("END OF DICTIONARY");
+ }
+}
package com.hughes.android.dictionary.engine;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.EnumMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.*;
import com.hughes.android.dictionary.engine.Index.IndexEntry;
import com.hughes.android.dictionary.parser.DictFileParser;
public final Index index;
final Set<String> stoplist;
- final SortedMap<String, TokenData> tokenToData;
+ final Map<String, TokenData> fastTokenToData;
+ final SortedMap<FastCompareString, TokenData> tokenToData;
IndexBuilder(final DictionaryBuilder dictionaryBuilder, final String shortName, final String longName, final Language language, final String normalizerRules, final Set<String> stoplist, final boolean swapPairEntries) {
this.dictionaryBuilder = dictionaryBuilder;
index = new Index(dictionaryBuilder.dictionary, shortName, longName, language, normalizerRules, swapPairEntries, stoplist);
- tokenToData = new TreeMap<String, TokenData>(index.getSortComparator());
+ tokenToData = new TreeMap<>(new FastNormalizeComparator(index.getSortComparator()));
+ fastTokenToData = new HashMap<>();
this.stoplist = stoplist;
}
public void build() {
- final Set<IndexedEntry> tokenIndexedEntries = new HashSet<IndexedEntry>();
+ final Set<IndexedEntry> tokenIndexedEntries = new HashSet<>();
final List<RowBase> rows = index.rows;
index.mainTokenCount = 0;
for (final TokenData tokenData : tokenToData.values()) {
}
}
- final List<IndexEntry> entriesSortedByNumRows = new ArrayList<IndexEntry>(index.sortedIndexEntries);
- Collections.sort(entriesSortedByNumRows, new Comparator<IndexEntry>() {
- @Override
- public int compare(IndexEntry object1, IndexEntry object2) {
- return object2.numRows - object1.numRows;
- }
- });
+ final List<IndexEntry> entriesSortedByNumRows = new ArrayList<>(index.sortedIndexEntries);
+ entriesSortedByNumRows.sort((object1, object2) -> object2.numRows - object1.numRows);
System.out.println("Most common tokens:");
for (int i = 0; i < 50 && i < entriesSortedByNumRows.size(); ++i) {
System.out.println(" " + entriesSortedByNumRows.get(i));
public static class TokenData {
final String token;
- final Map<EntryTypeName, List<IndexedEntry>> typeToEntries = new EnumMap<EntryTypeName, List<IndexedEntry>>(EntryTypeName.class);
+ final Map<EntryTypeName, List<IndexedEntry>> typeToEntries = new EnumMap<>(EntryTypeName.class);
public boolean hasMainEntry = false;
- public List<HtmlEntry> htmlEntries = new ArrayList<HtmlEntry>();
+ public final List<HtmlEntry> htmlEntries = new ArrayList<>();
TokenData(final String token) {
assert token.equals(token.trim());
}
public TokenData getOrCreateTokenData(final String token) {
- TokenData tokenData = tokenToData.get(token);
- if (tokenData == null) {
- tokenData = new TokenData(token);
- tokenToData.put(token, tokenData);
+ TokenData tokenData = fastTokenToData.get(token);
+ if (tokenData != null) return tokenData;
+ tokenData = new TokenData(token);
+ final FastCompareString c = new FastCompareString(token);
+ if (tokenToData.put(c, tokenData) != null) {
+ // The parallel HashMap assumes that the TreeMap Comparator
+ // is compatible with the equals it uses to compare.
+ throw new RuntimeException("TokenData TreeMap and HashMap out of sync, Comparator may be broken?");
}
+ fastTokenToData.put(token, tokenData);
return tokenData;
}
tokenData.hasMainEntry = true;
}
if (entries == null) {
- entries = new ArrayList<IndexedEntry>();
+ entries = new ArrayList<>();
tokenData.typeToEntries.put(entryTypeName, entries);
}
return entries;
import java.util.List;
import java.util.Set;
-import junit.framework.TestCase;
-
import com.hughes.android.dictionary.parser.DictFileParser;
import com.hughes.android.dictionary.parser.wiktionary.WiktionaryLangs;
import com.ibm.icu.text.Transliterator;
+import junit.framework.TestCase;
+
public class LanguageTest extends TestCase {
public void testGermanSort() {
assertEquals("hulle", normalizer.transform("Hulle"));
- final List<String> sorted = new ArrayList<String>(words);
+ final List<String> sorted = new ArrayList<>(words);
// Collections.shuffle(shuffled, new Random(0));
- Collections.sort(sorted, comparator);
- System.out.println(sorted.toString());
+ sorted.sort(comparator);
+ System.out.println(sorted);
for (int i = 0; i < words.size(); ++i) {
System.out.println(words.get(i) + "\t" + sorted.get(i));
assertEquals(words.get(i), sorted.get(i));
"preppy",
"preprocess");
- final List<String> sorted = new ArrayList<String>(words);
+ final List<String> sorted = new ArrayList<>(words);
final NormalizeComparator comparator = new NormalizeComparator(normalizer, Language.en.getCollator(), 7);
- Collections.sort(sorted, comparator);
+ sorted.sort(comparator);
for (int i = 0; i < words.size(); ++i) {
if (i > 0) {
assertTrue(comparator.compare(words.get(i-1), words.get(i)) < 0);
public void testEnWiktionaryNames() {
- final Set<String> enLangs = new LinkedHashSet<String>(WiktionaryLangs.isoCodeToEnWikiName.keySet());
- final List<String> names = new ArrayList<String>();
+ final Set<String> enLangs = new LinkedHashSet<>(WiktionaryLangs.isoCodeToEnWikiName.keySet());
+ final List<String> names = new ArrayList<>();
for (final String code : WiktionaryLangs.isoCodeToEnWikiName.keySet()) {
names.add(WiktionaryLangs.isoCodeToEnWikiName.get(code));
enLangs.add(code.toLowerCase());
package com.hughes.android.dictionary.engine;
-import java.io.InputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
public ReadAheadBuffer(InputStream in, int size) {
super(size);
assert size >= 2 * BLOCK_SIZE;
- this.in = in;
try {
pipe = new PipedOutputStream(this);
- buffer = new byte[BLOCK_SIZE];
- new Thread(new Runnable() {
- public void run() {
- int read;
- try {
- while ((read = in.read(buffer)) > 0)
- {
- pipe.write(buffer, 0, read);
- pipe.flush();
- }
- } catch (IOException e) {}
- try {
- pipe.close();
- } catch (IOException e) {}
- }
- }).start();
} catch (IOException e) {}
+ new Thread(() -> {
+ try {
+ int read;
+ final byte[] buffer = new byte[BLOCK_SIZE];
+ while ((read = in.read(buffer)) > 0)
+ {
+ pipe.write(buffer, 0, read);
+ pipe.flush();
+ }
+ } catch (IOException e) {}
+ try {
+ pipe.close();
+ } catch (IOException e) {}
+ }).start();
}
- InputStream in;
PipedOutputStream pipe;
- byte buffer[];
}
--- /dev/null
+// Copyright 2020 Reimar Döffinger. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.hughes.android.dictionary.engine;
+
+import java.util.Arrays;
+
+public class Runner {
+ public static void main(final String[] args) throws Exception {
+ if (args.length == 0) {
+ System.out.println("Specify WiktionarySplitter, DictionaryBuilder or ConvertToV6 as first argument");
+ return;
+ }
+ String[] newargs = Arrays.copyOfRange(args, 1, args.length);
+ if (args[0].equals("WiktionarySplitter")) {
+ WiktionarySplitter.main(newargs);
+ } else if (args[0].equals("DictionaryBuilder")) {
+ DictionaryBuilder.main(newargs);
+ } else if (args[0].equals("ConvertToV6")) {
+ ConvertToV6.main(newargs);
+ } else if (args[0].equals("CheckDictionariesMain")) {
+ CheckDictionariesMain.main(newargs);
+ } else {
+ System.out.println("Unknown command '" + args[0] + "'. Use one of WiktionarySplitter, DictionaryBuilder, ConvertToV6 or CheckDictionariesMain instead.");
+ }
+ }
+}
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
-import java.io.InputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
-import org.apache.xerces.jaxp.SAXParserFactoryImpl;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import com.hughes.android.dictionary.parser.wiktionary.WiktionaryLangs;
-public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
+public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler implements Runnable {
// The matches the whole line, otherwise regexes don't work well on French:
// {{=uk=}}
// Spanish has no initial headings, tried to also detect {{ES as such
// with "^(\\{\\{ES|(=+)[^=]).*$" but that broke English.
- static final Pattern headingStart = Pattern.compile("^(=+)[^=].*$", Pattern.MULTILINE);
+ static final Pattern headingStartPattern = Pattern.compile("^(=+)[^=].*$", Pattern.MULTILINE);
+ static final Pattern startSpanish = Pattern.compile("\\{\\{ES(\\|[^{}=]*)?}}");
- final Map<String,List<Selector>> pathToSelectors = new LinkedHashMap<String, List<Selector>>();
+ final Map.Entry<String, List<Selector>> pathToSelectorsEntry;
List<Selector> currentSelectors = null;
StringBuilder titleBuilder;
StringBuilder currentBuilder = null;
public static void main(final String[] args) throws Exception {
- final WiktionarySplitter wiktionarySplitter = new WiktionarySplitter();
- wiktionarySplitter.go();
+ boolean parallel = args.length > 0 && args[0].equals("parallel");
+ final ExecutorService e = Executors.newCachedThreadPool();
+ final Map<String,List<Selector>> pathToSelectors = createSelectorsMap();
+ for (final Map.Entry<String, List<Selector>> pathToSelectorsEntry : pathToSelectors.entrySet()) {
+ final WiktionarySplitter wiktionarySplitter = new WiktionarySplitter(pathToSelectorsEntry);
+ if (parallel) {
+ e.submit(wiktionarySplitter);
+ } else wiktionarySplitter.go();
+ }
+ e.shutdown();
}
- private WiktionarySplitter() {
+ private WiktionarySplitter(final Map.Entry<String, List<Selector>> pathToSelectorsEntry) {
+ this.pathToSelectorsEntry = pathToSelectorsEntry;
+ }
+
+ private static Map<String,List<Selector>> createSelectorsMap() {
+ final Map<String,List<Selector>> pathToSelectors = new LinkedHashMap<>();
List<Selector> selectors;
for (final String code : WiktionaryLangs.wikiCodeToIsoCodeToWikiName.keySet()) {
//if (!code.equals("fr")) {continue;}
- selectors = new ArrayList<WiktionarySplitter.Selector>();
+ selectors = new ArrayList<>();
pathToSelectors.put(String.format("data/inputs/%swiktionary-pages-articles.xml", code), selectors);
for (final Map.Entry<String, String> entry : WiktionaryLangs.wikiCodeToIsoCodeToWikiName.get(code).entrySet()) {
final String dir = String.format("data/inputs/wikiSplit/%s", code);
selectors.add(new Selector(String.format("%s/%s.data", dir, entry.getKey()), entry.getValue()));
}
}
+ return pathToSelectors;
+ }
+
+ @Override
+ public void run() {
+ try {
+ go();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
}
private void go() throws Exception {
- final SAXParser parser = SAXParserFactoryImpl.newInstance().newSAXParser();
+ final SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
// Configure things.
- for (final Map.Entry<String, List<Selector>> pathToSelectorsEntry : pathToSelectors.entrySet()) {
currentSelectors = pathToSelectorsEntry.getValue();
OutputStream tmp = new FileOutputStream(selector.outFilename + ".gz");
tmp = new BufferedOutputStream(tmp);
tmp = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP, tmp);
- tmp = new WriteBuffer(tmp, 20 * 1024 * 1024);
+ tmp = new WriteBuffer(tmp, 1024 * 1024);
selector.out = new DataOutputStream(tmp);
}
parser.parse(new BufferedInputStream(in), this);
}
} catch (Exception e) {
- System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder.toString() + " of file " + pathToSelectorsEntry.getKey());
+ System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder + " of file " + pathToSelectorsEntry.getKey());
throw e;
}
for (final Selector selector : currentSelectors) {
selector.out.close();
}
-
- }
}
String lastPageTitle = null;
int pageCount = 0;
- Pattern endPatterns[] = new Pattern[100];
+ final Matcher[] endPatterns = new Matcher[100];
- private Pattern getEndPattern(int depth) {
+ private Matcher getEndPattern(int depth) {
if (endPatterns[depth] == null)
- endPatterns[depth] = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE);
+ endPatterns[depth] = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE).matcher("");
return endPatterns[depth];
}
title.startsWith("Template:") ||
title.startsWith("Summary:") ||
title.startsWith("Module:") ||
+ title.startsWith("Reconstruction:") ||
// DE
title.startsWith("Datei:") ||
title.startsWith("Verzeichnis:") ||
title.startsWith("Kategorie:") ||
title.startsWith("Hilfe:") ||
title.startsWith("Reim:") ||
+ title.startsWith("Modul:") ||
// FR:
title.startsWith("Annexe:") ||
title.startsWith("Catégori:") ||
title.startsWith("Aide:") ||
title.startsWith("Fichier:") ||
title.startsWith("Wiktionnaire:") ||
+ title.startsWith("Translations:Wiktionnaire:") ||
+ title.startsWith("Translations:Projet:") ||
title.startsWith("Catégorie:") ||
title.startsWith("Portail:") ||
title.startsWith("utiliusateur:") ||
title.startsWith("Kategorio:") ||
+ title.startsWith("Tutoriel:") ||
// IT
title.startsWith("Wikizionario:") ||
title.startsWith("Appendice:") ||
title.startsWith("Categoria:") ||
title.startsWith("Aiuto:") ||
title.startsWith("Portail:") ||
+ title.startsWith("Modulo:") ||
// ES
title.startsWith("Apéndice:") ||
title.startsWith("Archivo:") ||
title.startsWith("Predefinição:") ||
title.startsWith("Vocabulário:") ||
title.startsWith("Wikcionário:") ||
+ title.startsWith("Módulo:") ||
// sentinel
false
) return;
- if (!title.startsWith("Sign gloss:")) {
+ // leave the Flexion: pages in for now and do not warn about them
+ if (!title.startsWith("Sign gloss:") && !title.startsWith("Flexion:")) {
System.err.println("title with colon: " + title);
}
}
String text = textBuilder.toString();
// Workaround for Spanish wiktionary {{ES}} and {{ES|word}} patterns
- text = text.replaceAll("\\{\\{ES(\\|[^{}=]*)?}}", "== {{lengua|es}} ==");
+ text = startSpanish.matcher(text).replaceAll("== {{lengua|es}} ==");
String translingual = "";
int start = 0;
- final Matcher startMatcher = headingStart.matcher(text);
+ Matcher headingStart = headingStartPattern.matcher(text);
while (start < text.length()) {
// Find start.
- if (!startMatcher.find(start)) {
+ if (!headingStart.find(start)) {
return;
}
- start = startMatcher.end();
+ start = headingStart.end();
- final String heading = startMatcher.group();
+ final String heading = headingStart.group();
// For Translingual entries just store the text for later
// use in the per-language sections
- if (heading.indexOf("Translingual") != -1) {
+ if (heading.contains("Translingual")) {
// Find end.
- final int depth = startMatcher.group(1).length();
- final Pattern endPattern = getEndPattern(depth);
+ final int depth = headingStart.group(1).length();
+ final Matcher endMatcher = getEndPattern(depth).reset(text);
- final Matcher endMatcher = endPattern.matcher(text);
if (endMatcher.find(start)) {
int end = endMatcher.start();
translingual = text.substring(start, end);
}
for (final Selector selector : currentSelectors) {
- if (selector.pattern.matcher(heading).find()) {
+ if (selector.pattern.reset(heading).find()) {
// Find end.
- final int depth = startMatcher.group(1).length();
- final Pattern endPattern = getEndPattern(depth);
+ final int depth = headingStart.group(1).length();
+ final Matcher endMatcher = getEndPattern(depth).reset(text);
- final Matcher endMatcher = endPattern.matcher(text);
final int end;
if (endMatcher.find(start)) {
end = endMatcher.start();
sectionText.charAt(dummy_end + 1) == '\n') ++dummy_end;
sectionText = sectionText.substring(dummy_end);
}
- if (heading.indexOf("Japanese") == -1) sectionText += translingual;
+ if (!heading.contains("Japanese")) sectionText += translingual;
final Section section = new Section(title, heading, sectionText);
try {
selector.out.writeUTF(section.title);
selector.out.writeUTF(section.heading);
- final byte[] bytes = section.text.getBytes("UTF8");
+ final byte[] bytes = section.text.getBytes(StandardCharsets.UTF_8);
selector.out.writeInt(bytes.length);
selector.out.write(bytes);
} catch (IOException e) {
static class Selector {
final String outFilename;
- final Pattern pattern;
+ final Matcher pattern;
DataOutputStream out;
public Selector(final String filename, final String pattern) {
this.outFilename = filename;
- this.pattern = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE);
+ this.pattern = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).matcher("");
}
}
}
@Override
- public void characters(char[] ch, int start, int length) throws SAXException {
+ public void characters(char[] ch, int start, int length) {
if (currentBuilder != null) {
currentBuilder.append(ch, start, length);
}
}
@Override
- public void endElement(String uri, String localName, String qName)
- throws SAXException {
+ public void endElement(String uri, String localName, String qName) {
currentBuilder = null;
if ("page".equals(qName)) {
endPage();
public void parse(final File file) throws ParserConfigurationException,
SAXException, IOException {
- final SAXParser parser = SAXParserFactoryImpl.newInstance().newSAXParser();
+ final SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
parser.parse(file, this);
}
package com.hughes.android.dictionary.engine;
-import java.io.OutputStream;
import java.io.IOException;
+import java.io.OutputStream;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
public class WriteBuffer extends PipedOutputStream {
- static int BLOCK_SIZE = 1024 * 1024;
+ static int BLOCK_SIZE = 256 * 1024;
public WriteBuffer(OutputStream out, int size) {
assert size >= 2 * BLOCK_SIZE;
this.out = out;
try {
pipe = new PipedInputStream(this, size);
buffer = new byte[BLOCK_SIZE];
- writeThread = new Thread(new Runnable() {
- public void run() {
- int read;
- try {
- while ((read = pipe.read(buffer)) > 0)
- {
- out.write(buffer, 0, read);
- out.flush();
- }
- } catch (IOException e) {
- System.out.println("Error writing to file " + e);
+ writeThread = new Thread(() -> {
+ int read;
+ try {
+ while ((read = pipe.read(buffer)) > 0)
+ {
+ out.write(buffer, 0, read);
+ out.flush();
}
- try {
- out.close();
- } catch (IOException e) {}
+ } catch (IOException e) {
+ System.out.println("Error writing to file " + e);
}
+ try {
+ out.close();
+ } catch (IOException e) {}
});
writeThread.start();
} catch (IOException e) {}
Thread writeThread;
OutputStream out;
PipedInputStream pipe;
- byte buffer[];
+ byte[] buffer;
}
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
-import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import com.hughes.android.dictionary.engine.DictionaryBuilder;
import com.hughes.android.dictionary.engine.EntrySource;
-import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.engine.EntryTypeName;
import com.hughes.android.dictionary.engine.IndexBuilder;
+import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.engine.Language;
import com.hughes.android.dictionary.engine.PairEntry;
-import com.hughes.android.dictionary.engine.PairEntry.Pair;
public class DictFileParser implements Parser {
if (subfields[1][i].length() == 0) {
subfields[1][i] = "__";
}
- pairEntry.pairs.add(new Pair(subfields[0][i], subfields[1][i]));
+ pairEntry.pairs.add(new PairEntry.Pair(subfields[0][i], subfields[1][i]));
}
final IndexedEntry entryData = new IndexedEntry(pairEntry);
entryData.isValid = true;
return field;
}
- public static final Set<String> tokenize(final String text, final Pattern pattern) {
+ public static Set<String> tokenize(final String text, final Pattern pattern) {
final String[] split = pattern.split(text);
- final Set<String> result = new LinkedHashSet<String>(Arrays.asList(split));
+ final Set<String> result = new LinkedHashSet<>(Arrays.asList(split));
result.remove("");
return result;
}
package com.hughes.android.dictionary.parser;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class WikiTokenizer {
- public static interface Callback {
+ public interface Callback {
void onPlainText(final String text);
void onMarkup(WikiTokenizer wikiTokenizer);
void onWikiLink(WikiTokenizer wikiTokenizer);
}
//private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE);
- private static final Pattern wikiTokenEvent = Pattern.compile("(" +
+ private static final Pattern wikiTokenEvent = Pattern.compile(
"\\{\\{|\\}\\}|" +
"\\[\\[|\\]\\]|" +
"\\||" + // Need the | because we might have to find unescaped pipes
"<pre>|" +
"<math>|" +
"<ref>|" +
- "$)", Pattern.MULTILINE);
+ "\n", Pattern.MULTILINE);
private static final String listChars = "*#:;";
int end = 0;
int start = -1;
- final List<String> errors = new ArrayList<String>();
- final List<String> tokenStack = new ArrayList<String>();
+ final List<String> errors = new ArrayList<>();
+ final List<TokenDelim> tokenStack = new ArrayList<>();
private String headingWikiText;
private int lastUnescapedPipePos;
private int lastUnescapedEqualsPos;
- private final List<String> positionArgs = new ArrayList<String>();
- private final Map<String,String> namedArgs = new LinkedHashMap<String,String>();
+ private final List<String> positionArgs = new ArrayList<>();
+ private final Map<String,String> namedArgs = new LinkedHashMap<>();
public WikiTokenizer(final String wikiText) {
public WikiTokenizer(String wikiText, final boolean isNewline) {
wikiText = wikiText.replace('\u2028', '\n');
+ wikiText = wikiText.replace('\u2029', '\n');
wikiText = wikiText.replace('\u0085', '\n');
this.wikiText = wikiText;
this.matcher = wikiTokenEvent.matcher(wikiText);
namedArgs.clear();
}
- private static final Pattern POSSIBLE_WIKI_TEXT = Pattern.compile(
+ private static final Matcher POSSIBLE_WIKI_TEXT = Pattern.compile(
"\\{\\{|" +
"\\[\\[|" +
"<!--|" +
"<pre>|" +
"<math>|" +
"<ref>|" +
- "[\n]"
- );
+ "\n"
+ ).matcher("");
public static void dispatch(final String wikiText, final boolean isNewline, final Callback callback) {
- // Optimization...
- if (!POSSIBLE_WIKI_TEXT.matcher(wikiText).find()) {
+ // Statistical background, from EN-DE dictionary generation:
+ // out of 12083000 calls, 9697686 can be skipped via the test
+ // for ', \n and ((c - 0x3b) & 0xff9f) < 2 (which covers among others
+ // <, { and [).
+ // This increased to 10006466 checking for <, { and [ specifically,
+ // and is minimally faster overall.
+ // A even more precise one using regex and checking for {{, [[, <!--, '',
+ // <pre>, <math>, <ref> and \n increased that to 10032846.
+ // Regex thus seems far too costly for a measly increase from 80%/82% to 83% rejection rate
+ // However completely removing it changes output (likely a bug), so leave it in for now
+ // but at least run it only on the 18% not caught by the faster logic.
+ // Original runtime: 1m29.708s
+ // Optimized: 1m19.170s
+ // Regex removed: 1m20.314s (not statistically significant)
+ boolean matched = false;
+ for (int i = 0; i < wikiText.length(); i++) {
+ int c = wikiText.charAt(i);
+ if (c == '\'' || c == '\n' || c == '<' || c == '[' || c == '{') {
+ matched = true;
+ break;
+ }
+ }
+ if (!matched || !POSSIBLE_WIKI_TEXT.reset(wikiText).find()) {
callback.onPlainText(wikiText);
} else {
final WikiTokenizer tokenizer = new WikiTokenizer(wikiText, isNewline);
}
// Eat a newline if we're looking at one:
- final boolean atNewline = wikiText.charAt(end) == '\n' || wikiText.charAt(end) == '\u2028';
+ final boolean atNewline = wikiText.charAt(end) == '\n';
if (atNewline) {
justReturnedNewline = true;
++end;
// Skip non-=...
if (end < len) {
final int nextNewline = safeIndexOf(wikiText, end, "\n", "\n");
- final int closingEquals = escapedFindEnd(end, "=");
+ final int closingEquals = escapedFindEnd(end, TokenDelim.EQUALS);
if (wikiText.charAt(closingEquals - 1) == '=') {
end = closingEquals - 1;
} else {
if (listChars.indexOf(firstChar) != -1) {
while (++end < len && listChars.indexOf(wikiText.charAt(end)) != -1) {}
listPrefixEnd = end;
- end = escapedFindEnd(start, "\n");
+ end = escapedFindEnd(start, TokenDelim.NEWLINE);
return this;
}
}
}
if (wikiText.startsWith("[[", start)) {
- end = escapedFindEnd(start + 2, "]]");
+ end = escapedFindEnd(start + 2, TokenDelim.DBRACKET_CLOSE);
isWikiLink = errors.isEmpty();
return this;
}
if (wikiText.startsWith("{{", start)) {
- end = escapedFindEnd(start + 2, "}}");
+ end = escapedFindEnd(start + 2, TokenDelim.BRACE_CLOSE);
isFunction = errors.isEmpty();
return this;
}
}
- if (this.matcher.find(start)) {
- end = this.matcher.start(1);
+ while (end < wikiText.length()) {
+ int c = wikiText.charAt(end);
+ if (c == '\n' || c == '\'' || ((c - 0x1b) & 0xff9f) < 3) {
+ matcher.region(end, wikiText.length());
+ if (matcher.lookingAt()) break;
+ }
+ end++;
+ }
+ if (end != wikiText.length()) {
isPlainText = true;
if (end == start) {
- errors.add("Empty group: " + this.matcher.group());
+ // stumbled over a new type of newline?
+ // Or matcher is out of sync with checks above
+ errors.add("Empty group: " + this.matcher.group() + " char: " + (int)wikiText.charAt(end));
assert false;
+ // Note: all newlines should be normalize to \n before calling this function
+ throw new RuntimeException("matcher not in sync with code, or new type of newline, errors :" + errors);
}
return this;
}
- end = wikiText.length();
+ isPlainText = true;
return this;
} finally {
return token;
}
- final static String[] patterns = { "\n", "{{", "}}", "[[", "]]", "[", "]", "|", "=", "<!--" };
- private int escapedFindEnd(final int start, final String toFind) {
+ enum TokenDelim { NEWLINE, BRACE_OPEN, BRACE_CLOSE, DBRACKET_OPEN, DBRACKET_CLOSE, BRACKET_OPEN, BRACKET_CLOSE, PIPE, EQUALS, COMMENT }
+
+ private int tokenDelimLen(TokenDelim d) {
+ switch (d) {
+ case NEWLINE:
+ case BRACKET_OPEN:
+ case BRACKET_CLOSE:
+ case PIPE:
+ case EQUALS:
+ return 1;
+ case BRACE_OPEN:
+ case BRACE_CLOSE:
+ case DBRACKET_OPEN:
+ case DBRACKET_CLOSE:
+ return 2;
+ case COMMENT:
+ return 4;
+ default:
+ throw new RuntimeException();
+ }
+ }
+
+ static final String[] patterns = { "\n", "{{", "}}", "[[", "]]", "[", "]", "|", "=", "<!--" };
+ private int escapedFindEnd(final int start, final TokenDelim toFind) {
assert tokenStack.isEmpty();
- final boolean insideFunction = toFind.equals("}}");
+ final boolean insideFunction = toFind == TokenDelim.BRACE_CLOSE;
int end = start;
int firstNewline = -1;
- int[] nextMatch = new int[patterns.length];
- for (int i = 0; i < nextMatch.length; ++i) {
- nextMatch[i] = -2;
- }
int singleBrackets = 0;
while (end < wikiText.length()) {
// Manual replacement for matcher.find(end),
// because Java regexp is a ridiculously slow implementation.
// Initialize to always match the end.
- int matchIdx = 0;
- for (int i = 0; i < nextMatch.length; ++i) {
- if (nextMatch[i] <= end) {
- nextMatch[i] = wikiText.indexOf(patterns[i], end);
- if (nextMatch[i] == -1) nextMatch[i] = i > 0 ? 0x7fffffff : wikiText.length();
- }
- if (nextMatch[i] < nextMatch[matchIdx]) {
- matchIdx = i;
- }
+ TokenDelim match = TokenDelim.NEWLINE;
+ int matchStart = end;
+ for (; matchStart < wikiText.length(); matchStart++) {
+ int i = matchStart;
+ int c = wikiText.charAt(i);
+ if (c == '\n') break;
+ if (c == '{' && wikiText.startsWith("{{", i)) { match = TokenDelim.BRACE_OPEN; break; }
+ if (c == '}' && wikiText.startsWith("}}", i)) { match = TokenDelim.BRACE_CLOSE; break; }
+ if (c == '[') { match = wikiText.startsWith("[[", i) ? TokenDelim.DBRACKET_OPEN : TokenDelim.BRACKET_OPEN ; break; }
+ if (c == ']') { match = wikiText.startsWith("]]", i) ? TokenDelim.DBRACKET_CLOSE : TokenDelim.BRACKET_CLOSE ; break; }
+ if (c == '|') { match = TokenDelim.PIPE; break; }
+ if (c == '=') { match = TokenDelim.EQUALS; break; }
+ if (c == '<' && wikiText.startsWith("<!--", i)) { match = TokenDelim.COMMENT; break; }
}
- int matchStart = nextMatch[matchIdx];
- String matchText = patterns[matchIdx];
- int matchEnd = matchStart + matchText.length();
- if (matchIdx == 0) {
- matchText = "";
- matchEnd = matchStart;
+ int matchEnd = matchStart + (match == TokenDelim.NEWLINE ? 0 : tokenDelimLen(match));
+ if (match != TokenDelim.NEWLINE && tokenStack.isEmpty() && match == toFind) {
+ // The normal return....
+ if (insideFunction) {
+ addFunctionArg(insideFunction, matchStart);
+ }
+ return matchEnd;
}
-
- assert matchEnd > end || matchText.length() == 0: "Group=" + matchText;
- if (matchText.length() == 0) {
+ switch (match) {
+ case NEWLINE:
assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n' : wikiText + ", " + matchStart;
if (firstNewline == -1) {
firstNewline = matchEnd;
}
- if (tokenStack.isEmpty() && toFind.equals("\n")) {
+ if (tokenStack.isEmpty() && toFind == TokenDelim.NEWLINE) {
return matchStart;
}
++end;
- } else if (tokenStack.isEmpty() && matchText.equals(toFind)) {
- // The normal return....
- if (insideFunction) {
- addFunctionArg(insideFunction, matchStart);
- }
- return matchEnd;
- } else if (matchText.equals("[")) {
+ break;
+ case BRACKET_OPEN:
singleBrackets++;
- } else if (matchText.equals("]")) {
+ break;
+ case BRACKET_CLOSE:
if (singleBrackets > 0) singleBrackets--;
- } else if (matchText.equals("[[") || matchText.equals("{{")) {
- tokenStack.add(matchText);
- } else if (matchText.equals("]]") || matchText.equals("}}")) {
- if (tokenStack.size() > 0) {
- final String removed = tokenStack.remove(tokenStack.size() - 1);
- if (removed.equals("{{") && !matchText.equals("}}")) {
+ break;
+ case DBRACKET_OPEN:
+ case BRACE_OPEN:
+ tokenStack.add(match);
+ break;
+ case DBRACKET_CLOSE:
+ case BRACE_CLOSE:
+ if (!tokenStack.isEmpty()) {
+ final TokenDelim removed = tokenStack.remove(tokenStack.size() - 1);
+ if (removed == TokenDelim.BRACE_OPEN && match != TokenDelim.BRACE_CLOSE) {
if (singleBrackets >= 2) { // assume this is really two closing single ]
singleBrackets -= 2;
tokenStack.add(removed);
errors.add("Unmatched {{ error: " + wikiText.substring(start, matchEnd));
return safeIndexOf(wikiText, start, "\n", "\n");
}
- } else if (removed.equals("[[") && !matchText.equals("]]")) {
+ } else if (removed == TokenDelim.DBRACKET_OPEN && match != TokenDelim.DBRACKET_CLOSE) {
errors.add("Unmatched [[ error: " + wikiText.substring(start, matchEnd));
return safeIndexOf(wikiText, start, "\n", "\n");
}
} else {
- errors.add("Pop too many " + matchText + " error: " + wikiText.substring(start, matchEnd).replace("\n", "\\\\n"));
+ errors.add("Pop too many " + wikiText.substring(matchStart, matchEnd) + " error: " + wikiText.substring(start, matchEnd).replace("\n", "\\\\n"));
// If we were looking for a newline
return safeIndexOf(wikiText, start, "\n", "\n");
}
- } else if (matchText.equals("|")) {
+ break;
+ case PIPE:
if (tokenStack.isEmpty()) {
addFunctionArg(insideFunction, matchStart);
}
- } else if (matchText.equals("=")) {
+ break;
+ case EQUALS:
if (tokenStack.isEmpty()) {
lastUnescapedEqualsPos = matchStart;
}
// Do nothing. These can match spuriously, and if it's not the thing
// we're looking for, keep on going.
- } else if (matchText.equals("<!--")) {
+ break;
+ case COMMENT:
end = wikiText.indexOf("-->", matchStart);
if (end == -1) {
errors.add("Unmatched <!-- error: " + wikiText.substring(start));
return safeIndexOf(wikiText, start, "\n", "\n");
}
- } else if (matchText.equals("''") || (matchText.startsWith("<") && matchText.endsWith(">"))) {
- // Don't care.
- } else {
- assert false : "Match text='" + matchText + "'";
- throw new IllegalStateException();
+ break;
+ default:
+ throw new RuntimeException();
}
// Inside the while loop. Just go forward.
end = Math.max(end, matchEnd);
}
- if (toFind.equals("\n") && tokenStack.isEmpty()) {
+ if (toFind == TokenDelim.NEWLINE && tokenStack.isEmpty()) {
// We were looking for the end, we got it.
return end;
}
- errors.add("Couldn't find: " + (toFind.equals("\n") ? "newline" : toFind) + ", "+ wikiText.substring(start));
+ errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start));
if (firstNewline != -1) {
return firstNewline;
}
lastUnescapedPipePos = matchStart;
}
- static final String trimNewlines(String s) {
+ static String trimNewlines(String s) {
while (s.startsWith("\n")) {
s = s.substring(1);
}
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import junit.framework.TestCase;
assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
assertTrue(new WikiTokenizer(wikiText).nextToken().isWikiLink());
assertEquals("abc", new WikiTokenizer(wikiText).nextToken().wikiLinkText());
- assertEquals(null, new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
+ assertNull(new WikiTokenizer(wikiText).nextToken().wikiLinkDest());
wikiText = "[[abc|def]]";
assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
assertEquals(wikiText, new WikiTokenizer(wikiText).nextToken().token());
assertTrue(new WikiTokenizer(wikiText).nextToken().isFunction());
assertEquals("abc", new WikiTokenizer(wikiText).nextToken().functionName());
- assertEquals(Arrays.asList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
+ assertEquals(Collections.singletonList("def"), new WikiTokenizer(wikiText).nextToken().functionPositionArgs());
assertEquals(0, new WikiTokenizer(wikiText).nextToken().functionNamedArgs().size());
wikiText = "{{abc|d[[|]]ef|ghi}}";
assertEquals("\n", tokenizer.nextToken().token());
assertEquals("hello2", tokenizer.nextToken().token());
- assertEquals(null, tokenizer.nextToken());
+ assertNull(tokenizer.nextToken());
tokenizer.returnToLineStart();
assertEquals("hello2", tokenizer.nextToken().token());
- assertEquals(null, tokenizer.nextToken());
+ assertNull(tokenizer.nextToken());
}
"[extraterminated]]" + "\n" +
"=== {{header-template}} ===" + "\n";
- final String[] expectedTokens = new String[] {
+ final String[] expectedTokens = {
"Hi",
"\n",
"Hello ",
"\n",
};
- final List<String> actualTokens = new ArrayList<String>();
+ final List<String> actualTokens = new ArrayList<>();
final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
WikiTokenizer token;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
-import java.io.InputStream;
import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.commons.compress.compressors.CompressorException;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
import com.hughes.android.dictionary.engine.EntrySource;
import com.hughes.android.dictionary.engine.EntryTypeName;
static final Logger LOG = Logger.getLogger("WiktionaryParser");
- final SortedMap<String, AtomicInteger> counters = new TreeMap<String, AtomicInteger>();
- final Set<String> pairsAdded = new LinkedHashSet<String>();
+ private static final Pattern SUPERSCRIPT = Pattern.compile("<sup>[0-9]*</sup>");
+
+ final SortedMap<String, AtomicInteger> counters = new TreeMap<>();
+ final Set<String> pairsAdded = new LinkedHashSet<>();
public EntrySource entrySource;
public String title;
abstract void removeUselessArgs(final Map<String, String> namedArgs);
+ private static String replaceSuperscript(String in) {
+ Matcher matcher;
+ while ((matcher = SUPERSCRIPT.matcher(in)).find()) {
+ String replace = "";
+ String orig = matcher.group();
+ for (int i = 5; i < orig.length() - 6; i++)
+ {
+ char c = 0;
+ switch (orig.charAt(i)) {
+ case '0': c = '\u2070'; break;
+ case '1': c = '\u00b9'; break;
+ case '2': c = '\u00b2'; break;
+ case '3': c = '\u00b3'; break;
+ case '4': c = '\u2074'; break;
+ case '5': c = '\u2075'; break;
+ case '6': c = '\u2076'; break;
+ case '7': c = '\u2077'; break;
+ case '8': c = '\u2078'; break;
+ case '9': c = '\u2079'; break;
+ }
+ if (c == 0) throw new RuntimeException();
+ replace += c;
+ }
+ in = matcher.replaceFirst(replace);
+ }
+ return in;
+ }
+
@Override
public void parse(final File file, final EntrySource entrySource, final int pageLimit) throws IOException {
this.entrySource = entrySource;
File input = new File(file.getPath() + ".bz2");
if (!input.exists()) input = new File(file.getPath() + ".gz");
if (!input.exists()) input = new File(file.getPath() + ".xz");
- DataInputStream dis;
+ DataInputStream dis;
if (!input.exists()) {
// Fallback to uncompressed file
dis = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
final int bytesLength = dis.readInt();
final byte[] bytes = new byte[bytesLength];
dis.readFully(bytes);
- final String text = new String(bytes, "UTF8");
+ final String text = new String(bytes, StandardCharsets.UTF_8);
- parseSection(heading, text);
+ parseSection(heading, replaceSuperscript(text));
++pageCount;
if (pageCount % 1000 == 0) {
StringBuilder builder;
IndexedEntry indexedEntry;
IndexBuilder indexBuilder;
- final Map<String,FunctionCallback<T>> functionCallbacks = new LinkedHashMap<String, FunctionCallback<T>>();
+ final Map<String,FunctionCallback<T>> functionCallbacks = new LinkedHashMap<>();
boolean entryTypeNameSticks = false;
EntryTypeName entryTypeName = null;
- final Map<String,AtomicInteger> langCodeToTCount = new LinkedHashMap<String, AtomicInteger>();
+ final Map<String,AtomicInteger> langCodeToTCount = new LinkedHashMap<>();
- final NameAndArgs<T> nameAndArgs = new NameAndArgs<T>();
+ final NameAndArgs<T> nameAndArgs = new NameAndArgs<>();
public AppendAndIndexWikiCallback(final T parser) {
this.parser = parser;
if (name != null) {
appendAndIndexWikiCallback.dispatch(name, null);
}
- for (int i = 0; i < args.size(); ++i) {
- if (args.get(i).length() > 0) {
+ for (String arg : args) {
+ if (arg.length() > 0) {
appendAndIndexWikiCallback.builder.append("|");
- appendAndIndexWikiCallback.dispatch(args.get(i), null, null);
+ appendAndIndexWikiCallback.dispatch(arg, null, null);
}
}
appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
return true;
}
}
- static NameAndArgs<AbstractWiktionaryParser> NAME_AND_ARGS = new NameAndArgs<AbstractWiktionaryParser>();
+ static NameAndArgs<AbstractWiktionaryParser> NAME_AND_ARGS = new NameAndArgs<>();
static void appendNamedArgs(final Map<String, String> namedArgs,
final AppendAndIndexWikiCallback<?> appendAndIndexWikiCallback) {
package com.hughes.android.dictionary.parser.wiktionary;
+import java.util.List;
+import java.util.Map;
+
import com.hughes.android.dictionary.parser.WikiTokenizer;
import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.AppendAndIndexWikiCallback;
import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.NameAndArgs;
-import java.util.List;
-import java.util.Map;
-
class DeFunctionCallbacks {
static <T extends AbstractWiktionaryParser> void addGenericCallbacks(Map<String, FunctionCallback<T>> callbacks) {
- FunctionCallback<T> callback = new MakeHeadingFromName<T>("====");
+ FunctionCallback<T> callback = new MakeHeadingFromName<>("====");
callbacks.put("Aussprache", callback);
callbacks.put("Worttrennung", callback);
callbacks.put("Bedeutungen", callback);
}
- static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
+ static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<>();
static final class MakeHeadingFromName<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
import com.hughes.android.dictionary.engine.IndexBuilder;
import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.engine.PairEntry;
-import com.hughes.android.dictionary.engine.PairEntry.Pair;
import com.hughes.android.dictionary.parser.WikiTokenizer;
public final class EnForeignParser extends EnParser {
static final class ListSection {
final String firstPrefix;
final String firstLine;
- final List<String> nextPrefixes = new ArrayList<String>();
- final List<String> nextLines = new ArrayList<String>();
+ final List<String> nextPrefixes = new ArrayList<>();
+ final List<String> nextLines = new ArrayList<>();
public ListSection(String firstPrefix, String firstLine) {
this.firstPrefix = firstPrefix;
}
final StringBuilder foreignBuilder = new StringBuilder();
- final List<EnForeignParser.ListSection> listSections = new ArrayList<EnForeignParser.ListSection>();
+ final List<EnForeignParser.ListSection> listSections = new ArrayList<>();
appendAndIndexWikiCallback.reset(foreignBuilder, null);
this.state = State.ENGLISH_DEF_OF_FOREIGN; // TODO: this is wrong, need new category....
final String english = trim(englishBuilder.toString());
if (english.length() > 0) {
- final Pair pair = new Pair(english, trim(foreignText), this.swap);
+ final PairEntry.Pair pair = new PairEntry.Pair(english, trim(foreignText), this.swap);
pairEntry.pairs.add(pair);
foreignIndexBuilder.addEntryWithString(indexedEntry, title, entryIsFormOfSomething ? EntryTypeName.WIKTIONARY_IS_FORM_OF_SOMETHING_ELSE : EntryTypeName.WIKTIONARY_TITLE_MULTI);
for (final String form : forms) {
if ((nextPrefix.equals("#:") || nextPrefix.equals("##:")) && dash != -1) {
final String foreignEx = nextLine.substring(0, dash);
final String englishEx = nextLine.substring(dash + mdashLen);
- final Pair pair = new Pair(formatAndIndexExampleString(englishEx, enIndexBuilder, indexedEntry), formatAndIndexExampleString(foreignEx, foreignIndexBuilder, indexedEntry), swap);
+ final PairEntry.Pair pair = new PairEntry.Pair(formatAndIndexExampleString(englishEx, enIndexBuilder, indexedEntry), formatAndIndexExampleString(foreignEx, foreignIndexBuilder, indexedEntry), swap);
if (pair.lang1 != "--" && pair.lang1 != "--") {
pairEntry.pairs.add(pair);
}
lastForeign = null;
// TODO: make #* and #*: work
} else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")/* || nextPrefix.equals("#*")*/) {
- final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
+ final PairEntry.Pair pair = new PairEntry.Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
lastForeign = nextLine;
if (pair.lang1 != "--" && pair.lang1 != "--") {
pairEntry.pairs.add(pair);
}
}
pairEntry.pairs.remove(pairEntry.pairs.size() - 1);
- final Pair pair = new Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, foreignIndexBuilder, indexedEntry), swap);
+ final PairEntry.Pair pair = new PairEntry.Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, foreignIndexBuilder, indexedEntry), swap);
if (pair.lang1 != "--" || pair.lang2 != "--") {
pairEntry.pairs.add(pair);
}
lastForeign = null;
} else {
LOG.warning("TODO: English example with no foreign: " + title + ", " + nextLine);
- final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
+ final PairEntry.Pair pair = new PairEntry.Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
if (pair.lang1 != "--" || pair.lang2 != "--") {
pairEntry.pairs.add(pair);
}
}
} else if (nextPrefix.equals("#*")) {
// Can't really index these.
- final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
+ final PairEntry.Pair pair = new PairEntry.Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
lastForeign = nextLine;
if (pair.lang1 != "--" || pair.lang2 != "--") {
pairEntry.pairs.add(pair);
}
} else if (nextPrefix.equals("#::*") || nextPrefix.equals("##") || nextPrefix.equals("#*:") || nextPrefix.equals("#:*") || true) {
- final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
+ final PairEntry.Pair pair = new PairEntry.Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
if (pair.lang1 != "--" || pair.lang2 != "--") {
pairEntry.pairs.add(pair);
}
package com.hughes.android.dictionary.parser.wiktionary;
-import com.hughes.android.dictionary.engine.EntryTypeName;
-import com.hughes.android.dictionary.engine.IndexBuilder;
-import com.hughes.android.dictionary.parser.WikiTokenizer;
-import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.AppendAndIndexWikiCallback;
-import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.NameAndArgs;
-import com.hughes.util.ListUtil;
-import com.hughes.util.MapUtil;
-import com.hughes.util.StringUtil;
-
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
+import com.hughes.android.dictionary.engine.EntryTypeName;
+import com.hughes.android.dictionary.engine.IndexBuilder;
+import com.hughes.android.dictionary.parser.WikiTokenizer;
+import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.AppendAndIndexWikiCallback;
+import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.NameAndArgs;
+import com.hughes.util.ListUtil;
+import com.hughes.util.MapUtil;
+import com.hughes.util.StringUtil;
+
class EnFunctionCallbacks {
- static final Map<String,FunctionCallback<EnParser>> DEFAULT = new LinkedHashMap<String, FunctionCallback<EnParser>>();
+ static final Map<String,FunctionCallback<EnParser>> DEFAULT = new LinkedHashMap<>();
static <T extends AbstractWiktionaryParser> void addGenericCallbacks(Map<String, FunctionCallback<T>> callbacks) {
- FunctionCallback<T> callback = new Gender<T>();
+ FunctionCallback<T> callback = new Gender<>();
callbacks.put("m", callback);
callbacks.put("f", callback);
callbacks.put("n", callback);
callbacks.put("p", callback);
callbacks.put("g", callback);
- callbacks.put("etyl", new etyl<T>());
- callbacks.put("term", new term<T>());
-
- callback = new EncodingCallback<T>();
- Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
- "IPA", "IPAchar", // Not really encodings, but it works.
- "zh-ts", "zh-tsp",
- "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
- "fa-Arab", "Khmr", "Cyrl", "ug-Arab", "ko-inline",
- "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
- "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
+ callbacks.put("etyl", new etyl<>());
+ callbacks.put("term", new term<>());
+
+ callback = new EncodingCallback<>();
+ Set<String> encodings = new LinkedHashSet<>(Arrays.asList(
+ "IPA", "IPAchar", // Not really encodings, but it works.
+ "zh-ts", "zh-tsp",
+ "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
+ "fa-Arab", "Khmr", "Cyrl", "ug-Arab", "ko-inline",
+ "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
+ "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
for (final String encoding : encodings) {
callbacks.put(encoding, callback);
}
- callback = new Ignore<T>();
+ callback = new Ignore<>();
callbacks.put("trreq", callback);
callbacks.put("t-image", callback);
callbacks.put("defn", callback);
callbacks.put("der-mid3", callback);
callbacks.put("der-bottom", callback);
- callback = new AppendName<T>();
+ callback = new AppendName<>();
callbacks.put("...", callback);
- callbacks.put("qualifier", new QualifierCallback<T>());
- callbacks.put("italbrac", new italbrac<T>());
- callbacks.put("gloss", new gloss<T>());
- callbacks.put("not used", new not_used<T>());
- callbacks.put("wikipedia", new wikipedia<T>());
+ callbacks.put("qualifier", new QualifierCallback<>());
+ callbacks.put("italbrac", new italbrac<>());
+ callbacks.put("gloss", new gloss<>());
+ callbacks.put("not used", new not_used<>());
+ callbacks.put("wikipedia", new wikipedia<>());
- final it_conj<T> it_conj_cb = new it_conj<T>();
+ final it_conj<T> it_conj_cb = new it_conj<>();
callbacks.put("it-conj", it_conj_cb);
- callbacks.put("it-conj-are", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-arsi", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-care", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-carsi", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-ciare", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-ciarsi", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-iare", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-iarsi", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-iare-b", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-iarsi-b", new it_conj_are<T>(it_conj_cb));
- callbacks.put("it-conj-ire", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-irsi", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-ire-b", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-irsi-b", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-cire", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-cirsi", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-ire", new it_conj_ire<T>(it_conj_cb));
- callbacks.put("it-conj-ere", new it_conj_ere<T>(it_conj_cb));
- callbacks.put("it-conj-ersi", new it_conj_ere<T>(it_conj_cb));
- callbacks.put("it-conj-urre", new it_conj_urre<T>(it_conj_cb));
- callbacks.put("it-conj-ursi", new it_conj_urre<T>(it_conj_cb));
- callbacks.put("it-conj-fare", new it_conj_fare<T>(it_conj_cb));
+ callbacks.put("it-conj-are", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-arsi", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-care", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-carsi", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-ciare", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-ciarsi", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-iare", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-iarsi", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-iare-b", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-iarsi-b", new it_conj_are<>(it_conj_cb));
+ callbacks.put("it-conj-ire", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-irsi", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-ire-b", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-irsi-b", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-cire", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-cirsi", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-ire", new it_conj_ire<>(it_conj_cb));
+ callbacks.put("it-conj-ere", new it_conj_ere<>(it_conj_cb));
+ callbacks.put("it-conj-ersi", new it_conj_ere<>(it_conj_cb));
+ callbacks.put("it-conj-urre", new it_conj_urre<>(it_conj_cb));
+ callbacks.put("it-conj-ursi", new it_conj_urre<>(it_conj_cb));
+ callbacks.put("it-conj-fare", new it_conj_fare<>(it_conj_cb));
//"{{it-conj-fare|putre|avere}}\n" +
static {
addGenericCallbacks(DEFAULT);
- FunctionCallback<EnParser> callback = new TranslationCallback<EnParser>();
+ FunctionCallback<EnParser> callback = new TranslationCallback<>();
DEFAULT.put("t", callback);
DEFAULT.put("t+", callback);
DEFAULT.put("t-", callback);
DEFAULT.put("head", callback);
}
- static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
+ static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<>();
// ------------------------------------------------------------------
namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
if (args.size() < 2) {
if (!name.equals("ttbc")) {
- EnParser.LOG.warning("{{t...}} with wrong args: title=" + parser.title + ", " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("{{t...}} with wrong args: title=" + parser.title + ", " + wikiTokenizer.token());
}
return false;
}
// Catch-all for anything else...
if (!namedArgs.isEmpty()) {
appendAndIndexWikiCallback.builder.append(" {");
- EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
+ AbstractWiktionaryParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
appendAndIndexWikiCallback.builder.append("}");
}
final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
namedArgs.remove("lang");
if (!namedArgs.isEmpty()) {
- EnParser.LOG.warning("weird qualifier: " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("weird qualifier: " + wikiTokenizer.token());
return false;
}
appendAndIndexWikiCallback.builder.append("(");
final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
namedArgs.remove("lang");
if (!namedArgs.isEmpty()) {
- EnParser.LOG.warning("weird encoding: " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("weird encoding: " + wikiTokenizer.token());
return false;
}
if (args.size() == 0) {
}
appendAndIndexWikiCallback.builder.append("{");
appendAndIndexWikiCallback.builder.append(name);
- for (int i = 0; i < args.size(); ++i) {
- appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
+ for (String arg : args) {
+ appendAndIndexWikiCallback.builder.append("|").append(arg);
}
appendAndIndexWikiCallback.builder.append("}");
return true;
if (displayText != null) {
appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
} else {
- EnParser.LOG.warning("no display text: " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("no display text: " + wikiTokenizer.token());
}
final String tr = namedArgs.remove("tr");
namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
if (!namedArgs.isEmpty()) {
appendAndIndexWikiCallback.builder.append(" {").append(name);
- EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
+ AbstractWiktionaryParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
appendAndIndexWikiCallback.builder.append("}");
}
formName = ListUtil.remove(args, 0, null);
}
if (formName == null) {
- EnParser.LOG.warning("Missing form name: " + parser.title);
+ AbstractWiktionaryParser.LOG.warning("Missing form name: " + parser.title);
formName = "form of";
}
String baseForm = ListUtil.get(args, 1, "");
parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
} else {
// null baseForm happens in Danish.
- EnParser.LOG.warning("Null baseform: " + parser.title);
+ AbstractWiktionaryParser.LOG.warning("Null baseform: " + parser.title);
}
return true;
}
if (args.size() > 1 || !namedArgs.isEmpty()) {
// Unindexed!
return false;
- } else if (args.size() == 1) {
- return false;
- } else {
- return true;
- }
+ } else return args.size() != 1;
}
}
return false;
}
String langName = WiktionaryLangs.getEnglishName(langCode);
- if (langName != null) {
- appendAndIndexWikiCallback.dispatch(langName, null);
- } else {
- appendAndIndexWikiCallback.dispatch("lang:" + langCode, null);
- }
+ appendAndIndexWikiCallback.dispatch(langName == null ? "lang:" + langCode : langName, null);
return true;
}
}
if (!StringUtil.isNullOrEmpty(literally)) {
literally = String.format("literally %s", literally);
}
- final List<String> inParens = new ArrayList<String>(Arrays.asList(tr, pos, gloss, literally));
+ final List<String> inParens = new ArrayList<>(Arrays.asList(tr, pos, gloss, literally));
cleanList(inParens);
appendCommaSeparatedList(appendAndIndexWikiCallback, inParens);
}
parser.wordForms.add(singular);
if (!namedArgs.isEmpty() || args.size() > 4) {
- EnParser.LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
+ AbstractWiktionaryParser.LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
}
return true;
}
}
static {
- DEFAULT.put("it-proper noun", new it_proper_noun<EnParser>());
+ DEFAULT.put("it-proper noun", new it_proper_noun<>());
}
static final class it_proper_noun<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
@Override
}
}
- static final Map<String,String> it_indicativePronouns = new LinkedHashMap<String, String>();
+ static final Map<String,String> it_indicativePronouns = new LinkedHashMap<>();
static {
it_indicativePronouns.put("1s", "io");
it_indicativePronouns.put("2s", "tu");
it_indicativePronouns.put("3p", "essi/esse");
}
- static final Map<String,String> it_subjunctivePronouns = new LinkedHashMap<String, String>();
+ static final Map<String,String> it_subjunctivePronouns = new LinkedHashMap<>();
static {
it_subjunctivePronouns.put("1s", "che io");
it_subjunctivePronouns.put("2s", "che tu");
it_subjunctivePronouns.put("3p", "che essi/esse");
}
- static final Map<String,String> it_imperativePronouns = new LinkedHashMap<String, String>();
+ static final Map<String,String> it_imperativePronouns = new LinkedHashMap<>();
static {
it_imperativePronouns.put("1s", "-");
it_imperativePronouns.put("2s", "tu");
final List<String> prefixes = (inf != null && inf.endsWith("si")) ? it_reflexive_pronouns : it_empty;
String style = " style=\"background:#c0cfe4\"";
- outputDataRow(appendAndIndexWikiCallback, style, "indicativo", style, "th", "", new LinkedHashMap<String, String>(it_indicativePronouns), it_empty, false);
+ outputDataRow(appendAndIndexWikiCallback, style, "indicativo", style, "th", "", new LinkedHashMap<>(it_indicativePronouns), it_empty, false);
outputDataRow(appendAndIndexWikiCallback, style, "presente", "", "td", "pres", namedArgs, prefixes, true);
outputDataRow(appendAndIndexWikiCallback, style, "imperfetto", "", "td", "imperf", namedArgs, prefixes, true);
outputDataRow(appendAndIndexWikiCallback, style, "passato remoto", "", "td", "prem", namedArgs, prefixes, true);
outputDataRow(appendAndIndexWikiCallback, style, "futuro", "", "td", "fut", namedArgs, prefixes, true);
style = " style=\"background:#c0d8e4\"";
- outputDataRow(appendAndIndexWikiCallback, style, "condizionale", style, "th", "", new LinkedHashMap<String, String>(it_indicativePronouns), it_empty, false);
+ outputDataRow(appendAndIndexWikiCallback, style, "condizionale", style, "th", "", new LinkedHashMap<>(it_indicativePronouns), it_empty, false);
outputDataRow(appendAndIndexWikiCallback, style, "presente", "", "td", "cond", namedArgs, prefixes, true);
style = " style=\"background:#c0e4c0\"";
- outputDataRow(appendAndIndexWikiCallback, style, "congiuntivo", style, "th", "", new LinkedHashMap<String, String>(it_subjunctivePronouns), it_empty, false);
+ outputDataRow(appendAndIndexWikiCallback, style, "congiuntivo", style, "th", "", new LinkedHashMap<>(it_subjunctivePronouns), it_empty, false);
namedArgs.put("sub3s2", namedArgs.remove("sub3s"));
namedArgs.put("sub1s", namedArgs.get("sub123s"));
namedArgs.put("sub2s", namedArgs.get("sub123s"));
outputDataRow(appendAndIndexWikiCallback, style, "imperfetto", "", "td", "impsub", namedArgs, prefixes, true);
style = " style=\"background:#e4d4c0\"";
- outputDataRow(appendAndIndexWikiCallback, style, "imperativo", style, "th", "", new LinkedHashMap<String, String>(it_imperativePronouns), it_empty, false);
+ outputDataRow(appendAndIndexWikiCallback, style, "imperativo", style, "th", "", new LinkedHashMap<>(it_imperativePronouns), it_empty, false);
outputDataRow(appendAndIndexWikiCallback, style, "", "", "td", "imp", namedArgs, it_empty, false); // these are attached to the stem.
builder.append("</table>\n");
for (final String number : it_number_s_p) {
for (final String person : it_person_1_2_3) {
// Output <td> or <th>
- builder.append("<").append(type2).append("").append(col2Style).append(">");
+ builder.append("<").append(type2).append(col2Style).append(">");
final String keyBase = String.format("%s%s%s", moodName, person, number);
appendAndIndexWikiCallback.dispatch(prefixes.get(i++), null);
outputKeyVariations(appendAndIndexWikiCallback, builder, keyBase, namedArgs, isForm);
"Particle|Interjection|Pronominal adverb|" +
"Han character|Hanzi|Hanja|Kanji|Katakana character|Syllable");
- static final Set<String> USELESS_WIKI_ARGS = new LinkedHashSet<String>(
- Arrays.asList(
- "lang",
- "sc",
- "sort",
- "cat",
- "cat2",
- "xs",
- "nodot"));
+ static final Set<String> USELESS_WIKI_ARGS = new LinkedHashSet<>(
+ Arrays.asList(
+ "lang",
+ "sc",
+ "sort",
+ "cat",
+ "cat2",
+ "xs",
+ "nodot"));
static boolean isIgnorableTitle(final String title) {
return title.startsWith("Wiktionary:") ||
State state = null;
public boolean entryIsFormOfSomething = false;
- final Collection<String> wordForms = new ArrayList<String>();
+ final Collection<String> wordForms = new ArrayList<>();
boolean titleAppended = false;
final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback = new AppendAndIndexCallback(this);
{
appendAndIndexWikiCallback.functionCallbacks.putAll(EnFunctionCallbacks.DEFAULT);
- for (final String key : new ArrayList<String>(appendAndIndexWikiCallback.functionCallbacks.keySet())) {
+ for (final String key : new ArrayList<>(appendAndIndexWikiCallback.functionCallbacks.keySet())) {
// Don't handle the it-conj functions here.
if (key.startsWith("it-conj")) {
appendAndIndexWikiCallback.functionCallbacks.remove(key);
final Pair pair = new Pair(trim(englishText.toString()), trim(foreignText.toString()), swap);
pairEntry.pairs.add(pair);
if (!pairsAdded.add(pair.toString())) {
- LOG.warning("Duplicate pair: " + pair.toString());
+ LOG.warning("Duplicate pair: " + pair);
incrementCount("WARNING: Duplicate pair" );
}
}
package com.hughes.android.dictionary.parser.wiktionary;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.HashSet;
import java.util.regex.Pattern;
import com.hughes.android.dictionary.engine.IndexBuilder;
import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.engine.PairEntry;
-import com.hughes.android.dictionary.engine.PairEntry.Pair;
import com.hughes.android.dictionary.parser.WikiTokenizer;
import com.hughes.android.dictionary.parser.wiktionary.EnFunctionCallbacks.TranslationCallback;
import com.hughes.util.ListUtil;
PairEntry pairEntry = null;
IndexedEntry indexedEntry = null;
StringBuilder[] builders = null;
- HashSet<Pair> allPairs = new HashSet<Pair>();
+ final HashSet<PairEntry.Pair> allPairs = new HashSet<>();
public static final String NAME = "EnTranslationToTranslation";
- final Set<String> Ts = new LinkedHashSet<String>(Arrays.asList("t", "t+",
+ final Set<String> Ts = new LinkedHashSet<>(Arrays.asList("t", "t+",
"t-", "tø", "apdx-t", "ttbc"));
public EnTranslationToTranslationParser(final List<IndexBuilder> indexBuilders,
}
}
- final TranslationCallback<EnTranslationToTranslationParser> translationCallback = new TranslationCallback<EnTranslationToTranslationParser>();
+ final TranslationCallback<EnTranslationToTranslationParser> translationCallback = new TranslationCallback<>();
- final AppendAndIndexWikiCallback<EnTranslationToTranslationParser> appendAndIndexWikiCallback = new AppendAndIndexWikiCallback<EnTranslationToTranslationParser>(
- this);
+ final AppendAndIndexWikiCallback<EnTranslationToTranslationParser> appendAndIndexWikiCallback = new AppendAndIndexWikiCallback<>(
+ this);
{
for (final String t : Ts) {
appendAndIndexWikiCallback.functionCallbacks.put(t, translationCallback);
final String lang1 = builders[0].toString();
final String lang2 = builders[1].toString();
if (lang1.length() > 0 && lang2.length() > 0) {
- final Pair newPair = new Pair(lang1, lang2);
+ final PairEntry.Pair newPair = new PairEntry.Pair(lang1, lang2);
// brute-force approach to prevent adding duplicates
if (!allPairs.contains(newPair))
{
allPairs.add(newPair);
- pairEntry.pairs.add(new Pair(lang1, lang2));
+ pairEntry.pairs.add(new PairEntry.Pair(lang1, lang2));
indexedEntry.isValid = true;
}
}
package com.hughes.android.dictionary.parser.wiktionary;
+import java.util.List;
+import java.util.Map;
+
import com.hughes.android.dictionary.parser.WikiTokenizer;
import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.AppendAndIndexWikiCallback;
import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.NameAndArgs;
import com.hughes.android.dictionary.parser.wiktionary.ItFunctionCallbacks.Redispatch;
-import java.util.List;
-import java.util.Map;
-
class FrFunctionCallbacks {
static <T extends AbstractWiktionaryParser> void addGenericCallbacks(Map<String, FunctionCallback<T>> callbacks) {
- callbacks.put("-étym-", new Redispatch<T>("\n==== Étymologie ====\n"));
- callbacks.put("-pron-", new Redispatch<T>("\n==== Prononciation ====\n"));
- callbacks.put("-voir-", new Redispatch<T>("\n==== Voir aussi ====\n"));
- callbacks.put("-drv-", new Redispatch<T>("\n==== Dérivés ====\n"));
- callbacks.put("-syn-", new Redispatch<T>("\n==== Synonymes ====\n"));
+ callbacks.put("-étym-", new Redispatch<>("\n==== Étymologie ====\n"));
+ callbacks.put("-pron-", new Redispatch<>("\n==== Prononciation ====\n"));
+ callbacks.put("-voir-", new Redispatch<>("\n==== Voir aussi ====\n"));
+ callbacks.put("-drv-", new Redispatch<>("\n==== Dérivés ====\n"));
+ callbacks.put("-syn-", new Redispatch<>("\n==== Synonymes ====\n"));
- callbacks.put("-apr-", new Redispatch<T>("\n==== Apparentés étymologiques ====\n"));
- callbacks.put("-hyper-", new Redispatch<T>("\n==== Hyperonymes ====\n"));
- callbacks.put("-hypo-", new Redispatch<T>("\n==== Hyponymes ====\n"));
- callbacks.put("-réf-", new Redispatch<T>("\n==== Références ====\n"));
- callbacks.put("-homo-", new Redispatch<T>("\n==== Homophones ====\n"));
- callbacks.put("-anagr-", new Redispatch<T>("\n==== Anagrammes ====\n"));
- callbacks.put("-voc-", new Redispatch<T>("\n==== Vocabulaire apparenté par le sens ====\n"));
- callbacks.put("-exp-", new Redispatch<T>("\n==== Expressions ====\n"));
- callbacks.put("-note-", new Redispatch<T>("\n==== Note ====\n"));
+ callbacks.put("-apr-", new Redispatch<>("\n==== Apparentés étymologiques ====\n"));
+ callbacks.put("-hyper-", new Redispatch<>("\n==== Hyperonymes ====\n"));
+ callbacks.put("-hypo-", new Redispatch<>("\n==== Hyponymes ====\n"));
+ callbacks.put("-réf-", new Redispatch<>("\n==== Références ====\n"));
+ callbacks.put("-homo-", new Redispatch<>("\n==== Homophones ====\n"));
+ callbacks.put("-anagr-", new Redispatch<>("\n==== Anagrammes ====\n"));
+ callbacks.put("-voc-", new Redispatch<>("\n==== Vocabulaire apparenté par le sens ====\n"));
+ callbacks.put("-exp-", new Redispatch<>("\n==== Expressions ====\n"));
+ callbacks.put("-note-", new Redispatch<>("\n==== Note ====\n"));
- callbacks.put("-trad-", new ItFunctionCallbacks.SkipSection<T>());
+ callbacks.put("-trad-", new ItFunctionCallbacks.SkipSection<>());
}
- static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
+ static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<>();
static final class MakeHeadingFromName<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
package com.hughes.android.dictionary.parser.wiktionary;
+import java.util.List;
+import java.util.Map;
+
import com.hughes.android.dictionary.parser.WikiTokenizer;
import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.AppendAndIndexWikiCallback;
import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.NameAndArgs;
-import java.util.List;
-import java.util.Map;
-
class ItFunctionCallbacks {
static <T extends AbstractWiktionaryParser> void addGenericCallbacks(
Map<String, FunctionCallback<T>> callbacks) {
- callbacks.put("-hyph-", new Redispatch<T>("\n==== Sillabazione ====\n"));
- callbacks.put("-pron-", new Redispatch<T>("\n==== Pronuncia ====\n"));
- callbacks.put("-etim-", new Redispatch<T>("\n==== Etimologia / Derivazione ====\n"));
- callbacks.put("-syn-", new Redispatch<T>("\n==== Sinonimi ====\n"));
- callbacks.put("-ant-", new Redispatch<T>("\n==== Antonimi/Contrari ====\n"));
- callbacks.put("-drv-", new Redispatch<T>("\n==== Parole derivate ====\n"));
- callbacks.put("-prov-", new Redispatch<T>("\n==== Proverbi e modi di dire ====\n"));
- callbacks.put("-ref-", new Redispatch<T>("\n==== Note / Riferimenti ====\n"));
- callbacks.put("-rel-", new Redispatch<T>("\n==== Termini correlati ====\n"));
- callbacks.put("-var-", new Redispatch<T>("\n==== Varianti ====\n"));
+ callbacks.put("-hyph-", new Redispatch<>("\n==== Sillabazione ====\n"));
+ callbacks.put("-pron-", new Redispatch<>("\n==== Pronuncia ====\n"));
+ callbacks.put("-etim-", new Redispatch<>("\n==== Etimologia / Derivazione ====\n"));
+ callbacks.put("-syn-", new Redispatch<>("\n==== Sinonimi ====\n"));
+ callbacks.put("-ant-", new Redispatch<>("\n==== Antonimi/Contrari ====\n"));
+ callbacks.put("-drv-", new Redispatch<>("\n==== Parole derivate ====\n"));
+ callbacks.put("-prov-", new Redispatch<>("\n==== Proverbi e modi di dire ====\n"));
+ callbacks.put("-ref-", new Redispatch<>("\n==== Note / Riferimenti ====\n"));
+ callbacks.put("-rel-", new Redispatch<>("\n==== Termini correlati ====\n"));
+ callbacks.put("-var-", new Redispatch<>("\n==== Varianti ====\n"));
- callbacks.put("-trans1-", new SkipSection<T>());
- callbacks.put("-trans2-", new SkipSection<T>());
- callbacks.put("-ref-", new SkipSection<T>());
+ callbacks.put("-trans1-", new SkipSection<>());
+ callbacks.put("-trans2-", new SkipSection<>());
+ callbacks.put("-ref-", new SkipSection<>());
}
- static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
+ static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<>();
static final class Redispatch<T extends AbstractWiktionaryParser> implements
FunctionCallback<T> {
package com.hughes.android.dictionary.parser.wiktionary;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.commons.text.StringEscapeUtils;
+
import com.hughes.android.dictionary.engine.EntryTypeName;
import com.hughes.android.dictionary.engine.HtmlEntry;
import com.hughes.android.dictionary.engine.IndexBuilder;
import com.hughes.android.dictionary.parser.WikiTokenizer;
import com.hughes.util.StringUtil;
-import org.apache.commons.lang3.StringEscapeUtils;
-
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Pattern;
-
public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
public static final String NAME = "WholeSectionToHtmlParser";
void addFunctionCallbacks(
Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks);
}
- static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<String,LangConfig>();
+ static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<>();
static {
final Pattern enSkipSections = Pattern.compile(".*(Translations|Anagrams|References).*");
isoToLangConfig.put("EN", new LangConfig() {
if (sectionName.equalsIgnoreCase("Antonyms")) {
return EntryTypeName.ANTONYM_MULTI;
}
- if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) {
- // We need to put it in the other index, too (probably)
- return null;
- }
- if (sectionName.equalsIgnoreCase("Derived Terms")) {
- return null;
- }
+ // We need to put it in the other index, too (probably) ?
+ // EnParser.partOfSpeechHeader.matcher(sectionName).matches()
+
+ // Needs special handling?
+ // sectionName.equalsIgnoreCase("Derived Terms")
return null;
}
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Category:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Category:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Categoría:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Categoría:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Categoria:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Categoria:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Kategorie:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Kategorie:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Categoria:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Categoria:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Catégorie:")) {
- return true;
- }
- return false;
+ return wikiText.startsWith("Catégorie:");
}
@Override
public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
if (webUrlTemplate != null) {
final String webUrl = String.format(webUrlTemplate, title);
+ String asciiWebUrl = null;
// URI.create can raise an exception e.g. if webUrl contains %, just ignore those cases.
try {
- callback.builder.append(String.format("<p> <a href=\"%s\">%s</a>", URI.create(webUrl).toASCIIString(), escapeHtmlLiteral(webUrl)));
+ asciiWebUrl = URI.create(webUrl).toASCIIString();
} catch (Exception e) {
}
+ if (asciiWebUrl != null) {
+ callback.builder.append("<p> <a href=\"");
+ callback.builder.append(asciiWebUrl);
+ callback.builder.append("\">");
+ callback.builder.append(escapeHtmlLiteral(webUrl));
+ callback.builder.append("</a>");
+ }
}
htmlEntry.html = callback.builder.toString();
indexedEntry.isValid = true;
titleIndexBuilder.addEntryWithString(indexedEntry, wikiTokenizer.wikiLinkText(), sectionEntryTypeName);
}
if (!StringUtil.isNullOrEmpty(linkDest)) {
- builder.append(String.format("<a href=\"%s\">", HtmlEntry.formatQuickdicUrl("", linkDest)));
+ builder.append("<a href=\"");
+ builder.append(HtmlEntry.formatQuickdicUrl("", linkDest));
+ builder.append("\">");
super.onWikiLink(wikiTokenizer);
- builder.append(String.format("</a>"));
+ builder.append("</a>");
} else {
super.onWikiLink(wikiTokenizer);
}
}
return;
}
- builder.append(String.format("\n<h%d>", depth));
+ builder.append("\n<h");
+ builder.append(depth);
+ builder.append('>');
dispatch(headingText, null);
- builder.append(String.format("</h%d>\n", depth));
+ builder.append("</h");
+ builder.append(depth);
+ builder.append(">\n");
}
- final List<Character> listPrefixStack = new ArrayList<Character>();
+ final List<Character> listPrefixStack = new ArrayList<>();
@Override
public void onListItem(WikiTokenizer wikiTokenizer) {
}
final String prefix = wikiTokenizer.listItemPrefix();
while (listPrefixStack.size() < prefix.length()) {
- builder.append(String.format("<%s>",
- WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size()))));
+ builder.append('<');
+ builder.append(WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size())));
+ builder.append('>');
listPrefixStack.add(prefix.charAt(listPrefixStack.size()));
}
builder.append("<li>");
}
while (listPrefixStack.size() > nextListHeader.length()) {
final char prefixChar = listPrefixStack.remove(listPrefixStack.size() - 1);
- builder.append(String.format("</%s>\n", WikiTokenizer.getListTag(prefixChar)));
+ builder.append("</");
+ builder.append(WikiTokenizer.getListTag(prefixChar));
+ builder.append(">\n");
}
}
package com.hughes.android.dictionary.parser.wiktionary;
-import com.hughes.android.dictionary.engine.Language;
-
import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
import java.util.Map;
-import java.util.Set;
import java.util.regex.Pattern;
public class WiktionaryLangs {
- public static final Map<String,String> isoCodeToEnWikiName = new LinkedHashMap<String,String>();
+ public static final Map<String,String> isoCodeToEnWikiName = new LinkedHashMap<>();
static {
isoCodeToEnWikiName.put("AF", "Afrikaans");
isoCodeToEnWikiName.put("SQ", "Albanian");
isoCodeToEnWikiName.put("HT", "Haitian Creole");
isoCodeToEnWikiName.put("LB", "Luxembourgish");
isoCodeToEnWikiName.put("MK", "Macedonian");
- isoCodeToEnWikiName.put("GV", "Manx");
isoCodeToEnWikiName.put("scn", "Sicilian");
isoCodeToEnWikiName.put("cu", "Old Church Slavonic");
isoCodeToEnWikiName.put("rom", "Romani");
//assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
}
- public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<String, Map<String,String>>();
+ public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<>();
static {
Map<String,String> isoCodeToWikiName;
wikiCodeToIsoCodeToWikiName.put("en", isoCodeToEnWikiName);
// egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName);
isoCodeToWikiName.put("nds", "Niederdeutsch");
isoCodeToWikiName.put("DE", "Deutsch");
isoCodeToWikiName.put("RO", "Rumänisch");
// egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName);
isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}"));
isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}"));
isoCodeToWikiName.put("SV", Pattern.quote("{{langue|sv}}"));
// egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("it", isoCodeToWikiName);
isoCodeToWikiName.put("IT", "\\{\\{-(it|scn|nap|cal|lmo)-\\}\\}"); // scn, nap, cal, lmo
isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));
isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}"));
// egrep -o '== *\{\{lengua\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName);
isoCodeToWikiName.put("AR", Pattern.quote("{{lengua|ar}}"));
isoCodeToWikiName.put("ES", Pattern.quote("{{lengua|es}}"));
isoCodeToWikiName.put("IT", Pattern.quote("{{lengua|it}}"));
// Pattern seems to match Italian one
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("pt", isoCodeToWikiName);
isoCodeToWikiName.put("PT", Pattern.quote("{{-pt-}}"));
isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));
int equalsIndex;
if (arg.startsWith("--") && (equalsIndex = arg.indexOf("=")) >= 0) {
final String key = arg.substring(2, equalsIndex);
- final String value = arg.substring(equalsIndex + 1, arg.length());
+ final String value = arg.substring(equalsIndex + 1);
dest.put(key, value);
}
}
@SuppressWarnings("WeakerAccess")
public final class EnumUtil {
- public static final <T extends Enum<T>> T min(final T e1, final T e2) {
+ public static <T extends Enum<T>> T min(final T e1, final T e2) {
if (e1 == null) {
return e2;
}
-// Copyright 2011 Google Inc. All Rights Reserved.\r
-//\r
-// Licensed under the Apache License, Version 2.0 (the "License");\r
-// you may not use this file except in compliance with the License.\r
-// You may obtain a copy of the License at\r
-//\r
-// http://www.apache.org/licenses/LICENSE-2.0\r
-//\r
-// Unless required by applicable law or agreed to in writing, software\r
-// distributed under the License is distributed on an "AS IS" BASIS,\r
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
-// See the License for the specific language governing permissions and\r
-// limitations under the License.\r
-\r
-package com.hughes.util;\r
-\r
-import java.io.BufferedReader;\r
-import java.io.File;\r
-import java.io.FileInputStream;\r
-import java.io.FileOutputStream;\r
-import java.io.IOException;\r
-import java.io.InputStreamReader;\r
-import java.io.PrintStream;\r
-import java.io.RandomAccessFile;\r
-import java.util.ArrayList;\r
-import java.util.List;\r
-\r
-@SuppressWarnings("WeakerAccess")\r
-public final class FileUtil {\r
- public static String readLine(final RandomAccessFile file, final long startPos) throws IOException {\r
- file.seek(startPos);\r
- return file.readLine();\r
- }\r
-\r
- public static List<String> readLines(final File file) throws IOException {\r
- final List<String> result = new ArrayList<>();\r
- try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file)))) {\r
- String line;\r
- while ((line = in.readLine()) != null) {\r
- result.add(line);\r
- }\r
- }\r
- return result;\r
- }\r
-\r
- public static String readToString(final File file) throws IOException {\r
- StringBuilder result = new StringBuilder();\r
- try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file)))) {\r
- String line;\r
- while ((line = in.readLine()) != null) {\r
- result.append(line).append("\n");\r
- }\r
- }\r
- return result.toString();\r
- }\r
-\r
- public static void writeStringToUTF8File(final String string, final File file) {\r
- throw new IllegalStateException();\r
- }\r
-\r
- public static void printString(final File file, final String s) throws IOException {\r
- final PrintStream out = new PrintStream(new FileOutputStream(file));\r
- out.print(s);\r
- out.close();\r
- }\r
-\r
-}\r
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.hughes.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.List;
+
+@SuppressWarnings("WeakerAccess")
+public final class FileUtil {
+ public static String readLine(final RandomAccessFile file, final long startPos) throws IOException {
+ file.seek(startPos);
+ return file.readLine();
+ }
+
+ public static List<String> readLines(final File file) throws IOException {
+ final List<String> result = new ArrayList<>();
+ try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file)))) {
+ String line;
+ while ((line = in.readLine()) != null) {
+ result.add(line);
+ }
+ }
+ return result;
+ }
+
+ public static String readToString(final File file) throws IOException {
+ StringBuilder result = new StringBuilder();
+ try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file)))) {
+ String line;
+ while ((line = in.readLine()) != null) {
+ result.append(line).append("\n");
+ }
+ }
+ return result.toString();
+ }
+
+ public static void writeStringToUTF8File(final String string, final File file) {
+ throw new IllegalStateException();
+ }
+
+ public static void printString(final File file, final String s) throws IOException {
+ final PrintStream out = new PrintStream(new FileOutputStream(file));
+ out.print(s);
+ out.close();
+ }
+
+}
import java.util.Map;
-@SuppressWarnings({"WeakerAccess", "unused"})
public class MapUtil {
-
- public static <K,V> V safeGet(final Map<K,V> map, K key, V defaultValue) {
- if (!map.containsKey(key)) {
- return defaultValue;
- }
- return map.get(key);
- }
-
- public static <K,V> V safeGetOrPut(final Map<K,V> map, K key, V defaultValue) {
- if (!map.containsKey(key)) {
- map.put(key, defaultValue);
- }
- return map.get(key);
- }
-
- public static <K,V> V safeGet(final Map<K,V> map, K key, Class<V> valueClass) {
- if (!map.containsKey(key)) {
- try {
- map.put(key, valueClass.newInstance());
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
- return map.get(key);
- }
-
public static <K,V> V safeRemove(final Map<K,V> map, K key, V defaultValue) {
if (!map.containsKey(key)) {
return defaultValue;
}
return map.remove(key);
}
-
-
}
# Run to update ..//Dictionary/res/raw/dictionary_info.txt to reference
# all dictionaries in /data/outputs (needs to contain both zip and uncompressed files).
-CLASS=CheckDictionariesMain
-JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
-test -x "$JAVA" || JAVA=java
-$JAVA -classpath src:../Dictionary/Util/src/:../Dictionary/src/:/usr/share/java/com.ibm.icu.jar:/usr/share/java/xercesImpl.jar com.hughes.android.dictionary.engine.$CLASS "$@"
+RUNNER=./DictionaryPC
+if ! test -x "$RUNNER" ; then
+ JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
+ test -x "$JAVA" || JAVA=java
+ RUNNER="$JAVA -classpath bin/:/usr/share/java/com.ibm.icu.jar com.hughes.android.dictionary.engine.Runner"
+fi
+$RUNNER CheckDictionariesMain "$@"