X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FDictFileParser.java;fp=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FDictFileParser.java;h=1f08428761d42911a99a29c3c07a71f6c7f271cc;hp=8c6424b8543ab663d71eaf5d8a9a7892dc6de7fd;hb=020aa910526ece05ee8514e55a9a951b45ce1fea;hpb=370b6e57cca69c186cff4c2cdfd86edfdb3e2f8f diff --git a/src/com/hughes/android/dictionary/parser/DictFileParser.java b/src/com/hughes/android/dictionary/parser/DictFileParser.java index 8c6424b..1f08428 100644 --- a/src/com/hughes/android/dictionary/parser/DictFileParser.java +++ b/src/com/hughes/android/dictionary/parser/DictFileParser.java @@ -15,8 +15,11 @@ package com.hughes.android.dictionary.parser; import java.io.BufferedReader; +import java.io.BufferedInputStream; +import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.Charset; @@ -28,10 +31,13 @@ import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.hughes.android.dictionary.engine.AbstractEntry; import com.hughes.android.dictionary.engine.DictionaryBuilder; import com.hughes.android.dictionary.engine.EntrySource; import com.hughes.android.dictionary.engine.EntryTypeName; +import com.hughes.android.dictionary.engine.HtmlEntry; import com.hughes.android.dictionary.engine.IndexBuilder; +import com.hughes.android.dictionary.engine.IndexBuilder.TokenData; import com.hughes.android.dictionary.engine.IndexedEntry; import com.hughes.android.dictionary.engine.Language; import com.hughes.android.dictionary.engine.PairEntry; @@ -64,6 +70,8 @@ public class DictFileParser implements Parser { final String fieldSplit; final String subfieldSplit; + final boolean singleLang; + final DictionaryBuilder dictBuilder; EntrySource entrySource; @@ -72,18 +80,20 @@ public class DictFileParser implements Parser { public DictFileParser(final Charset charset, boolean flipCols, final String fieldSplit, final String subfieldSplit, + final boolean singleLang, final DictionaryBuilder dictBuilder) { this.charset = charset; this.flipCols = flipCols; this.fieldSplit = fieldSplit; this.subfieldSplit = subfieldSplit; + this.singleLang = singleLang; this.dictBuilder = dictBuilder; } @Override public void parse(final File file, final EntrySource entrySouce, final int pageLimit) throws IOException { this.entrySource = entrySouce; - final BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)); + final BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file))); String line; int count = 0; while ((line = reader.readLine()) != null) { @@ -130,7 +140,6 @@ public class DictFileParser implements Parser { subfields[1] = new String[] { fields[1] }; } - final PairEntry pairEntry = new PairEntry(entrySource); for (int i = 0; i < subfields[0].length; ++i) { subfields[0][i] = subfields[0][i].trim(); subfields[1][i] = subfields[1][i].trim(); @@ -144,25 +153,56 @@ public class DictFileParser implements Parser { if (subfields[1][i].isEmpty()) { subfields[1][i] = "__"; } - pairEntry.pairs.add(new PairEntry.Pair(subfields[0][i], subfields[1][i])); } - final IndexedEntry entryData = new IndexedEntry(pairEntry); - entryData.isValid = true; - - for (int l = 0; l < 2; ++l) { - // alreadyDone.clear(); - - final IndexBuilder indexBuilder = dictBuilder.indexBuilders.get(l); - for (int j = 0; j < subfields[l].length; ++j) { - String subfield = subfields[l][j]; - if (indexBuilder.index.sortLanguage == Language.de) { - subfield = parseField_DE(indexBuilder, subfield, entryData, j); - } else if (indexBuilder.index.sortLanguage == Language.en) { - subfield = parseField_EN(indexBuilder, subfield, entryData, j); + + if (singleLang) { + HtmlEntry htmlEntry = new HtmlEntry(entrySource, fields[0]); + htmlEntry.html = StringUtil.escapeUnicodeToPureHtml(fields[1]); + + final IndexBuilder titleIndexBuilder = dictBuilder.indexBuilders.get(0); + htmlEntry.addToDictionary(titleIndexBuilder.index.dict); + + TokenData tokenData = titleIndexBuilder.getOrCreateTokenData(fields[0]); + tokenData.hasMainEntry = true; + tokenData.htmlEntries.add(0, htmlEntry); + + final String[] tokens = NON_CHAR.split(fields[0]); + if (tokens.length > 1) { + for (final String token : tokens) { + assert token.length() >= 1; + assert token.indexOf("-") == -1; + if (/*!alreadyDone.contains(token) && */!token.isEmpty()) { + tokenData = titleIndexBuilder.getOrCreateTokenData(token); + tokenData.htmlEntries.add(htmlEntry); + } + } + } + final IndexedEntry entryData = new IndexedEntry(htmlEntry); + entryData.isValid = true; + + } else { + PairEntry pairEntry = new PairEntry(entrySource); + for (int i = 0; i < subfields[0].length; ++i) { + pairEntry.pairs.add(new PairEntry.Pair(subfields[0][i], subfields[1][i])); + } + final IndexedEntry entryData = new IndexedEntry(pairEntry); + entryData.isValid = true; + for (int l = 0; l < 2; ++l) { + // alreadyDone.clear(); + + final IndexBuilder indexBuilder = dictBuilder.indexBuilders.get(l); + for (int j = 0; j < subfields[l].length; ++j) { + String subfield = subfields[l][j]; + if (indexBuilder.index.sortLanguage == Language.de) { + subfield = parseField_DE(indexBuilder, subfield, entryData, j); + } else if (indexBuilder.index.sortLanguage == Language.en) { + subfield = parseField_EN(indexBuilder, subfield, entryData, j); + } + parseFieldGeneric(indexBuilder, subfield, entryData, j, subfields[l].length); } - parseFieldGeneric(indexBuilder, subfield, entryData, j, subfields[l].length); } } + } private StringBuilder extractParenthesized(StringBuilder in, String startChar, String endChar) {