import java.util.regex.Pattern;
import com.hughes.android.dictionary.engine.DictionaryBuilder;
+import com.hughes.android.dictionary.engine.EntrySource;
import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.engine.EntryTypeName;
import com.hughes.android.dictionary.engine.IndexBuilder;
import com.hughes.android.dictionary.engine.PairEntry;
import com.hughes.android.dictionary.engine.PairEntry.Pair;
-public class DictFileParser {
+public class DictFileParser implements Parser {
static final Logger logger = Logger.getLogger(DictFileParser.class.getName());
final IndexBuilder[] langIndexBuilders;
final IndexBuilder bothIndexBuilder;
+ EntrySource entrySource;
+
// final Set<String> alreadyDone = new HashSet<String>();
public DictFileParser(final Charset charset, boolean flipCols,
this.bothIndexBuilder = bothIndexBuilder;
}
- public void parseFile(final File file) throws IOException {
+ @Override
+ public void parse(final File file, final EntrySource entrySouce, final int pageLimit) throws IOException {
+ this.entrySource = entrySouce;
final BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset));
String line;
int count = 0;
while ((line = reader.readLine()) != null) {
+ if (pageLimit >= 0 && count >= pageLimit) {
+ return;
+ }
if (count % 10000 == 0) {
logger.info("count=" + count + ", line=" + line);
}
subfields[1] = new String[] { fields[1] };
}
- final PairEntry pairEntry = new PairEntry();
+ final PairEntry pairEntry = new PairEntry(entrySource);
for (int i = 0; i < subfields[0].length; ++i) {
subfields[0][i] = subfields[0][i].trim();
subfields[1][i] = subfields[1][i].trim();
+ if (subfields[0][i].length() == 0 && subfields[1][i].length() == 0) {
+ logger.warning("Empty pair: " + line);
+ continue;
+ }
+ if (subfields[0][i].length() == 0) {
+ subfields[0][i] = "__";
+ }
+ if (subfields[1][i].length() == 0) {
+ subfields[1][i] = "__";
+ }
pairEntry.pairs.add(new Pair(subfields[0][i], subfields[1][i]));
}
final IndexedEntry entryData = new IndexedEntry(pairEntry);