From: Thad Hughes Date: Sun, 23 Dec 2012 18:38:15 +0000 (-0800) Subject: Multi word search now looks for exact matches of TokenRows. X-Git-Url: http://gitweb.fperrin.net/?p=Dictionary.git;a=commitdiff_plain;h=9f89cb85dcaa0313bb9714e24d10598c018c4c92 Multi word search now looks for exact matches of TokenRows. --- diff --git a/src/com/hughes/android/dictionary/DictionaryActivity.java b/src/com/hughes/android/dictionary/DictionaryActivity.java index 75751c7..b008ea9 100644 --- a/src/com/hughes/android/dictionary/DictionaryActivity.java +++ b/src/com/hughes/android/dictionary/DictionaryActivity.java @@ -79,6 +79,7 @@ import com.hughes.android.dictionary.engine.TokenRow; import com.hughes.android.dictionary.engine.TransliteratorManager; import com.hughes.android.util.IntentLauncher; import com.hughes.android.util.NonLinkClickableSpan; +import com.hughes.util.StringUtil; import java.io.File; import java.io.FileWriter; @@ -1022,7 +1023,7 @@ public class DictionaryActivity extends ListActivity { boolean done = false; SearchOperation(final String searchText, final Index index) { - this.searchText = searchText.trim(); + this.searchText = StringUtil.normalizeWhitespace(searchText); this.index = index; } @@ -1039,7 +1040,7 @@ public class DictionaryActivity extends ListActivity { searchResult = index.findInsertionPoint(searchText, interrupted); } else { searchTokens = Arrays.asList(searchTokenArray); - multiWordSearchResult = index.multiWordSearch(searchTokens, interrupted); + multiWordSearchResult = index.multiWordSearch(searchText, searchTokens, interrupted); } Log.d(LOG, "searchText=" + searchText + ", searchDuration=" diff --git a/src/com/hughes/android/dictionary/engine/HtmlEntry.java b/src/com/hughes/android/dictionary/engine/HtmlEntry.java index c7e0628..0610186 100644 --- a/src/com/hughes/android/dictionary/engine/HtmlEntry.java +++ b/src/com/hughes/android/dictionary/engine/HtmlEntry.java @@ -2,7 +2,6 @@ package com.hughes.android.dictionary.engine; import android.content.Intent; import android.net.Uri; -import android.util.Log; import com.hughes.android.dictionary.C; import com.hughes.util.StringUtil; diff --git a/src/com/hughes/android/dictionary/engine/Index.java b/src/com/hughes/android/dictionary/engine/Index.java index 535250f..68a0dc2 100644 --- a/src/com/hughes/android/dictionary/engine/Index.java +++ b/src/com/hughes/android/dictionary/engine/Index.java @@ -21,6 +21,7 @@ import com.hughes.android.dictionary.DictionaryInfo; import com.hughes.android.dictionary.DictionaryInfo.IndexInfo; import com.hughes.android.dictionary.engine.RowBase.RowKey; import com.hughes.util.CachingList; +import com.hughes.util.CollectionUtil; import com.hughes.util.TransformingList; import com.hughes.util.raf.RAFList; import com.hughes.util.raf.RAFSerializable; @@ -315,7 +316,8 @@ public final class Index implements RAFSerializable { } - public final List multiWordSearch(final List searchTokens, final AtomicBoolean interrupted) { + public final List multiWordSearch( + final String searchText, final List searchTokens, final AtomicBoolean interrupted) { final long startMills = System.currentTimeMillis(); final List result = new ArrayList(); @@ -323,7 +325,7 @@ public final class Index implements RAFSerializable { String bestPrefix = null; int leastRows = Integer.MAX_VALUE; - final StringBuilder regex = new StringBuilder(); + final StringBuilder searchTokensRegex = new StringBuilder(); for (int i = 0; i < searchTokens.size(); ++i) { if (interrupted.get()) { return null; } final String searchToken = searchTokens.get(i); @@ -345,12 +347,12 @@ public final class Index implements RAFSerializable { } } - if (regex.length() > 0) { - regex.append("[\\s]*"); + if (searchTokensRegex.length() > 0) { + searchTokensRegex.append("[\\s]*"); } - regex.append(Pattern.quote(normalized)); + searchTokensRegex.append(Pattern.quote(normalized)); } - final Pattern pattern = Pattern.compile(regex.toString()); + final Pattern pattern = Pattern.compile(searchTokensRegex.toString()); if (bestPrefix == null) { bestPrefix = searchTokens.get(0); @@ -367,14 +369,22 @@ public final class Index implements RAFSerializable { } int matchCount = 0; - final Set cachedRowKeys = new HashSet(); -// for (final String searchToken : searchTokens) { - final String searchToken = bestPrefix; + final int exactMatchIndex = findInsertionPointIndex(searchText, interrupted); + if (exactMatchIndex != -1) { + final IndexEntry exactMatch = sortedIndexEntries.get(exactMatchIndex); + if (pattern.matcher(exactMatch.token).matches()) { + matches.get(RowMatchType.TITLE_MATCH).add(rows.get(exactMatch.startRow)); + } + } + + final String searchToken = bestPrefix; final int insertionPointIndex = findInsertionPointIndex(searchToken, interrupted); - - for (int index = insertionPointIndex; index < sortedIndexEntries.size() && matchCount < MAX_SEARCH_ROWS; ++index) { + final Set rowsAlreadySeen = new HashSet(); + for (int index = insertionPointIndex; + index < sortedIndexEntries.size() && matchCount < MAX_SEARCH_ROWS; + ++index) { if (interrupted.get()) { return null; } final IndexEntry indexEntry = sortedIndexEntries.get(index); if (!indexEntry.normalizedToken.startsWith(searchToken)) { @@ -384,14 +394,16 @@ public final class Index implements RAFSerializable { // System.out.println("Searching indexEntry: " + indexEntry.token); // Extra +1 to skip token row. - for (int rowIndex = indexEntry.startRow + 1; rowIndex < indexEntry.startRow + 1 + indexEntry.numRows && rowIndex < rows.size(); ++rowIndex) { + for (int rowIndex = indexEntry.startRow + 1; + rowIndex < indexEntry.startRow + 1 + indexEntry.numRows && rowIndex < rows.size(); + ++rowIndex) { if (interrupted.get()) { return null; } final RowBase row = rows.get(rowIndex); final RowBase.RowKey rowKey = row.getRowKey(); - if (cachedRowKeys.contains(rowKey)) { + if (rowsAlreadySeen.contains(rowKey)) { continue; } - cachedRowKeys.add(rowKey); + rowsAlreadySeen.add(rowKey); final RowMatchType matchType = row.matches(searchTokens, pattern, normalizer(), swapPairEntries); if (matchType != RowMatchType.NO_MATCH) { matches.get(matchType).add(row); @@ -409,7 +421,7 @@ public final class Index implements RAFSerializable { result.addAll(ordered); } - //System.out.println("searchDuration: " + (System.currentTimeMillis() - startMills)); + System.out.println("searchDuration: " + (System.currentTimeMillis() - startMills)); return result; } diff --git a/src/com/hughes/android/dictionary/engine/RowMatchType.java b/src/com/hughes/android/dictionary/engine/RowMatchType.java index 96ac4ad..e0be508 100644 --- a/src/com/hughes/android/dictionary/engine/RowMatchType.java +++ b/src/com/hughes/android/dictionary/engine/RowMatchType.java @@ -15,9 +15,8 @@ package com.hughes.android.dictionary.engine; public enum RowMatchType { - - ORDERED_MATCH, - BAG_OF_WORDS_MATCH, - NO_MATCH - + TITLE_MATCH, + ORDERED_MATCH, + BAG_OF_WORDS_MATCH, + NO_MATCH }