]> gitweb.fperrin.net Git - Dictionary.git/commitdiff
Multi word search now looks for exact matches of TokenRows.
authorThad Hughes <thadh@google.com>
Sun, 23 Dec 2012 18:38:15 +0000 (10:38 -0800)
committerThad Hughes <thadh@google.com>
Sun, 23 Dec 2012 18:38:15 +0000 (10:38 -0800)
src/com/hughes/android/dictionary/DictionaryActivity.java
src/com/hughes/android/dictionary/engine/HtmlEntry.java
src/com/hughes/android/dictionary/engine/Index.java
src/com/hughes/android/dictionary/engine/RowMatchType.java

index 75751c711c1a165c01dd46544a5d0134c7136c9b..b008ea9677ae88d5088632e41e83120c058a16be 100644 (file)
@@ -79,6 +79,7 @@ import com.hughes.android.dictionary.engine.TokenRow;
 import com.hughes.android.dictionary.engine.TransliteratorManager;
 import com.hughes.android.util.IntentLauncher;
 import com.hughes.android.util.NonLinkClickableSpan;
+import com.hughes.util.StringUtil;
 
 import java.io.File;
 import java.io.FileWriter;
@@ -1022,7 +1023,7 @@ public class DictionaryActivity extends ListActivity {
         boolean done = false;
 
         SearchOperation(final String searchText, final Index index) {
-            this.searchText = searchText.trim();
+            this.searchText = StringUtil.normalizeWhitespace(searchText);
             this.index = index;
         }
 
@@ -1039,7 +1040,7 @@ public class DictionaryActivity extends ListActivity {
                     searchResult = index.findInsertionPoint(searchText, interrupted);
                 } else {
                     searchTokens = Arrays.asList(searchTokenArray);
-                    multiWordSearchResult = index.multiWordSearch(searchTokens, interrupted);
+                    multiWordSearchResult = index.multiWordSearch(searchText, searchTokens, interrupted);
                 }
                 Log.d(LOG,
                         "searchText=" + searchText + ", searchDuration="
index c7e06282e52aadec67c29d9f897ecc2e543601d2..06101860c3393a4bf65bd2848b12269c45ba9a7c 100644 (file)
@@ -2,7 +2,6 @@ package com.hughes.android.dictionary.engine;
 
 import android.content.Intent;
 import android.net.Uri;
-import android.util.Log;
 
 import com.hughes.android.dictionary.C;
 import com.hughes.util.StringUtil;
index 535250f33777021669687a6dd43ca081306b029d..68a0dc27a9d6ee3645cde0a322839bafa98bbfef 100644 (file)
@@ -21,6 +21,7 @@ import com.hughes.android.dictionary.DictionaryInfo;
 import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
 import com.hughes.android.dictionary.engine.RowBase.RowKey;
 import com.hughes.util.CachingList;
+import com.hughes.util.CollectionUtil;
 import com.hughes.util.TransformingList;
 import com.hughes.util.raf.RAFList;
 import com.hughes.util.raf.RAFSerializable;
@@ -315,7 +316,8 @@ public final class Index implements RAFSerializable<Index> {
   }
   
   
-  public final List<RowBase> multiWordSearch(final List<String> searchTokens, final AtomicBoolean interrupted) {
+  public final List<RowBase> multiWordSearch(
+          final String searchText, final List<String> searchTokens, final AtomicBoolean interrupted) {
     final long startMills = System.currentTimeMillis();
     final List<RowBase> result = new ArrayList<RowBase>();
     
@@ -323,7 +325,7 @@ public final class Index implements RAFSerializable<Index> {
     
     String bestPrefix = null;
     int leastRows = Integer.MAX_VALUE;
-    final StringBuilder regex = new StringBuilder();
+    final StringBuilder searchTokensRegex = new StringBuilder();
     for (int i = 0; i < searchTokens.size(); ++i) {
       if (interrupted.get()) { return null; }
       final String searchToken = searchTokens.get(i);
@@ -345,12 +347,12 @@ public final class Index implements RAFSerializable<Index> {
         }
       }
 
-      if (regex.length() > 0) {
-        regex.append("[\\s]*");
+      if (searchTokensRegex.length() > 0) {
+        searchTokensRegex.append("[\\s]*");
       }
-      regex.append(Pattern.quote(normalized));
+      searchTokensRegex.append(Pattern.quote(normalized));
     }
-    final Pattern pattern = Pattern.compile(regex.toString());
+    final Pattern pattern = Pattern.compile(searchTokensRegex.toString());
     
     if (bestPrefix == null) {
       bestPrefix = searchTokens.get(0);
@@ -367,14 +369,22 @@ public final class Index implements RAFSerializable<Index> {
     }
     
     int matchCount = 0;
-    final Set<RowKey> cachedRowKeys = new HashSet<RowBase.RowKey>();
     
-//    for (final String searchToken : searchTokens) {
-    final String searchToken = bestPrefix;
+    final int exactMatchIndex = findInsertionPointIndex(searchText, interrupted);
+    if (exactMatchIndex != -1) {
+        final IndexEntry exactMatch = sortedIndexEntries.get(exactMatchIndex);
+        if (pattern.matcher(exactMatch.token).matches()) {
+            matches.get(RowMatchType.TITLE_MATCH).add(rows.get(exactMatch.startRow));
+        }
+    }
+
     
+    final String searchToken = bestPrefix;
     final int insertionPointIndex = findInsertionPointIndex(searchToken, interrupted);
-
-    for (int index = insertionPointIndex; index < sortedIndexEntries.size() && matchCount < MAX_SEARCH_ROWS; ++index) {
+    final Set<RowKey> rowsAlreadySeen = new HashSet<RowBase.RowKey>();
+    for (int index = insertionPointIndex; 
+            index < sortedIndexEntries.size() && matchCount < MAX_SEARCH_ROWS; 
+            ++index) {
         if (interrupted.get()) { return null; }
         final IndexEntry indexEntry = sortedIndexEntries.get(index);
         if (!indexEntry.normalizedToken.startsWith(searchToken)) {
@@ -384,14 +394,16 @@ public final class Index implements RAFSerializable<Index> {
 //        System.out.println("Searching indexEntry: " + indexEntry.token);
 
         // Extra +1 to skip token row.
-        for (int rowIndex = indexEntry.startRow + 1; rowIndex < indexEntry.startRow + 1 + indexEntry.numRows && rowIndex < rows.size(); ++rowIndex) {
+        for (int rowIndex = indexEntry.startRow + 1; 
+                rowIndex < indexEntry.startRow + 1 + indexEntry.numRows && rowIndex < rows.size(); 
+                ++rowIndex) {
           if (interrupted.get()) { return null; }
           final RowBase row = rows.get(rowIndex);
           final RowBase.RowKey rowKey = row.getRowKey();
-          if (cachedRowKeys.contains(rowKey)) {
+          if (rowsAlreadySeen.contains(rowKey)) {
             continue;
           }
-          cachedRowKeys.add(rowKey);
+          rowsAlreadySeen.add(rowKey);
           final RowMatchType matchType = row.matches(searchTokens, pattern, normalizer(), swapPairEntries);
           if (matchType != RowMatchType.NO_MATCH) {
             matches.get(matchType).add(row);
@@ -409,7 +421,7 @@ public final class Index implements RAFSerializable<Index> {
       result.addAll(ordered);
     }
     
-    //System.out.println("searchDuration: " + (System.currentTimeMillis() - startMills));
+    System.out.println("searchDuration: " + (System.currentTimeMillis() - startMills));
     return result;
   }
   
index 96ac4ad364ee67e728fcd31e8be599a559dc8bd2..e0be508fb3fc2af9447cb8a791f32a46de195467 100644 (file)
@@ -15,9 +15,8 @@
 package com.hughes.android.dictionary.engine;
 
 public enum RowMatchType {
-  
-  ORDERED_MATCH,
-  BAG_OF_WORDS_MATCH,
-  NO_MATCH
-
+    TITLE_MATCH,
+    ORDERED_MATCH,
+    BAG_OF_WORDS_MATCH,
+    NO_MATCH
 }