2 *******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.tool.rbbi;
\r
9 import java.io.DataOutputStream;
\r
10 import java.io.FileInputStream;
\r
11 import java.io.FileNotFoundException;
\r
12 import java.io.FileOutputStream;
\r
13 import java.io.IOException;
\r
14 import java.io.InputStreamReader;
\r
15 import java.io.OutputStreamWriter;
\r
16 import java.io.PrintWriter;
\r
17 import java.io.UnsupportedEncodingException;
\r
18 import java.util.Vector;
\r
20 import com.ibm.icu.text.UnicodeSet;
\r
21 import com.ibm.icu.util.CompactByteArray;
\r
23 public class BuildDictionaryFile {
\r
24 public static void main(String args[])
\r
25 throws FileNotFoundException, UnsupportedEncodingException, IOException {
\r
26 String filename = args[0];
\r
27 String encoding = "";
\r
28 String outputFile = "";
\r
29 String listingFile = "";
\r
31 if (args.length >= 2)
\r
34 if(args.length >= 3)
\r
35 outputFile = args[2];
\r
37 if (args.length >= 4)
\r
38 listingFile = args[3];
\r
40 BuildDictionaryFile dictionary = new BuildDictionaryFile();
\r
41 dictionary.build(filename, encoding);
\r
43 DataOutputStream out = null;
\r
44 if (outputFile.length() != 0) {
\r
45 out = new DataOutputStream(new FileOutputStream(outputFile));
\r
46 dictionary.writeDictionaryFile(out);
\r
49 PrintWriter listing = null;
\r
50 if (listingFile.length() != 0) {
\r
51 listing = new PrintWriter(new OutputStreamWriter(new FileOutputStream(listingFile), "UnicodeLittle"));
\r
52 dictionary.printWordList("", 0, listing);
\r
57 public BuildDictionaryFile() {
\r
60 public void build(String filename, String encoding)
\r
61 throws FileNotFoundException, UnsupportedEncodingException, IOException {
\r
62 FileInputStream file = new FileInputStream(filename);
\r
63 InputStreamReader in;
\r
64 if (encoding.length() == 0)
\r
65 in = new InputStreamReader(file);
\r
67 in = new InputStreamReader(file, encoding);
\r
71 file = new FileInputStream(filename);
\r
72 if (encoding.length() == 0)
\r
73 in = new InputStreamReader(file);
\r
75 in = new InputStreamReader(file, encoding);
\r
77 buildStateTable(in);
\r
81 public void buildColumnMap(InputStreamReader in) throws IOException {
\r
82 System.out.println("Building column map...");
\r
83 UnicodeSet charsInFile = new UnicodeSet();
\r
87 ++totalChars; if (totalChars > 0 && totalChars % 5000 == 0) System.out.println("Read " + totalChars + " characters...");
\r
89 charsInFile.add((char)c);
\r
92 // Test.debugPrintln(charsInFile.toString());
\r
94 StringBuffer tempReverseMap = new StringBuffer();
\r
95 tempReverseMap.append(' ');
\r
97 columnMap = new CompactByteArray();
\r
98 int n = charsInFile.getRangeCount();
\r
100 for (int i=0; i<n; ++i) {
\r
101 char start = (char) charsInFile.getRangeStart(i);
\r
102 char end = (char) charsInFile.getRangeEnd(i);
\r
103 for (char ch = start; ch <= end; ch++) {
\r
104 if (columnMap.elementAt(Character.toLowerCase(ch)) == 0) {
\r
105 columnMap.setElementAt(Character.toUpperCase(ch), Character.toUpperCase(ch),
\r
107 columnMap.setElementAt(Character.toLowerCase(ch), Character.toLowerCase(ch),
\r
110 tempReverseMap.append(ch);
\r
114 //System.out.println("Compacting...");
\r
115 columnMap.compact();
\r
117 //System.out.println(tempReverseMap.toString());
\r
118 reverseColumnMap = new char[p];
\r
120 tempReverseMap.getChars(0, p, reverseColumnMap, 0);
\r
123 System.out.println("total columns = " + p);
\r
125 numColGroups = (numCols >> 5) + 1;
\r
128 short[] index = columnMap.getIndexArray();
\r
129 System.out.println("Index:");
\r
130 for (int i = 0; i < index.length; i++) {
\r
132 System.out.println();
\r
133 System.out.print(" " + Integer.toHexString(i * 128) + ":");
\r
135 System.out.print("\t" + Integer.toHexString(index[i]));
\r
137 System.out.println();
\r
138 byte[] data = columnMap.getStringArray();
\r
139 System.out.print("Values:");
\r
140 for (int i = 0; i < data.length; i++) {
\r
142 System.out.println();
\r
143 System.out.print(" " + Integer.toHexString(i) + ":");
\r
146 System.out.print("\t.");
\r
148 System.out.print("\t" + Integer.toString(data[i]));
\r
150 System.out.println();
\r
154 public void buildStateTable(InputStreamReader in) throws IOException {
\r
155 Vector tempTable = new Vector();
\r
156 tempTable.addElement(new int[numCols + 1]);
\r
161 int charsInWord = 0;
\r
164 short column = columnMap.elementAt((char)c);
\r
166 row = (int[])(tempTable.elementAt(state));
\r
168 if (row[column] == 0) {
\r
169 row[column] = tempTable.size();
\r
171 state = (tempTable.size());
\r
172 tempTable.addElement(new int[numCols + 1]);
\r
175 state = row[column];
\r
177 else if (state != 0) {
\r
178 if (row[0] != -1) {
\r
182 totalUniqueWordChars += charsInWord;
\r
185 if (totalWords % 5000 == 0) System.out.println("Read " + totalWords + " words, " + tempTable.size() + " rows...");
\r
192 row = (int[])(tempTable.elementAt(state));
\r
193 if (row[0] != -1) {
\r
196 totalUniqueWordChars += charsInWord;
\r
201 compress(tempTable);
\r
203 table = new short[numCols * tempTable.size()];
\r
204 for (int i = 0; i < tempTable.size(); i++) {
\r
205 row = (int[])tempTable.elementAt(i);
\r
206 for (int j = 0; j < numCols; j++)
\r
207 table[i * numCols + j] = (short)row[j];
\r
211 private void compress(Vector tempTable) {
\r
212 System.out.println("Before compression:");
\r
213 System.out.println(" Number of rows = " + tempTable.size());
\r
214 System.out.println(" Number of columns = " + numCols);
\r
215 System.out.println(" Number of cells = " + tempTable.size() * numCols);
\r
216 deleteDuplicateRows(tempTable);
\r
217 System.out.println("After removing duplicate rows:");
\r
218 System.out.println(" Number of rows = " + tempTable.size());
\r
219 System.out.println(" Number of columns = " + numCols);
\r
220 System.out.println(" Number of cells = " + tempTable.size() * numCols);
\r
221 stackRows(tempTable);
\r
222 if (tempTable.size() > 32767) throw new IllegalArgumentException("Too many rows in table!");
\r
223 System.out.println("After doubling up on rows:");
\r
224 System.out.println(" Number of rows = " + tempTable.size());
\r
225 System.out.println(" Number of columns = " + numCols);
\r
226 System.out.println(" Number of cells = " + tempTable.size() * numCols);
\r
231 private void deleteDuplicateRows(Vector tempTable) {
\r
232 int[] rowNumMap = new int[tempTable.size()];
\r
233 for (int i = 0; i < rowNumMap.length; i++)
\r
236 int nextClass = numCols;
\r
240 int[] row1, row2, tempRow;
\r
244 System.out.println("Making a pass (" + nextClass + " classes)...");
\r
246 lastClass = nextClass;
\r
247 while (currentClass < nextClass) {
\r
248 System.out.println(" currentClass = " + currentClass);
\r
250 row1 = row2 = null;
\r
251 for (int i = 0; i < tempTable.size(); i++) {
\r
252 tempRow = (int[])tempTable.elementAt(i);
\r
253 if (tempRow[numCols] == currentClass) {
\r
254 if (row1 == null) {
\r
255 row1 = (int[])tempTable.elementAt(i);
\r
258 row2 = (int[])tempTable.elementAt(i);
\r
259 for (int j = 0; j < numCols; j++) {
\r
260 if ((row1[j] == 0) != (row2[j] == 0) ||
\r
261 (row1[j] == -1) != (row2[j] == -1)) {
\r
262 row2[numCols] = nextClass;
\r
266 else if (row1[j] != 0 && row2[j] != 0 && row1[j] != -1
\r
267 && row2[j] != -1) {
\r
268 tempRow = (int[])tempTable.elementAt(row1[j]);
\r
269 tempCat = tempRow[numCols];
\r
270 tempRow = (int[])tempTable.elementAt(row2[j]);
\r
271 if (tempCat != tempRow[numCols]) {
\r
272 row2[numCols] = nextClass;
\r
284 //System.out.println();
\r
286 } while (lastClass != nextClass);
\r
288 int[] representatives = new int[nextClass];
\r
289 for (int i = 1; i < tempTable.size(); i++) {
\r
290 tempRow = (int[])tempTable.elementAt(i);
\r
291 if (representatives[tempRow[numCols]] == 0)
\r
292 representatives[tempRow[numCols]] = i;
\r
294 rowNumMap[i] = representatives[tempRow[numCols]];
\r
296 System.out.println("Renumbering...");
\r
298 // renumber all remaining rows
\r
299 for (int i = 0; i < rowNumMap.length; i++)
\r
300 if (rowNumMap[i] != i)
\r
301 tempTable.setElementAt(null, i);
\r
303 for (int i = 0; i < rowNumMap.length; i++)
\r
304 if (tempTable.elementAt(i) != null)
\r
305 rowNumMap[i] = newRowNum++;
\r
306 for (int i = 0; i < rowNumMap.length; i++)
\r
307 if (tempTable.elementAt(i) == null)
\r
308 rowNumMap[i] = rowNumMap[rowNumMap[i]];
\r
310 for (int i = tempTable.size() - 1; i >= 0; i--) {
\r
311 tempRow = (int[])tempTable.elementAt(i);
\r
312 if (tempRow == null)
\r
313 tempTable.removeElementAt(i);
\r
315 for (int j = 0; j < numCols; j++)
\r
316 if (tempRow[j] != -1)
\r
317 tempRow[j] = rowNumMap[j];
\r
320 //for (int i = 1; i < rowNumMap.length; i++) rowNumMap[i] = i; int newRowNum = rowNumMap.length;
\r
324 private void deleteDuplicateRows(Vector tempTable) {
\r
325 Vector work = (Vector)(tempTable.clone());
\r
326 boolean didDeleteRow = true;
\r
328 Vector tempMapping = new Vector(work.size());
\r
329 int[] mapping = new int[work.size()];
\r
330 for (int i = 0; i < mapping.length; i++) {
\r
332 tempMapping.addElement(new Integer(i));
\r
334 boolean[] tbd = new boolean[work.size()];
\r
336 while (didDeleteRow) {
\r
337 System.out.println(" " + work.size() + " rows...");
\r
338 int deletedRows = 0;
\r
339 didDeleteRow = false;
\r
341 sortTable(work, tempMapping, mapping, 1, work.size());
\r
342 for (int i = 0; i < work.size() - 1; ) {
\r
343 System.out.print("Deleting, inspecting row " + i + ", deleted " + deletedRows + " rows...\r");
\r
344 int rowToDelete = ((Integer)(tempMapping.elementAt(i + 1))).intValue();
\r
345 int rowToMapTo = ((Integer)(tempMapping.elementAt(i))).intValue();
\r
346 if (compareRows((int[])work.elementAt(i), (int[])work.elementAt(i + 1),
\r
348 tbd[rowToDelete] = true;
\r
349 tempTable.setElementAt(null, rowToDelete);
\r
350 while (tbd[mapping[rowToMapTo]])
\r
351 mapping[rowToMapTo] = mapping[mapping[rowToMapTo]];
\r
352 mapping[rowToDelete] = mapping[rowToMapTo];
\r
353 didDeleteRow = true;
\r
355 work.removeElementAt(i + 1);
\r
356 tempMapping.removeElementAt(i + 1);
\r
361 for (int i = 0; i < mapping.length; i++) {
\r
362 if (tbd[i] && tbd[mapping[i]])
\r
363 mapping[i] = mapping[mapping[i]];
\r
367 int decrementBy = 0;
\r
368 for (int i = 0; i < mapping.length; i++) {
\r
372 mapping[i] -= decrementBy;
\r
374 for (int i = 0; i < mapping.length; i++) {
\r
376 mapping[i] = mapping[mapping[i]];
\r
378 for (int i = tempTable.size() - 1; i >= 0; i--) {
\r
380 tempTable.removeElementAt(i);
\r
382 int[] row = (int[])tempTable.elementAt(i);
\r
383 for (int j = 0; j < numCols; j++)
\r
384 row[j] = (row[j] == -1) ? -1 : mapping[row[j]];
\r
389 private void sortTable(Vector tbl, Vector tempMapping, int[] mapping, int start, int end) {
\r
390 System.out.print("Sorting (" + start + ", " + end + ")...\r");
\r
391 if (start + 1 >= end)
\r
393 else if (start + 10 >= end) {
\r
394 for (int i = start + 1; i < end; i++) {
\r
395 int[] row = (int[])tbl.elementAt(i);
\r
396 Integer tempMap = (Integer)tempMapping.elementAt(i);
\r
398 for (j = i - 1; j >= start; j--) {
\r
399 if (compareRows((int[])tbl.elementAt(j), row, mapping) > 0) {
\r
400 tbl.setElementAt((int[])tbl.elementAt(j), j + 1);
\r
401 tempMapping.setElementAt((Integer)tempMapping.elementAt(j), j + 1);
\r
404 tbl.setElementAt(row, j + 1);
\r
405 tempMapping.setElementAt(tempMap, j + 1);
\r
410 tbl.setElementAt(row, start);
\r
411 tempMapping.setElementAt(tempMap, start);
\r
416 int boundaryPos = (start + end) / 2;
\r
418 boolean allTheSame = true;
\r
419 int firstDifferent = 0;
\r
422 int[] boundary = (int[])tbl.elementAt(boundaryPos);
\r
428 row = (int[])tbl.elementAt(i);
\r
429 while (i <= j && compareRows(row, boundary, mapping) < 0) {
\r
431 row = (int[])tbl.elementAt(i);
\r
434 row = (int[])tbl.elementAt(j);
\r
435 compResult = compareRows(row, boundary, mapping);
\r
436 while (i <= j && (compResult >= 0)) {
\r
437 if (compResult != 0) {
\r
438 allTheSame = false;
\r
439 firstDifferent = j;
\r
442 row = (int[])tbl.elementAt(j);
\r
443 compResult = compareRows(row, boundary, mapping);
\r
446 row = (int[])tbl.elementAt(j);
\r
447 tbl.setElementAt(tbl.elementAt(i), j);
\r
448 tbl.setElementAt(row, i);
\r
449 Object temp = tempMapping.elementAt(j);
\r
450 tempMapping.setElementAt(tempMapping.elementAt(i), j);
\r
451 tempMapping.setElementAt(temp, i);
\r
458 boundaryPos = firstDifferent;
\r
460 } while (i <= start);
\r
461 sortTable(tbl, tempMapping, mapping, start, i);
\r
462 sortTable(tbl, tempMapping, mapping, i, end);
\r
466 private byte compareRows(int[] row1, int[] row2, int[] mapping) {
\r
467 for (int i = 0; i < numCols; i++) {
\r
468 int c1 = (row1[i] == -1) ? -1 : mapping[row1[i]];
\r
469 int c2 = (row2[i] == -1) ? -1 : mapping[row2[i]];
\r
478 private int[] buildRowIndex(Vector tempTable) {
\r
479 int[] tempRowIndex = new int[tempTable.size()];
\r
480 rowIndexFlagsIndex = new short[tempTable.size()];
\r
481 Vector tempRowIndexFlags = new Vector();
\r
482 rowIndexShifts = new byte[tempTable.size()];
\r
484 // build the row index. Each entry in the row index starts out referring
\r
485 // to the original row (so it doesn't actually do any mapping), and we set
\r
486 // up the index flags to show which cells in the row are populated
\r
487 for (int i = 0; i < tempTable.size(); i++) {
\r
488 tempRowIndex[i] = i;
\r
490 int[] row = (int[])tempTable.elementAt(i);
\r
491 if (row[numCols] == 1 && row[0] == 0) {
\r
493 while (row[j] == 0)
\r
495 rowIndexFlagsIndex[i] = (short)(-j);
\r
498 int[] flags = new int[numColGroups];
\r
501 for (int j = 0; j < numCols; j++) {
\r
503 flags[colGroup] |= nextFlag;
\r
505 if (nextFlag == 0) {
\r
512 while (j < tempRowIndexFlags.size()) {
\r
513 if (((Integer)tempRowIndexFlags.elementAt(j)).intValue() ==
\r
517 if (colGroup >= numColGroups)
\r
520 else if (colGroup != 0)
\r
525 rowIndexFlagsIndex[i] = (short)(j - colGroup);
\r
526 while (colGroup < numColGroups) {
\r
527 tempRowIndexFlags.addElement(new Integer(flags[colGroup]));
\r
532 rowIndexFlags = new int[tempRowIndexFlags.size()];
\r
533 for (int i = 0; i < rowIndexFlags.length; i++)
\r
534 rowIndexFlags[i] = ((Integer)tempRowIndexFlags.elementAt(i)).intValue();
\r
535 System.out.println("Number of column groups = " + numColGroups);
\r
536 System.out.println("Size of rowIndexFlags = " + rowIndexFlags.length);
\r
538 return tempRowIndex;
\r
541 private void stackRows(Vector tempTable) {
\r
543 System.out.print("Row:\t");
\r
544 for (int i = 0; i < numCols; i++)
\r
545 System.out.print(reverseColumnMap[i] + "\t");
\r
546 System.out.println();
\r
547 for (int i = 0; i < tempTable.size(); i++) {
\r
548 System.out.print(Integer.toString(i) + ":\t");
\r
549 int[] row = (int[])tempTable.elementAt(i);
\r
550 for (int j = 0; j < row.length; j++)
\r
551 if (row[j] != 0) System.out.print(Integer.toString(row[j]) + "\t");
\r
552 else System.out.print(".\t");
\r
553 System.out.println();
\r
557 int[] tempRowIndex = buildRowIndex(tempTable);
\r
558 boolean[] tbd = new boolean[tempTable.size()];
\r
560 // now we actually go through and stack rows together
\r
561 for (int i = 0; i < tempTable.size(); i++) {
\r
564 System.out.print("Stacking, inspecting row " + i + "...\r");
\r
565 //System.out.println("Stacking, inspecting row " + i + "...");
\r
567 int[] destRow = (int[])tempTable.elementAt(i);
\r
569 boolean[] tempFlags = new boolean[numCols];
\r
570 boolean[] filledCells = new boolean[numCols];
\r
571 for (int j = 0; j < numCols; j++)
\r
572 filledCells[j] = destRow[j] != 0;
\r
574 for (int j = i + 1; destRow[numCols] < numCols && j < tempTable.size(); j++) {
\r
578 int[] srcRow = (int[])tempTable.elementAt(j);
\r
579 if (srcRow[numCols] + destRow[numCols] > numCols)
\r
582 int maxLeftShift = -999;
\r
583 int maxRightShift = 0;
\r
584 for (int k = 0; k < numCols; k++) {
\r
585 tempFlags[k] = srcRow[k] != 0;
\r
586 if (tempFlags[k]) {
\r
587 if (maxLeftShift == -999)
\r
589 maxRightShift = (numCols - 1) - k;
\r
594 for (shift = maxLeftShift; shift <= maxRightShift; shift++) {
\r
596 for (k = 0; k < numCols; k++) {
\r
597 if (tempFlags[k] && filledCells[k + shift])
\r
603 if (shift <= maxRightShift) {
\r
604 //System.out.println("Packing row " + j + " into row " + i + " with shift = " + shift);
\r
605 for (int k = 0; k < numCols; k++) {
\r
606 if (tempFlags[k]) {
\r
607 filledCells[k + shift] = true;
\r
608 destRow[k + shift] = srcRow[k];
\r
609 ++destRow[numCols];
\r
613 tempRowIndex[j] = i;
\r
614 rowIndexShifts[j] = (byte)shift;
\r
619 // finally, we squeeze out all the deleted rows
\r
620 int decrementBy = 0;
\r
621 for (int i = 0; i < tempRowIndex.length; i++) {
\r
623 tempRowIndex[i] -= decrementBy;
\r
627 rowIndex = new short[tempRowIndex.length];
\r
628 for (int i = tempRowIndex.length - 1; i >= 0; i--) {
\r
630 rowIndex[i] = (short)(tempRowIndex[tempRowIndex[i]]);
\r
631 tempTable.removeElementAt(i);
\r
634 rowIndex[i] = (short)tempRowIndex[i];
\r
638 // private void printTable() {
\r
640 // int populatedCells = 0;
\r
642 // System.out.println("Conceptual table:");
\r
643 // System.out.println(" Row: a b c d e f g h i j k l m n"
\r
644 // + " o p q r s t u v w x y z ' #");
\r
646 // boolean[] rowPrintFlags = new boolean[rowIndex.length];
\r
647 // printConceptualTable("", 0, rowPrintFlags);
\r
650 // System.out.println();
\r
651 // System.out.println("Conceptual table:");
\r
652 // System.out.print(" Row:");
\r
653 // for (int i = 0; i < reverseColumnMap.length; i++) {
\r
654 // System.out.print(" " + reverseColumnMap[i]);
\r
656 // for (int i = 0; i < rowIndex.length; i++) {
\r
657 // System.out.println();
\r
658 // printNumber(i, 4);
\r
659 // System.out.print(":");
\r
660 // for (int j = 0; j < numCols; j++)
\r
661 // printNumber(at(i, j), 4);
\r
663 // System.out.println('\n');
\r
665 // System.out.println();
\r
666 // System.out.println("Internally stored table:");
\r
667 // System.out.print(" Row:");
\r
668 // for (int i = 0; i < reverseColumnMap.length; i++) {
\r
669 // System.out.print(" " + reverseColumnMap[i]);
\r
671 // for (int i = 0; i < table.length; i++) {
\r
672 // if (i % numCols == 0) {
\r
673 // System.out.println();
\r
674 // printNumber(i / numCols, 4);
\r
675 // System.out.print(":");
\r
677 // cell = table[i];
\r
679 // populatedCells++;
\r
680 // printNumber(cell, 4);
\r
682 // System.out.println('\n');
\r
684 //System.out.println("Row index:");
\r
685 //for (int i = 0; i < rowIndex.length; i++) {
\r
686 // System.out.print(" " + i + " -> " + rowIndex[i]);
\r
687 // if (rowIndexFlagsIndex[i] < 0)
\r
688 // System.out.print(", flags = " + Integer.toBinaryString((1 << (-rowIndexFlagsIndex[i]))) + " (" + rowIndexFlagsIndex[i]);
\r
690 // System.out.print(", flags = " + Integer.toBinaryString(rowIndexFlags[rowIndexFlagsIndex[i]]) + " (" + rowIndexFlagsIndex[i]);
\r
691 // System.out.println("), shift = " + rowIndexShifts[i]);
\r
694 // int theoreticalMinRows = populatedCells / numCols;
\r
695 // if (populatedCells % numCols != 0)
\r
696 // theoreticalMinRows++;
\r
697 // int oneCellRows = 0;
\r
698 // for (int i = 0; i < rowIndexFlags.length; i++) {
\r
699 // double temp = Math.log(rowIndexFlags[i]) / Math.log(2);
\r
700 // if (temp == (int)temp)
\r
704 // System.out.println('\n');
\r
705 // System.out.println("Total words in input = " + totalWords);
\r
706 // System.out.println("Total unique words = " + uniqueWords + ", comprising " +
\r
707 // totalUniqueWordChars + " characters\n");
\r
708 // System.out.println("Number of populated cells = " + populatedCells);
\r
709 // System.out.println("Total number of cells = " + (table.length));
\r
710 // System.out.println("Residency = " + ((float)populatedCells / table.length * 100) + '%');
\r
711 // System.out.println("Ratio of populated cells to unique-word characters = " +
\r
712 // ((float)populatedCells / totalUniqueWordChars * 100) + '%');
\r
713 // System.out.println("Ratio of total cells to unique-word characters = " +
\r
714 // ((float)table.length / totalUniqueWordChars * 100) + '%');
\r
715 // System.out.println("Number of rows = " + (table.length / numCols));
\r
716 // System.out.println("Theoretical minimum number of rows = " + theoreticalMinRows);
\r
717 // System.out.println("Ratio of number of rows to theoretical minimum = " +
\r
718 // ((float)(table.length / numCols) / theoreticalMinRows * 100) + '%');
\r
719 // System.out.println("Number of conceptual rows = " + rowIndex.length);
\r
720 // System.out.println("Conceptual rows with only one populated cell = " + oneCellRows);
\r
721 // System.out.println("Ratio of one-cell rows to total conceptual rows = " + (((float)oneCellRows)
\r
722 // / rowIndex.length * 100) + '%');
\r
723 // System.out.println("Average number of populated cells in multi-cell rows = " +
\r
724 // ((float)(populatedCells - oneCellRows) / (rowIndex.length - oneCellRows)));
\r
726 // int storageUsed = table.length * 2 + rowIndex.length * 2
\r
727 // + rowIndexFlags.length * 4 + rowIndexShifts.length;
\r
728 // System.out.println("Total number of bytes in table (including indexes) = " +
\r
730 // System.out.println("Bytes of overhead per unique-word character = " + ((double)(storageUsed
\r
731 // - (totalUniqueWordChars * 2)) / totalUniqueWordChars));
\r
735 // private void printConceptualTable(String initialString, int state, boolean[] flags) {
\r
736 // if (initialString.length() == 0)
\r
737 // System.out.println("root:");
\r
739 // System.out.println(initialString + ':');
\r
741 // if (!flags[state]) {
\r
742 // flags[state] = true;
\r
743 // printNumber(state, 4);
\r
744 // System.out.print(":");
\r
745 // for (int i = 0; i < numCols; i++)
\r
746 // printNumber(at(state, i), 4);
\r
747 // System.out.println();
\r
751 // for (int i = 0; i < numCols; i++) {
\r
752 // nextState = at(state, i);
\r
753 // if (nextState > 0 && !flags[nextState]) {
\r
754 // printNumber(nextState, 4);
\r
755 // System.out.print(":");
\r
756 // for (int j = 0; j < numCols; j++)
\r
757 // printNumber(at(nextState, j), 4);
\r
758 // System.out.println();
\r
761 // for (int i = 0; i < numCols; i++) {
\r
762 // nextState = at(state, i);
\r
763 // if (nextState > 0 && !flags[nextState]) {
\r
765 // if (nextState == 27)
\r
767 // else if (nextState == 26)
\r
768 // nextChar = '\'';
\r
770 // nextChar = (char)(i + 'a');
\r
771 // flags[nextState] = true;
\r
772 // printConceptualTable(initialString + nextChar, nextState, flags);
\r
777 private void printWordList(String partialWord, int state, PrintWriter out)
\r
778 throws IOException {
\r
780 System.out.println(partialWord);
\r
782 out.println(partialWord);
\r
785 for (int i = 0; i < numCols; i++) {
\r
786 if (at(state, i) != 0)
\r
787 printWordList(partialWord + reverseColumnMap[i], at(state, i), out);
\r
792 private void writeDictionaryFile(DataOutputStream out) throws IOException {
\r
793 out.writeInt(0); // version number
\r
795 char[] columnMapIndexes = columnMap.getIndexArray();
\r
796 out.writeInt(columnMapIndexes.length);
\r
797 for (int i = 0; i < columnMapIndexes.length; i++)
\r
798 out.writeShort((short)columnMapIndexes[i]);
\r
799 byte[] columnMapValues = columnMap.getValueArray();
\r
800 out.writeInt(columnMapValues.length);
\r
801 for (int i = 0; i < columnMapValues.length; i++)
\r
802 out.writeByte((byte)columnMapValues[i]);
\r
804 out.writeInt(numCols);
\r
805 out.writeInt(numColGroups);
\r
807 out.writeInt(rowIndex.length);
\r
808 for (int i = 0; i < rowIndex.length; i++)
\r
809 out.writeShort(rowIndex[i]);
\r
811 out.writeInt(rowIndexFlagsIndex.length);
\r
812 for (int i = 0; i < rowIndexFlagsIndex.length; i++)
\r
813 out.writeShort(rowIndexFlagsIndex[i]);
\r
814 out.writeInt(rowIndexFlags.length);
\r
815 for (int i = 0; i < rowIndexFlags.length; i++)
\r
816 out.writeInt(rowIndexFlags[i]);
\r
818 out.writeInt(rowIndexShifts.length);
\r
819 for (int i = 0; i < rowIndexShifts.length; i++)
\r
820 out.writeByte(rowIndexShifts[i]);
\r
822 out.writeInt(table.length);
\r
823 for (int i = 0; i < table.length; i++)
\r
824 out.writeShort(table[i]);
\r
829 // private void printNumber(int x, int width) {
\r
830 // String s = String.valueOf(x);
\r
831 // if (width > s.length())
\r
832 // System.out.print(spaces.substring(0, width - s.length()));
\r
834 // System.out.print(s);
\r
836 // System.out.print('.');
\r
839 public final short at(int row, char ch) {
\r
840 int col = columnMap.elementAt(ch);
\r
841 return at(row, col);
\r
844 public final short at(int row, int col) {
\r
845 if (cellIsPopulated(row, col))
\r
846 return internalAt(rowIndex[row], col + rowIndexShifts[row]);
\r
851 private final boolean cellIsPopulated(int row, int col) {
\r
852 if (rowIndexFlagsIndex[row] < 0)
\r
853 return col == -rowIndexFlagsIndex[row];
\r
855 int flags = rowIndexFlags[rowIndexFlagsIndex[row] + (col >> 5)];
\r
856 return (flags & (1 << (col & 0x1f))) != 0;
\r
860 private final short internalAt(int row, int col) {
\r
861 return table[row * numCols + col];
\r
864 private CompactByteArray columnMap = null;
\r
865 private char[] reverseColumnMap = null;
\r
866 private int numCols;
\r
867 private int numColGroups;
\r
868 private short[] table = null;
\r
869 private short[] rowIndex = null;
\r
870 private int[] rowIndexFlags = null;
\r
871 private short[] rowIndexFlagsIndex = null;
\r
872 private byte[] rowIndexShifts = null;
\r
874 private int totalWords = 0;
\r
875 private int uniqueWords = 0;
\r
876 private int totalUniqueWordChars = 0;
\r
878 //private static final String spaces = " ";
\r