+
+ @Override
+ public void write(final DataOutput out) throws IOException {
+ RandomAccessFile raf = (RandomAccessFile)out;
+ raf.writeUTF(shortName);
+ raf.writeUTF(longName);
+ raf.writeUTF(sortLanguage.getIsoCode());
+ raf.writeUTF(normalizerRules);
+ raf.writeBoolean(swapPairEntries);
+ if (dict.dictFileVersion >= 2) {
+ raf.writeInt(mainTokenCount);
+ }
+ RAFList.write(raf, sortedIndexEntries, indexEntrySerializer, 32, true);
+ StringUtil.writeVarInt(raf, stoplist.size());
+ for (String i : stoplist) {
+ raf.writeUTF(i);
+ }
+ UniformRAFList.write(raf, rows, new RowBase.Serializer(this), 3 /* bytes per entry */);
+ }
+
+ public void print(final PrintStream out) {
+ for (final RowBase row : rows) {
+ row.print(out);
+ }
+ }
+
+ private final RAFSerializer<IndexEntry> indexEntrySerializer = new RAFSerializer<IndexEntry>() {
+ @Override
+ public IndexEntry read(DataInput raf) throws IOException {
+ return new IndexEntry(Index.this, raf);
+ }
+
+ @Override
+ public void write(DataOutput raf, IndexEntry t) throws IOException {
+ t.write(raf);
+ }
+ };
+
+ public static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
+ public final String token;
+ private final String normalizedToken;
+ public final int startRow;
+ public final int numRows; // doesn't count the token row!
+ public List<HtmlEntry> htmlEntries;
+
+ public IndexEntry(final Index index, final String token, final String normalizedToken,
+ final int startRow, final int numRows) {
+ assert token.equals(token.trim());
+ assert token.length() > 0;
+ this.token = token;
+ this.normalizedToken = normalizedToken;
+ this.startRow = startRow;
+ this.numRows = numRows;
+ this.htmlEntries = new ArrayList<HtmlEntry>();
+ }
+
+ public IndexEntry(final Index index, final DataInput raf) throws IOException {
+ token = raf.readUTF();
+ if (index.dict.dictFileVersion >= 7) {
+ startRow = StringUtil.readVarInt(raf);
+ numRows = StringUtil.readVarInt(raf);
+ } else {
+ startRow = raf.readInt();
+ numRows = raf.readInt();
+ }
+ final boolean hasNormalizedForm = raf.readBoolean();
+ normalizedToken = hasNormalizedForm ? raf.readUTF() : token;
+ if (index.dict.dictFileVersion >= 7) {
+ int size = StringUtil.readVarInt(raf);
+ if (size == 0) {
+ this.htmlEntries = Collections.emptyList();
+ } else {
+ final int[] htmlEntryIndices = new int[size];
+ for (int i = 0; i < size; ++i) {
+ htmlEntryIndices[i] = StringUtil.readVarInt(raf);
+ }
+ this.htmlEntries = new AbstractList<HtmlEntry>() {
+ @Override
+ public HtmlEntry get(int i) {
+ return index.dict.htmlEntries.get(htmlEntryIndices[i]);
+ }
+ @Override
+ public int size() {
+ return htmlEntryIndices.length;
+ }
+ };
+ }
+ } else if (index.dict.dictFileVersion >= 6) {
+ this.htmlEntries = CachingList.create(
+ RAFList.create((RandomAccessFile)raf, index.dict.htmlEntryIndexSerializer,
+ ((RandomAccessFile)raf).getFilePointer(), index.dict.dictFileVersion,
+ index.dict.dictInfo + " htmlEntries: "), 1);
+ } else {
+ this.htmlEntries = Collections.emptyList();
+ }
+ }
+
+ public void write(DataOutput raf) throws IOException {
+ raf.writeUTF(token);
+ StringUtil.writeVarInt(raf, startRow);
+ StringUtil.writeVarInt(raf, numRows);
+ final boolean hasNormalizedForm = !token.equals(normalizedToken);
+ raf.writeBoolean(hasNormalizedForm);
+ if (hasNormalizedForm) {
+ raf.writeUTF(normalizedToken);
+ }
+ StringUtil.writeVarInt(raf, htmlEntries.size());
+ for (HtmlEntry e : htmlEntries)
+ StringUtil.writeVarInt(raf, e.index());
+ }
+
+ public String toString() {
+ return String.format("%s@%d(%d)", token, startRow, numRows);
+ }
+
+ public String normalizedToken() {
+ return normalizedToken;
+ }