]> gitweb.fperrin.net Git - DictionaryPC.git/blob - src/com/hughes/android/dictionary/engine/DictionaryV6Writer.java
Minor automated code simplifications.
[DictionaryPC.git] / src / com / hughes / android / dictionary / engine / DictionaryV6Writer.java
1 // Copyright 2020 Reimar Döffinger. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 package com.hughes.android.dictionary.engine;
16
17 import java.io.BufferedOutputStream;
18 import java.io.ByteArrayOutputStream;
19 import java.io.DataOutputStream;
20 import java.io.FileOutputStream;
21 import java.io.IOException;
22 import java.io.ObjectOutputStream;
23 import java.io.RandomAccessFile;
24 import java.nio.charset.StandardCharsets;
25 import java.util.Collections;
26 import java.util.List;
27 import java.util.zip.GZIPOutputStream;
28
29 public class DictionaryV6Writer {
30     private final Dictionary d;
31
32     public DictionaryV6Writer(Dictionary dictionary) {
33         d = dictionary;
34     }
35
36     private void writev6Sources(RandomAccessFile out) throws IOException {
37         ByteArrayOutputStream toc = new ByteArrayOutputStream();
38         DataOutputStream tocout = new DataOutputStream(toc);
39
40         out.writeInt(d.sources.size());
41         long tocPos = out.getFilePointer();
42         out.seek(tocPos + d.sources.size() * 8 + 8);
43         for (EntrySource s : d.sources) {
44             long dataPos = out.getFilePointer();
45             tocout.writeLong(dataPos);
46
47             out.writeUTF(s.getName());
48             out.writeInt(s.getNumEntries());
49         }
50         long dataPos = out.getFilePointer();
51         tocout.writeLong(dataPos);
52         tocout.close();
53
54         out.seek(tocPos);
55         out.write(toc.toByteArray());
56         out.seek(dataPos);
57     }
58
59     private void writev6PairEntries(RandomAccessFile out) throws IOException {
60         ByteArrayOutputStream toc = new ByteArrayOutputStream();
61         DataOutputStream tocout = new DataOutputStream(toc);
62
63         long tocPos = out.getFilePointer();
64         long dataPos = tocPos + 4 + d.pairEntries.size() * 8 + 8;
65
66         out.seek(dataPos);
67         DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(out.getFD())));
68
69         tocout.writeInt(d.pairEntries.size());
70         for (PairEntry pe : d.pairEntries) {
71             tocout.writeLong(dataPos + outb.size());
72
73             outb.writeShort(pe.entrySource.index());
74             outb.writeInt(pe.pairs.size());
75             for (PairEntry.Pair p : pe.pairs) {
76                 outb.writeUTF(p.lang1);
77                 outb.writeUTF(p.lang2);
78             }
79         }
80         dataPos += outb.size();
81         outb.flush();
82         tocout.writeLong(dataPos);
83         tocout.close();
84
85         out.seek(tocPos);
86         out.write(toc.toByteArray());
87         out.seek(dataPos);
88     }
89
90     private void writev6TextEntries(RandomAccessFile out) throws IOException {
91         ByteArrayOutputStream toc = new ByteArrayOutputStream();
92         DataOutputStream tocout = new DataOutputStream(toc);
93
94         out.writeInt(d.textEntries.size());
95         long tocPos = out.getFilePointer();
96         out.seek(tocPos + d.textEntries.size() * 8 + 8);
97         for (TextEntry t : d.textEntries) {
98             long dataPos = out.getFilePointer();
99             tocout.writeLong(dataPos);
100
101             out.writeShort(t.entrySource.index());
102             out.writeUTF(t.text);
103         }
104         long dataPos = out.getFilePointer();
105         tocout.writeLong(dataPos);
106         tocout.close();
107
108         out.seek(tocPos);
109         out.write(toc.toByteArray());
110         out.seek(dataPos);
111     }
112
113     private void writev6EmptyList(RandomAccessFile out) throws IOException {
114         out.writeInt(0);
115         out.writeLong(out.getFilePointer() + 8);
116     }
117
118     private void writev6HtmlEntries(RandomAccessFile out) throws IOException {
119         ByteArrayOutputStream toc = new ByteArrayOutputStream();
120         DataOutputStream tocout = new DataOutputStream(toc);
121
122         long tocPos = out.getFilePointer();
123         long dataPos = tocPos + 4 + d.htmlEntries.size() * 8 + 8;
124
125         out.seek(dataPos);
126         DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(out.getFD())));
127
128         tocout.writeInt(d.htmlEntries.size());
129         for (HtmlEntry h : d.htmlEntries) {
130             tocout.writeLong(dataPos + outb.size());
131
132             outb.writeShort(h.entrySource.index());
133             outb.writeUTF(h.title);
134             byte[] data = h.getHtml().getBytes(StandardCharsets.UTF_8);
135             outb.writeInt(data.length);
136             ByteArrayOutputStream baos = new ByteArrayOutputStream();
137             GZIPOutputStream gzout = new GZIPOutputStream(baos);
138             gzout.write(data);
139             gzout.close();
140             outb.writeInt(baos.size());
141             outb.write(baos.toByteArray());
142         }
143         dataPos += outb.size();
144         outb.flush();
145         tocout.writeLong(dataPos);
146         tocout.close();
147
148         out.seek(tocPos);
149         out.write(toc.toByteArray());
150         out.seek(dataPos);
151     }
152
153     private void writev6HtmlIndices(DataOutputStream out, long pos, List<HtmlEntry> entries) throws IOException {
154         long dataPos = pos + 4 + entries.size() * 8 + 8;
155
156         out.writeInt(entries.size());
157
158         // TOC is trivial, so optimize writing it
159         for (int i = 0; i < entries.size(); i++) {
160             out.writeLong(dataPos);
161             dataPos += 4;
162         }
163         out.writeLong(dataPos);
164
165         for (HtmlEntry e : entries) {
166             out.writeInt(e.index());
167         }
168     }
169
170     private void writev6IndexEntries(RandomAccessFile out, List<Index.IndexEntry> entries, int[] prunedRowIdx) throws IOException {
171         ByteArrayOutputStream toc = new ByteArrayOutputStream();
172         DataOutputStream tocout = new DataOutputStream(toc);
173
174         long tocPos = out.getFilePointer();
175         long dataPos = tocPos + 4 + entries.size() * 8 + 8;
176
177         out.seek(dataPos);
178         DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(out.getFD())));
179
180         tocout.writeInt(entries.size());
181         for (Index.IndexEntry e : entries) {
182             tocout.writeLong(dataPos + outb.size());
183
184             outb.writeUTF(e.token);
185
186             int startRow = e.startRow;
187             int numRows = e.numRows;
188             if (prunedRowIdx != null) {
189                 // note: the start row will always be a TokenRow
190                 // and thus never be pruned
191                 int newNumRows = 1;
192                 for (int i = 1; i < numRows; i++) {
193                     if (prunedRowIdx[startRow + i] >= 0) newNumRows++;
194                 }
195                 startRow = prunedRowIdx[startRow];
196                 numRows = newNumRows;
197             }
198
199             outb.writeInt(startRow);
200             outb.writeInt(numRows);
201             final boolean hasNormalizedForm = !e.token.equals(e.normalizedToken());
202             outb.writeBoolean(hasNormalizedForm);
203             if (hasNormalizedForm) outb.writeUTF(e.normalizedToken());
204             writev6HtmlIndices(outb, dataPos + outb.size(),
205                                prunedRowIdx == null ? e.htmlEntries : Collections.emptyList());
206         }
207         dataPos += outb.size();
208         outb.flush();
209         tocout.writeLong(dataPos);
210         tocout.close();
211
212         out.seek(tocPos);
213         out.write(toc.toByteArray());
214         out.seek(dataPos);
215     }
216
217     private void writev6Index(RandomAccessFile out, boolean skipHtml) throws IOException {
218         ByteArrayOutputStream toc = new ByteArrayOutputStream();
219         DataOutputStream tocout = new DataOutputStream(toc);
220
221         out.writeInt(d.indices.size());
222         long tocPos = out.getFilePointer();
223         out.seek(tocPos + d.indices.size() * 8 + 8);
224         for (Index idx : d.indices) {
225             // create pruned index for skipHtml feature
226             int[] prunedRowIdx = null;
227             int prunedSize = 0;
228             if (skipHtml) {
229                 prunedRowIdx = new int[idx.rows.size()];
230                 for (int i = 0; i < idx.rows.size(); i++) {
231                     final RowBase r = idx.rows.get(i);
232                     // prune Html entries
233                     boolean pruned = r instanceof HtmlEntry.Row;
234                     prunedRowIdx[i] = pruned ? -1 : prunedSize;
235                     if (!pruned) prunedSize++;
236                 }
237             }
238
239             long dataPos = out.getFilePointer();
240             tocout.writeLong(dataPos);
241
242             out.writeUTF(idx.shortName);
243             out.writeUTF(idx.longName);
244             out.writeUTF(idx.sortLanguage.getIsoCode());
245             out.writeUTF(idx.normalizerRules);
246             out.writeBoolean(idx.swapPairEntries);
247             out.writeInt(idx.mainTokenCount);
248             writev6IndexEntries(out, idx.sortedIndexEntries, prunedRowIdx);
249
250             // write stoplist, serializing the whole Set *shudder*
251             final ByteArrayOutputStream baos = new ByteArrayOutputStream();
252             final ObjectOutputStream oos = new ObjectOutputStream(baos);
253             oos.writeObject(idx.stoplist);
254             oos.close();
255             final byte[] bytes = baos.toByteArray();
256
257
258             DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(out.getFD())));
259             outb.writeInt(bytes.length);
260             outb.write(bytes);
261
262             outb.writeInt(skipHtml ? prunedSize : idx.rows.size());
263             outb.writeInt(5);
264             for (RowBase r : idx.rows) {
265                 int type = 0;
266                 if (r instanceof PairEntry.Row) {
267                     type = 0;
268                 } else if (r instanceof TokenRow) {
269                     final TokenRow tokenRow = (TokenRow)r;
270                     type = tokenRow.hasMainEntry ? 1 : 3;
271                 } else if (r instanceof TextEntry.Row) {
272                     type = 2;
273                 } else if (r instanceof HtmlEntry.Row) {
274                     type = 4;
275                     if (skipHtml) continue;
276                 } else {
277                     throw new RuntimeException("Row type not supported for v6");
278                 }
279                 outb.writeByte(type);
280                 outb.writeInt(r.referenceIndex);
281             }
282             outb.flush();
283         }
284         long dataPos = out.getFilePointer();
285         tocout.writeLong(dataPos);
286         tocout.close();
287
288         out.seek(tocPos);
289         out.write(toc.toByteArray());
290         out.seek(dataPos);
291     }
292
293     public void writev6(RandomAccessFile raf, boolean skipHtml) throws IOException {
294         raf.writeInt(6);
295         raf.writeLong(d.creationMillis);
296         raf.writeUTF(d.dictInfo);
297         System.out.println("sources start: " + raf.getFilePointer());
298         writev6Sources(raf);
299         System.out.println("pair start: " + raf.getFilePointer());
300         writev6PairEntries(raf);
301         System.out.println("text start: " + raf.getFilePointer());
302         writev6TextEntries(raf);
303         System.out.println("html index start: " + raf.getFilePointer());
304         if (skipHtml) writev6EmptyList(raf);
305         else writev6HtmlEntries(raf);
306         System.out.println("indices start: " + raf.getFilePointer());
307         writev6Index(raf, skipHtml);
308         System.out.println("end: " + raf.getFilePointer());
309         raf.writeUTF("END OF DICTIONARY");
310     }
311 }