Add support for '?' and '*' in search
app/src/main/java/eu/lepiller/nani/dictionary/FileDictionary.java
3 | 3 | import android.util.Log; | |
4 | 4 | import android.util.Pair; | |
5 | 5 | ||
6 | + | import androidx.annotation.NonNull; | |
7 | + | ||
6 | 8 | import java.io.File; | |
7 | 9 | import java.io.FileNotFoundException; | |
8 | 10 | import java.io.IOException; | |
… | |||
37 | 39 | } | |
38 | 40 | ||
39 | 41 | static abstract class TrieParser<T> { | |
40 | - | protected Parser<T> valParser; | |
41 | - | TrieParser(Parser<T> parser) { | |
42 | + | protected Parser<List<T>> valParser; | |
43 | + | TrieParser(Parser<List<T>> parser) { | |
42 | 44 | valParser = parser; | |
43 | 45 | } | |
44 | 46 | ||
45 | - | final T decodeVals(RandomAccessFile file, long pos) throws IOException { | |
46 | - | seek(file, pos); | |
47 | - | Log.d(TAG, "if it's a list, it has " + file.readShort() + " elements."); | |
47 | + | final List<T> decodeVals(RandomAccessFile file, long pos) throws IOException { | |
48 | 48 | seek(file, pos); | |
49 | 49 | return valParser.parse(file); | |
50 | 50 | } | |
… | |||
56 | 56 | abstract void skipVals(RandomAccessFile file, long pos) throws IOException; | |
57 | 57 | } | |
58 | 58 | ||
59 | + | static abstract class SingleTrieParser<T> extends TrieParser<T> { | |
60 | + | SingleTrieParser(Parser<T> parser) { | |
61 | + | super(new Parser<List<T>>() { | |
62 | + | @Override | |
63 | + | List<T> parse(RandomAccessFile file) throws IOException { | |
64 | + | T obj = parser.parse(file); | |
65 | + | ArrayList<T> list = new ArrayList<>(); | |
66 | + | list.add(obj); | |
67 | + | return list; | |
68 | + | } | |
69 | + | }); | |
70 | + | } | |
71 | + | } | |
72 | + | ||
59 | 73 | private final String mUrl; | |
60 | 74 | private final static String TAG = "FileDictionary"; | |
61 | 75 | ||
… | |||
291 | 305 | } | |
292 | 306 | } | |
293 | 307 | ||
294 | - | static<T> T searchTrie(RandomAccessFile file, long triePos, byte[] txt, TrieParser<T> decoder) throws IOException { | |
295 | - | file.seek(triePos); | |
296 | - | if(txt.length == 0) { | |
297 | - | Log.v(TAG, "found trie value, reading values"); | |
298 | - | return decoder.decodeVals(file, triePos); | |
308 | + | static class TrieSearchParam { | |
309 | + | public final byte[] key; | |
310 | + | public final byte[] partialKey; | |
311 | + | public final long pos; | |
312 | + | ||
313 | + | public TrieSearchParam(long pos, byte[] key, byte[] partialKey){ | |
314 | + | this.pos = pos; | |
315 | + | this.key = key; | |
316 | + | this.partialKey = partialKey; | |
299 | 317 | } | |
318 | + | } | |
300 | 319 | ||
301 | - | decoder.skipVals(file, triePos); | |
320 | + | static<T> List<T> searchTrie(RandomAccessFile file, long pos, byte[] key, int limit, TrieParser<T> decoder) throws IOException { | |
321 | + | ArrayList<TrieSearchParam> queue = new ArrayList<>(); | |
322 | + | ArrayList<T> results = new ArrayList<>(); | |
323 | + | queue.add(new TrieSearchParam(pos, key, new byte[0])); | |
302 | 324 | ||
303 | - | int transitionLength = file.readByte(); | |
304 | - | Log.v(TAG, "number of transitions: " + transitionLength); | |
325 | + | byte[] partialKey; | |
305 | 326 | ||
306 | - | for(int i = 0; i < transitionLength; i++) { | |
307 | - | byte letter = file.readByte(); | |
308 | - | Log.v(TAG, "Possible transition " + letter + "; Expected transition: " + txt[0]); | |
309 | - | if(letter == txt[0]) { | |
310 | - | long nextPos = file.readInt(); | |
311 | - | Log.v(TAG, "Taking transition "+letter+" to " + nextPos); | |
312 | - | byte[] ntxt = new byte[txt.length-1]; | |
313 | - | System.arraycopy(txt, 1, ntxt, 0, txt.length-1); | |
314 | - | return searchTrie(file, nextPos, ntxt, decoder); | |
315 | - | } else { | |
316 | - | file.skipBytes(4); | |
327 | + | while(queue.size() > 0) { | |
328 | + | pos = queue.get(0).pos; | |
329 | + | key = queue.get(0).key; | |
330 | + | partialKey = queue.get(0).partialKey; | |
331 | + | queue.remove(0); | |
332 | + | ||
333 | + | if (key.length == 0) { | |
334 | + | results.addAll(decoder.decodeVals(file, pos)); | |
335 | + | limit -= results.size(); | |
336 | + | if (limit <= 0) | |
337 | + | return results; | |
317 | 338 | } | |
318 | - | } | |
319 | 339 | ||
320 | - | return null; | |
340 | + | // if looking for '*', we actually look for '' or '?*' | |
341 | + | if(key.length > 0 && key[0] == '*' && partialKey.length == 0) { | |
342 | + | byte[] nkeyempty = new byte[key.length-1]; | |
343 | + | System.arraycopy(key, 1, nkeyempty, 0, key.length-1); | |
344 | + | byte[] nkeyoption = new byte[key.length+1]; | |
345 | + | nkeyoption[0] = '?'; | |
346 | + | System.arraycopy(key, 0, nkeyoption, 1, key.length); | |
347 | + | ||
348 | + | queue.add(new TrieSearchParam(pos, nkeyempty, partialKey)); | |
349 | + | queue.add(new TrieSearchParam(pos, nkeyoption, partialKey)); | |
350 | + | continue; | |
351 | + | } | |
352 | + | ||
353 | + | file.seek(pos); | |
354 | + | decoder.skipVals(file, pos); | |
355 | + | int transitionCount = file.readByte(); | |
356 | + | for (int i = 0; i < transitionCount; i++) { | |
357 | + | // go through each transitions. If a transition is selected, add it to the back of the queue | |
358 | + | byte letter = file.readByte(); | |
359 | + | if (key.length == 0) { | |
360 | + | long nextPos = file.readInt(); | |
361 | + | // if we already reached the end, | |
362 | + | queue.add(new TrieSearchParam(nextPos, key, partialKey)); | |
363 | + | } else if (key[0] == '?') { | |
364 | + | byte first = partialKey.length > 0? partialKey[0]: letter; | |
365 | + | int byteCount = ((first & 0b10000000) == 0)? 1: ((first & 0b11100000) == 0b11000000)? 2: ((first & 0b11110000) == 0b11100000)? 3: 4; | |
366 | + | StringBuilder partial = new StringBuilder(); | |
367 | + | for (byte b : partialKey) { | |
368 | + | partial.append(String.format("%02x", b)); | |
369 | + | partial.append(" "); | |
370 | + | } | |
371 | + | Log.d(TAG, "?: partialKey is " + partial.toString() + ", taking transition " + String.format("%02x", letter)); | |
372 | + | if(partialKey.length == byteCount-1) { | |
373 | + | long nextPos = file.readInt(); | |
374 | + | byte[] nkey = new byte[key.length-1]; | |
375 | + | System.arraycopy(key, 1, nkey, 0, key.length-1); | |
376 | + | queue.add(new TrieSearchParam(nextPos, nkey, new byte[0])); | |
377 | + | } else { | |
378 | + | long nextPos = file.readInt(); | |
379 | + | byte[] npartialKey = new byte[partialKey.length + 1]; | |
380 | + | System.arraycopy(partialKey, 0, npartialKey, 0, partialKey.length); | |
381 | + | npartialKey[partialKey.length] = letter; | |
382 | + | queue.add(new TrieSearchParam(nextPos, key, npartialKey)); | |
383 | + | } | |
384 | + | } else if (key[0] == letter) { | |
385 | + | long nextPos = file.readInt(); | |
386 | + | Log.v(TAG, "Taking transition "+letter+" to " + nextPos); | |
387 | + | byte[] nkey = new byte[key.length-1]; | |
388 | + | System.arraycopy(key, 1, nkey, 0, key.length-1); | |
389 | + | queue.add(new TrieSearchParam(nextPos, nkey, partialKey)); | |
390 | + | // can only be one transition like this, so we can stop early | |
391 | + | break; | |
392 | + | } else { | |
393 | + | file.skipBytes(4); | |
394 | + | } | |
395 | + | } | |
396 | + | ||
397 | + | if(results.size() >= limit) | |
398 | + | break; | |
399 | + | } | |
400 | + | return results; | |
321 | 401 | } | |
322 | 402 | } |
app/src/main/java/eu/lepiller/nani/dictionary/KanjiDict.java
94 | 94 | ||
95 | 95 | Log.d(TAG, "trie pos: " + kanjiTriePos); | |
96 | 96 | ||
97 | - | return searchTrie(file, kanjiTriePos, search, new TrieParser<KanjiResult>(new ResultParser(kanji)) { | |
97 | + | List<KanjiResult> results = searchTrie(file, kanjiTriePos, search, 1, new SingleTrieParser<KanjiResult>(new ResultParser(kanji)) { | |
98 | 98 | @Override | |
99 | 99 | public void skipVals(RandomAccessFile file1, long pos) throws IOException { | |
100 | 100 | file1.seek(pos); | |
… | |||
107 | 107 | file.seek(file.readInt()); | |
108 | 108 | } | |
109 | 109 | }); | |
110 | + | if(results.isEmpty()) | |
111 | + | return null; | |
112 | + | return results.get(0); | |
110 | 113 | } catch (FileNotFoundException e) { | |
111 | 114 | e.printStackTrace(); | |
112 | 115 | } catch (IOException e) { |
app/src/main/java/eu/lepiller/nani/dictionary/KanjiVG.java
70 | 70 | ||
71 | 71 | Log.d(TAG, "trie pos: " + kanjiTriePos); | |
72 | 72 | ||
73 | - | return searchTrie(file, kanjiTriePos, search, new TrieParser<KanjiResult>(new ResultParser(kanji)) { | |
73 | + | List<KanjiResult> results = searchTrie(file, kanjiTriePos, search, 1, new SingleTrieParser<KanjiResult>(new ResultParser(kanji)) { | |
74 | 74 | @Override | |
75 | 75 | public void skipVals(RandomAccessFile file1, long pos) throws IOException { | |
76 | 76 | file1.seek(pos); | |
… | |||
83 | 83 | file.seek(file.readInt()); | |
84 | 84 | } | |
85 | 85 | }); | |
86 | + | if(results.isEmpty()) | |
87 | + | return null; | |
88 | + | return results.get(0); | |
86 | 89 | } catch (FileNotFoundException e) { | |
87 | 90 | e.printStackTrace(); | |
88 | 91 | } catch (IOException e) { |
app/src/main/java/eu/lepiller/nani/dictionary/ResultDictionary.java
118 | 118 | } | |
119 | 119 | ||
120 | 120 | private List<Integer> searchTrie(RandomAccessFile file, long triePos, byte[] txt) throws IOException { | |
121 | - | return searchTrie(file, triePos, txt, new TrieParser<List<Integer>>(new ValuesParser()) { | |
121 | + | return searchTrie(file, triePos, txt, 50, new TrieParser<Integer>(new ValuesParser()) { | |
122 | 122 | @Override | |
123 | 123 | public void skipVals(RandomAccessFile file, long pos) throws IOException { | |
124 | 124 | file.seek(pos); |
app/src/main/java/eu/lepiller/nani/dictionary/WadokuPitchDictionary.java
29 | 29 | ||
30 | 30 | private String findPitch(String kanji, String reading, RandomAccessFile file) throws IOException { | |
31 | 31 | String concat = kanji + reading; | |
32 | - | return searchTrie(file, triePos, concat.getBytes(), new TrieParser<String>(new HuffmanStringParser(huffman)) { | |
32 | + | List<String> results = searchTrie(file, triePos, concat.getBytes(), 1, new SingleTrieParser<String>(new HuffmanStringParser(huffman)) { | |
33 | 33 | @Override | |
34 | 34 | public void skipVals(RandomAccessFile file, long pos) throws IOException { | |
35 | 35 | file.seek(pos); | |
36 | 36 | new HuffmanStringParser(huffman).parse(file); | |
37 | 37 | } | |
38 | 38 | }); | |
39 | + | if(results.isEmpty()) | |
40 | + | return null; | |
41 | + | return results.get(0); | |
39 | 42 | } | |
40 | 43 | ||
41 | 44 | @Override |