Add support for '?' and '*' in search
app/src/main/java/eu/lepiller/nani/dictionary/FileDictionary.java
| 3 | 3 | import android.util.Log; | |
| 4 | 4 | import android.util.Pair; | |
| 5 | 5 | ||
| 6 | + | import androidx.annotation.NonNull; | |
| 7 | + | ||
| 6 | 8 | import java.io.File; | |
| 7 | 9 | import java.io.FileNotFoundException; | |
| 8 | 10 | import java.io.IOException; | |
… | |||
| 37 | 39 | } | |
| 38 | 40 | ||
| 39 | 41 | static abstract class TrieParser<T> { | |
| 40 | - | protected Parser<T> valParser; | |
| 41 | - | TrieParser(Parser<T> parser) { | |
| 42 | + | protected Parser<List<T>> valParser; | |
| 43 | + | TrieParser(Parser<List<T>> parser) { | |
| 42 | 44 | valParser = parser; | |
| 43 | 45 | } | |
| 44 | 46 | ||
| 45 | - | final T decodeVals(RandomAccessFile file, long pos) throws IOException { | |
| 46 | - | seek(file, pos); | |
| 47 | - | Log.d(TAG, "if it's a list, it has " + file.readShort() + " elements."); | |
| 47 | + | final List<T> decodeVals(RandomAccessFile file, long pos) throws IOException { | |
| 48 | 48 | seek(file, pos); | |
| 49 | 49 | return valParser.parse(file); | |
| 50 | 50 | } | |
… | |||
| 56 | 56 | abstract void skipVals(RandomAccessFile file, long pos) throws IOException; | |
| 57 | 57 | } | |
| 58 | 58 | ||
| 59 | + | static abstract class SingleTrieParser<T> extends TrieParser<T> { | |
| 60 | + | SingleTrieParser(Parser<T> parser) { | |
| 61 | + | super(new Parser<List<T>>() { | |
| 62 | + | @Override | |
| 63 | + | List<T> parse(RandomAccessFile file) throws IOException { | |
| 64 | + | T obj = parser.parse(file); | |
| 65 | + | ArrayList<T> list = new ArrayList<>(); | |
| 66 | + | list.add(obj); | |
| 67 | + | return list; | |
| 68 | + | } | |
| 69 | + | }); | |
| 70 | + | } | |
| 71 | + | } | |
| 72 | + | ||
| 59 | 73 | private final String mUrl; | |
| 60 | 74 | private final static String TAG = "FileDictionary"; | |
| 61 | 75 | ||
… | |||
| 291 | 305 | } | |
| 292 | 306 | } | |
| 293 | 307 | ||
| 294 | - | static<T> T searchTrie(RandomAccessFile file, long triePos, byte[] txt, TrieParser<T> decoder) throws IOException { | |
| 295 | - | file.seek(triePos); | |
| 296 | - | if(txt.length == 0) { | |
| 297 | - | Log.v(TAG, "found trie value, reading values"); | |
| 298 | - | return decoder.decodeVals(file, triePos); | |
| 308 | + | static class TrieSearchParam { | |
| 309 | + | public final byte[] key; | |
| 310 | + | public final byte[] partialKey; | |
| 311 | + | public final long pos; | |
| 312 | + | ||
| 313 | + | public TrieSearchParam(long pos, byte[] key, byte[] partialKey){ | |
| 314 | + | this.pos = pos; | |
| 315 | + | this.key = key; | |
| 316 | + | this.partialKey = partialKey; | |
| 299 | 317 | } | |
| 318 | + | } | |
| 300 | 319 | ||
| 301 | - | decoder.skipVals(file, triePos); | |
| 320 | + | static<T> List<T> searchTrie(RandomAccessFile file, long pos, byte[] key, int limit, TrieParser<T> decoder) throws IOException { | |
| 321 | + | ArrayList<TrieSearchParam> queue = new ArrayList<>(); | |
| 322 | + | ArrayList<T> results = new ArrayList<>(); | |
| 323 | + | queue.add(new TrieSearchParam(pos, key, new byte[0])); | |
| 302 | 324 | ||
| 303 | - | int transitionLength = file.readByte(); | |
| 304 | - | Log.v(TAG, "number of transitions: " + transitionLength); | |
| 325 | + | byte[] partialKey; | |
| 305 | 326 | ||
| 306 | - | for(int i = 0; i < transitionLength; i++) { | |
| 307 | - | byte letter = file.readByte(); | |
| 308 | - | Log.v(TAG, "Possible transition " + letter + "; Expected transition: " + txt[0]); | |
| 309 | - | if(letter == txt[0]) { | |
| 310 | - | long nextPos = file.readInt(); | |
| 311 | - | Log.v(TAG, "Taking transition "+letter+" to " + nextPos); | |
| 312 | - | byte[] ntxt = new byte[txt.length-1]; | |
| 313 | - | System.arraycopy(txt, 1, ntxt, 0, txt.length-1); | |
| 314 | - | return searchTrie(file, nextPos, ntxt, decoder); | |
| 315 | - | } else { | |
| 316 | - | file.skipBytes(4); | |
| 327 | + | while(queue.size() > 0) { | |
| 328 | + | pos = queue.get(0).pos; | |
| 329 | + | key = queue.get(0).key; | |
| 330 | + | partialKey = queue.get(0).partialKey; | |
| 331 | + | queue.remove(0); | |
| 332 | + | ||
| 333 | + | if (key.length == 0) { | |
| 334 | + | results.addAll(decoder.decodeVals(file, pos)); | |
| 335 | + | limit -= results.size(); | |
| 336 | + | if (limit <= 0) | |
| 337 | + | return results; | |
| 317 | 338 | } | |
| 318 | - | } | |
| 319 | 339 | ||
| 320 | - | return null; | |
| 340 | + | // if looking for '*', we actually look for '' or '?*' | |
| 341 | + | if(key.length > 0 && key[0] == '*' && partialKey.length == 0) { | |
| 342 | + | byte[] nkeyempty = new byte[key.length-1]; | |
| 343 | + | System.arraycopy(key, 1, nkeyempty, 0, key.length-1); | |
| 344 | + | byte[] nkeyoption = new byte[key.length+1]; | |
| 345 | + | nkeyoption[0] = '?'; | |
| 346 | + | System.arraycopy(key, 0, nkeyoption, 1, key.length); | |
| 347 | + | ||
| 348 | + | queue.add(new TrieSearchParam(pos, nkeyempty, partialKey)); | |
| 349 | + | queue.add(new TrieSearchParam(pos, nkeyoption, partialKey)); | |
| 350 | + | continue; | |
| 351 | + | } | |
| 352 | + | ||
| 353 | + | file.seek(pos); | |
| 354 | + | decoder.skipVals(file, pos); | |
| 355 | + | int transitionCount = file.readByte(); | |
| 356 | + | for (int i = 0; i < transitionCount; i++) { | |
| 357 | + | // go through each transitions. If a transition is selected, add it to the back of the queue | |
| 358 | + | byte letter = file.readByte(); | |
| 359 | + | if (key.length == 0) { | |
| 360 | + | long nextPos = file.readInt(); | |
| 361 | + | // if we already reached the end, | |
| 362 | + | queue.add(new TrieSearchParam(nextPos, key, partialKey)); | |
| 363 | + | } else if (key[0] == '?') { | |
| 364 | + | byte first = partialKey.length > 0? partialKey[0]: letter; | |
| 365 | + | int byteCount = ((first & 0b10000000) == 0)? 1: ((first & 0b11100000) == 0b11000000)? 2: ((first & 0b11110000) == 0b11100000)? 3: 4; | |
| 366 | + | StringBuilder partial = new StringBuilder(); | |
| 367 | + | for (byte b : partialKey) { | |
| 368 | + | partial.append(String.format("%02x", b)); | |
| 369 | + | partial.append(" "); | |
| 370 | + | } | |
| 371 | + | Log.d(TAG, "?: partialKey is " + partial.toString() + ", taking transition " + String.format("%02x", letter)); | |
| 372 | + | if(partialKey.length == byteCount-1) { | |
| 373 | + | long nextPos = file.readInt(); | |
| 374 | + | byte[] nkey = new byte[key.length-1]; | |
| 375 | + | System.arraycopy(key, 1, nkey, 0, key.length-1); | |
| 376 | + | queue.add(new TrieSearchParam(nextPos, nkey, new byte[0])); | |
| 377 | + | } else { | |
| 378 | + | long nextPos = file.readInt(); | |
| 379 | + | byte[] npartialKey = new byte[partialKey.length + 1]; | |
| 380 | + | System.arraycopy(partialKey, 0, npartialKey, 0, partialKey.length); | |
| 381 | + | npartialKey[partialKey.length] = letter; | |
| 382 | + | queue.add(new TrieSearchParam(nextPos, key, npartialKey)); | |
| 383 | + | } | |
| 384 | + | } else if (key[0] == letter) { | |
| 385 | + | long nextPos = file.readInt(); | |
| 386 | + | Log.v(TAG, "Taking transition "+letter+" to " + nextPos); | |
| 387 | + | byte[] nkey = new byte[key.length-1]; | |
| 388 | + | System.arraycopy(key, 1, nkey, 0, key.length-1); | |
| 389 | + | queue.add(new TrieSearchParam(nextPos, nkey, partialKey)); | |
| 390 | + | // can only be one transition like this, so we can stop early | |
| 391 | + | break; | |
| 392 | + | } else { | |
| 393 | + | file.skipBytes(4); | |
| 394 | + | } | |
| 395 | + | } | |
| 396 | + | ||
| 397 | + | if(results.size() >= limit) | |
| 398 | + | break; | |
| 399 | + | } | |
| 400 | + | return results; | |
| 321 | 401 | } | |
| 322 | 402 | } | |
app/src/main/java/eu/lepiller/nani/dictionary/KanjiDict.java
| 94 | 94 | ||
| 95 | 95 | Log.d(TAG, "trie pos: " + kanjiTriePos); | |
| 96 | 96 | ||
| 97 | - | return searchTrie(file, kanjiTriePos, search, new TrieParser<KanjiResult>(new ResultParser(kanji)) { | |
| 97 | + | List<KanjiResult> results = searchTrie(file, kanjiTriePos, search, 1, new SingleTrieParser<KanjiResult>(new ResultParser(kanji)) { | |
| 98 | 98 | @Override | |
| 99 | 99 | public void skipVals(RandomAccessFile file1, long pos) throws IOException { | |
| 100 | 100 | file1.seek(pos); | |
… | |||
| 107 | 107 | file.seek(file.readInt()); | |
| 108 | 108 | } | |
| 109 | 109 | }); | |
| 110 | + | if(results.isEmpty()) | |
| 111 | + | return null; | |
| 112 | + | return results.get(0); | |
| 110 | 113 | } catch (FileNotFoundException e) { | |
| 111 | 114 | e.printStackTrace(); | |
| 112 | 115 | } catch (IOException e) { | |
app/src/main/java/eu/lepiller/nani/dictionary/KanjiVG.java
| 70 | 70 | ||
| 71 | 71 | Log.d(TAG, "trie pos: " + kanjiTriePos); | |
| 72 | 72 | ||
| 73 | - | return searchTrie(file, kanjiTriePos, search, new TrieParser<KanjiResult>(new ResultParser(kanji)) { | |
| 73 | + | List<KanjiResult> results = searchTrie(file, kanjiTriePos, search, 1, new SingleTrieParser<KanjiResult>(new ResultParser(kanji)) { | |
| 74 | 74 | @Override | |
| 75 | 75 | public void skipVals(RandomAccessFile file1, long pos) throws IOException { | |
| 76 | 76 | file1.seek(pos); | |
… | |||
| 83 | 83 | file.seek(file.readInt()); | |
| 84 | 84 | } | |
| 85 | 85 | }); | |
| 86 | + | if(results.isEmpty()) | |
| 87 | + | return null; | |
| 88 | + | return results.get(0); | |
| 86 | 89 | } catch (FileNotFoundException e) { | |
| 87 | 90 | e.printStackTrace(); | |
| 88 | 91 | } catch (IOException e) { | |
app/src/main/java/eu/lepiller/nani/dictionary/ResultDictionary.java
| 118 | 118 | } | |
| 119 | 119 | ||
| 120 | 120 | private List<Integer> searchTrie(RandomAccessFile file, long triePos, byte[] txt) throws IOException { | |
| 121 | - | return searchTrie(file, triePos, txt, new TrieParser<List<Integer>>(new ValuesParser()) { | |
| 121 | + | return searchTrie(file, triePos, txt, 50, new TrieParser<Integer>(new ValuesParser()) { | |
| 122 | 122 | @Override | |
| 123 | 123 | public void skipVals(RandomAccessFile file, long pos) throws IOException { | |
| 124 | 124 | file.seek(pos); |
app/src/main/java/eu/lepiller/nani/dictionary/WadokuPitchDictionary.java
| 29 | 29 | ||
| 30 | 30 | private String findPitch(String kanji, String reading, RandomAccessFile file) throws IOException { | |
| 31 | 31 | String concat = kanji + reading; | |
| 32 | - | return searchTrie(file, triePos, concat.getBytes(), new TrieParser<String>(new HuffmanStringParser(huffman)) { | |
| 32 | + | List<String> results = searchTrie(file, triePos, concat.getBytes(), 1, new SingleTrieParser<String>(new HuffmanStringParser(huffman)) { | |
| 33 | 33 | @Override | |
| 34 | 34 | public void skipVals(RandomAccessFile file, long pos) throws IOException { | |
| 35 | 35 | file.seek(pos); | |
| 36 | 36 | new HuffmanStringParser(huffman).parse(file); | |
| 37 | 37 | } | |
| 38 | 38 | }); | |
| 39 | + | if(results.isEmpty()) | |
| 40 | + | return null; | |
| 41 | + | return results.get(0); | |
| 39 | 42 | } | |
| 40 | 43 | ||
| 41 | 44 | @Override |