Add support for '?' and '*' in search

Julien LepillerSat Jul 02 16:36:40+0200 2022

1944065

Add support for '?' and '*' in search

app/src/main/java/eu/lepiller/nani/dictionary/FileDictionary.java

33
import android.util.Log;
44
import android.util.Pair;
55
6+
import androidx.annotation.NonNull;
7+
68
import java.io.File;
79
import java.io.FileNotFoundException;
810
import java.io.IOException;

3739
    }
3840
3941
    static abstract class TrieParser<T> {
40-
        protected Parser<T> valParser;
41-
        TrieParser(Parser<T> parser) {
42+
        protected Parser<List<T>> valParser;
43+
        TrieParser(Parser<List<T>> parser) {
4244
            valParser = parser;
4345
        }
4446
45-
        final T decodeVals(RandomAccessFile file, long pos) throws IOException {
46-
            seek(file, pos);
47-
            Log.d(TAG, "if it's a list, it has " + file.readShort() + " elements.");
47+
        final List<T> decodeVals(RandomAccessFile file, long pos) throws IOException {
4848
            seek(file, pos);
4949
            return valParser.parse(file);
5050
        }

5656
        abstract void skipVals(RandomAccessFile file, long pos) throws IOException;
5757
    }
5858
59+
    static abstract class SingleTrieParser<T> extends TrieParser<T> {
60+
        SingleTrieParser(Parser<T> parser) {
61+
            super(new Parser<List<T>>() {
62+
                @Override
63+
                List<T> parse(RandomAccessFile file) throws IOException {
64+
                    T obj = parser.parse(file);
65+
                    ArrayList<T> list = new ArrayList<>();
66+
                    list.add(obj);
67+
                    return list;
68+
                }
69+
            });
70+
        }
71+
    }
72+
5973
    private final String mUrl;
6074
    private final static String TAG = "FileDictionary";
6175

291305
        }
292306
    }
293307
294-
    static<T> T searchTrie(RandomAccessFile file, long triePos, byte[] txt, TrieParser<T> decoder) throws IOException {
295-
        file.seek(triePos);
296-
        if(txt.length == 0) {
297-
            Log.v(TAG, "found trie value, reading values");
298-
            return decoder.decodeVals(file, triePos);
308+
    static class TrieSearchParam {
309+
        public final byte[] key;
310+
        public final byte[] partialKey;
311+
        public final long pos;
312+
313+
        public TrieSearchParam(long pos, byte[] key, byte[] partialKey){
314+
            this.pos = pos;
315+
            this.key = key;
316+
            this.partialKey = partialKey;
299317
        }
318+
    }
300319
301-
        decoder.skipVals(file, triePos);
320+
    static<T> List<T> searchTrie(RandomAccessFile file, long pos, byte[] key, int limit, TrieParser<T> decoder) throws IOException {
321+
        ArrayList<TrieSearchParam> queue = new ArrayList<>();
322+
        ArrayList<T> results = new ArrayList<>();
323+
        queue.add(new TrieSearchParam(pos, key, new byte[0]));
302324
303-
        int transitionLength = file.readByte();
304-
        Log.v(TAG, "number of transitions: " + transitionLength);
325+
        byte[] partialKey;
305326
306-
        for(int i = 0; i < transitionLength; i++) {
307-
            byte letter = file.readByte();
308-
            Log.v(TAG, "Possible transition " + letter + "; Expected transition: " + txt[0]);
309-
            if(letter == txt[0]) {
310-
                long nextPos = file.readInt();
311-
                Log.v(TAG, "Taking transition "+letter+" to " + nextPos);
312-
                byte[] ntxt = new byte[txt.length-1];
313-
                System.arraycopy(txt, 1, ntxt, 0, txt.length-1);
314-
                return searchTrie(file, nextPos, ntxt, decoder);
315-
            } else {
316-
                file.skipBytes(4);
327+
        while(queue.size() > 0) {
328+
            pos = queue.get(0).pos;
329+
            key = queue.get(0).key;
330+
            partialKey = queue.get(0).partialKey;
331+
            queue.remove(0);
332+
333+
            if (key.length == 0) {
334+
                results.addAll(decoder.decodeVals(file, pos));
335+
                limit -= results.size();
336+
                if (limit <= 0)
337+
                    return results;
317338
            }
318-
        }
319339
320-
        return null;
340+
            // if looking for '*', we actually look for '' or '?*'
341+
            if(key.length > 0 && key[0] == '*' && partialKey.length == 0) {
342+
                byte[] nkeyempty = new byte[key.length-1];
343+
                System.arraycopy(key, 1, nkeyempty, 0, key.length-1);
344+
                byte[] nkeyoption = new byte[key.length+1];
345+
                nkeyoption[0] = '?';
346+
                System.arraycopy(key, 0, nkeyoption, 1, key.length);
347+
348+
                queue.add(new TrieSearchParam(pos, nkeyempty, partialKey));
349+
                queue.add(new TrieSearchParam(pos, nkeyoption, partialKey));
350+
                continue;
351+
            }
352+
353+
            file.seek(pos);
354+
            decoder.skipVals(file, pos);
355+
            int transitionCount = file.readByte();
356+
            for (int i = 0; i < transitionCount; i++) {
357+
                // go through each transitions. If a transition is selected, add it to the back of the queue
358+
                byte letter = file.readByte();
359+
                if (key.length == 0) {
360+
                    long nextPos = file.readInt();
361+
                    // if we already reached the end,
362+
                    queue.add(new TrieSearchParam(nextPos, key, partialKey));
363+
                } else if (key[0] == '?') {
364+
                    byte first = partialKey.length > 0? partialKey[0]: letter;
365+
                    int byteCount = ((first & 0b10000000) == 0)? 1: ((first & 0b11100000) == 0b11000000)? 2: ((first & 0b11110000) == 0b11100000)? 3: 4;
366+
                    StringBuilder partial = new StringBuilder();
367+
                    for (byte b : partialKey) {
368+
                        partial.append(String.format("%02x", b));
369+
                        partial.append(" ");
370+
                    }
371+
                    Log.d(TAG, "?: partialKey is " + partial.toString() + ", taking transition " + String.format("%02x", letter));
372+
                    if(partialKey.length == byteCount-1) {
373+
                        long nextPos = file.readInt();
374+
                        byte[] nkey = new byte[key.length-1];
375+
                        System.arraycopy(key, 1, nkey, 0, key.length-1);
376+
                        queue.add(new TrieSearchParam(nextPos, nkey, new byte[0]));
377+
                    } else {
378+
                        long nextPos = file.readInt();
379+
                        byte[] npartialKey = new byte[partialKey.length + 1];
380+
                        System.arraycopy(partialKey, 0, npartialKey, 0, partialKey.length);
381+
                        npartialKey[partialKey.length] = letter;
382+
                        queue.add(new TrieSearchParam(nextPos, key, npartialKey));
383+
                    }
384+
                } else if (key[0] == letter) {
385+
                    long nextPos = file.readInt();
386+
                    Log.v(TAG, "Taking transition "+letter+" to " + nextPos);
387+
                    byte[] nkey = new byte[key.length-1];
388+
                    System.arraycopy(key, 1, nkey, 0, key.length-1);
389+
                    queue.add(new TrieSearchParam(nextPos, nkey, partialKey));
390+
                    // can only be one transition like this, so we can stop early
391+
                    break;
392+
                } else {
393+
                    file.skipBytes(4);
394+
                }
395+
            }
396+
397+
            if(results.size() >= limit)
398+
                break;
399+
        }
400+
        return results;
321401
    }
322402
}

app/src/main/java/eu/lepiller/nani/dictionary/KanjiDict.java

9494
9595
                Log.d(TAG, "trie pos: " + kanjiTriePos);
9696
97-
                return searchTrie(file, kanjiTriePos, search, new TrieParser<KanjiResult>(new ResultParser(kanji)) {
97+
                List<KanjiResult> results = searchTrie(file, kanjiTriePos, search, 1, new SingleTrieParser<KanjiResult>(new ResultParser(kanji)) {
9898
                    @Override
9999
                    public void skipVals(RandomAccessFile file1, long pos) throws IOException {
100100
                        file1.seek(pos);

107107
                        file.seek(file.readInt());
108108
                    }
109109
                });
110+
                if(results.isEmpty())
111+
                    return null;
112+
                return results.get(0);
110113
            } catch (FileNotFoundException e) {
111114
                e.printStackTrace();
112115
            } catch (IOException e) {

app/src/main/java/eu/lepiller/nani/dictionary/KanjiVG.java

7070
7171
                Log.d(TAG, "trie pos: " + kanjiTriePos);
7272
73-
                return searchTrie(file, kanjiTriePos, search, new TrieParser<KanjiResult>(new ResultParser(kanji)) {
73+
                List<KanjiResult> results = searchTrie(file, kanjiTriePos, search, 1, new SingleTrieParser<KanjiResult>(new ResultParser(kanji)) {
7474
                    @Override
7575
                    public void skipVals(RandomAccessFile file1, long pos) throws IOException {
7676
                        file1.seek(pos);

8383
                        file.seek(file.readInt());
8484
                    }
8585
                });
86+
                if(results.isEmpty())
87+
                    return null;
88+
                return results.get(0);
8689
            } catch (FileNotFoundException e) {
8790
                e.printStackTrace();
8891
            } catch (IOException e) {

app/src/main/java/eu/lepiller/nani/dictionary/ResultDictionary.java

118118
    }
119119
120120
    private List<Integer> searchTrie(RandomAccessFile file, long triePos, byte[] txt) throws IOException {
121-
        return searchTrie(file, triePos, txt, new TrieParser<List<Integer>>(new ValuesParser()) {
121+
        return searchTrie(file, triePos, txt, 50, new TrieParser<Integer>(new ValuesParser()) {
122122
            @Override
123123
            public void skipVals(RandomAccessFile file, long pos) throws IOException {
124124
                file.seek(pos);

app/src/main/java/eu/lepiller/nani/dictionary/WadokuPitchDictionary.java

2929
3030
    private String findPitch(String kanji, String reading, RandomAccessFile file) throws IOException {
3131
        String concat = kanji + reading;
32-
        return searchTrie(file, triePos, concat.getBytes(), new TrieParser<String>(new HuffmanStringParser(huffman)) {
32+
        List<String> results = searchTrie(file, triePos, concat.getBytes(), 1, new SingleTrieParser<String>(new HuffmanStringParser(huffman)) {
3333
            @Override
3434
            public void skipVals(RandomAccessFile file, long pos) throws IOException {
3535
                file.seek(pos);
3636
                new HuffmanStringParser(huffman).parse(file);
3737
            }
3838
        });
39+
        if(results.isEmpty())
40+
            return null;
41+
        return results.get(0);
3942
    }
4043
4144
    @Override