Fix furigana with katakana in headword

Julien LepillerSat Jul 31 02:58:54+0200 2021

ad7f304

Fix furigana with katakana in headword

CHANGELOG.md

1414
  This fixes the issues where the furigana was not fully visible if it took
1515
  too much space, as well as the view disappearing completely when the text was
1616
  too large.
17+
* Fixed furigana with katakana in headword. It used to make the furigana matching
18+
  fail completely.
1719
1820
### Features
1921

app/src/main/java/eu/lepiller/nani/ResultPagerAdapter.java

1818
import java.util.List;
1919
import java.util.Map;
2020
21-
import eu.lepiller.views.RubyTextView;
21+
import me.weilunli.views.RubyTextView;
2222
import eu.lepiller.nani.result.KanjiResult;
2323
import eu.lepiller.nani.result.Result;
2424

app/src/main/java/eu/lepiller/nani/result/Result.java

1010
import java.util.regex.Pattern;
1111
1212
import static java.lang.Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS;
13+
import static java.lang.Character.UnicodeBlock.KATAKANA;
1314
1415
public class Result {
1516
    public static class Source {

139140
140141
        // split the text into kanji / not kanji portions
141142
        ArrayList<String> portions = new ArrayList<>();
143+
        ArrayList<String> portionsMatcher = new ArrayList<>();
142144
143145
        StringBuilder current = new StringBuilder();
146+
        StringBuilder currentMatcher = new StringBuilder();
144147
        Character.UnicodeBlock b = CJK_UNIFIED_IDEOGRAPHS;
145148
149+
        MojiConverter converter = new MojiConverter();
146150
        for(int i=0; i<txt.length(); i++) {
147151
            Character.UnicodeBlock b2 = Character.UnicodeBlock.of(txt.charAt(i));
148152
            if(b == b2) {
153+
                // if the headwork contains katakana, convert it to hiragana to match pronunciation
154+
                // better.
149155
                current.append(txt.charAt(i));
156+
                if(b2 == KATAKANA) {
157+
                    String s = new String(new char[]{txt.charAt(i)});
158+
                    String hiragana = converter.convertRomajiToHiragana(converter.convertKanaToRomaji(s));
159+
                    currentMatcher.append(hiragana.charAt(0));
160+
                } else {
161+
                    currentMatcher.append(txt.charAt(i));
162+
                }
150163
            } else {
151164
                String s = current.toString();
152165
                if(!s.isEmpty())
153166
                    portions.add(s);
167+
                s = currentMatcher.toString();
168+
                if(!s.isEmpty())
169+
                    portionsMatcher.add(s);
154170
                current = new StringBuilder();
171+
                currentMatcher = new StringBuilder();
155172
                current.append(txt.charAt(i));
173+
                if(b2 == KATAKANA) {
174+
                    String katakana = new String(new char[]{txt.charAt(i)});
175+
                    String hiragana = converter.convertRomajiToHiragana(converter.convertKanaToRomaji(katakana));
176+
                    currentMatcher.append(hiragana.charAt(0));
177+
                } else {
178+
                    currentMatcher.append(txt.charAt(i));
179+
                }
156180
            }
157181
158182
            b = b2;

160184
        String str = current.toString();
161185
        if(!str.isEmpty())
162186
            portions.add(str);
187+
        str = currentMatcher.toString();
188+
        if(!str.isEmpty()) {
189+
            portionsMatcher.add(str);
190+
        }
163191
164192
        // Create a regexp to match kanji places
165193
        current = new StringBuilder();
166194
        current.append("^");
167-
        for(String s: portions) {
195+
        for(String s: portionsMatcher) {
168196
            if(Character.UnicodeBlock.of(s.charAt(0)) == CJK_UNIFIED_IDEOGRAPHS) {
169197
                current.append("(.*)");
170198
            } else {

179207
        Matcher m = p.matcher(reading);
180208
181209
        if(!m.matches()) {
182-
            Log.v("RESULT", "Finaly: " + txt);
210+
            Log.v("RESULT", "Finally: " + txt);
183211
            return txt;
184212
        }
185213