Improve furigana matching
app/src/main/java/eu/lepiller/nani/result/Result.java
| 140 | 140 | ||
| 141 | 141 | // split the text into kanji / not kanji portions | |
| 142 | 142 | ArrayList<String> portions = new ArrayList<>(); | |
| 143 | - | ArrayList<String> portionsMatcher = new ArrayList<>(); | |
| 144 | 143 | ||
| 145 | 144 | StringBuilder current = new StringBuilder(); | |
| 146 | - | StringBuilder currentMatcher = new StringBuilder(); | |
| 147 | 145 | Character.UnicodeBlock b = CJK_UNIFIED_IDEOGRAPHS; | |
| 148 | 146 | ||
| 149 | 147 | MojiConverter converter = new MojiConverter(); | |
… | |||
| 153 | 151 | // if the headwork contains katakana, convert it to hiragana to match pronunciation | |
| 154 | 152 | // better. | |
| 155 | 153 | current.append(txt.charAt(i)); | |
| 156 | - | if(b2 == KATAKANA) { | |
| 157 | - | String s = new String(new char[]{txt.charAt(i)}); | |
| 158 | - | String hiragana = converter.convertRomajiToHiragana(converter.convertKanaToRomaji(s)); | |
| 159 | - | currentMatcher.append(hiragana.charAt(0)); | |
| 160 | - | } else { | |
| 161 | - | currentMatcher.append(txt.charAt(i)); | |
| 162 | - | } | |
| 163 | 154 | } else { | |
| 164 | 155 | String s = current.toString(); | |
| 165 | 156 | if(!s.isEmpty()) | |
| 166 | 157 | portions.add(s); | |
| 167 | - | s = currentMatcher.toString(); | |
| 168 | - | if(!s.isEmpty()) | |
| 169 | - | portionsMatcher.add(s); | |
| 170 | 158 | current = new StringBuilder(); | |
| 171 | - | currentMatcher = new StringBuilder(); | |
| 172 | 159 | current.append(txt.charAt(i)); | |
| 173 | - | if(b2 == KATAKANA) { | |
| 174 | - | String katakana = new String(new char[]{txt.charAt(i)}); | |
| 175 | - | String hiragana = converter.convertRomajiToHiragana(converter.convertKanaToRomaji(katakana)); | |
| 176 | - | currentMatcher.append(hiragana.charAt(0)); | |
| 177 | - | } else { | |
| 178 | - | currentMatcher.append(txt.charAt(i)); | |
| 179 | - | } | |
| 180 | 160 | } | |
| 181 | 161 | ||
| 182 | 162 | b = b2; | |
… | |||
| 184 | 164 | String str = current.toString(); | |
| 185 | 165 | if(!str.isEmpty()) | |
| 186 | 166 | portions.add(str); | |
| 187 | - | str = currentMatcher.toString(); | |
| 188 | - | if(!str.isEmpty()) { | |
| 189 | - | portionsMatcher.add(str); | |
| 190 | - | } | |
| 191 | 167 | ||
| 192 | 168 | // Create a regexp to match kanji places | |
| 193 | 169 | current = new StringBuilder(); | |
| 194 | 170 | current.append("^"); | |
| 195 | - | for(String s: portionsMatcher) { | |
| 171 | + | for(String s: portions) { | |
| 196 | 172 | if(Character.UnicodeBlock.of(s.charAt(0)) == CJK_UNIFIED_IDEOGRAPHS) { | |
| 197 | 173 | current.append("(.*)"); | |
| 198 | 174 | } else { | |
| 199 | - | current.append(s); | |
| 175 | + | for(Character c: s.toCharArray()) { | |
| 176 | + | if(Character.UnicodeBlock.of(c) == KATAKANA) { | |
| 177 | + | current.append("["); | |
| 178 | + | current.append(c); | |
| 179 | + | current.append(converter.convertRomajiToHiragana(converter.convertKanaToRomaji(new String(new char[]{c})))); | |
| 180 | + | current.append("]"); | |
| 181 | + | } else { | |
| 182 | + | current.append(c); | |
| 183 | + | } | |
| 184 | + | } | |
| 200 | 185 | } | |
| 201 | 186 | } | |
| 202 | 187 | current.append("$"); | |