Improve furigana matching
app/src/main/java/eu/lepiller/nani/result/Result.java
140 | 140 | ||
141 | 141 | // split the text into kanji / not kanji portions | |
142 | 142 | ArrayList<String> portions = new ArrayList<>(); | |
143 | - | ArrayList<String> portionsMatcher = new ArrayList<>(); | |
144 | 143 | ||
145 | 144 | StringBuilder current = new StringBuilder(); | |
146 | - | StringBuilder currentMatcher = new StringBuilder(); | |
147 | 145 | Character.UnicodeBlock b = CJK_UNIFIED_IDEOGRAPHS; | |
148 | 146 | ||
149 | 147 | MojiConverter converter = new MojiConverter(); | |
… | |||
153 | 151 | // if the headwork contains katakana, convert it to hiragana to match pronunciation | |
154 | 152 | // better. | |
155 | 153 | current.append(txt.charAt(i)); | |
156 | - | if(b2 == KATAKANA) { | |
157 | - | String s = new String(new char[]{txt.charAt(i)}); | |
158 | - | String hiragana = converter.convertRomajiToHiragana(converter.convertKanaToRomaji(s)); | |
159 | - | currentMatcher.append(hiragana.charAt(0)); | |
160 | - | } else { | |
161 | - | currentMatcher.append(txt.charAt(i)); | |
162 | - | } | |
163 | 154 | } else { | |
164 | 155 | String s = current.toString(); | |
165 | 156 | if(!s.isEmpty()) | |
166 | 157 | portions.add(s); | |
167 | - | s = currentMatcher.toString(); | |
168 | - | if(!s.isEmpty()) | |
169 | - | portionsMatcher.add(s); | |
170 | 158 | current = new StringBuilder(); | |
171 | - | currentMatcher = new StringBuilder(); | |
172 | 159 | current.append(txt.charAt(i)); | |
173 | - | if(b2 == KATAKANA) { | |
174 | - | String katakana = new String(new char[]{txt.charAt(i)}); | |
175 | - | String hiragana = converter.convertRomajiToHiragana(converter.convertKanaToRomaji(katakana)); | |
176 | - | currentMatcher.append(hiragana.charAt(0)); | |
177 | - | } else { | |
178 | - | currentMatcher.append(txt.charAt(i)); | |
179 | - | } | |
180 | 160 | } | |
181 | 161 | ||
182 | 162 | b = b2; | |
… | |||
184 | 164 | String str = current.toString(); | |
185 | 165 | if(!str.isEmpty()) | |
186 | 166 | portions.add(str); | |
187 | - | str = currentMatcher.toString(); | |
188 | - | if(!str.isEmpty()) { | |
189 | - | portionsMatcher.add(str); | |
190 | - | } | |
191 | 167 | ||
192 | 168 | // Create a regexp to match kanji places | |
193 | 169 | current = new StringBuilder(); | |
194 | 170 | current.append("^"); | |
195 | - | for(String s: portionsMatcher) { | |
171 | + | for(String s: portions) { | |
196 | 172 | if(Character.UnicodeBlock.of(s.charAt(0)) == CJK_UNIFIED_IDEOGRAPHS) { | |
197 | 173 | current.append("(.*)"); | |
198 | 174 | } else { | |
199 | - | current.append(s); | |
175 | + | for(Character c: s.toCharArray()) { | |
176 | + | if(Character.UnicodeBlock.of(c) == KATAKANA) { | |
177 | + | current.append("["); | |
178 | + | current.append(c); | |
179 | + | current.append(converter.convertRomajiToHiragana(converter.convertKanaToRomaji(new String(new char[]{c})))); | |
180 | + | current.append("]"); | |
181 | + | } else { | |
182 | + | current.append(c); | |
183 | + | } | |
184 | + | } | |
200 | 185 | } | |
201 | 186 | } | |
202 | 187 | current.append("$"); |