Add wadoku dictionary modules
modules/nani/parse-result.scm unknown status 1
1 | + | ;;; Nani Project website | |
2 | + | ;;; Copyright ?? 2020 Julien Lepiller <julien@lepiller.eu> | |
3 | + | ;;; | |
4 | + | ;;; This file is part of the Nani Project website. | |
5 | + | ;;; | |
6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | + | ;;; your option) any later version. | |
10 | + | ;;; | |
11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + | ;;; GNU Affero General Public License for more details. | |
15 | + | ;;; | |
16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | + | ||
19 | + | (define-module (nani parse-result) | |
20 | + | #:use-module (ice-9 binary-ports) | |
21 | + | #:use-module (rnrs bytevectors) | |
22 | + | #:export (parse-result-list | |
23 | + | parse-result-char | |
24 | + | parse-result-int | |
25 | + | parse-result-boolean | |
26 | + | parse-result-string)) | |
27 | + | ||
28 | + | (define* (parse-result-list port parse-result-element) | |
29 | + | (let ((size (bytevector-u16-ref (get-bytevector-n port 2) 0 (endianness big)))) | |
30 | + | (let loop ((result '()) (remaining size)) | |
31 | + | (if (= remaining 0) | |
32 | + | (reverse result) | |
33 | + | (loop (cons (parse-result-element port) result) (- remaining 1)))))) | |
34 | + | ||
35 | + | (define (parse-result-char port) | |
36 | + | (get-u8 port)) | |
37 | + | ||
38 | + | (define (parse-result-int port) | |
39 | + | (bytevector-u32-ref (get-bytevector-n port 4) 0 (endianness big))) | |
40 | + | ||
41 | + | (define (parse-result-boolean port) | |
42 | + | (= (get-u8 port) 1)) | |
43 | + | ||
44 | + | (define (parse-result-string port) | |
45 | + | (define (get-result-string port) | |
46 | + | (let loop ((lu8 '()) (char (get-u8 port))) | |
47 | + | (if (= char 0) | |
48 | + | lu8 | |
49 | + | (loop (cons char lu8) (get-u8 port))))) | |
50 | + | (utf8->string (u8-list->bytevector (reverse (get-result-string port))))) |
modules/nani/wadoku/entities.scm unknown status 1
1 | + | (define-module (nani wadoku entities) | |
2 | + | #:use-module (nani result) | |
3 | + | #:use-module ((nani tags) #:prefix tag:) | |
4 | + | #:use-module (ice-9 match) | |
5 | + | #:export (get-tag hint->tag) | |
6 | + | #:re-export (tag:get-points)) | |
7 | + | ||
8 | + | (define wadoku-entities | |
9 | + | `(("Aerodyn." . "aero") | |
10 | + | ("Agrochem." . "chem") | |
11 | + | ("??gypt. Gesch." . "hist") | |
12 | + | ("??gypt. Mythol." . "rel") | |
13 | + | ("Aktien" . "econ") | |
14 | + | ("Akupunktur" . "med") | |
15 | + | ("Akustik" . "music") | |
16 | + | ("American Football" . "sports") | |
17 | + | ("amerik. Gesch." . "hist") | |
18 | + | ("amerik. Pol." . "politics") | |
19 | + | ("amerik. Rechtsw." . "law") | |
20 | + | ("Anat." . "anat") | |
21 | + | ("Angeln" . "sports") | |
22 | + | ("Anime" . "work") | |
23 | + | ("Anthropol." . "antho") | |
24 | + | ("arab. Gesch." . "hist") | |
25 | + | ("Arch??ol." . "archeo") | |
26 | + | ("Archit." . "archit") | |
27 | + | ("Astrol." . "astron") | |
28 | + | ("Astron." . "astron") | |
29 | + | ("atmosph??r. Optik" . "physics") | |
30 | + | ("Atomphys." . "physics") | |
31 | + | ("Audio" . "music") | |
32 | + | ("Augenheilk." . "med") | |
33 | + | ("Autorennen" . "sports") | |
34 | + | ("Backw." . "food") | |
35 | + | ("Badminton" . "sports") | |
36 | + | ("Bahnhofsn." . "station") | |
37 | + | ("Bakteriol." . "biol") | |
38 | + | ("Ballet" . "music") | |
39 | + | ("Ballett" . "music") | |
40 | + | ("Ballsport" . "sports") | |
41 | + | ("Bankw." . "finc") | |
42 | + | ("Baseb." . "baseb") | |
43 | + | ("Baseball" . "baseb") | |
44 | + | ("Basketball" . "sports") | |
45 | + | ("Bauingenieurw." . "archit") | |
46 | + | ("Bautechnik" . "archit") | |
47 | + | ("Bauw." . "archit") | |
48 | + | ("Befestigungsw." . "archit") | |
49 | + | ("Beh??rde" . "law") | |
50 | + | ("Bergb." . "archit") | |
51 | + | ("Bergbau" . "archit") | |
52 | + | ("Bergn." . "place") | |
53 | + | ("Bergsteigen" . "sports") | |
54 | + | ("Bibel" . "Christn") | |
55 | + | ("Bibliotheksw." . "biblio") | |
56 | + | ("Bildhauerei" . "work") | |
57 | + | ("bild. Kunst" . "work") | |
58 | + | ("Billard" . "game") | |
59 | + | ("Biochem." . "biol") | |
60 | + | ("Biol." . "biol") | |
61 | + | ("Bodenkunde" . "agri") | |
62 | + | ("Bogenschie??en" . "sports") | |
63 | + | ("B??rse" . "econ") | |
64 | + | ("Bot." . "bot") | |
65 | + | ("Bowling" . "sports") | |
66 | + | ("Boxen" . "sports") | |
67 | + | ("Brauereiw." . "alco") | |
68 | + | ("Brettspiel" . "game") | |
69 | + | ("brit. Gesch." . "hist") | |
70 | + | ("brit. Rechtsw." . "law") | |
71 | + | ("Br??ckenbau" . "engr") | |
72 | + | ("Bsp." . "exp") | |
73 | + | ("Buchbinden" . "print") | |
74 | + | ("B??chereiw." . "biblio") | |
75 | + | ("Buchtitel" . "work") | |
76 | + | ("Buchw." . "biblio") | |
77 | + | ("Buddh." . "Buddh") | |
78 | + | ("Bud??" . "MA") | |
79 | + | ("B??d??" . "MA") | |
80 | + | ("Chem." . "chem") | |
81 | + | ("chin. Arch??ol." . "archeo") | |
82 | + | ("chin. Astron." . "astron") | |
83 | + | ("chin. Gesch." . "hist") | |
84 | + | ("chin. Gramm." . "ling") | |
85 | + | ("chin. Literaturw." . "work") | |
86 | + | ("chin. Mus." . "music") | |
87 | + | ("chin. Musikinstr." . "music") | |
88 | + | ("chin. Mythol." . "rel") | |
89 | + | ("chin. Philos." . "rel") | |
90 | + | ("chin. Phon." . "ling") | |
91 | + | ("chin. Poet." . "poet") | |
92 | + | ("chin. Pol." . "politics") | |
93 | + | ("chin. Theater" . "work") | |
94 | + | ("Chirurgie" . "med") | |
95 | + | ("Christent." . "Christn") | |
96 | + | ("Cocktail" . "alco") | |
97 | + | ("Computerling." . "comp") | |
98 | + | ("Curling" . "sports") | |
99 | + | ("Dammbau" . "engr") | |
100 | + | ("Demogr." . "demo") | |
101 | + | ("Diplomat." . "politics") | |
102 | + | ("Druckw." . "print") | |
103 | + | ("dtsch. Gesch." . "hist") | |
104 | + | ("dtsch. Gramm." . "ling") | |
105 | + | ("EDV" . "comp") | |
106 | + | ("Einzel-Kanji" . "ling") | |
107 | + | ("Streckenkilometertafel" . "train") | |
108 | + | ("Eishockey" . "sports") | |
109 | + | ("Eiskunstl." . "sports") | |
110 | + | ("Eiskunstlauf" . "sports") | |
111 | + | ("Eisschnelllauf" . "sports") | |
112 | + | ("Elektrochem." . "chem") | |
113 | + | ("Elektrot." . "engr") | |
114 | + | ("Embryol." . "biol") | |
115 | + | ("engl. Gesch." . "hist") | |
116 | + | ("Entwicklungsphysiol." . "biol") | |
117 | + | ("Entwicklungspsychol." . "psy") | |
118 | + | ("Ergonomie" . "engr") | |
119 | + | ("Ethik" . "philo") | |
120 | + | ("Ethnol." . "ethno") | |
121 | + | ("europ. Gesch." . "hist") | |
122 | + | ("Fahrrad" . "sports") | |
123 | + | ("Familienn." . "surname") | |
124 | + | ("Familienn.." . "surname") | |
125 | + | ("Farbe" . "color") | |
126 | + | ("F??rberei" . "build") | |
127 | + | ("Fechten" . "build") | |
128 | + | ("Fertigungstechnik" . "engr") | |
129 | + | ("Film" . "work") | |
130 | + | ("Filmtitel" . "work") | |
131 | + | ("Finanzw." . "finc") | |
132 | + | ("Firmenn." . "company") | |
133 | + | ("Fischerei" . "sports") | |
134 | + | ("Fischk." . "food") | |
135 | + | ("Flugw." . "engr") | |
136 | + | ("Flussn." . "place") | |
137 | + | ("Forstw." . "bot") | |
138 | + | ("Fotog." . "photo") | |
139 | + | ("Frachtw." . "trans") | |
140 | + | ("franz. Gesch." . "hist") | |
141 | + | ("Frisur" . "fashion") | |
142 | + | ("Funkt." . "physics") | |
143 | + | ("Fu??ball" . "sports") | |
144 | + | ||
145 | + | ("Gagaku" . "music") | |
146 | + | ("Garten" . "bota") | |
147 | + | ("Gartenk." . "bota") | |
148 | + | ("Gateball" . "sports") | |
149 | + | ("Gebietsn." . "place") | |
150 | + | ("Geldw." . "finc") | |
151 | + | ("Genetik" . "biol") | |
152 | + | ("Geogr." . "place") | |
153 | + | ("Geol." . "geol") | |
154 | + | ("Geom." . "geom") | |
155 | + | ("Geophys." . "physics") | |
156 | + | ("Gerberei" . "build") | |
157 | + | ("Gesch." . "hist") | |
158 | + | ("Gewichtheben" . "sports") | |
159 | + | ("Glasherst." . "build") | |
160 | + | ("Glaziol." . "sci") | |
161 | + | ("Go" . "game") | |
162 | + | ("Golf" . "sports") | |
163 | + | ("Gramm." . "ling") | |
164 | + | ("griech. Christent." . "Christn") | |
165 | + | ("griech. Gesch." . "hist") | |
166 | + | ("griech. Myth." . "rel") | |
167 | + | ("griech. Mythol." . "rel") | |
168 | + | ("griech. Theat." . "work") | |
169 | + | ("Gymnastik" . "sports") | |
170 | + | ("Handball" . "sports") | |
171 | + | ("Handel" . "econ") | |
172 | + | ("Handwerkskunst" . "build") | |
173 | + | ("Heraldik" . "sci") | |
174 | + | ("Hindu." . "rel") | |
175 | + | ("Hochsprung" . "sports") | |
176 | + | ("Hockey" . "sports") | |
177 | + | ("Holzbau" . "engr") | |
178 | + | ("Holzbearb." . "engr") | |
179 | + | ("Holzschnitt" . "engr") | |
180 | + | ("Hunderasse" . "zool") | |
181 | + | ("Hydrodynamik" . "physics") | |
182 | + | ("I Ging" . "rel") | |
183 | + | ("Ikebana" . "build") | |
184 | + | ("Immunol." . "med") | |
185 | + | ("ind. Gesch." . "hist") | |
186 | + | ("indian. Mythol." . "rel") | |
187 | + | ("ind. Mythol." . "rel") | |
188 | + | ("indones. Gesch." . "hist") | |
189 | + | ("ind. Philos." . "philo") | |
190 | + | ("Insektenk." . "zool") | |
191 | + | ("Inseln." . "place") | |
192 | + | ("Internet" . "comp") | |
193 | + | ("Islam" . "rel") | |
194 | + | ("islam. Mythol." . "rel") | |
195 | + | ("islam. Rechtsw." . "law") | |
196 | + | ("islam. Rel." . "rel") | |
197 | + | ("israel. Gesch." . "hist") | |
198 | + | ("ital. Gesch." . "hist") | |
199 | + | ("japan. Archit." . "archit") | |
200 | + | ("japan. Astrologie" . "aston") | |
201 | + | ("japan. bild. Kunst" . "work") | |
202 | + | ("japan. Christent." . "Christn") | |
203 | + | ("japan. Gesch." . "hist") | |
204 | + | ("japan. Gramm." . "ling") | |
205 | + | ("japan. Literaturw." . "work") | |
206 | + | ("japan. Med." . "med") | |
207 | + | ("japan. Mus." . "music") | |
208 | + | ("japan. Mythol." . "rel") | |
209 | + | ("japan. Philos." . "philo") | |
210 | + | ("japan. Pol." . "politics") | |
211 | + | ("japan. Schwimmen" . "sports") | |
212 | + | ("japan. Soziol." . "sci") | |
213 | + | ("japan. Sprachw." . "ling") | |
214 | + | ("J??ruri" . "music") | |
215 | + | ("Judent." . "rel") | |
216 | + | ("J??d??" . "MA") | |
217 | + | ("Kabuki" . "music") | |
218 | + | ("Kagura" . "music") | |
219 | + | ("Kalligraphie" . "build") | |
220 | + | ("Kanbun" . "ling") | |
221 | + | ("Kanji" . "ling") | |
222 | + | ("Kanp??" . "med") | |
223 | + | ("Karten" . "games") | |
224 | + | ("Kartenspiel" . "game") | |
225 | + | ("Kartogr." . "place") | |
226 | + | ("kath. Christent." . "Christn") | |
227 | + | ("Kend??" . "MA") | |
228 | + | ("Keramik" . "build") | |
229 | + | ("Kernphys." . "physics") | |
230 | + | ("Kfz-W." . "engr") | |
231 | + | ("KI" . "comp") | |
232 | + | ("Kleidung" . "fashion") | |
233 | + | ("Kochk." . "food") | |
234 | + | ("Konditoreiw." . "food") | |
235 | + | ("Konfuz." . "rel") | |
236 | + | ("korean. Gesch." . "hist") | |
237 | + | ("korean. Rel." . "rel") | |
238 | + | ("korean. Theater" . "work") | |
239 | + | ("Kosmetik" . "fashion") | |
240 | + | ("Kricket" . "sports") | |
241 | + | ("Kristallogr." . "physics") | |
242 | + | ("Krocket" . "sports") | |
243 | + | ("Kunst" . "work") | |
244 | + | ("Kunstgesch." . "hist") | |
245 | + | ("Kunst??? und Turmsprung" . "build") | |
246 | + | ("Ky??gen" . "music") | |
247 | + | ("Lack" . "work") | |
248 | + | ("L??ndern." . "place") | |
249 | + | ("Landw." . "agri") | |
250 | + | ("Leichtathl." . "sports") | |
251 | + | ("Liedtitel" . "work") | |
252 | + | ("Literaturw." . "work") | |
253 | + | ("Logik" . "math") | |
254 | + | ("Luftf." . "aero") | |
255 | + | ("Machinenb." . "engr") | |
256 | + | ("Mah-Jongg" . "mahj") | |
257 | + | ("Management" . "bus") | |
258 | + | ("Manga" . "work") | |
259 | + | ("m??nnl. Name" . "given") | |
260 | + | ("m??nnl. Vorn." . "given") | |
261 | + | ("m??nnl. Vorname" . "given") | |
262 | + | ("Mantik" . "rel") | |
263 | + | ("Marine" . "sail") | |
264 | + | ("Marketing" . "bus") | |
265 | + | ("Maschinenb." . "engr") | |
266 | + | ("Ma??" . "engr") | |
267 | + | ("Math." . "math") | |
268 | + | ("Mech." . "physics") | |
269 | + | ("Med." . "med") | |
270 | + | ("Meeresk." . "place") | |
271 | + | ("Meeresn." . "place") | |
272 | + | ("Meinungsforschung" . "politics") | |
273 | + | ("Messw." . "hobby") | |
274 | + | ("Metall." . "engr") | |
275 | + | ("Metallbearb." . "engr") | |
276 | + | ("Metallguss" . "engr") | |
277 | + | ("Meteor." . "meteor") | |
278 | + | ("mexikan. Kochk." . "food") | |
279 | + | ("Mikrobiol." . "biol") | |
280 | + | ("Milit." . "mil") | |
281 | + | ("Mineral." . "geol") | |
282 | + | ("M??belbau" . "build") | |
283 | + | ("Mode" . "fashion") | |
284 | + | ("Motorsport" . "sports") | |
285 | + | ("Mus." . "music") | |
286 | + | ("Muschelk." . "zool") | |
287 | + | ("Musikinstr." . "music") | |
288 | + | ("Mykol." . "bota") | |
289 | + | ("Mythol." . "rel") | |
290 | + | ("Nachrichtent." . "politics") | |
291 | + | ("N??hen" . "fashion") | |
292 | + | ("Naniwabushi" . "music") | |
293 | + | ("Naturmed." . "med") | |
294 | + | ("Naturph??n." . "physics") | |
295 | + | ("Naturphilos." . "philo") | |
296 | + | ("Neng??" . "hist") | |
297 | + | ("Netzwerktechnik" . "comp") | |
298 | + | ("Neurol." . "med") | |
299 | + | ("N??" . "music") | |
300 | + | ("nord. Mythol." . "rel") | |
301 | + | ("Numismatik" . "build") | |
302 | + | ("??kol." . "biol") | |
303 | + | ("Oper" . "music") | |
304 | + | ("Optik" . "physics") | |
305 | + | ("Org." . "organization") | |
306 | + | ("Origami" . "build") | |
307 | + | ("Ortsn." . "place") | |
308 | + | ("P??d." . "school") | |
309 | + | ("Pal??ontol." . "hist") | |
310 | + | ("Papierherst." . "build") | |
311 | + | ("Parapsych." . "psy") | |
312 | + | ("Patentw." . "engr") | |
313 | + | ("Pathol." . "med") | |
314 | + | ("pers. Gesch." . "hist") | |
315 | + | ("Pers??nlichk." . "person") | |
316 | + | ("Petrochem." . "chem") | |
317 | + | ("Pferderennen" . "sports") | |
318 | + | ("Pferdesport" . "sports") | |
319 | + | ("Pharm." . "med") | |
320 | + | ("Philos." . "philo") | |
321 | + | ("philos. Anthrop." . "anth") | |
322 | + | ("Phon." . "ling") | |
323 | + | ("Phys." . "physics") | |
324 | + | ("Physiol." . "anat") | |
325 | + | ("Poetik" . "poet") | |
326 | + | ("Pol." . "politics") | |
327 | + | ("Postw." . "telecom") | |
328 | + | ("Psych." . "psy") | |
329 | + | ("Quantenphys." . "physics") | |
330 | + | ("Radio" . "telecom") | |
331 | + | ("Radrennen" . "sports") | |
332 | + | ("Radsport" . "sports") | |
333 | + | ("Rakugo" . "music") | |
334 | + | ("Raumf." . "engr") | |
335 | + | ("Reaktort." . "engr") | |
336 | + | ("Rechnungsw." . "bus") | |
337 | + | ("Rechtsw." . "law") | |
338 | + | ("Redew." . "exp") | |
339 | + | ("Regelungstechnik" . "engr") | |
340 | + | ("Reiten" . "sports") | |
341 | + | ("Rel." . "rel") | |
342 | + | ("Rennsport" . "sports") | |
343 | + | ("Rhetorik" . "philo") | |
344 | + | ("Ringen" . "sports") | |
345 | + | ("r??m. Gesch." . "hist") | |
346 | + | ("r??m. Mythol." . "rel") | |
347 | + | ("Rudern" . "sports") | |
348 | + | ("Rugby" . "sports") | |
349 | + | ("Rundfunk" . "telecom") | |
350 | + | ("russ. Gesch." . "hist") | |
351 | + | ("R??stung" . "mil") | |
352 | + | ("Schach" . "game") | |
353 | + | ("Schie??sport" . "sports") | |
354 | + | ("Schiff" . "sail") | |
355 | + | ("Schiffbau" . "sail") | |
356 | + | ("Schifffahrt" . "sail") | |
357 | + | ("Schmuck" . "build") | |
358 | + | ("Schneckenk." . "zool") | |
359 | + | ("Schneiderei" . "fashion") | |
360 | + | ("Schuhe" . "fashion") | |
361 | + | ("Schule" . "school") | |
362 | + | ("Schwei??en" . "build") | |
363 | + | ("Schwert" . "mil") | |
364 | + | ("Schwimmen" . "sports") | |
365 | + | ("Seef." . "sail") | |
366 | + | ("Seen." . "place") | |
367 | + | ("Seerechtsw." . "law") | |
368 | + | ("Seew." . "sail") | |
369 | + | ("Segeln" . "sail") | |
370 | + | ("Seide" . "build") | |
371 | + | ("Seismol." . "physics") | |
372 | + | ("SF" . "work") | |
373 | + | ("Shingon" . "Buddh") | |
374 | + | ("Shint??" . "Shinto") | |
375 | + | ("Sh??gi" . "shogi") | |
376 | + | ("Ski" . "sports") | |
377 | + | ("Skispringen" . "sports") | |
378 | + | ("Softball" . "sports") | |
379 | + | ("Sozialpsych." . "psy") | |
380 | + | ("Soziol." . "sci") | |
381 | + | ("span. Gesch." . "hist") | |
382 | + | ("Spiel" . "game") | |
383 | + | ("Spiele-Titel" . "game") | |
384 | + | ("Spieltheorie" . "game") | |
385 | + | ("Spinnen" . "zool") | |
386 | + | ("Spinnenk." . "zool") | |
387 | + | ("Sport" . "sports") | |
388 | + | ("Sprache" . "ling") | |
389 | + | ("Sprachw." . "ling") | |
390 | + | ("Sprichw." . "ling") | |
391 | + | ("Stadtn." . "place") | |
392 | + | ("Stadtplanung" . "engr") | |
393 | + | ("Statistik" . "math") | |
394 | + | ("Steuerw." . "econ") | |
395 | + | ("Sticken" . "build") | |
396 | + | ("Stra??enbau" . "engr") | |
397 | + | ("Stricken" . "build") | |
398 | + | ("Strukturalismus" . "philo") | |
399 | + | ("Sum??" . "sumo") | |
400 | + | ("Surfen" . "sports") | |
401 | + | ("Systemanalyse" . "engr") | |
402 | + | ("Systemtheorie" . "engr") | |
403 | + | ("taiwan. Pol." . "politics") | |
404 | + | ("Tanz" . "art") | |
405 | + | ("Tanzen" . "art") | |
406 | + | ("Taoismus" . "rel") | |
407 | + | ("Tauchen" . "sports") | |
408 | + | ("Technik" . "engr") | |
409 | + | ("Tee" . "hobby") | |
410 | + | ("Telegrafie" . "telecom") | |
411 | + | ("Telekom." . "telecom") | |
412 | + | ("Tempeln." . "place") | |
413 | + | ("Tennis" . "sports") | |
414 | + | ("Tenn??" . "surname") | |
415 | + | ("Textilt." . "engr") | |
416 | + | ("Theat." . "work") | |
417 | + | ("Theol." . "rel") | |
418 | + | ("tibet. Buddh." . "Buddh") | |
419 | + | ("Tiefb." . "engr") | |
420 | + | ("Tiermed." . "med") | |
421 | + | ("Tischtennis" . "sports") | |
422 | + | ("Tourismus" . "hobby") | |
423 | + | ("Tunnelbau" . "engr") | |
424 | + | ("t??rk. Gesch." . "hist") | |
425 | + | ("Turnen" . "sports") | |
426 | + | ("TV" . "telecom") | |
427 | + | ("TV-Prog." . "telecom") | |
428 | + | ("U-Bahn" . "train") | |
429 | + | ("Umwelt" . "biol") | |
430 | + | ("Univ.-N." . "place") | |
431 | + | ("Verhaltensbiol." . "biol") | |
432 | + | ("Verkehrsw." . "engr") | |
433 | + | ("Verlagsn." . "company") | |
434 | + | ("Verlagsw." . "econ") | |
435 | + | ("Versicherungsw." . "bus") | |
436 | + | ("Video" . "telecom") | |
437 | + | ("Videospiel" . "game") | |
438 | + | ("vietnam. Gesch." . "hist") | |
439 | + | ("Vogelk." . "zool") | |
440 | + | ("V??lkerk." . "ethno") | |
441 | + | ("V??lkerr." . "law") | |
442 | + | ("Volleyball" . "sports") | |
443 | + | ("Vorgesch." . "hist") | |
444 | + | ("VWL" . "econ") | |
445 | + | ("Waffenk." . "mil") | |
446 | + | ("Walfang" . "hobby") | |
447 | + | ("Wasserball" . "sports") | |
448 | + | ("Wasserbau" . "engr") | |
449 | + | ("Wassersport" . "sports") | |
450 | + | ("Weben" . "build") | |
451 | + | ("weibl. Name" . "given") | |
452 | + | ("weibl. Vorn." . "given") | |
453 | + | ("Weichtierk." . "zool") | |
454 | + | ("Wein" . "alco") | |
455 | + | ("Werbung" . "bus") | |
456 | + | ("Werktitel" . "work") | |
457 | + | ("Wintersport" . "sports") | |
458 | + | ("Wirtsch." . "econ") | |
459 | + | ("Wrestling" . "sports") | |
460 | + | ("Wz." . "exp") | |
461 | + | ("Yoga" . "sports") | |
462 | + | ("Z??hlw." . "finc") | |
463 | + | ("Zahnmed." . "med") | |
464 | + | ("Zeitschriftenn." . "work") | |
465 | + | ("Zeitungsn." . "work") | |
466 | + | ("Zeitungsw." . "print") | |
467 | + | ("Zellbiol." . "biol") | |
468 | + | ("Zen" . "rel") | |
469 | + | ("Zirkus" . "hobby") | |
470 | + | ("Zitat" . "exp") | |
471 | + | ("Zollw." . "bus") | |
472 | + | ("Zool." . "biol"))) | |
473 | + | ||
474 | + | (define (hint->tag hint) | |
475 | + | (match hint | |
476 | + | ("Kansai-Dial." "ksb") | |
477 | + | ("Ky??to-Dial." "kyb") | |
478 | + | ("??saka-Dial." "osb") | |
479 | + | ("Kant??-Dial." "ktb") | |
480 | + | ("T??hoku-Dial." "thb") | |
481 | + | ("Tsugaru-Dial." "tsug") | |
482 | + | ("Ky??sh??-Dial." "kyu") | |
483 | + | ("Hokkaid??-Dial." "hob") | |
484 | + | ("altert??ml." "arch") | |
485 | + | ("arch." "arch") | |
486 | + | ("Edo-zeitl." "arch") | |
487 | + | ("ehrerb.-h??fl." "hon") | |
488 | + | ("h??fl.-ehrerb." "hon") | |
489 | + | ("besch.-h??fl." "hum") | |
490 | + | ("abwertend" "derog") | |
491 | + | ("etwas altmodisch" "obs") | |
492 | + | ("Gaunerjargon" "thief") | |
493 | + | ("Frauenspr. der Edo-Zeit" "arch") | |
494 | + | ("Geheimspr. der Gauner" "thief") | |
495 | + | ("hist." "hist") | |
496 | + | ("h??fl." "pol") | |
497 | + | ("kinderspr." "chn") | |
498 | + | ("M??nnersprache" "male") | |
499 | + | ("Medizinerjargon" "med") | |
500 | + | ("Milit??rjargon" "mil") | |
501 | + | ("obsol." "obs") | |
502 | + | ("onomat." "on-mim") | |
503 | + | ("poet." "poet") | |
504 | + | ("Polizeijargon" "law") | |
505 | + | ("Slang" "sl") | |
506 | + | ("veraltet" "arch") | |
507 | + | (_ #f))) | |
508 | + | ||
509 | + | (define (wadoku->tag-name wadoku-tag) | |
510 | + | (let ((tag (assoc-ref wadoku-entities wadoku-tag))) | |
511 | + | (or tag wadoku-tag))) | |
512 | + | ||
513 | + | (define (get-tag tag) | |
514 | + | (tag:get-tag (wadoku->tag-name tag))) |
modules/nani/wadoku/xml.scm unknown status 1
1 | + | ;;; Nani Project website | |
2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
3 | + | ;;; | |
4 | + | ;;; This file is part of the Nani Project website. | |
5 | + | ;;; | |
6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | + | ;;; your option) any later version. | |
10 | + | ;;; | |
11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + | ;;; GNU Affero General Public License for more details. | |
15 | + | ;;; | |
16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | + | ||
19 | + | (define-module (nani wadoku xml) | |
20 | + | #:use-module (ice-9 match) | |
21 | + | #:use-module (ice-9 rdelim) | |
22 | + | #:use-module (nani frequency) | |
23 | + | #:use-module (nani result) | |
24 | + | #:use-module (nani wadoku entities) | |
25 | + | #:use-module (srfi srfi-9) | |
26 | + | #:use-module (sxml ssax) | |
27 | + | #:export (xml->results)) | |
28 | + | ||
29 | + | (define-record-type usage | |
30 | + | (make-usage type reg content) | |
31 | + | usage? | |
32 | + | (type usage-type) | |
33 | + | (reg usage-reg) | |
34 | + | (content usage-content)) | |
35 | + | ||
36 | + | (define (flatten lst) | |
37 | + | (match lst | |
38 | + | (() '()) | |
39 | + | ((head lst ...) | |
40 | + | (if (list? head) | |
41 | + | (append (flatten head) (flatten lst)) | |
42 | + | (cons head (flatten lst)))) | |
43 | + | (lst (list lst)))) | |
44 | + | ||
45 | + | (define* (update-usage usage #:key (type (usage-type usage)) | |
46 | + | (reg (usage-reg usage)) | |
47 | + | (content (usage-content usage))) | |
48 | + | (make-usage type reg content)) | |
49 | + | ||
50 | + | ;; TODO | |
51 | + | (define (usage->tag usage) | |
52 | + | (let ((content (usage-content usage)) | |
53 | + | (type (usage-type usage)) | |
54 | + | (reg (usage-reg usage))) | |
55 | + | `(,@(if (string-null? content) | |
56 | + | (match type | |
57 | + | ("" '())) | |
58 | + | (match type | |
59 | + | ("dom" `((tag ,content))) | |
60 | + | ("hint" (let ((tag (hint->tag content))) | |
61 | + | (if tag `(tag ,tag) '()))))) | |
62 | + | ,@(match reg | |
63 | + | ("" '()) | |
64 | + | ("lit" `((tag "litf"))) | |
65 | + | ("kinderspr." `((tag "chn"))) | |
66 | + | ("kleinkindspr." `((tag "baby"))) | |
67 | + | ("jugendspr." `((tag "young"))) | |
68 | + | ("coll" `((tag "col"))) | |
69 | + | ("vulg." `((tag "vulg"))) | |
70 | + | ("Slang" `((tag "sl"))) | |
71 | + | ("poet." `((tag "poet"))) | |
72 | + | ("hist." `((tag "hist"))) | |
73 | + | ("hist" `((tag "hist"))) | |
74 | + | ("obsol." `((tag "obs"))) | |
75 | + | ("veraltet" `((tag "arch"))) | |
76 | + | ("sch??lerspr." `((tag "stud"))) | |
77 | + | ("studentenspr." `((tag "stud"))) | |
78 | + | ("frauenspr." `((tag "fem"))) | |
79 | + | ("gaunerspr." `((tag "thief"))) | |
80 | + | ("geheimspr." `((tag "secret"))) | |
81 | + | ("hofdamenspr." `((tag "fem") (tag "pol"))) | |
82 | + | ("seemannsspr." `((tag "sail"))))))) | |
83 | + | ||
84 | + | (define (usg->tag lst) | |
85 | + | (define (usg->usage lst) | |
86 | + | (let loop ((result (make-usage "" "" "")) (lst lst)) | |
87 | + | (if (null? lst) | |
88 | + | result | |
89 | + | (loop | |
90 | + | (match (car lst) | |
91 | + | (('type . type) (update-usage result #:type type)) | |
92 | + | (('reg . reg) (update-usage result #:reg reg)) | |
93 | + | (((? symbol? s) . v) | |
94 | + | (throw 'unknow-usage-attr s v)) | |
95 | + | ((? string? content) (update-usage result #:content content)) | |
96 | + | ((? list? l) (loop result l)) | |
97 | + | (_ result)) | |
98 | + | (cdr lst))))) | |
99 | + | (usage->tag (usg->usage lst))) | |
100 | + | ||
101 | + | (define (ruigo->ref lst) | |
102 | + | (let loop ((ref #f) (lst lst)) | |
103 | + | (match lst | |
104 | + | (() ref) | |
105 | + | ((('id id) lst ...) | |
106 | + | (loop id lst)) | |
107 | + | (((a . b) lst ...) | |
108 | + | (loop (loop ref (list (list a b))) lst)) | |
109 | + | (((? list? l) lst ...) | |
110 | + | (loop (loop ref l) lst))))) | |
111 | + | ||
112 | + | (define (merge-senses s1 s2) | |
113 | + | (update-sense s2 | |
114 | + | #:references (append (sense-references s1) (sense-references s2)) | |
115 | + | #:tags (append (sense-tags s1) (sense-tags s2)) | |
116 | + | #:glosses (append (sense-glosses s1) (sense-glosses s2)))) | |
117 | + | ||
118 | + | (define (sxml->sense lst) | |
119 | + | (let loop ((sense (make-sense '() '() '() '() '() '() "ger")) (lst lst)) | |
120 | + | (if (null? lst) | |
121 | + | sense | |
122 | + | (loop | |
123 | + | (match (car lst) | |
124 | + | (#f sense) | |
125 | + | ((? sense? s) | |
126 | + | (merge-senses s sense)) | |
127 | + | ((? source? s) | |
128 | + | (update-sense sense #:sources (cons s (sense-sources sense)))) | |
129 | + | (('ref (? string? r)) | |
130 | + | (update-sense sense #:references (cons r (sense-references sense)))) | |
131 | + | (('tag (? string? r)) | |
132 | + | (update-sense sense #:tags (cons (get-tag r) (sense-tags sense)))) | |
133 | + | (('trans (? string? r)) | |
134 | + | (update-sense sense #:glosses (cons r (sense-glosses sense)))) | |
135 | + | (('related . _) sense) | |
136 | + | (('transcr . _) sense) | |
137 | + | (('pitch . _) sense) | |
138 | + | (((? symbol? s) v) | |
139 | + | (throw 'unknown-symbol s v)) | |
140 | + | ((? list? l) (loop sense l)) | |
141 | + | ((? string? _) sense)) | |
142 | + | (cdr lst))))) | |
143 | + | ||
144 | + | ;; TODO | |
145 | + | (define (sxml->ref lst) | |
146 | + | (define (sxml->ref-name lst) | |
147 | + | (let loop ((result #f) (lst lst)) | |
148 | + | (if (null? lst) | |
149 | + | result | |
150 | + | (loop | |
151 | + | (match (car lst) | |
152 | + | (('id . id) id) | |
153 | + | (((? symbol? s) . _) result) | |
154 | + | (((? symbol? s) _) result) | |
155 | + | ((? list? l) (loop result l)) | |
156 | + | (_ result)) | |
157 | + | (cdr lst))))) | |
158 | + | (let ((ref (sxml->ref-name lst))) | |
159 | + | (if (string? ref) | |
160 | + | `(ref ,ref) | |
161 | + | (throw 'no-ref ref 'from lst)))) | |
162 | + | ||
163 | + | (define (sxml->source lst) | |
164 | + | (let loop ((source (make-source '() #f "" "")) (lst lst)) | |
165 | + | (if (null? lst) | |
166 | + | source | |
167 | + | (loop | |
168 | + | (match (car lst) | |
169 | + | (('impli impli) (update-source source #:lang impli)) | |
170 | + | (('foreign foreign) (update-source source #:content foreign)) | |
171 | + | ((? list? l) (loop source l)) | |
172 | + | (_ source)) | |
173 | + | (cdr lst))))) | |
174 | + | ||
175 | + | (define (sxml->reading lst) | |
176 | + | (let loop ((reading (make-reading '() '() '())) (lst lst)) | |
177 | + | (if (null? lst) | |
178 | + | reading | |
179 | + | (loop | |
180 | + | (match (car lst) | |
181 | + | (('reading r) | |
182 | + | (update-reading reading #:readings (cons r (reading-readings reading)))) | |
183 | + | (_ reading)) | |
184 | + | (cdr lst))))) | |
185 | + | ||
186 | + | (define (sxml->result sxml frq) | |
187 | + | (define (sxml->result-aux sxml) | |
188 | + | (let loop ((result (make-result 0 0 '() '() '())) (last-source #f) (lst sxml)) | |
189 | + | (if (null? lst) | |
190 | + | result | |
191 | + | (match (car lst) | |
192 | + | (('kanji kanji) | |
193 | + | (loop | |
194 | + | (update-result result #:kanjis (cons kanji (result-kanjis result))) | |
195 | + | last-source (cdr lst))) | |
196 | + | ((? reading? reading) | |
197 | + | (loop | |
198 | + | (update-result result #:readings (cons reading (result-readings result))) | |
199 | + | last-source (cdr lst))) | |
200 | + | ((? sense? sense) | |
201 | + | (loop | |
202 | + | (update-result result | |
203 | + | #:senses | |
204 | + | (cons | |
205 | + | (if last-source | |
206 | + | (update-sense sense | |
207 | + | #:sources (cons last-source (sense-sources sense))) | |
208 | + | sense) | |
209 | + | (result-senses result))) | |
210 | + | last-source (cdr lst))) | |
211 | + | ((? list? l) | |
212 | + | (loop (loop result last-source l) last-source (cdr lst))) | |
213 | + | (_ (loop result last-source (cdr lst))))))) | |
214 | + | (let* ((result (sxml->result-aux sxml)) | |
215 | + | (word (if (null? (result-kanjis result)) | |
216 | + | (car (reading-readings (car (result-readings result)))) | |
217 | + | (car (result-kanjis result)))) | |
218 | + | (entity (frequency-entity frq word)) | |
219 | + | (sense1 (car (result-senses result))) | |
220 | + | (sense1 (if entity | |
221 | + | (update-sense sense1 | |
222 | + | #:tags (cons (get-tag entity) (sense-tags sense1))) | |
223 | + | sense1)) | |
224 | + | (senses (cons sense1 (cdr (result-senses result))))) | |
225 | + | (update-result result | |
226 | + | #:points (tag:get-points (update-result result #:senses senses)) | |
227 | + | #:senses senses))) | |
228 | + | ||
229 | + | (define (gram-tag lst) | |
230 | + | (map | |
231 | + | (lambda (attr) | |
232 | + | (match attr | |
233 | + | (('suru . suru) | |
234 | + | `(tag ,(string-append "suru-" suru))) | |
235 | + | (('ni . ni) | |
236 | + | `(tag ,(string-append "ni-" ni))) | |
237 | + | (('shiku . shiku) | |
238 | + | `(tag ,(string-append "shiku-" shiku))) | |
239 | + | (('ku . ku) | |
240 | + | `(tag ,(string-append "ku-" ku))) | |
241 | + | (('to . to) | |
242 | + | `(tag ,(string-append "to-" to))) | |
243 | + | (('no . no) | |
244 | + | `(tag ,(string-append "no-" no))) | |
245 | + | (('nari . nari) | |
246 | + | `(tag ,(string-append "nari-" nari))) | |
247 | + | (('taru . taru) | |
248 | + | `(tag ,(string-append "taru-" taru))) | |
249 | + | (('level . level) | |
250 | + | `(tag ,(string-append "level-" level))) | |
251 | + | (('godanrow . tr) | |
252 | + | `(tag ,(string-append "godanrow-" tr))) | |
253 | + | (('onbin . tr) | |
254 | + | `(tag ,(string-append "onbin-" tr))) | |
255 | + | (('transitivity . tr) | |
256 | + | `(tag ,(string-append "transitivity-" tr))))) | |
257 | + | lst)) | |
258 | + | ||
259 | + | (define (sxml->string lst) | |
260 | + | (define (sub-loop loop tags result lst l) | |
261 | + | (let ((result (loop tags result l))) | |
262 | + | (if (list? result) | |
263 | + | (loop (append tags (filter list? result)) | |
264 | + | (apply string-append (filter string? result)) | |
265 | + | lst) | |
266 | + | (loop tags result lst)))) | |
267 | + | ||
268 | + | (let loop ((tags '()) (result "") (lst lst)) | |
269 | + | (match lst | |
270 | + | (() (if (null? tags) | |
271 | + | result | |
272 | + | (append tags (list result)))) | |
273 | + | ((? string? s) | |
274 | + | (loop tags (string-append result s) '())) | |
275 | + | (((? string? s) lst ...) | |
276 | + | (loop tags (string-append result s) lst)) | |
277 | + | ((('prior . _) lst ...) | |
278 | + | (loop tags result lst)) | |
279 | + | ((('options . _) lst ...) | |
280 | + | (loop tags result lst)) | |
281 | + | ((('firstname . _) lst ...) | |
282 | + | (loop tags result lst)) | |
283 | + | ((('ausn . _) lst ...) | |
284 | + | (loop tags result lst)) | |
285 | + | ((('lang . _) lst ...) | |
286 | + | (loop tags result lst)) | |
287 | + | ((('meta . _) lst ...) | |
288 | + | (loop tags result lst)) | |
289 | + | ((('genki . genki) lst ...) | |
290 | + | (loop (cons `(tag ,genki) tags) result lst)) | |
291 | + | ((('jlpt . jlpt) lst ...) | |
292 | + | (loop (cons `(tag ,(string-append "jlpt-" jlpt)) tags) result lst)) | |
293 | + | ((('ref . (? string? ref)) lst ...) | |
294 | + | (loop (cons `(ref ,ref) tags) result lst)) | |
295 | + | ((('jap l) lst ...) | |
296 | + | (sub-loop loop tags result lst l)) | |
297 | + | ((('foreign l) lst ...) | |
298 | + | (sub-loop loop tags result lst l)) | |
299 | + | ((('transcr l) lst ...) | |
300 | + | (sub-loop loop tags result lst l)) | |
301 | + | ((((? symbol? s) v) lst ...) | |
302 | + | (throw 'unsupported-symbol s v)) | |
303 | + | ((((? symbol? s) . v) lst ...) | |
304 | + | (throw 'unsupported-symbol s v)) | |
305 | + | (((? list? l) lst ...) | |
306 | + | (sub-loop loop tags result lst l))))) | |
307 | + | ||
308 | + | (define (sxml->element lst elem frq) | |
309 | + | (let ((elem (match elem | |
310 | + | ((_ . elem) elem) | |
311 | + | (_ elem)))) | |
312 | + | (match elem | |
313 | + | ('orth (let ((kanji (filter string? lst))) | |
314 | + | (if (null? kanji) #f `(kanji ,(car kanji))))) | |
315 | + | ('entry (sxml->result lst frq)) | |
316 | + | ('hira `(reading ,(car lst))) | |
317 | + | ('hatsuon `(hatsuon ,(car lst))) | |
318 | + | ('accent `(pitch ,(car lst))) | |
319 | + | ('reading (sxml->reading lst)) | |
320 | + | ('form lst) | |
321 | + | ('impli `(impli ,(car lst))) | |
322 | + | ('text (let loop ((text "") (lst lst)) | |
323 | + | (match lst | |
324 | + | (() text) | |
325 | + | ((('hasPrecedingSpace . _) lst ...) | |
326 | + | (loop (string-append " " text) lst)) | |
327 | + | ((('hasFollowingSpace . _) lst ...) | |
328 | + | (string-append (loop text lst) " ")) | |
329 | + | (((? string? s) lst ...) | |
330 | + | (loop (string-append text s) lst))))) | |
331 | + | ('famn (sxml->string lst)) | |
332 | + | ('expl (sxml->string lst)) | |
333 | + | ('expli (filter list? lst)) | |
334 | + | ('abbrev (filter list? lst)) | |
335 | + | ('token (filter string? lst)) | |
336 | + | ('tr (sxml->string lst)) | |
337 | + | ('transcr `(transcr ,lst)) | |
338 | + | ('trans | |
339 | + | (append (filter list? lst) | |
340 | + | (map (lambda (s) `(trans ,s)) (filter string? lst)))) | |
341 | + | ('jap `(jap ,lst)) | |
342 | + | ('emph lst) | |
343 | + | ('title (filter string? lst)) | |
344 | + | ('transl (filter string? lst)) | |
345 | + | ('topic (filter string? lst)) | |
346 | + | ('iron (filter string? lst)) | |
347 | + | ('specchar (filter string? lst)) | |
348 | + | ('scientif (filter string? lst)) | |
349 | + | ('wikide #f) | |
350 | + | ('wikija #f) | |
351 | + | ('link #f) | |
352 | + | ('ref (sxml->ref lst)) | |
353 | + | ('sref (sxml->ref lst)) | |
354 | + | ('etym (sxml->source lst)) | |
355 | + | ('literal (sxml->string (list "???" (sxml->string lst) "???"))) | |
356 | + | ('def (sxml->string (list "(" (sxml->string lst) ")"))) | |
357 | + | ('date (sxml->string (list "(" (sxml->string lst) ")"))) | |
358 | + | ('birthdeath (sxml->string (list "(" (sxml->string lst) ")"))) | |
359 | + | ('descr (sxml->string (list "(" (sxml->string lst) ")"))) | |
360 | + | ('bracket (sxml->string (list "[" (sxml->string lst) "]"))) | |
361 | + | ('foreign (if (null? lst) #f `(foreign ,(car lst)))) | |
362 | + | ('seasonword `(tag ,(string-append "season-" (assoc-ref lst 'type)))) | |
363 | + | ('usg (usg->tag lst)) | |
364 | + | ('sense (sxml->sense lst)) | |
365 | + | ('steinhaus (let ((ref (sxml->string lst))) | |
366 | + | (if (string? ref) | |
367 | + | `(ref ,(sxml->string lst)) | |
368 | + | (throw 'not-steinhaus ref)))) | |
369 | + | ('pos '()); TODO: actually find what tag to use | |
370 | + | ('wordcomponent (cons `(tag "wordcomponent") (gram-tag lst))) | |
371 | + | ('meishi (cons `(tag "meishi") (gram-tag lst))) | |
372 | + | ('setsuzokushi (cons `(tag "setsuzokushi") (gram-tag lst))) | |
373 | + | ('daimeishi (cons `(tag "daimeishi") (gram-tag lst))) | |
374 | + | ('doushi (cons `(tag "doushi") (gram-tag lst))) | |
375 | + | ('kandoushi (cons `(tag "kandoushi") (gram-tag lst))) | |
376 | + | ('keiyoudoushi (cons `(tag "keiyoudoushi") (gram-tag lst))) | |
377 | + | ('keiyoushi (cons `(tag "keiyoushi") (gram-tag lst))) | |
378 | + | ('fukushi (cons `(tag "fukushi") (gram-tag lst))) | |
379 | + | ('rengo (cons `(tag "rengo") (gram-tag lst))) | |
380 | + | ('suffix (cons `(tag "suffix") (gram-tag lst))) | |
381 | + | ('prefix (cons `(tag "prefix") (gram-tag lst))) | |
382 | + | ('kanji (cons `(tag "kanji") (gram-tag lst))) | |
383 | + | ('rentaishi (cons `(tag "rentaishi") (gram-tag lst))) | |
384 | + | ('specialcharacter (cons `(tag "specialcharacter") (gram-tag lst))) | |
385 | + | ('joshi (cons `(tag "joshi") (gram-tag lst))) | |
386 | + | ('fukujoshi (cons `(tag "fukujoshi") (gram-tag lst))) | |
387 | + | ('kakujoshi (cons `(tag "kakujoshi") (gram-tag lst))) | |
388 | + | ('kakarijoshi (cons `(tag "kakarijoshi") (gram-tag lst))) | |
389 | + | ('shuujoshi (cons `(tag "shuujoshi") (gram-tag lst))) | |
390 | + | ('setsuzokujoshi (cons `(tag "setsuzokujoshi") (gram-tag lst))) | |
391 | + | ('jokeiyoushi (cons `(tag "jokeiyoushi") (gram-tag lst))) | |
392 | + | ('jodoushi (cons `(tag "jodoushi") (gram-tag lst))) | |
393 | + | ('ruigos lst) | |
394 | + | ('ruigo (ruigo->ref lst)) | |
395 | + | ('gramGrp lst)))) | |
396 | + | ||
397 | + | (define (create-parser frq) | |
398 | + | (ssax:make-parser | |
399 | + | NEW-LEVEL-SEED | |
400 | + | (lambda (elem-gi attributes namespaces expected-content seed) | |
401 | + | attributes) | |
402 | + | ||
403 | + | FINISH-ELEMENT | |
404 | + | (lambda (elem-gi attributes namespaces parent-seed seed) | |
405 | + | (if (equal? elem-gi 'entries) | |
406 | + | seed | |
407 | + | (let* ((seed (reverse seed)) | |
408 | + | (element (sxml->element seed elem-gi frq))) | |
409 | + | (cons element parent-seed)))) | |
410 | + | ||
411 | + | CHAR-DATA-HANDLER | |
412 | + | (lambda (string1 string2 seed) | |
413 | + | (cons (string-append string1 string2) seed)))) | |
414 | + | ||
415 | + | (define (xml->results port frq) | |
416 | + | (let ((resuts (filter result? ((create-parser frq) port '())))) | |
417 | + | (sort results (lambda (a b) (> (result-points a) (result-points b)))))) |