Add wadoku dictionary modules
modules/nani/parse-result.scm unknown status 1
| 1 | + | ;;; Nani Project website | |
| 2 | + | ;;; Copyright ?? 2020 Julien Lepiller <julien@lepiller.eu> | |
| 3 | + | ;;; | |
| 4 | + | ;;; This file is part of the Nani Project website. | |
| 5 | + | ;;; | |
| 6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
| 7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
| 8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
| 9 | + | ;;; your option) any later version. | |
| 10 | + | ;;; | |
| 11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
| 12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 | + | ;;; GNU Affero General Public License for more details. | |
| 15 | + | ;;; | |
| 16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
| 17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
| 18 | + | ||
| 19 | + | (define-module (nani parse-result) | |
| 20 | + | #:use-module (ice-9 binary-ports) | |
| 21 | + | #:use-module (rnrs bytevectors) | |
| 22 | + | #:export (parse-result-list | |
| 23 | + | parse-result-char | |
| 24 | + | parse-result-int | |
| 25 | + | parse-result-boolean | |
| 26 | + | parse-result-string)) | |
| 27 | + | ||
| 28 | + | (define* (parse-result-list port parse-result-element) | |
| 29 | + | (let ((size (bytevector-u16-ref (get-bytevector-n port 2) 0 (endianness big)))) | |
| 30 | + | (let loop ((result '()) (remaining size)) | |
| 31 | + | (if (= remaining 0) | |
| 32 | + | (reverse result) | |
| 33 | + | (loop (cons (parse-result-element port) result) (- remaining 1)))))) | |
| 34 | + | ||
| 35 | + | (define (parse-result-char port) | |
| 36 | + | (get-u8 port)) | |
| 37 | + | ||
| 38 | + | (define (parse-result-int port) | |
| 39 | + | (bytevector-u32-ref (get-bytevector-n port 4) 0 (endianness big))) | |
| 40 | + | ||
| 41 | + | (define (parse-result-boolean port) | |
| 42 | + | (= (get-u8 port) 1)) | |
| 43 | + | ||
| 44 | + | (define (parse-result-string port) | |
| 45 | + | (define (get-result-string port) | |
| 46 | + | (let loop ((lu8 '()) (char (get-u8 port))) | |
| 47 | + | (if (= char 0) | |
| 48 | + | lu8 | |
| 49 | + | (loop (cons char lu8) (get-u8 port))))) | |
| 50 | + | (utf8->string (u8-list->bytevector (reverse (get-result-string port))))) |
modules/nani/wadoku/entities.scm unknown status 1
| 1 | + | (define-module (nani wadoku entities) | |
| 2 | + | #:use-module (nani result) | |
| 3 | + | #:use-module ((nani tags) #:prefix tag:) | |
| 4 | + | #:use-module (ice-9 match) | |
| 5 | + | #:export (get-tag hint->tag) | |
| 6 | + | #:re-export (tag:get-points)) | |
| 7 | + | ||
| 8 | + | (define wadoku-entities | |
| 9 | + | `(("Aerodyn." . "aero") | |
| 10 | + | ("Agrochem." . "chem") | |
| 11 | + | ("??gypt. Gesch." . "hist") | |
| 12 | + | ("??gypt. Mythol." . "rel") | |
| 13 | + | ("Aktien" . "econ") | |
| 14 | + | ("Akupunktur" . "med") | |
| 15 | + | ("Akustik" . "music") | |
| 16 | + | ("American Football" . "sports") | |
| 17 | + | ("amerik. Gesch." . "hist") | |
| 18 | + | ("amerik. Pol." . "politics") | |
| 19 | + | ("amerik. Rechtsw." . "law") | |
| 20 | + | ("Anat." . "anat") | |
| 21 | + | ("Angeln" . "sports") | |
| 22 | + | ("Anime" . "work") | |
| 23 | + | ("Anthropol." . "antho") | |
| 24 | + | ("arab. Gesch." . "hist") | |
| 25 | + | ("Arch??ol." . "archeo") | |
| 26 | + | ("Archit." . "archit") | |
| 27 | + | ("Astrol." . "astron") | |
| 28 | + | ("Astron." . "astron") | |
| 29 | + | ("atmosph??r. Optik" . "physics") | |
| 30 | + | ("Atomphys." . "physics") | |
| 31 | + | ("Audio" . "music") | |
| 32 | + | ("Augenheilk." . "med") | |
| 33 | + | ("Autorennen" . "sports") | |
| 34 | + | ("Backw." . "food") | |
| 35 | + | ("Badminton" . "sports") | |
| 36 | + | ("Bahnhofsn." . "station") | |
| 37 | + | ("Bakteriol." . "biol") | |
| 38 | + | ("Ballet" . "music") | |
| 39 | + | ("Ballett" . "music") | |
| 40 | + | ("Ballsport" . "sports") | |
| 41 | + | ("Bankw." . "finc") | |
| 42 | + | ("Baseb." . "baseb") | |
| 43 | + | ("Baseball" . "baseb") | |
| 44 | + | ("Basketball" . "sports") | |
| 45 | + | ("Bauingenieurw." . "archit") | |
| 46 | + | ("Bautechnik" . "archit") | |
| 47 | + | ("Bauw." . "archit") | |
| 48 | + | ("Befestigungsw." . "archit") | |
| 49 | + | ("Beh??rde" . "law") | |
| 50 | + | ("Bergb." . "archit") | |
| 51 | + | ("Bergbau" . "archit") | |
| 52 | + | ("Bergn." . "place") | |
| 53 | + | ("Bergsteigen" . "sports") | |
| 54 | + | ("Bibel" . "Christn") | |
| 55 | + | ("Bibliotheksw." . "biblio") | |
| 56 | + | ("Bildhauerei" . "work") | |
| 57 | + | ("bild. Kunst" . "work") | |
| 58 | + | ("Billard" . "game") | |
| 59 | + | ("Biochem." . "biol") | |
| 60 | + | ("Biol." . "biol") | |
| 61 | + | ("Bodenkunde" . "agri") | |
| 62 | + | ("Bogenschie??en" . "sports") | |
| 63 | + | ("B??rse" . "econ") | |
| 64 | + | ("Bot." . "bot") | |
| 65 | + | ("Bowling" . "sports") | |
| 66 | + | ("Boxen" . "sports") | |
| 67 | + | ("Brauereiw." . "alco") | |
| 68 | + | ("Brettspiel" . "game") | |
| 69 | + | ("brit. Gesch." . "hist") | |
| 70 | + | ("brit. Rechtsw." . "law") | |
| 71 | + | ("Br??ckenbau" . "engr") | |
| 72 | + | ("Bsp." . "exp") | |
| 73 | + | ("Buchbinden" . "print") | |
| 74 | + | ("B??chereiw." . "biblio") | |
| 75 | + | ("Buchtitel" . "work") | |
| 76 | + | ("Buchw." . "biblio") | |
| 77 | + | ("Buddh." . "Buddh") | |
| 78 | + | ("Bud??" . "MA") | |
| 79 | + | ("B??d??" . "MA") | |
| 80 | + | ("Chem." . "chem") | |
| 81 | + | ("chin. Arch??ol." . "archeo") | |
| 82 | + | ("chin. Astron." . "astron") | |
| 83 | + | ("chin. Gesch." . "hist") | |
| 84 | + | ("chin. Gramm." . "ling") | |
| 85 | + | ("chin. Literaturw." . "work") | |
| 86 | + | ("chin. Mus." . "music") | |
| 87 | + | ("chin. Musikinstr." . "music") | |
| 88 | + | ("chin. Mythol." . "rel") | |
| 89 | + | ("chin. Philos." . "rel") | |
| 90 | + | ("chin. Phon." . "ling") | |
| 91 | + | ("chin. Poet." . "poet") | |
| 92 | + | ("chin. Pol." . "politics") | |
| 93 | + | ("chin. Theater" . "work") | |
| 94 | + | ("Chirurgie" . "med") | |
| 95 | + | ("Christent." . "Christn") | |
| 96 | + | ("Cocktail" . "alco") | |
| 97 | + | ("Computerling." . "comp") | |
| 98 | + | ("Curling" . "sports") | |
| 99 | + | ("Dammbau" . "engr") | |
| 100 | + | ("Demogr." . "demo") | |
| 101 | + | ("Diplomat." . "politics") | |
| 102 | + | ("Druckw." . "print") | |
| 103 | + | ("dtsch. Gesch." . "hist") | |
| 104 | + | ("dtsch. Gramm." . "ling") | |
| 105 | + | ("EDV" . "comp") | |
| 106 | + | ("Einzel-Kanji" . "ling") | |
| 107 | + | ("Streckenkilometertafel" . "train") | |
| 108 | + | ("Eishockey" . "sports") | |
| 109 | + | ("Eiskunstl." . "sports") | |
| 110 | + | ("Eiskunstlauf" . "sports") | |
| 111 | + | ("Eisschnelllauf" . "sports") | |
| 112 | + | ("Elektrochem." . "chem") | |
| 113 | + | ("Elektrot." . "engr") | |
| 114 | + | ("Embryol." . "biol") | |
| 115 | + | ("engl. Gesch." . "hist") | |
| 116 | + | ("Entwicklungsphysiol." . "biol") | |
| 117 | + | ("Entwicklungspsychol." . "psy") | |
| 118 | + | ("Ergonomie" . "engr") | |
| 119 | + | ("Ethik" . "philo") | |
| 120 | + | ("Ethnol." . "ethno") | |
| 121 | + | ("europ. Gesch." . "hist") | |
| 122 | + | ("Fahrrad" . "sports") | |
| 123 | + | ("Familienn." . "surname") | |
| 124 | + | ("Familienn.." . "surname") | |
| 125 | + | ("Farbe" . "color") | |
| 126 | + | ("F??rberei" . "build") | |
| 127 | + | ("Fechten" . "build") | |
| 128 | + | ("Fertigungstechnik" . "engr") | |
| 129 | + | ("Film" . "work") | |
| 130 | + | ("Filmtitel" . "work") | |
| 131 | + | ("Finanzw." . "finc") | |
| 132 | + | ("Firmenn." . "company") | |
| 133 | + | ("Fischerei" . "sports") | |
| 134 | + | ("Fischk." . "food") | |
| 135 | + | ("Flugw." . "engr") | |
| 136 | + | ("Flussn." . "place") | |
| 137 | + | ("Forstw." . "bot") | |
| 138 | + | ("Fotog." . "photo") | |
| 139 | + | ("Frachtw." . "trans") | |
| 140 | + | ("franz. Gesch." . "hist") | |
| 141 | + | ("Frisur" . "fashion") | |
| 142 | + | ("Funkt." . "physics") | |
| 143 | + | ("Fu??ball" . "sports") | |
| 144 | + | ||
| 145 | + | ("Gagaku" . "music") | |
| 146 | + | ("Garten" . "bota") | |
| 147 | + | ("Gartenk." . "bota") | |
| 148 | + | ("Gateball" . "sports") | |
| 149 | + | ("Gebietsn." . "place") | |
| 150 | + | ("Geldw." . "finc") | |
| 151 | + | ("Genetik" . "biol") | |
| 152 | + | ("Geogr." . "place") | |
| 153 | + | ("Geol." . "geol") | |
| 154 | + | ("Geom." . "geom") | |
| 155 | + | ("Geophys." . "physics") | |
| 156 | + | ("Gerberei" . "build") | |
| 157 | + | ("Gesch." . "hist") | |
| 158 | + | ("Gewichtheben" . "sports") | |
| 159 | + | ("Glasherst." . "build") | |
| 160 | + | ("Glaziol." . "sci") | |
| 161 | + | ("Go" . "game") | |
| 162 | + | ("Golf" . "sports") | |
| 163 | + | ("Gramm." . "ling") | |
| 164 | + | ("griech. Christent." . "Christn") | |
| 165 | + | ("griech. Gesch." . "hist") | |
| 166 | + | ("griech. Myth." . "rel") | |
| 167 | + | ("griech. Mythol." . "rel") | |
| 168 | + | ("griech. Theat." . "work") | |
| 169 | + | ("Gymnastik" . "sports") | |
| 170 | + | ("Handball" . "sports") | |
| 171 | + | ("Handel" . "econ") | |
| 172 | + | ("Handwerkskunst" . "build") | |
| 173 | + | ("Heraldik" . "sci") | |
| 174 | + | ("Hindu." . "rel") | |
| 175 | + | ("Hochsprung" . "sports") | |
| 176 | + | ("Hockey" . "sports") | |
| 177 | + | ("Holzbau" . "engr") | |
| 178 | + | ("Holzbearb." . "engr") | |
| 179 | + | ("Holzschnitt" . "engr") | |
| 180 | + | ("Hunderasse" . "zool") | |
| 181 | + | ("Hydrodynamik" . "physics") | |
| 182 | + | ("I Ging" . "rel") | |
| 183 | + | ("Ikebana" . "build") | |
| 184 | + | ("Immunol." . "med") | |
| 185 | + | ("ind. Gesch." . "hist") | |
| 186 | + | ("indian. Mythol." . "rel") | |
| 187 | + | ("ind. Mythol." . "rel") | |
| 188 | + | ("indones. Gesch." . "hist") | |
| 189 | + | ("ind. Philos." . "philo") | |
| 190 | + | ("Insektenk." . "zool") | |
| 191 | + | ("Inseln." . "place") | |
| 192 | + | ("Internet" . "comp") | |
| 193 | + | ("Islam" . "rel") | |
| 194 | + | ("islam. Mythol." . "rel") | |
| 195 | + | ("islam. Rechtsw." . "law") | |
| 196 | + | ("islam. Rel." . "rel") | |
| 197 | + | ("israel. Gesch." . "hist") | |
| 198 | + | ("ital. Gesch." . "hist") | |
| 199 | + | ("japan. Archit." . "archit") | |
| 200 | + | ("japan. Astrologie" . "aston") | |
| 201 | + | ("japan. bild. Kunst" . "work") | |
| 202 | + | ("japan. Christent." . "Christn") | |
| 203 | + | ("japan. Gesch." . "hist") | |
| 204 | + | ("japan. Gramm." . "ling") | |
| 205 | + | ("japan. Literaturw." . "work") | |
| 206 | + | ("japan. Med." . "med") | |
| 207 | + | ("japan. Mus." . "music") | |
| 208 | + | ("japan. Mythol." . "rel") | |
| 209 | + | ("japan. Philos." . "philo") | |
| 210 | + | ("japan. Pol." . "politics") | |
| 211 | + | ("japan. Schwimmen" . "sports") | |
| 212 | + | ("japan. Soziol." . "sci") | |
| 213 | + | ("japan. Sprachw." . "ling") | |
| 214 | + | ("J??ruri" . "music") | |
| 215 | + | ("Judent." . "rel") | |
| 216 | + | ("J??d??" . "MA") | |
| 217 | + | ("Kabuki" . "music") | |
| 218 | + | ("Kagura" . "music") | |
| 219 | + | ("Kalligraphie" . "build") | |
| 220 | + | ("Kanbun" . "ling") | |
| 221 | + | ("Kanji" . "ling") | |
| 222 | + | ("Kanp??" . "med") | |
| 223 | + | ("Karten" . "games") | |
| 224 | + | ("Kartenspiel" . "game") | |
| 225 | + | ("Kartogr." . "place") | |
| 226 | + | ("kath. Christent." . "Christn") | |
| 227 | + | ("Kend??" . "MA") | |
| 228 | + | ("Keramik" . "build") | |
| 229 | + | ("Kernphys." . "physics") | |
| 230 | + | ("Kfz-W." . "engr") | |
| 231 | + | ("KI" . "comp") | |
| 232 | + | ("Kleidung" . "fashion") | |
| 233 | + | ("Kochk." . "food") | |
| 234 | + | ("Konditoreiw." . "food") | |
| 235 | + | ("Konfuz." . "rel") | |
| 236 | + | ("korean. Gesch." . "hist") | |
| 237 | + | ("korean. Rel." . "rel") | |
| 238 | + | ("korean. Theater" . "work") | |
| 239 | + | ("Kosmetik" . "fashion") | |
| 240 | + | ("Kricket" . "sports") | |
| 241 | + | ("Kristallogr." . "physics") | |
| 242 | + | ("Krocket" . "sports") | |
| 243 | + | ("Kunst" . "work") | |
| 244 | + | ("Kunstgesch." . "hist") | |
| 245 | + | ("Kunst??? und Turmsprung" . "build") | |
| 246 | + | ("Ky??gen" . "music") | |
| 247 | + | ("Lack" . "work") | |
| 248 | + | ("L??ndern." . "place") | |
| 249 | + | ("Landw." . "agri") | |
| 250 | + | ("Leichtathl." . "sports") | |
| 251 | + | ("Liedtitel" . "work") | |
| 252 | + | ("Literaturw." . "work") | |
| 253 | + | ("Logik" . "math") | |
| 254 | + | ("Luftf." . "aero") | |
| 255 | + | ("Machinenb." . "engr") | |
| 256 | + | ("Mah-Jongg" . "mahj") | |
| 257 | + | ("Management" . "bus") | |
| 258 | + | ("Manga" . "work") | |
| 259 | + | ("m??nnl. Name" . "given") | |
| 260 | + | ("m??nnl. Vorn." . "given") | |
| 261 | + | ("m??nnl. Vorname" . "given") | |
| 262 | + | ("Mantik" . "rel") | |
| 263 | + | ("Marine" . "sail") | |
| 264 | + | ("Marketing" . "bus") | |
| 265 | + | ("Maschinenb." . "engr") | |
| 266 | + | ("Ma??" . "engr") | |
| 267 | + | ("Math." . "math") | |
| 268 | + | ("Mech." . "physics") | |
| 269 | + | ("Med." . "med") | |
| 270 | + | ("Meeresk." . "place") | |
| 271 | + | ("Meeresn." . "place") | |
| 272 | + | ("Meinungsforschung" . "politics") | |
| 273 | + | ("Messw." . "hobby") | |
| 274 | + | ("Metall." . "engr") | |
| 275 | + | ("Metallbearb." . "engr") | |
| 276 | + | ("Metallguss" . "engr") | |
| 277 | + | ("Meteor." . "meteor") | |
| 278 | + | ("mexikan. Kochk." . "food") | |
| 279 | + | ("Mikrobiol." . "biol") | |
| 280 | + | ("Milit." . "mil") | |
| 281 | + | ("Mineral." . "geol") | |
| 282 | + | ("M??belbau" . "build") | |
| 283 | + | ("Mode" . "fashion") | |
| 284 | + | ("Motorsport" . "sports") | |
| 285 | + | ("Mus." . "music") | |
| 286 | + | ("Muschelk." . "zool") | |
| 287 | + | ("Musikinstr." . "music") | |
| 288 | + | ("Mykol." . "bota") | |
| 289 | + | ("Mythol." . "rel") | |
| 290 | + | ("Nachrichtent." . "politics") | |
| 291 | + | ("N??hen" . "fashion") | |
| 292 | + | ("Naniwabushi" . "music") | |
| 293 | + | ("Naturmed." . "med") | |
| 294 | + | ("Naturph??n." . "physics") | |
| 295 | + | ("Naturphilos." . "philo") | |
| 296 | + | ("Neng??" . "hist") | |
| 297 | + | ("Netzwerktechnik" . "comp") | |
| 298 | + | ("Neurol." . "med") | |
| 299 | + | ("N??" . "music") | |
| 300 | + | ("nord. Mythol." . "rel") | |
| 301 | + | ("Numismatik" . "build") | |
| 302 | + | ("??kol." . "biol") | |
| 303 | + | ("Oper" . "music") | |
| 304 | + | ("Optik" . "physics") | |
| 305 | + | ("Org." . "organization") | |
| 306 | + | ("Origami" . "build") | |
| 307 | + | ("Ortsn." . "place") | |
| 308 | + | ("P??d." . "school") | |
| 309 | + | ("Pal??ontol." . "hist") | |
| 310 | + | ("Papierherst." . "build") | |
| 311 | + | ("Parapsych." . "psy") | |
| 312 | + | ("Patentw." . "engr") | |
| 313 | + | ("Pathol." . "med") | |
| 314 | + | ("pers. Gesch." . "hist") | |
| 315 | + | ("Pers??nlichk." . "person") | |
| 316 | + | ("Petrochem." . "chem") | |
| 317 | + | ("Pferderennen" . "sports") | |
| 318 | + | ("Pferdesport" . "sports") | |
| 319 | + | ("Pharm." . "med") | |
| 320 | + | ("Philos." . "philo") | |
| 321 | + | ("philos. Anthrop." . "anth") | |
| 322 | + | ("Phon." . "ling") | |
| 323 | + | ("Phys." . "physics") | |
| 324 | + | ("Physiol." . "anat") | |
| 325 | + | ("Poetik" . "poet") | |
| 326 | + | ("Pol." . "politics") | |
| 327 | + | ("Postw." . "telecom") | |
| 328 | + | ("Psych." . "psy") | |
| 329 | + | ("Quantenphys." . "physics") | |
| 330 | + | ("Radio" . "telecom") | |
| 331 | + | ("Radrennen" . "sports") | |
| 332 | + | ("Radsport" . "sports") | |
| 333 | + | ("Rakugo" . "music") | |
| 334 | + | ("Raumf." . "engr") | |
| 335 | + | ("Reaktort." . "engr") | |
| 336 | + | ("Rechnungsw." . "bus") | |
| 337 | + | ("Rechtsw." . "law") | |
| 338 | + | ("Redew." . "exp") | |
| 339 | + | ("Regelungstechnik" . "engr") | |
| 340 | + | ("Reiten" . "sports") | |
| 341 | + | ("Rel." . "rel") | |
| 342 | + | ("Rennsport" . "sports") | |
| 343 | + | ("Rhetorik" . "philo") | |
| 344 | + | ("Ringen" . "sports") | |
| 345 | + | ("r??m. Gesch." . "hist") | |
| 346 | + | ("r??m. Mythol." . "rel") | |
| 347 | + | ("Rudern" . "sports") | |
| 348 | + | ("Rugby" . "sports") | |
| 349 | + | ("Rundfunk" . "telecom") | |
| 350 | + | ("russ. Gesch." . "hist") | |
| 351 | + | ("R??stung" . "mil") | |
| 352 | + | ("Schach" . "game") | |
| 353 | + | ("Schie??sport" . "sports") | |
| 354 | + | ("Schiff" . "sail") | |
| 355 | + | ("Schiffbau" . "sail") | |
| 356 | + | ("Schifffahrt" . "sail") | |
| 357 | + | ("Schmuck" . "build") | |
| 358 | + | ("Schneckenk." . "zool") | |
| 359 | + | ("Schneiderei" . "fashion") | |
| 360 | + | ("Schuhe" . "fashion") | |
| 361 | + | ("Schule" . "school") | |
| 362 | + | ("Schwei??en" . "build") | |
| 363 | + | ("Schwert" . "mil") | |
| 364 | + | ("Schwimmen" . "sports") | |
| 365 | + | ("Seef." . "sail") | |
| 366 | + | ("Seen." . "place") | |
| 367 | + | ("Seerechtsw." . "law") | |
| 368 | + | ("Seew." . "sail") | |
| 369 | + | ("Segeln" . "sail") | |
| 370 | + | ("Seide" . "build") | |
| 371 | + | ("Seismol." . "physics") | |
| 372 | + | ("SF" . "work") | |
| 373 | + | ("Shingon" . "Buddh") | |
| 374 | + | ("Shint??" . "Shinto") | |
| 375 | + | ("Sh??gi" . "shogi") | |
| 376 | + | ("Ski" . "sports") | |
| 377 | + | ("Skispringen" . "sports") | |
| 378 | + | ("Softball" . "sports") | |
| 379 | + | ("Sozialpsych." . "psy") | |
| 380 | + | ("Soziol." . "sci") | |
| 381 | + | ("span. Gesch." . "hist") | |
| 382 | + | ("Spiel" . "game") | |
| 383 | + | ("Spiele-Titel" . "game") | |
| 384 | + | ("Spieltheorie" . "game") | |
| 385 | + | ("Spinnen" . "zool") | |
| 386 | + | ("Spinnenk." . "zool") | |
| 387 | + | ("Sport" . "sports") | |
| 388 | + | ("Sprache" . "ling") | |
| 389 | + | ("Sprachw." . "ling") | |
| 390 | + | ("Sprichw." . "ling") | |
| 391 | + | ("Stadtn." . "place") | |
| 392 | + | ("Stadtplanung" . "engr") | |
| 393 | + | ("Statistik" . "math") | |
| 394 | + | ("Steuerw." . "econ") | |
| 395 | + | ("Sticken" . "build") | |
| 396 | + | ("Stra??enbau" . "engr") | |
| 397 | + | ("Stricken" . "build") | |
| 398 | + | ("Strukturalismus" . "philo") | |
| 399 | + | ("Sum??" . "sumo") | |
| 400 | + | ("Surfen" . "sports") | |
| 401 | + | ("Systemanalyse" . "engr") | |
| 402 | + | ("Systemtheorie" . "engr") | |
| 403 | + | ("taiwan. Pol." . "politics") | |
| 404 | + | ("Tanz" . "art") | |
| 405 | + | ("Tanzen" . "art") | |
| 406 | + | ("Taoismus" . "rel") | |
| 407 | + | ("Tauchen" . "sports") | |
| 408 | + | ("Technik" . "engr") | |
| 409 | + | ("Tee" . "hobby") | |
| 410 | + | ("Telegrafie" . "telecom") | |
| 411 | + | ("Telekom." . "telecom") | |
| 412 | + | ("Tempeln." . "place") | |
| 413 | + | ("Tennis" . "sports") | |
| 414 | + | ("Tenn??" . "surname") | |
| 415 | + | ("Textilt." . "engr") | |
| 416 | + | ("Theat." . "work") | |
| 417 | + | ("Theol." . "rel") | |
| 418 | + | ("tibet. Buddh." . "Buddh") | |
| 419 | + | ("Tiefb." . "engr") | |
| 420 | + | ("Tiermed." . "med") | |
| 421 | + | ("Tischtennis" . "sports") | |
| 422 | + | ("Tourismus" . "hobby") | |
| 423 | + | ("Tunnelbau" . "engr") | |
| 424 | + | ("t??rk. Gesch." . "hist") | |
| 425 | + | ("Turnen" . "sports") | |
| 426 | + | ("TV" . "telecom") | |
| 427 | + | ("TV-Prog." . "telecom") | |
| 428 | + | ("U-Bahn" . "train") | |
| 429 | + | ("Umwelt" . "biol") | |
| 430 | + | ("Univ.-N." . "place") | |
| 431 | + | ("Verhaltensbiol." . "biol") | |
| 432 | + | ("Verkehrsw." . "engr") | |
| 433 | + | ("Verlagsn." . "company") | |
| 434 | + | ("Verlagsw." . "econ") | |
| 435 | + | ("Versicherungsw." . "bus") | |
| 436 | + | ("Video" . "telecom") | |
| 437 | + | ("Videospiel" . "game") | |
| 438 | + | ("vietnam. Gesch." . "hist") | |
| 439 | + | ("Vogelk." . "zool") | |
| 440 | + | ("V??lkerk." . "ethno") | |
| 441 | + | ("V??lkerr." . "law") | |
| 442 | + | ("Volleyball" . "sports") | |
| 443 | + | ("Vorgesch." . "hist") | |
| 444 | + | ("VWL" . "econ") | |
| 445 | + | ("Waffenk." . "mil") | |
| 446 | + | ("Walfang" . "hobby") | |
| 447 | + | ("Wasserball" . "sports") | |
| 448 | + | ("Wasserbau" . "engr") | |
| 449 | + | ("Wassersport" . "sports") | |
| 450 | + | ("Weben" . "build") | |
| 451 | + | ("weibl. Name" . "given") | |
| 452 | + | ("weibl. Vorn." . "given") | |
| 453 | + | ("Weichtierk." . "zool") | |
| 454 | + | ("Wein" . "alco") | |
| 455 | + | ("Werbung" . "bus") | |
| 456 | + | ("Werktitel" . "work") | |
| 457 | + | ("Wintersport" . "sports") | |
| 458 | + | ("Wirtsch." . "econ") | |
| 459 | + | ("Wrestling" . "sports") | |
| 460 | + | ("Wz." . "exp") | |
| 461 | + | ("Yoga" . "sports") | |
| 462 | + | ("Z??hlw." . "finc") | |
| 463 | + | ("Zahnmed." . "med") | |
| 464 | + | ("Zeitschriftenn." . "work") | |
| 465 | + | ("Zeitungsn." . "work") | |
| 466 | + | ("Zeitungsw." . "print") | |
| 467 | + | ("Zellbiol." . "biol") | |
| 468 | + | ("Zen" . "rel") | |
| 469 | + | ("Zirkus" . "hobby") | |
| 470 | + | ("Zitat" . "exp") | |
| 471 | + | ("Zollw." . "bus") | |
| 472 | + | ("Zool." . "biol"))) | |
| 473 | + | ||
| 474 | + | (define (hint->tag hint) | |
| 475 | + | (match hint | |
| 476 | + | ("Kansai-Dial." "ksb") | |
| 477 | + | ("Ky??to-Dial." "kyb") | |
| 478 | + | ("??saka-Dial." "osb") | |
| 479 | + | ("Kant??-Dial." "ktb") | |
| 480 | + | ("T??hoku-Dial." "thb") | |
| 481 | + | ("Tsugaru-Dial." "tsug") | |
| 482 | + | ("Ky??sh??-Dial." "kyu") | |
| 483 | + | ("Hokkaid??-Dial." "hob") | |
| 484 | + | ("altert??ml." "arch") | |
| 485 | + | ("arch." "arch") | |
| 486 | + | ("Edo-zeitl." "arch") | |
| 487 | + | ("ehrerb.-h??fl." "hon") | |
| 488 | + | ("h??fl.-ehrerb." "hon") | |
| 489 | + | ("besch.-h??fl." "hum") | |
| 490 | + | ("abwertend" "derog") | |
| 491 | + | ("etwas altmodisch" "obs") | |
| 492 | + | ("Gaunerjargon" "thief") | |
| 493 | + | ("Frauenspr. der Edo-Zeit" "arch") | |
| 494 | + | ("Geheimspr. der Gauner" "thief") | |
| 495 | + | ("hist." "hist") | |
| 496 | + | ("h??fl." "pol") | |
| 497 | + | ("kinderspr." "chn") | |
| 498 | + | ("M??nnersprache" "male") | |
| 499 | + | ("Medizinerjargon" "med") | |
| 500 | + | ("Milit??rjargon" "mil") | |
| 501 | + | ("obsol." "obs") | |
| 502 | + | ("onomat." "on-mim") | |
| 503 | + | ("poet." "poet") | |
| 504 | + | ("Polizeijargon" "law") | |
| 505 | + | ("Slang" "sl") | |
| 506 | + | ("veraltet" "arch") | |
| 507 | + | (_ #f))) | |
| 508 | + | ||
| 509 | + | (define (wadoku->tag-name wadoku-tag) | |
| 510 | + | (let ((tag (assoc-ref wadoku-entities wadoku-tag))) | |
| 511 | + | (or tag wadoku-tag))) | |
| 512 | + | ||
| 513 | + | (define (get-tag tag) | |
| 514 | + | (tag:get-tag (wadoku->tag-name tag))) |
modules/nani/wadoku/xml.scm unknown status 1
| 1 | + | ;;; Nani Project website | |
| 2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
| 3 | + | ;;; | |
| 4 | + | ;;; This file is part of the Nani Project website. | |
| 5 | + | ;;; | |
| 6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
| 7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
| 8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
| 9 | + | ;;; your option) any later version. | |
| 10 | + | ;;; | |
| 11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
| 12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 | + | ;;; GNU Affero General Public License for more details. | |
| 15 | + | ;;; | |
| 16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
| 17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
| 18 | + | ||
| 19 | + | (define-module (nani wadoku xml) | |
| 20 | + | #:use-module (ice-9 match) | |
| 21 | + | #:use-module (ice-9 rdelim) | |
| 22 | + | #:use-module (nani frequency) | |
| 23 | + | #:use-module (nani result) | |
| 24 | + | #:use-module (nani wadoku entities) | |
| 25 | + | #:use-module (srfi srfi-9) | |
| 26 | + | #:use-module (sxml ssax) | |
| 27 | + | #:export (xml->results)) | |
| 28 | + | ||
| 29 | + | (define-record-type usage | |
| 30 | + | (make-usage type reg content) | |
| 31 | + | usage? | |
| 32 | + | (type usage-type) | |
| 33 | + | (reg usage-reg) | |
| 34 | + | (content usage-content)) | |
| 35 | + | ||
| 36 | + | (define (flatten lst) | |
| 37 | + | (match lst | |
| 38 | + | (() '()) | |
| 39 | + | ((head lst ...) | |
| 40 | + | (if (list? head) | |
| 41 | + | (append (flatten head) (flatten lst)) | |
| 42 | + | (cons head (flatten lst)))) | |
| 43 | + | (lst (list lst)))) | |
| 44 | + | ||
| 45 | + | (define* (update-usage usage #:key (type (usage-type usage)) | |
| 46 | + | (reg (usage-reg usage)) | |
| 47 | + | (content (usage-content usage))) | |
| 48 | + | (make-usage type reg content)) | |
| 49 | + | ||
| 50 | + | ;; TODO | |
| 51 | + | (define (usage->tag usage) | |
| 52 | + | (let ((content (usage-content usage)) | |
| 53 | + | (type (usage-type usage)) | |
| 54 | + | (reg (usage-reg usage))) | |
| 55 | + | `(,@(if (string-null? content) | |
| 56 | + | (match type | |
| 57 | + | ("" '())) | |
| 58 | + | (match type | |
| 59 | + | ("dom" `((tag ,content))) | |
| 60 | + | ("hint" (let ((tag (hint->tag content))) | |
| 61 | + | (if tag `(tag ,tag) '()))))) | |
| 62 | + | ,@(match reg | |
| 63 | + | ("" '()) | |
| 64 | + | ("lit" `((tag "litf"))) | |
| 65 | + | ("kinderspr." `((tag "chn"))) | |
| 66 | + | ("kleinkindspr." `((tag "baby"))) | |
| 67 | + | ("jugendspr." `((tag "young"))) | |
| 68 | + | ("coll" `((tag "col"))) | |
| 69 | + | ("vulg." `((tag "vulg"))) | |
| 70 | + | ("Slang" `((tag "sl"))) | |
| 71 | + | ("poet." `((tag "poet"))) | |
| 72 | + | ("hist." `((tag "hist"))) | |
| 73 | + | ("hist" `((tag "hist"))) | |
| 74 | + | ("obsol." `((tag "obs"))) | |
| 75 | + | ("veraltet" `((tag "arch"))) | |
| 76 | + | ("sch??lerspr." `((tag "stud"))) | |
| 77 | + | ("studentenspr." `((tag "stud"))) | |
| 78 | + | ("frauenspr." `((tag "fem"))) | |
| 79 | + | ("gaunerspr." `((tag "thief"))) | |
| 80 | + | ("geheimspr." `((tag "secret"))) | |
| 81 | + | ("hofdamenspr." `((tag "fem") (tag "pol"))) | |
| 82 | + | ("seemannsspr." `((tag "sail"))))))) | |
| 83 | + | ||
| 84 | + | (define (usg->tag lst) | |
| 85 | + | (define (usg->usage lst) | |
| 86 | + | (let loop ((result (make-usage "" "" "")) (lst lst)) | |
| 87 | + | (if (null? lst) | |
| 88 | + | result | |
| 89 | + | (loop | |
| 90 | + | (match (car lst) | |
| 91 | + | (('type . type) (update-usage result #:type type)) | |
| 92 | + | (('reg . reg) (update-usage result #:reg reg)) | |
| 93 | + | (((? symbol? s) . v) | |
| 94 | + | (throw 'unknow-usage-attr s v)) | |
| 95 | + | ((? string? content) (update-usage result #:content content)) | |
| 96 | + | ((? list? l) (loop result l)) | |
| 97 | + | (_ result)) | |
| 98 | + | (cdr lst))))) | |
| 99 | + | (usage->tag (usg->usage lst))) | |
| 100 | + | ||
| 101 | + | (define (ruigo->ref lst) | |
| 102 | + | (let loop ((ref #f) (lst lst)) | |
| 103 | + | (match lst | |
| 104 | + | (() ref) | |
| 105 | + | ((('id id) lst ...) | |
| 106 | + | (loop id lst)) | |
| 107 | + | (((a . b) lst ...) | |
| 108 | + | (loop (loop ref (list (list a b))) lst)) | |
| 109 | + | (((? list? l) lst ...) | |
| 110 | + | (loop (loop ref l) lst))))) | |
| 111 | + | ||
| 112 | + | (define (merge-senses s1 s2) | |
| 113 | + | (update-sense s2 | |
| 114 | + | #:references (append (sense-references s1) (sense-references s2)) | |
| 115 | + | #:tags (append (sense-tags s1) (sense-tags s2)) | |
| 116 | + | #:glosses (append (sense-glosses s1) (sense-glosses s2)))) | |
| 117 | + | ||
| 118 | + | (define (sxml->sense lst) | |
| 119 | + | (let loop ((sense (make-sense '() '() '() '() '() '() "ger")) (lst lst)) | |
| 120 | + | (if (null? lst) | |
| 121 | + | sense | |
| 122 | + | (loop | |
| 123 | + | (match (car lst) | |
| 124 | + | (#f sense) | |
| 125 | + | ((? sense? s) | |
| 126 | + | (merge-senses s sense)) | |
| 127 | + | ((? source? s) | |
| 128 | + | (update-sense sense #:sources (cons s (sense-sources sense)))) | |
| 129 | + | (('ref (? string? r)) | |
| 130 | + | (update-sense sense #:references (cons r (sense-references sense)))) | |
| 131 | + | (('tag (? string? r)) | |
| 132 | + | (update-sense sense #:tags (cons (get-tag r) (sense-tags sense)))) | |
| 133 | + | (('trans (? string? r)) | |
| 134 | + | (update-sense sense #:glosses (cons r (sense-glosses sense)))) | |
| 135 | + | (('related . _) sense) | |
| 136 | + | (('transcr . _) sense) | |
| 137 | + | (('pitch . _) sense) | |
| 138 | + | (((? symbol? s) v) | |
| 139 | + | (throw 'unknown-symbol s v)) | |
| 140 | + | ((? list? l) (loop sense l)) | |
| 141 | + | ((? string? _) sense)) | |
| 142 | + | (cdr lst))))) | |
| 143 | + | ||
| 144 | + | ;; TODO | |
| 145 | + | (define (sxml->ref lst) | |
| 146 | + | (define (sxml->ref-name lst) | |
| 147 | + | (let loop ((result #f) (lst lst)) | |
| 148 | + | (if (null? lst) | |
| 149 | + | result | |
| 150 | + | (loop | |
| 151 | + | (match (car lst) | |
| 152 | + | (('id . id) id) | |
| 153 | + | (((? symbol? s) . _) result) | |
| 154 | + | (((? symbol? s) _) result) | |
| 155 | + | ((? list? l) (loop result l)) | |
| 156 | + | (_ result)) | |
| 157 | + | (cdr lst))))) | |
| 158 | + | (let ((ref (sxml->ref-name lst))) | |
| 159 | + | (if (string? ref) | |
| 160 | + | `(ref ,ref) | |
| 161 | + | (throw 'no-ref ref 'from lst)))) | |
| 162 | + | ||
| 163 | + | (define (sxml->source lst) | |
| 164 | + | (let loop ((source (make-source '() #f "" "")) (lst lst)) | |
| 165 | + | (if (null? lst) | |
| 166 | + | source | |
| 167 | + | (loop | |
| 168 | + | (match (car lst) | |
| 169 | + | (('impli impli) (update-source source #:lang impli)) | |
| 170 | + | (('foreign foreign) (update-source source #:content foreign)) | |
| 171 | + | ((? list? l) (loop source l)) | |
| 172 | + | (_ source)) | |
| 173 | + | (cdr lst))))) | |
| 174 | + | ||
| 175 | + | (define (sxml->reading lst) | |
| 176 | + | (let loop ((reading (make-reading '() '() '())) (lst lst)) | |
| 177 | + | (if (null? lst) | |
| 178 | + | reading | |
| 179 | + | (loop | |
| 180 | + | (match (car lst) | |
| 181 | + | (('reading r) | |
| 182 | + | (update-reading reading #:readings (cons r (reading-readings reading)))) | |
| 183 | + | (_ reading)) | |
| 184 | + | (cdr lst))))) | |
| 185 | + | ||
| 186 | + | (define (sxml->result sxml frq) | |
| 187 | + | (define (sxml->result-aux sxml) | |
| 188 | + | (let loop ((result (make-result 0 0 '() '() '())) (last-source #f) (lst sxml)) | |
| 189 | + | (if (null? lst) | |
| 190 | + | result | |
| 191 | + | (match (car lst) | |
| 192 | + | (('kanji kanji) | |
| 193 | + | (loop | |
| 194 | + | (update-result result #:kanjis (cons kanji (result-kanjis result))) | |
| 195 | + | last-source (cdr lst))) | |
| 196 | + | ((? reading? reading) | |
| 197 | + | (loop | |
| 198 | + | (update-result result #:readings (cons reading (result-readings result))) | |
| 199 | + | last-source (cdr lst))) | |
| 200 | + | ((? sense? sense) | |
| 201 | + | (loop | |
| 202 | + | (update-result result | |
| 203 | + | #:senses | |
| 204 | + | (cons | |
| 205 | + | (if last-source | |
| 206 | + | (update-sense sense | |
| 207 | + | #:sources (cons last-source (sense-sources sense))) | |
| 208 | + | sense) | |
| 209 | + | (result-senses result))) | |
| 210 | + | last-source (cdr lst))) | |
| 211 | + | ((? list? l) | |
| 212 | + | (loop (loop result last-source l) last-source (cdr lst))) | |
| 213 | + | (_ (loop result last-source (cdr lst))))))) | |
| 214 | + | (let* ((result (sxml->result-aux sxml)) | |
| 215 | + | (word (if (null? (result-kanjis result)) | |
| 216 | + | (car (reading-readings (car (result-readings result)))) | |
| 217 | + | (car (result-kanjis result)))) | |
| 218 | + | (entity (frequency-entity frq word)) | |
| 219 | + | (sense1 (car (result-senses result))) | |
| 220 | + | (sense1 (if entity | |
| 221 | + | (update-sense sense1 | |
| 222 | + | #:tags (cons (get-tag entity) (sense-tags sense1))) | |
| 223 | + | sense1)) | |
| 224 | + | (senses (cons sense1 (cdr (result-senses result))))) | |
| 225 | + | (update-result result | |
| 226 | + | #:points (tag:get-points (update-result result #:senses senses)) | |
| 227 | + | #:senses senses))) | |
| 228 | + | ||
| 229 | + | (define (gram-tag lst) | |
| 230 | + | (map | |
| 231 | + | (lambda (attr) | |
| 232 | + | (match attr | |
| 233 | + | (('suru . suru) | |
| 234 | + | `(tag ,(string-append "suru-" suru))) | |
| 235 | + | (('ni . ni) | |
| 236 | + | `(tag ,(string-append "ni-" ni))) | |
| 237 | + | (('shiku . shiku) | |
| 238 | + | `(tag ,(string-append "shiku-" shiku))) | |
| 239 | + | (('ku . ku) | |
| 240 | + | `(tag ,(string-append "ku-" ku))) | |
| 241 | + | (('to . to) | |
| 242 | + | `(tag ,(string-append "to-" to))) | |
| 243 | + | (('no . no) | |
| 244 | + | `(tag ,(string-append "no-" no))) | |
| 245 | + | (('nari . nari) | |
| 246 | + | `(tag ,(string-append "nari-" nari))) | |
| 247 | + | (('taru . taru) | |
| 248 | + | `(tag ,(string-append "taru-" taru))) | |
| 249 | + | (('level . level) | |
| 250 | + | `(tag ,(string-append "level-" level))) | |
| 251 | + | (('godanrow . tr) | |
| 252 | + | `(tag ,(string-append "godanrow-" tr))) | |
| 253 | + | (('onbin . tr) | |
| 254 | + | `(tag ,(string-append "onbin-" tr))) | |
| 255 | + | (('transitivity . tr) | |
| 256 | + | `(tag ,(string-append "transitivity-" tr))))) | |
| 257 | + | lst)) | |
| 258 | + | ||
| 259 | + | (define (sxml->string lst) | |
| 260 | + | (define (sub-loop loop tags result lst l) | |
| 261 | + | (let ((result (loop tags result l))) | |
| 262 | + | (if (list? result) | |
| 263 | + | (loop (append tags (filter list? result)) | |
| 264 | + | (apply string-append (filter string? result)) | |
| 265 | + | lst) | |
| 266 | + | (loop tags result lst)))) | |
| 267 | + | ||
| 268 | + | (let loop ((tags '()) (result "") (lst lst)) | |
| 269 | + | (match lst | |
| 270 | + | (() (if (null? tags) | |
| 271 | + | result | |
| 272 | + | (append tags (list result)))) | |
| 273 | + | ((? string? s) | |
| 274 | + | (loop tags (string-append result s) '())) | |
| 275 | + | (((? string? s) lst ...) | |
| 276 | + | (loop tags (string-append result s) lst)) | |
| 277 | + | ((('prior . _) lst ...) | |
| 278 | + | (loop tags result lst)) | |
| 279 | + | ((('options . _) lst ...) | |
| 280 | + | (loop tags result lst)) | |
| 281 | + | ((('firstname . _) lst ...) | |
| 282 | + | (loop tags result lst)) | |
| 283 | + | ((('ausn . _) lst ...) | |
| 284 | + | (loop tags result lst)) | |
| 285 | + | ((('lang . _) lst ...) | |
| 286 | + | (loop tags result lst)) | |
| 287 | + | ((('meta . _) lst ...) | |
| 288 | + | (loop tags result lst)) | |
| 289 | + | ((('genki . genki) lst ...) | |
| 290 | + | (loop (cons `(tag ,genki) tags) result lst)) | |
| 291 | + | ((('jlpt . jlpt) lst ...) | |
| 292 | + | (loop (cons `(tag ,(string-append "jlpt-" jlpt)) tags) result lst)) | |
| 293 | + | ((('ref . (? string? ref)) lst ...) | |
| 294 | + | (loop (cons `(ref ,ref) tags) result lst)) | |
| 295 | + | ((('jap l) lst ...) | |
| 296 | + | (sub-loop loop tags result lst l)) | |
| 297 | + | ((('foreign l) lst ...) | |
| 298 | + | (sub-loop loop tags result lst l)) | |
| 299 | + | ((('transcr l) lst ...) | |
| 300 | + | (sub-loop loop tags result lst l)) | |
| 301 | + | ((((? symbol? s) v) lst ...) | |
| 302 | + | (throw 'unsupported-symbol s v)) | |
| 303 | + | ((((? symbol? s) . v) lst ...) | |
| 304 | + | (throw 'unsupported-symbol s v)) | |
| 305 | + | (((? list? l) lst ...) | |
| 306 | + | (sub-loop loop tags result lst l))))) | |
| 307 | + | ||
| 308 | + | (define (sxml->element lst elem frq) | |
| 309 | + | (let ((elem (match elem | |
| 310 | + | ((_ . elem) elem) | |
| 311 | + | (_ elem)))) | |
| 312 | + | (match elem | |
| 313 | + | ('orth (let ((kanji (filter string? lst))) | |
| 314 | + | (if (null? kanji) #f `(kanji ,(car kanji))))) | |
| 315 | + | ('entry (sxml->result lst frq)) | |
| 316 | + | ('hira `(reading ,(car lst))) | |
| 317 | + | ('hatsuon `(hatsuon ,(car lst))) | |
| 318 | + | ('accent `(pitch ,(car lst))) | |
| 319 | + | ('reading (sxml->reading lst)) | |
| 320 | + | ('form lst) | |
| 321 | + | ('impli `(impli ,(car lst))) | |
| 322 | + | ('text (let loop ((text "") (lst lst)) | |
| 323 | + | (match lst | |
| 324 | + | (() text) | |
| 325 | + | ((('hasPrecedingSpace . _) lst ...) | |
| 326 | + | (loop (string-append " " text) lst)) | |
| 327 | + | ((('hasFollowingSpace . _) lst ...) | |
| 328 | + | (string-append (loop text lst) " ")) | |
| 329 | + | (((? string? s) lst ...) | |
| 330 | + | (loop (string-append text s) lst))))) | |
| 331 | + | ('famn (sxml->string lst)) | |
| 332 | + | ('expl (sxml->string lst)) | |
| 333 | + | ('expli (filter list? lst)) | |
| 334 | + | ('abbrev (filter list? lst)) | |
| 335 | + | ('token (filter string? lst)) | |
| 336 | + | ('tr (sxml->string lst)) | |
| 337 | + | ('transcr `(transcr ,lst)) | |
| 338 | + | ('trans | |
| 339 | + | (append (filter list? lst) | |
| 340 | + | (map (lambda (s) `(trans ,s)) (filter string? lst)))) | |
| 341 | + | ('jap `(jap ,lst)) | |
| 342 | + | ('emph lst) | |
| 343 | + | ('title (filter string? lst)) | |
| 344 | + | ('transl (filter string? lst)) | |
| 345 | + | ('topic (filter string? lst)) | |
| 346 | + | ('iron (filter string? lst)) | |
| 347 | + | ('specchar (filter string? lst)) | |
| 348 | + | ('scientif (filter string? lst)) | |
| 349 | + | ('wikide #f) | |
| 350 | + | ('wikija #f) | |
| 351 | + | ('link #f) | |
| 352 | + | ('ref (sxml->ref lst)) | |
| 353 | + | ('sref (sxml->ref lst)) | |
| 354 | + | ('etym (sxml->source lst)) | |
| 355 | + | ('literal (sxml->string (list "???" (sxml->string lst) "???"))) | |
| 356 | + | ('def (sxml->string (list "(" (sxml->string lst) ")"))) | |
| 357 | + | ('date (sxml->string (list "(" (sxml->string lst) ")"))) | |
| 358 | + | ('birthdeath (sxml->string (list "(" (sxml->string lst) ")"))) | |
| 359 | + | ('descr (sxml->string (list "(" (sxml->string lst) ")"))) | |
| 360 | + | ('bracket (sxml->string (list "[" (sxml->string lst) "]"))) | |
| 361 | + | ('foreign (if (null? lst) #f `(foreign ,(car lst)))) | |
| 362 | + | ('seasonword `(tag ,(string-append "season-" (assoc-ref lst 'type)))) | |
| 363 | + | ('usg (usg->tag lst)) | |
| 364 | + | ('sense (sxml->sense lst)) | |
| 365 | + | ('steinhaus (let ((ref (sxml->string lst))) | |
| 366 | + | (if (string? ref) | |
| 367 | + | `(ref ,(sxml->string lst)) | |
| 368 | + | (throw 'not-steinhaus ref)))) | |
| 369 | + | ('pos '()); TODO: actually find what tag to use | |
| 370 | + | ('wordcomponent (cons `(tag "wordcomponent") (gram-tag lst))) | |
| 371 | + | ('meishi (cons `(tag "meishi") (gram-tag lst))) | |
| 372 | + | ('setsuzokushi (cons `(tag "setsuzokushi") (gram-tag lst))) | |
| 373 | + | ('daimeishi (cons `(tag "daimeishi") (gram-tag lst))) | |
| 374 | + | ('doushi (cons `(tag "doushi") (gram-tag lst))) | |
| 375 | + | ('kandoushi (cons `(tag "kandoushi") (gram-tag lst))) | |
| 376 | + | ('keiyoudoushi (cons `(tag "keiyoudoushi") (gram-tag lst))) | |
| 377 | + | ('keiyoushi (cons `(tag "keiyoushi") (gram-tag lst))) | |
| 378 | + | ('fukushi (cons `(tag "fukushi") (gram-tag lst))) | |
| 379 | + | ('rengo (cons `(tag "rengo") (gram-tag lst))) | |
| 380 | + | ('suffix (cons `(tag "suffix") (gram-tag lst))) | |
| 381 | + | ('prefix (cons `(tag "prefix") (gram-tag lst))) | |
| 382 | + | ('kanji (cons `(tag "kanji") (gram-tag lst))) | |
| 383 | + | ('rentaishi (cons `(tag "rentaishi") (gram-tag lst))) | |
| 384 | + | ('specialcharacter (cons `(tag "specialcharacter") (gram-tag lst))) | |
| 385 | + | ('joshi (cons `(tag "joshi") (gram-tag lst))) | |
| 386 | + | ('fukujoshi (cons `(tag "fukujoshi") (gram-tag lst))) | |
| 387 | + | ('kakujoshi (cons `(tag "kakujoshi") (gram-tag lst))) | |
| 388 | + | ('kakarijoshi (cons `(tag "kakarijoshi") (gram-tag lst))) | |
| 389 | + | ('shuujoshi (cons `(tag "shuujoshi") (gram-tag lst))) | |
| 390 | + | ('setsuzokujoshi (cons `(tag "setsuzokujoshi") (gram-tag lst))) | |
| 391 | + | ('jokeiyoushi (cons `(tag "jokeiyoushi") (gram-tag lst))) | |
| 392 | + | ('jodoushi (cons `(tag "jodoushi") (gram-tag lst))) | |
| 393 | + | ('ruigos lst) | |
| 394 | + | ('ruigo (ruigo->ref lst)) | |
| 395 | + | ('gramGrp lst)))) | |
| 396 | + | ||
| 397 | + | (define (create-parser frq) | |
| 398 | + | (ssax:make-parser | |
| 399 | + | NEW-LEVEL-SEED | |
| 400 | + | (lambda (elem-gi attributes namespaces expected-content seed) | |
| 401 | + | attributes) | |
| 402 | + | ||
| 403 | + | FINISH-ELEMENT | |
| 404 | + | (lambda (elem-gi attributes namespaces parent-seed seed) | |
| 405 | + | (if (equal? elem-gi 'entries) | |
| 406 | + | seed | |
| 407 | + | (let* ((seed (reverse seed)) | |
| 408 | + | (element (sxml->element seed elem-gi frq))) | |
| 409 | + | (cons element parent-seed)))) | |
| 410 | + | ||
| 411 | + | CHAR-DATA-HANDLER | |
| 412 | + | (lambda (string1 string2 seed) | |
| 413 | + | (cons (string-append string1 string2) seed)))) | |
| 414 | + | ||
| 415 | + | (define (xml->results port frq) | |
| 416 | + | (let ((resuts (filter result? ((create-parser frq) port '())))) | |
| 417 | + | (sort results (lambda (a b) (> (result-points a) (result-points b)))))) |