Add jmdict data builder
modules/nani/jmdict/serialize.scm unknown status 1
| 1 | + | ;;; Nani Project website | |
| 2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
| 3 | + | ;;; | |
| 4 | + | ;;; This file is part of the Nani Project website. | |
| 5 | + | ;;; | |
| 6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
| 7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
| 8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
| 9 | + | ;;; your option) any later version. | |
| 10 | + | ;;; | |
| 11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
| 12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 | + | ;;; GNU Affero General Public License for more details. | |
| 15 | + | ;;; | |
| 16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
| 17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
| 18 | + | ||
| 19 | + | (define-module (nani jmdict serialize) | |
| 20 | + | #:use-module (nani result) | |
| 21 | + | #:use-module (nani trie) | |
| 22 | + | #:use-module (rnrs bytevectors) | |
| 23 | + | #:export (serialize-jmdict)) | |
| 24 | + | ||
| 25 | + | (define (merge-bvs bvs) | |
| 26 | + | (let* ((size (apply + (map bytevector-length bvs))) | |
| 27 | + | (bv (make-bytevector size 0))) | |
| 28 | + | (let loop ((bvs bvs) (pos 0)) | |
| 29 | + | (unless (null? bvs) | |
| 30 | + | (let ((sz (bytevector-length (car bvs)))) | |
| 31 | + | (bytevector-copy! (car bvs) 0 bv pos sz) | |
| 32 | + | (loop (cdr bvs) (+ pos sz))))) | |
| 33 | + | bv)) | |
| 34 | + | ||
| 35 | + | (define (serialize-trie trie results pos) | |
| 36 | + | (define (serialize-trie-aux transitions pos) | |
| 37 | + | (let loop ((pos pos) | |
| 38 | + | (trs transitions) | |
| 39 | + | (bvs '())) | |
| 40 | + | (if (null? trs) | |
| 41 | + | (cons pos bvs) | |
| 42 | + | (let* ((next-trie (cdr (car trs))) | |
| 43 | + | (bv (serialize-trie next-trie results pos)) | |
| 44 | + | (pos (car bv)) | |
| 45 | + | (bv (cdr bv))) | |
| 46 | + | (loop pos (cdr trs) (append bvs bv)))))) | |
| 47 | + | (trie-position-set! trie pos) | |
| 48 | + | (let* ((vals-sz (list-size (trie-vals trie) int-size)) | |
| 49 | + | (trs-sz (list-size (trie-transitions trie) (const 5))) | |
| 50 | + | (sz (+ vals-sz trs-sz)) | |
| 51 | + | (bv (make-bytevector sz 0))) | |
| 52 | + | (serialize-list (map (lambda (pos) (result-position (array-ref results pos))) | |
| 53 | + | (trie-vals trie)) | |
| 54 | + | serialize-int 0 bv) | |
| 55 | + | (let* ((bvs (serialize-trie-aux | |
| 56 | + | (trie-transitions trie) | |
| 57 | + | (+ pos (bytevector-length bv)))) | |
| 58 | + | (next-pos (car bvs)) | |
| 59 | + | (bvs (cdr bvs))) | |
| 60 | + | (serialize-list (trie-transitions trie) serialize-pointer | |
| 61 | + | vals-sz bv) | |
| 62 | + | (cons next-pos (cons bv bvs))))) | |
| 63 | + | ||
| 64 | + | (define (serialize-pointer ptr pos bv) | |
| 65 | + | (bytevector-u8-set! bv pos (car ptr)) | |
| 66 | + | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness little)) | |
| 67 | + | (+ pos 5)) | |
| 68 | + | ||
| 69 | + | (define (serialize-int int pos bv) | |
| 70 | + | (bytevector-u32-set! bv pos int (endianness little)) | |
| 71 | + | (+ pos 4)) | |
| 72 | + | (define int-size (const 4)) | |
| 73 | + | ||
| 74 | + | (define (serialize-boolean bool pos bv) | |
| 75 | + | (bytevector-u8-set! bv pos (if bool 1 0)) | |
| 76 | + | (+ pos 1)) | |
| 77 | + | (define boolean-size (const 1)) | |
| 78 | + | ||
| 79 | + | (define (serialize-string str pos bv) | |
| 80 | + | (let ((sbv (string->utf8 str))) | |
| 81 | + | (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness little)) | |
| 82 | + | (bytevector-copy! sbv 0 bv (+ pos 4) (bytevector-length sbv)) | |
| 83 | + | (+ pos 4 (bytevector-length sbv)))) | |
| 84 | + | (define (string-size str) | |
| 85 | + | (let ((sbv (string->utf8 str))) | |
| 86 | + | (+ 4 (bytevector-length sbv)))) | |
| 87 | + | ||
| 88 | + | (define* (serialize-list lst serialize pos bv #:key (size? #t)) | |
| 89 | + | (when (not (list? lst)) (throw 'not-list lst)) | |
| 90 | + | (when size? | |
| 91 | + | (bytevector-u32-set! bv pos (length lst) (endianness little))) | |
| 92 | + | (let loop ((lst lst) (pos (+ pos (if size? 4 0)))) | |
| 93 | + | (if (null? lst) | |
| 94 | + | pos | |
| 95 | + | (loop (cdr lst) (serialize (car lst) pos bv))))) | |
| 96 | + | (define* (list-size lst size #:key (size? #t)) | |
| 97 | + | (when (not (list? lst)) (throw 'not-list lst)) | |
| 98 | + | (apply + (if size? 4 0) (map size lst))) | |
| 99 | + | ||
| 100 | + | (define (serialize-source source pos bv) | |
| 101 | + | (when (not (source? source)) (throw 'not-source source)) | |
| 102 | + | (let* ((pos (serialize-list (source-content source) serialize-string pos bv)) | |
| 103 | + | (pos (serialize-boolean (source-wasei? source) pos bv)) | |
| 104 | + | (pos (serialize-string (source-type source) pos bv)) | |
| 105 | + | (pos (serialize-string (source-lang source) pos bv))) | |
| 106 | + | pos)) | |
| 107 | + | (define (source-size source) | |
| 108 | + | (when (not (source? source)) (throw 'not-source source)) | |
| 109 | + | (+ (list-size (source-content source) string-size) | |
| 110 | + | (boolean-size (source-wasei? source)) | |
| 111 | + | (string-size (source-type source)) | |
| 112 | + | (string-size (source-lang source)))) | |
| 113 | + | ||
| 114 | + | (define (serialize-reading reading pos bv) | |
| 115 | + | (when (not (reading? reading)) (throw 'not-reading reading)) | |
| 116 | + | (let* ((pos (serialize-list (reading-kanjis reading) serialize-string pos bv)) | |
| 117 | + | (pos (serialize-list (reading-info reading) serialize-string pos bv)) | |
| 118 | + | (pos (serialize-list (reading-readings reading) serialize-string pos bv))) | |
| 119 | + | pos)) | |
| 120 | + | (define (reading-size reading) | |
| 121 | + | (when (not (reading? reading)) (throw 'not-reading reading)) | |
| 122 | + | (+ (list-size (reading-kanjis reading) string-size) | |
| 123 | + | (list-size (reading-info reading) string-size) | |
| 124 | + | (list-size (reading-readings reading) string-size))) | |
| 125 | + | ||
| 126 | + | (define (serialize-sense sense pos bv) | |
| 127 | + | (when (not (sense? sense)) (throw 'not-sense sense)) | |
| 128 | + | (let* ((pos (serialize-list (sense-references sense) serialize-string pos bv)) | |
| 129 | + | (pos (serialize-list (sense-limits sense) serialize-string pos bv)) | |
| 130 | + | (pos (serialize-list (sense-infos sense) serialize-string pos bv)) | |
| 131 | + | (pos (serialize-list (sense-sources sense) serialize-source pos bv)) | |
| 132 | + | (pos (serialize-list (sense-tags sense) serialize-string pos bv)) | |
| 133 | + | (pos (serialize-list (sense-glosses sense) serialize-string pos bv)) | |
| 134 | + | (pos (serialize-string (sense-language sense) pos bv))) | |
| 135 | + | pos)) | |
| 136 | + | (define (sense-size sense) | |
| 137 | + | (when (not (sense? sense)) (throw 'not-sense sense)) | |
| 138 | + | (+ (list-size (sense-references sense) string-size) | |
| 139 | + | (list-size (sense-limits sense) string-size) | |
| 140 | + | (list-size (sense-infos sense) string-size) | |
| 141 | + | (list-size (sense-sources sense) source-size) | |
| 142 | + | (list-size (sense-tags sense) string-size) | |
| 143 | + | (list-size (sense-glosses sense) string-size) | |
| 144 | + | (string-size (sense-language sense)))) | |
| 145 | + | ||
| 146 | + | (define (serialize-result result pos bv) | |
| 147 | + | (when (not (result? result)) (throw 'not-result result)) | |
| 148 | + | (result-position-set! result pos) | |
| 149 | + | (let* ((pos (serialize-list (result-kanjis result) serialize-string pos bv)) | |
| 150 | + | (pos (serialize-list (result-readings result) serialize-reading pos bv)) | |
| 151 | + | (pos (serialize-list (result-senses result) serialize-sense pos bv))) | |
| 152 | + | pos)) | |
| 153 | + | (define (result-size result) | |
| 154 | + | (when (not (result? result)) (throw 'not-result result)) | |
| 155 | + | (+ (list-size (result-kanjis result) string-size) | |
| 156 | + | (list-size (result-readings result) reading-size) | |
| 157 | + | (list-size (result-senses result) sense-size))) | |
| 158 | + | ||
| 159 | + | (define (serialize-jmdict results kanji-trie reading-trie sense-trie) | |
| 160 | + | (let* ((header (string->utf8 "NANI_JMDICT")) | |
| 161 | + | (header-size (bytevector-length header)) | |
| 162 | + | (pointers (make-bytevector 12 0)) | |
| 163 | + | (results-size (list-size results result-size #:size? #f)) | |
| 164 | + | (results-bv (make-bytevector (+ header-size 12 results-size 0)))) | |
| 165 | + | (serialize-list results serialize-result (+ header-size 12) results-bv #:size? #f) | |
| 166 | + | (let* ((results (list->array 1 results)) | |
| 167 | + | (pos (+ header-size 12 (bytevector-length results-bv))) | |
| 168 | + | (kanji-bvs (serialize-trie kanji-trie results pos)) | |
| 169 | + | (pos (car kanji-bvs)) | |
| 170 | + | (reading-bvs (serialize-trie reading-trie results pos)) | |
| 171 | + | (pos (car reading-bvs)) | |
| 172 | + | (meaning-bvs (serialize-trie sense-trie results pos))) | |
| 173 | + | (bytevector-copy! header 0 results-bv 0 header-size) | |
| 174 | + | (bytevector-copy! pointers 0 results-bv header-size 12) | |
| 175 | + | (merge-bvs (append (list results-bv) (cdr kanji-bvs) (cdr reading-bvs) | |
| 176 | + | (cdr meaning-bvs)))))) |
modules/nani/jmdict/trie.scm unknown status 1
| 1 | + | ;;; Nani Project website | |
| 2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
| 3 | + | ;;; | |
| 4 | + | ;;; This file is part of the Nani Project website. | |
| 5 | + | ;;; | |
| 6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
| 7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
| 8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
| 9 | + | ;;; your option) any later version. | |
| 10 | + | ;;; | |
| 11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
| 12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 | + | ;;; GNU Affero General Public License for more details. | |
| 15 | + | ;;; | |
| 16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
| 17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
| 18 | + | ||
| 19 | + | (define-module (nani jmdict trie) | |
| 20 | + | #:use-module (nani trie) | |
| 21 | + | #:use-module (nani result) | |
| 22 | + | #:use-module (rnrs bytevectors) | |
| 23 | + | #:export (make-kanji-trie | |
| 24 | + | make-reading-trie | |
| 25 | + | make-meaning-trie)) | |
| 26 | + | ||
| 27 | + | (define (make-key key) | |
| 28 | + | (apply append | |
| 29 | + | (map | |
| 30 | + | (lambda (c) | |
| 31 | + | (list (quotient c 16) (modulo c 16))) | |
| 32 | + | (bytevector->u8-list (string->utf8 key))))) | |
| 33 | + | ||
| 34 | + | (define (make-kanji-trie results) | |
| 35 | + | (let ((trie (make-empty-trie))) | |
| 36 | + | (let loop ((results results) (i 0)) | |
| 37 | + | (if (null? results) | |
| 38 | + | trie | |
| 39 | + | (begin | |
| 40 | + | (for-each | |
| 41 | + | (lambda (key) | |
| 42 | + | (add-to-trie! trie (make-key key) i)) | |
| 43 | + | (result-kanjis (car results))) | |
| 44 | + | (loop (cdr results) (+ i 1))))))) | |
| 45 | + | ||
| 46 | + | (define (make-reading-trie results) | |
| 47 | + | (let ((trie (make-empty-trie))) | |
| 48 | + | (let loop ((results results) (i 0)) | |
| 49 | + | (if (null? results) | |
| 50 | + | trie | |
| 51 | + | (begin | |
| 52 | + | (for-each | |
| 53 | + | (lambda (reading) | |
| 54 | + | (for-each | |
| 55 | + | (lambda (key) | |
| 56 | + | (add-to-trie! trie (make-key key) i)) | |
| 57 | + | (reading-readings reading))) | |
| 58 | + | (result-readings (car results))) | |
| 59 | + | (loop (cdr results) (+ i 1))))))) | |
| 60 | + | ||
| 61 | + | (define (make-meaning-trie results) | |
| 62 | + | (let ((trie (make-empty-trie))) | |
| 63 | + | (let loop ((results results) (i 0)) | |
| 64 | + | (if (null? results) | |
| 65 | + | trie | |
| 66 | + | (begin | |
| 67 | + | (for-each | |
| 68 | + | (lambda (meaning) | |
| 69 | + | (for-each | |
| 70 | + | (lambda (key) | |
| 71 | + | (add-to-trie! trie (make-key key) i)) | |
| 72 | + | (sense-glosses meaning))) | |
| 73 | + | (result-senses (car results))) | |
| 74 | + | (loop (cdr results) (+ i 1))))))) |
modules/nani/jmdict/xml.scm unknown status 1
| 1 | + | ;;; Nani Project website | |
| 2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
| 3 | + | ;;; | |
| 4 | + | ;;; This file is part of the Nani Project website. | |
| 5 | + | ;;; | |
| 6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
| 7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
| 8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
| 9 | + | ;;; your option) any later version. | |
| 10 | + | ;;; | |
| 11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
| 12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 | + | ;;; GNU Affero General Public License for more details. | |
| 15 | + | ;;; | |
| 16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
| 17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
| 18 | + | ||
| 19 | + | (define-module (nani jmdict xml) | |
| 20 | + | #:use-module (ice-9 match) | |
| 21 | + | #:use-module (ice-9 rdelim) | |
| 22 | + | #:use-module (sxml fold) | |
| 23 | + | #:use-module (sxml simple) | |
| 24 | + | #:use-module (nani result) | |
| 25 | + | #:export (load-dic sxml->results)) | |
| 26 | + | ||
| 27 | + | (define (load-dic file) | |
| 28 | + | (xml->sxml (call-with-input-file file read-string))) | |
| 29 | + | ||
| 30 | + | (define (sxml->reading lst) | |
| 31 | + | (let loop ((reading (make-reading '() '() '())) (lst lst)) | |
| 32 | + | (if (null? lst) | |
| 33 | + | reading | |
| 34 | + | (loop | |
| 35 | + | (match (car lst) | |
| 36 | + | (('reading r) (update-reading reading #:readings (cons r (reading-readings reading)))) | |
| 37 | + | (('info r) (update-reading reading #:info (cons r (reading-info reading)))) | |
| 38 | + | (('limit r) (update-reading reading #:kanjis (cons r (reading-kanjis reading)))) | |
| 39 | + | ((? string? _) reading)) | |
| 40 | + | (cdr lst))))) | |
| 41 | + | ||
| 42 | + | (define (sxml->source lst) | |
| 43 | + | (let loop ((source (make-source '() #f "" "")) (lst lst)) | |
| 44 | + | (if (null? lst) | |
| 45 | + | source | |
| 46 | + | (loop | |
| 47 | + | (match (car lst) | |
| 48 | + | (('content c) (update-source source #:content (cons c (source-content source)))) | |
| 49 | + | (('wasei _) (update-source source #:wasei? #t)) | |
| 50 | + | (('type t) (update-source source #:type t)) | |
| 51 | + | (('lang l) (update-source source #:lang l)) | |
| 52 | + | ((? list? l) (loop source l)) | |
| 53 | + | ((? string? _) source)) | |
| 54 | + | (cdr lst))))) | |
| 55 | + | ||
| 56 | + | (define (sxml->sense lst) | |
| 57 | + | (let loop ((sense (make-sense '() '() '() '() '() '() "eng")) (lst lst)) | |
| 58 | + | (if (null? lst) | |
| 59 | + | sense | |
| 60 | + | (loop | |
| 61 | + | (match (car lst) | |
| 62 | + | (('ref (? string? r)) (update-sense sense #:references (cons r (sense-references sense)))) | |
| 63 | + | (('limit (? string? r)) (update-sense sense #:limits (cons r (sense-limits sense)))) | |
| 64 | + | (('info (? string? r)) (update-sense sense #:infos (cons r (sense-infos sense)))) | |
| 65 | + | ((? source? s) (update-sense sense #:sources (cons s (sense-sources sense)))) | |
| 66 | + | (('tag (? string? r)) (update-sense sense #:tags (cons r (sense-tags sense)))) | |
| 67 | + | (('gloss (? string? r)) (update-sense sense #:glosses (cons r (sense-glosses sense)))) | |
| 68 | + | (('lang (? string? l)) (update-sense sense #:language l)) | |
| 69 | + | ((? list? l) (loop sense l)) | |
| 70 | + | ((? string? _) sense)) | |
| 71 | + | (cdr lst))))) | |
| 72 | + | ||
| 73 | + | (define (sxml->result lst) | |
| 74 | + | (let loop ((result (make-result 0 '() '() '())) (lst lst)) | |
| 75 | + | (if (null? lst) | |
| 76 | + | result | |
| 77 | + | (loop | |
| 78 | + | (match (car lst) | |
| 79 | + | (('kanji kanji) (update-result result #:kanjis (cons kanji (result-kanjis result)))) | |
| 80 | + | ((? reading? r) (update-result result #:readings (cons r (result-readings result)))) | |
| 81 | + | ((? sense? s) (update-result result #:senses (cons s (result-senses result)))) | |
| 82 | + | ((? string? _) result)) | |
| 83 | + | (cdr lst))))) | |
| 84 | + | ||
| 85 | + | (define (sxml->results sxml) | |
| 86 | + | (foldt | |
| 87 | + | (lambda (xml) | |
| 88 | + | (match xml | |
| 89 | + | (('ent_seq _) "") | |
| 90 | + | (('ke_pri _) "") | |
| 91 | + | (('re_pri _) "") | |
| 92 | + | (('pri _) "") | |
| 93 | + | (('keb kanji) `(kanji ,kanji)) | |
| 94 | + | (('ke_inf _) "") | |
| 95 | + | (('k_ele lst ...) (car (filter list? lst))) | |
| 96 | + | (('reb reading) `(reading ,reading)) | |
| 97 | + | (('re_nokanji _ ...) "") | |
| 98 | + | (('re_restr r) `(limit ,r)) | |
| 99 | + | (('re_inf r) `(info ,r)) | |
| 100 | + | (('r_ele lst ...) (sxml->reading lst)) | |
| 101 | + | (('ant pos) `(tag ,pos)) | |
| 102 | + | (('dial pos) `(tag ,pos)) | |
| 103 | + | (('field pos) `(tag ,pos)) | |
| 104 | + | (('misc pos) `(tag ,pos)) | |
| 105 | + | (('pos pos) `(tag ,pos)) | |
| 106 | + | (('xref pos) `(ref ,pos)) | |
| 107 | + | (('g_type _ ...) "") | |
| 108 | + | (('gloss (? string? g)) xml) | |
| 109 | + | (('gloss (? list? g)) g) | |
| 110 | + | (('gloss attr g) (cons `(gloss ,g) attr)) | |
| 111 | + | (('stagk k) `(limit ,k)) | |
| 112 | + | (('stagr r) `(limit ,r)) | |
| 113 | + | (('s_inf r) `(info ,r)) | |
| 114 | + | (('lsource lst ...) (sxml->source lst)) | |
| 115 | + | (('sense lst ...) (sxml->sense lst)) | |
| 116 | + | (('entry lst ...) (sxml->result lst)) | |
| 117 | + | (('JMdict lst ...) lst) | |
| 118 | + | (('xml:lang l) `(lang ,l)) | |
| 119 | + | (('ls_wasei _) '(wasei #t)) | |
| 120 | + | (('ls_type t) `(type ,t)) | |
| 121 | + | (('@ lst ...) lst) | |
| 122 | + | (('*TOP* _ l) (filter result? l)) | |
| 123 | + | (('*PI* _ ...) #f))) | |
| 124 | + | (lambda (a) a) | |
| 125 | + | sxml)) |
modules/nani/result.scm unknown status 1
| 1 | + | ;;; Nani Project website | |
| 2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
| 3 | + | ;;; | |
| 4 | + | ;;; This file is part of the Nani Project website. | |
| 5 | + | ;;; | |
| 6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
| 7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
| 8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
| 9 | + | ;;; your option) any later version. | |
| 10 | + | ;;; | |
| 11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
| 12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 | + | ;;; GNU Affero General Public License for more details. | |
| 15 | + | ;;; | |
| 16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
| 17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
| 18 | + | ||
| 19 | + | (define-module (nani result) | |
| 20 | + | #:use-module (srfi srfi-9) | |
| 21 | + | #:export (make-result | |
| 22 | + | result? | |
| 23 | + | result-position | |
| 24 | + | result-position-set! | |
| 25 | + | result-kanjis | |
| 26 | + | result-readings | |
| 27 | + | result-senses | |
| 28 | + | ||
| 29 | + | make-reading | |
| 30 | + | reading? | |
| 31 | + | reading-kanjis | |
| 32 | + | reading-info | |
| 33 | + | reading-readings | |
| 34 | + | ||
| 35 | + | make-sense | |
| 36 | + | sense? | |
| 37 | + | sense-references | |
| 38 | + | sense-limits | |
| 39 | + | sense-infos | |
| 40 | + | sense-sources | |
| 41 | + | sense-tags | |
| 42 | + | sense-glosses | |
| 43 | + | sense-language | |
| 44 | + | ||
| 45 | + | make-source | |
| 46 | + | source? | |
| 47 | + | source-content | |
| 48 | + | source-wasei? | |
| 49 | + | source-type | |
| 50 | + | source-lang | |
| 51 | + | ||
| 52 | + | update-result | |
| 53 | + | update-reading | |
| 54 | + | update-sense | |
| 55 | + | update-source)) | |
| 56 | + | ||
| 57 | + | (define-record-type result | |
| 58 | + | (make-result position kanjis readings senses) | |
| 59 | + | result? | |
| 60 | + | (position result-position result-position-set!) ; integer | |
| 61 | + | (kanjis result-kanjis) ; string-list | |
| 62 | + | (readings result-readings) ; reanding-list | |
| 63 | + | (senses result-senses)) ; sense-list | |
| 64 | + | ||
| 65 | + | (define-record-type reading | |
| 66 | + | (make-reading kanjis info readings) | |
| 67 | + | reading? | |
| 68 | + | (kanjis reading-kanjis) ; string-list | |
| 69 | + | (info reading-info) ; string-list | |
| 70 | + | (readings reading-readings)) ; string-list | |
| 71 | + | ||
| 72 | + | (define-record-type sense | |
| 73 | + | (make-sense references limits infos sources tags glosses language) | |
| 74 | + | sense? | |
| 75 | + | (references sense-references) ; string-list | |
| 76 | + | (limits sense-limits) ; string-list | |
| 77 | + | (infos sense-infos) ; string-list | |
| 78 | + | (sources sense-sources) ; source-list | |
| 79 | + | (tags sense-tags) ; symbol-list | |
| 80 | + | (glosses sense-glosses) ; string-list | |
| 81 | + | (language sense-language)) ; string | |
| 82 | + | ||
| 83 | + | (define-record-type source | |
| 84 | + | (make-source content wasei? type lang) | |
| 85 | + | source? | |
| 86 | + | (content source-content) ; string-list | |
| 87 | + | (wasei? source-wasei?) ; boolean | |
| 88 | + | (type source-type) ; string | |
| 89 | + | (lang source-lang)) ; string | |
| 90 | + | ||
| 91 | + | (define* (update-result result | |
| 92 | + | #:key (kanjis (result-kanjis result)) | |
| 93 | + | (readings (result-readings result)) | |
| 94 | + | (senses (result-senses result))) | |
| 95 | + | (make-result (result-position result) kanjis readings senses)) | |
| 96 | + | ||
| 97 | + | (define* (update-reading reading | |
| 98 | + | #:key (kanjis (reading-kanjis reading)) | |
| 99 | + | (info (reading-info reading)) | |
| 100 | + | (readings (reading-readings reading))) | |
| 101 | + | (make-reading kanjis info readings)) | |
| 102 | + | ||
| 103 | + | (define* (update-sense sense | |
| 104 | + | #:key (references (sense-references sense)) | |
| 105 | + | (limits (sense-limits sense)) | |
| 106 | + | (infos (sense-infos sense)) | |
| 107 | + | (sources (sense-sources sense)) | |
| 108 | + | (tags (sense-tags sense)) | |
| 109 | + | (glosses (sense-glosses sense)) | |
| 110 | + | (language (sense-language sense))) | |
| 111 | + | (make-sense references limits infos sources tags glosses language)) | |
| 112 | + | ||
| 113 | + | (define* (update-source source | |
| 114 | + | #:key (content (source-content source)) | |
| 115 | + | (wasei? (source-wasei? source)) | |
| 116 | + | (type (source-type source)) | |
| 117 | + | (lang (source-lang source))) | |
| 118 | + | (make-source content wasei? type lang)) |
modules/nani/trie.scm unknown status 1
| 1 | + | ;;; Nani Project website | |
| 2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
| 3 | + | ;;; | |
| 4 | + | ;;; This file is part of the Nani Project website. | |
| 5 | + | ;;; | |
| 6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
| 7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
| 8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
| 9 | + | ;;; your option) any later version. | |
| 10 | + | ;;; | |
| 11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
| 12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 | + | ;;; GNU Affero General Public License for more details. | |
| 15 | + | ;;; | |
| 16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
| 17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
| 18 | + | ||
| 19 | + | (define-module (nani trie) | |
| 20 | + | #:use-module (srfi srfi-9) | |
| 21 | + | #:export (make-trie | |
| 22 | + | trie? | |
| 23 | + | trie-position | |
| 24 | + | trie-position-set! | |
| 25 | + | trie-vals | |
| 26 | + | trie-vals-set! | |
| 27 | + | trie-transitions | |
| 28 | + | trie-transitions-set! | |
| 29 | + | ||
| 30 | + | make-empty-trie | |
| 31 | + | add-to-trie! | |
| 32 | + | compress-trie)) | |
| 33 | + | ||
| 34 | + | (define-record-type trie | |
| 35 | + | (make-trie position vals transitions) | |
| 36 | + | trie? | |
| 37 | + | (position trie-position trie-position-set!) ; integer | |
| 38 | + | (vals trie-vals trie-vals-set!) ; list | |
| 39 | + | (transitions trie-transitions trie-transitions-set!)) ; array or alist | |
| 40 | + | ||
| 41 | + | (define (make-empty-trie) | |
| 42 | + | (make-trie 0 '() (make-array #f 16))) | |
| 43 | + | ||
| 44 | + | (define (add-to-trie! trie key value) | |
| 45 | + | (if (null? key) | |
| 46 | + | (trie-vals-set! trie (cons value (trie-vals trie))) | |
| 47 | + | (let ((next-trie (array-ref (trie-transitions trie) (car key)))) | |
| 48 | + | (if next-trie | |
| 49 | + | (add-to-trie! next-trie (cdr key) value) | |
| 50 | + | (let ((next-trie (make-empty-trie))) | |
| 51 | + | (array-set! (trie-transitions trie) next-trie (car key)) | |
| 52 | + | (add-to-trie! next-trie (cdr key) value)))))) | |
| 53 | + | ||
| 54 | + | (define (convert-trie-transitions! trie) | |
| 55 | + | (define (get-new-transitions transitions) | |
| 56 | + | (let loop ((i 0) (tr '())) | |
| 57 | + | (if (= i 16) | |
| 58 | + | tr | |
| 59 | + | (let ((elem (array-ref transitions i))) | |
| 60 | + | (if elem | |
| 61 | + | (begin | |
| 62 | + | (convert-trie-transitions! elem) | |
| 63 | + | (loop (+ i 1) (cons (cons i elem) tr))) | |
| 64 | + | (loop (+ i 1) tr)))))) | |
| 65 | + | (let* ((transitions (trie-transitions trie)) | |
| 66 | + | (transitions (get-new-transitions transitions))) | |
| 67 | + | (trie-transitions-set! trie transitions))) | |
| 68 | + | ||
| 69 | + | (define (compress-trie trie) | |
| 70 | + | (define (compress-aux trie) | |
| 71 | + | (make-trie | |
| 72 | + | (trie-position trie) | |
| 73 | + | (trie-vals trie) | |
| 74 | + | (apply append | |
| 75 | + | (map | |
| 76 | + | (lambda (tr) | |
| 77 | + | (let ((trie (cdr tr))) | |
| 78 | + | (map | |
| 79 | + | (lambda (tr2) | |
| 80 | + | (cons (+ (car tr2) (* 16 (car tr))) | |
| 81 | + | (compress-aux (cdr tr2)))) | |
| 82 | + | (trie-transitions trie)))) | |
| 83 | + | (trie-transitions trie))))) | |
| 84 | + | (convert-trie-transitions! trie) | |
| 85 | + | (compress-aux trie)) |