Add jmdict data builder
modules/nani/jmdict/serialize.scm unknown status 1
1 | + | ;;; Nani Project website | |
2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
3 | + | ;;; | |
4 | + | ;;; This file is part of the Nani Project website. | |
5 | + | ;;; | |
6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | + | ;;; your option) any later version. | |
10 | + | ;;; | |
11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + | ;;; GNU Affero General Public License for more details. | |
15 | + | ;;; | |
16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | + | ||
19 | + | (define-module (nani jmdict serialize) | |
20 | + | #:use-module (nani result) | |
21 | + | #:use-module (nani trie) | |
22 | + | #:use-module (rnrs bytevectors) | |
23 | + | #:export (serialize-jmdict)) | |
24 | + | ||
25 | + | (define (merge-bvs bvs) | |
26 | + | (let* ((size (apply + (map bytevector-length bvs))) | |
27 | + | (bv (make-bytevector size 0))) | |
28 | + | (let loop ((bvs bvs) (pos 0)) | |
29 | + | (unless (null? bvs) | |
30 | + | (let ((sz (bytevector-length (car bvs)))) | |
31 | + | (bytevector-copy! (car bvs) 0 bv pos sz) | |
32 | + | (loop (cdr bvs) (+ pos sz))))) | |
33 | + | bv)) | |
34 | + | ||
35 | + | (define (serialize-trie trie results pos) | |
36 | + | (define (serialize-trie-aux transitions pos) | |
37 | + | (let loop ((pos pos) | |
38 | + | (trs transitions) | |
39 | + | (bvs '())) | |
40 | + | (if (null? trs) | |
41 | + | (cons pos bvs) | |
42 | + | (let* ((next-trie (cdr (car trs))) | |
43 | + | (bv (serialize-trie next-trie results pos)) | |
44 | + | (pos (car bv)) | |
45 | + | (bv (cdr bv))) | |
46 | + | (loop pos (cdr trs) (append bvs bv)))))) | |
47 | + | (trie-position-set! trie pos) | |
48 | + | (let* ((vals-sz (list-size (trie-vals trie) int-size)) | |
49 | + | (trs-sz (list-size (trie-transitions trie) (const 5))) | |
50 | + | (sz (+ vals-sz trs-sz)) | |
51 | + | (bv (make-bytevector sz 0))) | |
52 | + | (serialize-list (map (lambda (pos) (result-position (array-ref results pos))) | |
53 | + | (trie-vals trie)) | |
54 | + | serialize-int 0 bv) | |
55 | + | (let* ((bvs (serialize-trie-aux | |
56 | + | (trie-transitions trie) | |
57 | + | (+ pos (bytevector-length bv)))) | |
58 | + | (next-pos (car bvs)) | |
59 | + | (bvs (cdr bvs))) | |
60 | + | (serialize-list (trie-transitions trie) serialize-pointer | |
61 | + | vals-sz bv) | |
62 | + | (cons next-pos (cons bv bvs))))) | |
63 | + | ||
64 | + | (define (serialize-pointer ptr pos bv) | |
65 | + | (bytevector-u8-set! bv pos (car ptr)) | |
66 | + | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness little)) | |
67 | + | (+ pos 5)) | |
68 | + | ||
69 | + | (define (serialize-int int pos bv) | |
70 | + | (bytevector-u32-set! bv pos int (endianness little)) | |
71 | + | (+ pos 4)) | |
72 | + | (define int-size (const 4)) | |
73 | + | ||
74 | + | (define (serialize-boolean bool pos bv) | |
75 | + | (bytevector-u8-set! bv pos (if bool 1 0)) | |
76 | + | (+ pos 1)) | |
77 | + | (define boolean-size (const 1)) | |
78 | + | ||
79 | + | (define (serialize-string str pos bv) | |
80 | + | (let ((sbv (string->utf8 str))) | |
81 | + | (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness little)) | |
82 | + | (bytevector-copy! sbv 0 bv (+ pos 4) (bytevector-length sbv)) | |
83 | + | (+ pos 4 (bytevector-length sbv)))) | |
84 | + | (define (string-size str) | |
85 | + | (let ((sbv (string->utf8 str))) | |
86 | + | (+ 4 (bytevector-length sbv)))) | |
87 | + | ||
88 | + | (define* (serialize-list lst serialize pos bv #:key (size? #t)) | |
89 | + | (when (not (list? lst)) (throw 'not-list lst)) | |
90 | + | (when size? | |
91 | + | (bytevector-u32-set! bv pos (length lst) (endianness little))) | |
92 | + | (let loop ((lst lst) (pos (+ pos (if size? 4 0)))) | |
93 | + | (if (null? lst) | |
94 | + | pos | |
95 | + | (loop (cdr lst) (serialize (car lst) pos bv))))) | |
96 | + | (define* (list-size lst size #:key (size? #t)) | |
97 | + | (when (not (list? lst)) (throw 'not-list lst)) | |
98 | + | (apply + (if size? 4 0) (map size lst))) | |
99 | + | ||
100 | + | (define (serialize-source source pos bv) | |
101 | + | (when (not (source? source)) (throw 'not-source source)) | |
102 | + | (let* ((pos (serialize-list (source-content source) serialize-string pos bv)) | |
103 | + | (pos (serialize-boolean (source-wasei? source) pos bv)) | |
104 | + | (pos (serialize-string (source-type source) pos bv)) | |
105 | + | (pos (serialize-string (source-lang source) pos bv))) | |
106 | + | pos)) | |
107 | + | (define (source-size source) | |
108 | + | (when (not (source? source)) (throw 'not-source source)) | |
109 | + | (+ (list-size (source-content source) string-size) | |
110 | + | (boolean-size (source-wasei? source)) | |
111 | + | (string-size (source-type source)) | |
112 | + | (string-size (source-lang source)))) | |
113 | + | ||
114 | + | (define (serialize-reading reading pos bv) | |
115 | + | (when (not (reading? reading)) (throw 'not-reading reading)) | |
116 | + | (let* ((pos (serialize-list (reading-kanjis reading) serialize-string pos bv)) | |
117 | + | (pos (serialize-list (reading-info reading) serialize-string pos bv)) | |
118 | + | (pos (serialize-list (reading-readings reading) serialize-string pos bv))) | |
119 | + | pos)) | |
120 | + | (define (reading-size reading) | |
121 | + | (when (not (reading? reading)) (throw 'not-reading reading)) | |
122 | + | (+ (list-size (reading-kanjis reading) string-size) | |
123 | + | (list-size (reading-info reading) string-size) | |
124 | + | (list-size (reading-readings reading) string-size))) | |
125 | + | ||
126 | + | (define (serialize-sense sense pos bv) | |
127 | + | (when (not (sense? sense)) (throw 'not-sense sense)) | |
128 | + | (let* ((pos (serialize-list (sense-references sense) serialize-string pos bv)) | |
129 | + | (pos (serialize-list (sense-limits sense) serialize-string pos bv)) | |
130 | + | (pos (serialize-list (sense-infos sense) serialize-string pos bv)) | |
131 | + | (pos (serialize-list (sense-sources sense) serialize-source pos bv)) | |
132 | + | (pos (serialize-list (sense-tags sense) serialize-string pos bv)) | |
133 | + | (pos (serialize-list (sense-glosses sense) serialize-string pos bv)) | |
134 | + | (pos (serialize-string (sense-language sense) pos bv))) | |
135 | + | pos)) | |
136 | + | (define (sense-size sense) | |
137 | + | (when (not (sense? sense)) (throw 'not-sense sense)) | |
138 | + | (+ (list-size (sense-references sense) string-size) | |
139 | + | (list-size (sense-limits sense) string-size) | |
140 | + | (list-size (sense-infos sense) string-size) | |
141 | + | (list-size (sense-sources sense) source-size) | |
142 | + | (list-size (sense-tags sense) string-size) | |
143 | + | (list-size (sense-glosses sense) string-size) | |
144 | + | (string-size (sense-language sense)))) | |
145 | + | ||
146 | + | (define (serialize-result result pos bv) | |
147 | + | (when (not (result? result)) (throw 'not-result result)) | |
148 | + | (result-position-set! result pos) | |
149 | + | (let* ((pos (serialize-list (result-kanjis result) serialize-string pos bv)) | |
150 | + | (pos (serialize-list (result-readings result) serialize-reading pos bv)) | |
151 | + | (pos (serialize-list (result-senses result) serialize-sense pos bv))) | |
152 | + | pos)) | |
153 | + | (define (result-size result) | |
154 | + | (when (not (result? result)) (throw 'not-result result)) | |
155 | + | (+ (list-size (result-kanjis result) string-size) | |
156 | + | (list-size (result-readings result) reading-size) | |
157 | + | (list-size (result-senses result) sense-size))) | |
158 | + | ||
159 | + | (define (serialize-jmdict results kanji-trie reading-trie sense-trie) | |
160 | + | (let* ((header (string->utf8 "NANI_JMDICT")) | |
161 | + | (header-size (bytevector-length header)) | |
162 | + | (pointers (make-bytevector 12 0)) | |
163 | + | (results-size (list-size results result-size #:size? #f)) | |
164 | + | (results-bv (make-bytevector (+ header-size 12 results-size 0)))) | |
165 | + | (serialize-list results serialize-result (+ header-size 12) results-bv #:size? #f) | |
166 | + | (let* ((results (list->array 1 results)) | |
167 | + | (pos (+ header-size 12 (bytevector-length results-bv))) | |
168 | + | (kanji-bvs (serialize-trie kanji-trie results pos)) | |
169 | + | (pos (car kanji-bvs)) | |
170 | + | (reading-bvs (serialize-trie reading-trie results pos)) | |
171 | + | (pos (car reading-bvs)) | |
172 | + | (meaning-bvs (serialize-trie sense-trie results pos))) | |
173 | + | (bytevector-copy! header 0 results-bv 0 header-size) | |
174 | + | (bytevector-copy! pointers 0 results-bv header-size 12) | |
175 | + | (merge-bvs (append (list results-bv) (cdr kanji-bvs) (cdr reading-bvs) | |
176 | + | (cdr meaning-bvs)))))) |
modules/nani/jmdict/trie.scm unknown status 1
1 | + | ;;; Nani Project website | |
2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
3 | + | ;;; | |
4 | + | ;;; This file is part of the Nani Project website. | |
5 | + | ;;; | |
6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | + | ;;; your option) any later version. | |
10 | + | ;;; | |
11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + | ;;; GNU Affero General Public License for more details. | |
15 | + | ;;; | |
16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | + | ||
19 | + | (define-module (nani jmdict trie) | |
20 | + | #:use-module (nani trie) | |
21 | + | #:use-module (nani result) | |
22 | + | #:use-module (rnrs bytevectors) | |
23 | + | #:export (make-kanji-trie | |
24 | + | make-reading-trie | |
25 | + | make-meaning-trie)) | |
26 | + | ||
27 | + | (define (make-key key) | |
28 | + | (apply append | |
29 | + | (map | |
30 | + | (lambda (c) | |
31 | + | (list (quotient c 16) (modulo c 16))) | |
32 | + | (bytevector->u8-list (string->utf8 key))))) | |
33 | + | ||
34 | + | (define (make-kanji-trie results) | |
35 | + | (let ((trie (make-empty-trie))) | |
36 | + | (let loop ((results results) (i 0)) | |
37 | + | (if (null? results) | |
38 | + | trie | |
39 | + | (begin | |
40 | + | (for-each | |
41 | + | (lambda (key) | |
42 | + | (add-to-trie! trie (make-key key) i)) | |
43 | + | (result-kanjis (car results))) | |
44 | + | (loop (cdr results) (+ i 1))))))) | |
45 | + | ||
46 | + | (define (make-reading-trie results) | |
47 | + | (let ((trie (make-empty-trie))) | |
48 | + | (let loop ((results results) (i 0)) | |
49 | + | (if (null? results) | |
50 | + | trie | |
51 | + | (begin | |
52 | + | (for-each | |
53 | + | (lambda (reading) | |
54 | + | (for-each | |
55 | + | (lambda (key) | |
56 | + | (add-to-trie! trie (make-key key) i)) | |
57 | + | (reading-readings reading))) | |
58 | + | (result-readings (car results))) | |
59 | + | (loop (cdr results) (+ i 1))))))) | |
60 | + | ||
61 | + | (define (make-meaning-trie results) | |
62 | + | (let ((trie (make-empty-trie))) | |
63 | + | (let loop ((results results) (i 0)) | |
64 | + | (if (null? results) | |
65 | + | trie | |
66 | + | (begin | |
67 | + | (for-each | |
68 | + | (lambda (meaning) | |
69 | + | (for-each | |
70 | + | (lambda (key) | |
71 | + | (add-to-trie! trie (make-key key) i)) | |
72 | + | (sense-glosses meaning))) | |
73 | + | (result-senses (car results))) | |
74 | + | (loop (cdr results) (+ i 1))))))) |
modules/nani/jmdict/xml.scm unknown status 1
1 | + | ;;; Nani Project website | |
2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
3 | + | ;;; | |
4 | + | ;;; This file is part of the Nani Project website. | |
5 | + | ;;; | |
6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | + | ;;; your option) any later version. | |
10 | + | ;;; | |
11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + | ;;; GNU Affero General Public License for more details. | |
15 | + | ;;; | |
16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | + | ||
19 | + | (define-module (nani jmdict xml) | |
20 | + | #:use-module (ice-9 match) | |
21 | + | #:use-module (ice-9 rdelim) | |
22 | + | #:use-module (sxml fold) | |
23 | + | #:use-module (sxml simple) | |
24 | + | #:use-module (nani result) | |
25 | + | #:export (load-dic sxml->results)) | |
26 | + | ||
27 | + | (define (load-dic file) | |
28 | + | (xml->sxml (call-with-input-file file read-string))) | |
29 | + | ||
30 | + | (define (sxml->reading lst) | |
31 | + | (let loop ((reading (make-reading '() '() '())) (lst lst)) | |
32 | + | (if (null? lst) | |
33 | + | reading | |
34 | + | (loop | |
35 | + | (match (car lst) | |
36 | + | (('reading r) (update-reading reading #:readings (cons r (reading-readings reading)))) | |
37 | + | (('info r) (update-reading reading #:info (cons r (reading-info reading)))) | |
38 | + | (('limit r) (update-reading reading #:kanjis (cons r (reading-kanjis reading)))) | |
39 | + | ((? string? _) reading)) | |
40 | + | (cdr lst))))) | |
41 | + | ||
42 | + | (define (sxml->source lst) | |
43 | + | (let loop ((source (make-source '() #f "" "")) (lst lst)) | |
44 | + | (if (null? lst) | |
45 | + | source | |
46 | + | (loop | |
47 | + | (match (car lst) | |
48 | + | (('content c) (update-source source #:content (cons c (source-content source)))) | |
49 | + | (('wasei _) (update-source source #:wasei? #t)) | |
50 | + | (('type t) (update-source source #:type t)) | |
51 | + | (('lang l) (update-source source #:lang l)) | |
52 | + | ((? list? l) (loop source l)) | |
53 | + | ((? string? _) source)) | |
54 | + | (cdr lst))))) | |
55 | + | ||
56 | + | (define (sxml->sense lst) | |
57 | + | (let loop ((sense (make-sense '() '() '() '() '() '() "eng")) (lst lst)) | |
58 | + | (if (null? lst) | |
59 | + | sense | |
60 | + | (loop | |
61 | + | (match (car lst) | |
62 | + | (('ref (? string? r)) (update-sense sense #:references (cons r (sense-references sense)))) | |
63 | + | (('limit (? string? r)) (update-sense sense #:limits (cons r (sense-limits sense)))) | |
64 | + | (('info (? string? r)) (update-sense sense #:infos (cons r (sense-infos sense)))) | |
65 | + | ((? source? s) (update-sense sense #:sources (cons s (sense-sources sense)))) | |
66 | + | (('tag (? string? r)) (update-sense sense #:tags (cons r (sense-tags sense)))) | |
67 | + | (('gloss (? string? r)) (update-sense sense #:glosses (cons r (sense-glosses sense)))) | |
68 | + | (('lang (? string? l)) (update-sense sense #:language l)) | |
69 | + | ((? list? l) (loop sense l)) | |
70 | + | ((? string? _) sense)) | |
71 | + | (cdr lst))))) | |
72 | + | ||
73 | + | (define (sxml->result lst) | |
74 | + | (let loop ((result (make-result 0 '() '() '())) (lst lst)) | |
75 | + | (if (null? lst) | |
76 | + | result | |
77 | + | (loop | |
78 | + | (match (car lst) | |
79 | + | (('kanji kanji) (update-result result #:kanjis (cons kanji (result-kanjis result)))) | |
80 | + | ((? reading? r) (update-result result #:readings (cons r (result-readings result)))) | |
81 | + | ((? sense? s) (update-result result #:senses (cons s (result-senses result)))) | |
82 | + | ((? string? _) result)) | |
83 | + | (cdr lst))))) | |
84 | + | ||
85 | + | (define (sxml->results sxml) | |
86 | + | (foldt | |
87 | + | (lambda (xml) | |
88 | + | (match xml | |
89 | + | (('ent_seq _) "") | |
90 | + | (('ke_pri _) "") | |
91 | + | (('re_pri _) "") | |
92 | + | (('pri _) "") | |
93 | + | (('keb kanji) `(kanji ,kanji)) | |
94 | + | (('ke_inf _) "") | |
95 | + | (('k_ele lst ...) (car (filter list? lst))) | |
96 | + | (('reb reading) `(reading ,reading)) | |
97 | + | (('re_nokanji _ ...) "") | |
98 | + | (('re_restr r) `(limit ,r)) | |
99 | + | (('re_inf r) `(info ,r)) | |
100 | + | (('r_ele lst ...) (sxml->reading lst)) | |
101 | + | (('ant pos) `(tag ,pos)) | |
102 | + | (('dial pos) `(tag ,pos)) | |
103 | + | (('field pos) `(tag ,pos)) | |
104 | + | (('misc pos) `(tag ,pos)) | |
105 | + | (('pos pos) `(tag ,pos)) | |
106 | + | (('xref pos) `(ref ,pos)) | |
107 | + | (('g_type _ ...) "") | |
108 | + | (('gloss (? string? g)) xml) | |
109 | + | (('gloss (? list? g)) g) | |
110 | + | (('gloss attr g) (cons `(gloss ,g) attr)) | |
111 | + | (('stagk k) `(limit ,k)) | |
112 | + | (('stagr r) `(limit ,r)) | |
113 | + | (('s_inf r) `(info ,r)) | |
114 | + | (('lsource lst ...) (sxml->source lst)) | |
115 | + | (('sense lst ...) (sxml->sense lst)) | |
116 | + | (('entry lst ...) (sxml->result lst)) | |
117 | + | (('JMdict lst ...) lst) | |
118 | + | (('xml:lang l) `(lang ,l)) | |
119 | + | (('ls_wasei _) '(wasei #t)) | |
120 | + | (('ls_type t) `(type ,t)) | |
121 | + | (('@ lst ...) lst) | |
122 | + | (('*TOP* _ l) (filter result? l)) | |
123 | + | (('*PI* _ ...) #f))) | |
124 | + | (lambda (a) a) | |
125 | + | sxml)) |
modules/nani/result.scm unknown status 1
1 | + | ;;; Nani Project website | |
2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
3 | + | ;;; | |
4 | + | ;;; This file is part of the Nani Project website. | |
5 | + | ;;; | |
6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | + | ;;; your option) any later version. | |
10 | + | ;;; | |
11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + | ;;; GNU Affero General Public License for more details. | |
15 | + | ;;; | |
16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | + | ||
19 | + | (define-module (nani result) | |
20 | + | #:use-module (srfi srfi-9) | |
21 | + | #:export (make-result | |
22 | + | result? | |
23 | + | result-position | |
24 | + | result-position-set! | |
25 | + | result-kanjis | |
26 | + | result-readings | |
27 | + | result-senses | |
28 | + | ||
29 | + | make-reading | |
30 | + | reading? | |
31 | + | reading-kanjis | |
32 | + | reading-info | |
33 | + | reading-readings | |
34 | + | ||
35 | + | make-sense | |
36 | + | sense? | |
37 | + | sense-references | |
38 | + | sense-limits | |
39 | + | sense-infos | |
40 | + | sense-sources | |
41 | + | sense-tags | |
42 | + | sense-glosses | |
43 | + | sense-language | |
44 | + | ||
45 | + | make-source | |
46 | + | source? | |
47 | + | source-content | |
48 | + | source-wasei? | |
49 | + | source-type | |
50 | + | source-lang | |
51 | + | ||
52 | + | update-result | |
53 | + | update-reading | |
54 | + | update-sense | |
55 | + | update-source)) | |
56 | + | ||
57 | + | (define-record-type result | |
58 | + | (make-result position kanjis readings senses) | |
59 | + | result? | |
60 | + | (position result-position result-position-set!) ; integer | |
61 | + | (kanjis result-kanjis) ; string-list | |
62 | + | (readings result-readings) ; reanding-list | |
63 | + | (senses result-senses)) ; sense-list | |
64 | + | ||
65 | + | (define-record-type reading | |
66 | + | (make-reading kanjis info readings) | |
67 | + | reading? | |
68 | + | (kanjis reading-kanjis) ; string-list | |
69 | + | (info reading-info) ; string-list | |
70 | + | (readings reading-readings)) ; string-list | |
71 | + | ||
72 | + | (define-record-type sense | |
73 | + | (make-sense references limits infos sources tags glosses language) | |
74 | + | sense? | |
75 | + | (references sense-references) ; string-list | |
76 | + | (limits sense-limits) ; string-list | |
77 | + | (infos sense-infos) ; string-list | |
78 | + | (sources sense-sources) ; source-list | |
79 | + | (tags sense-tags) ; symbol-list | |
80 | + | (glosses sense-glosses) ; string-list | |
81 | + | (language sense-language)) ; string | |
82 | + | ||
83 | + | (define-record-type source | |
84 | + | (make-source content wasei? type lang) | |
85 | + | source? | |
86 | + | (content source-content) ; string-list | |
87 | + | (wasei? source-wasei?) ; boolean | |
88 | + | (type source-type) ; string | |
89 | + | (lang source-lang)) ; string | |
90 | + | ||
91 | + | (define* (update-result result | |
92 | + | #:key (kanjis (result-kanjis result)) | |
93 | + | (readings (result-readings result)) | |
94 | + | (senses (result-senses result))) | |
95 | + | (make-result (result-position result) kanjis readings senses)) | |
96 | + | ||
97 | + | (define* (update-reading reading | |
98 | + | #:key (kanjis (reading-kanjis reading)) | |
99 | + | (info (reading-info reading)) | |
100 | + | (readings (reading-readings reading))) | |
101 | + | (make-reading kanjis info readings)) | |
102 | + | ||
103 | + | (define* (update-sense sense | |
104 | + | #:key (references (sense-references sense)) | |
105 | + | (limits (sense-limits sense)) | |
106 | + | (infos (sense-infos sense)) | |
107 | + | (sources (sense-sources sense)) | |
108 | + | (tags (sense-tags sense)) | |
109 | + | (glosses (sense-glosses sense)) | |
110 | + | (language (sense-language sense))) | |
111 | + | (make-sense references limits infos sources tags glosses language)) | |
112 | + | ||
113 | + | (define* (update-source source | |
114 | + | #:key (content (source-content source)) | |
115 | + | (wasei? (source-wasei? source)) | |
116 | + | (type (source-type source)) | |
117 | + | (lang (source-lang source))) | |
118 | + | (make-source content wasei? type lang)) |
modules/nani/trie.scm unknown status 1
1 | + | ;;; Nani Project website | |
2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
3 | + | ;;; | |
4 | + | ;;; This file is part of the Nani Project website. | |
5 | + | ;;; | |
6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | + | ;;; your option) any later version. | |
10 | + | ;;; | |
11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + | ;;; GNU Affero General Public License for more details. | |
15 | + | ;;; | |
16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | + | ||
19 | + | (define-module (nani trie) | |
20 | + | #:use-module (srfi srfi-9) | |
21 | + | #:export (make-trie | |
22 | + | trie? | |
23 | + | trie-position | |
24 | + | trie-position-set! | |
25 | + | trie-vals | |
26 | + | trie-vals-set! | |
27 | + | trie-transitions | |
28 | + | trie-transitions-set! | |
29 | + | ||
30 | + | make-empty-trie | |
31 | + | add-to-trie! | |
32 | + | compress-trie)) | |
33 | + | ||
34 | + | (define-record-type trie | |
35 | + | (make-trie position vals transitions) | |
36 | + | trie? | |
37 | + | (position trie-position trie-position-set!) ; integer | |
38 | + | (vals trie-vals trie-vals-set!) ; list | |
39 | + | (transitions trie-transitions trie-transitions-set!)) ; array or alist | |
40 | + | ||
41 | + | (define (make-empty-trie) | |
42 | + | (make-trie 0 '() (make-array #f 16))) | |
43 | + | ||
44 | + | (define (add-to-trie! trie key value) | |
45 | + | (if (null? key) | |
46 | + | (trie-vals-set! trie (cons value (trie-vals trie))) | |
47 | + | (let ((next-trie (array-ref (trie-transitions trie) (car key)))) | |
48 | + | (if next-trie | |
49 | + | (add-to-trie! next-trie (cdr key) value) | |
50 | + | (let ((next-trie (make-empty-trie))) | |
51 | + | (array-set! (trie-transitions trie) next-trie (car key)) | |
52 | + | (add-to-trie! next-trie (cdr key) value)))))) | |
53 | + | ||
54 | + | (define (convert-trie-transitions! trie) | |
55 | + | (define (get-new-transitions transitions) | |
56 | + | (let loop ((i 0) (tr '())) | |
57 | + | (if (= i 16) | |
58 | + | tr | |
59 | + | (let ((elem (array-ref transitions i))) | |
60 | + | (if elem | |
61 | + | (begin | |
62 | + | (convert-trie-transitions! elem) | |
63 | + | (loop (+ i 1) (cons (cons i elem) tr))) | |
64 | + | (loop (+ i 1) tr)))))) | |
65 | + | (let* ((transitions (trie-transitions trie)) | |
66 | + | (transitions (get-new-transitions transitions))) | |
67 | + | (trie-transitions-set! trie transitions))) | |
68 | + | ||
69 | + | (define (compress-trie trie) | |
70 | + | (define (compress-aux trie) | |
71 | + | (make-trie | |
72 | + | (trie-position trie) | |
73 | + | (trie-vals trie) | |
74 | + | (apply append | |
75 | + | (map | |
76 | + | (lambda (tr) | |
77 | + | (let ((trie (cdr tr))) | |
78 | + | (map | |
79 | + | (lambda (tr2) | |
80 | + | (cons (+ (car tr2) (* 16 (car tr))) | |
81 | + | (compress-aux (cdr tr2)))) | |
82 | + | (trie-transitions trie)))) | |
83 | + | (trie-transitions trie))))) | |
84 | + | (convert-trie-transitions! trie) | |
85 | + | (compress-aux trie)) |