Add jmdict data builder

Julien LepillerThu Apr 11 23:52:44+0200 2019

ecfc791

Add jmdict data builder

modules/nani/jmdict/serialize.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(define-module (nani jmdict serialize)
20+
  #:use-module (nani result)
21+
  #:use-module (nani trie)
22+
  #:use-module (rnrs bytevectors)
23+
  #:export (serialize-jmdict))
24+
25+
(define (merge-bvs bvs)
26+
  (let* ((size (apply + (map bytevector-length bvs)))
27+
         (bv (make-bytevector size 0)))
28+
    (let loop ((bvs bvs) (pos 0))
29+
      (unless (null? bvs)
30+
        (let ((sz (bytevector-length (car bvs))))
31+
          (bytevector-copy! (car bvs) 0 bv pos sz)
32+
          (loop (cdr bvs) (+ pos sz)))))
33+
    bv))
34+
35+
(define (serialize-trie trie results pos)
36+
  (define (serialize-trie-aux transitions pos)
37+
    (let loop ((pos pos)
38+
               (trs transitions)
39+
               (bvs '()))
40+
      (if (null? trs)
41+
        (cons pos bvs)
42+
        (let* ((next-trie (cdr (car trs)))
43+
               (bv (serialize-trie next-trie results pos))
44+
               (pos (car bv))
45+
               (bv (cdr bv)))
46+
          (loop pos (cdr trs) (append bvs bv))))))
47+
  (trie-position-set! trie pos)
48+
  (let* ((vals-sz (list-size (trie-vals trie) int-size))
49+
         (trs-sz (list-size (trie-transitions trie) (const 5)))
50+
         (sz (+ vals-sz trs-sz))
51+
         (bv (make-bytevector sz 0)))
52+
    (serialize-list (map (lambda (pos) (result-position (array-ref results pos)))
53+
                         (trie-vals trie))
54+
                    serialize-int 0 bv)
55+
    (let* ((bvs (serialize-trie-aux
56+
                  (trie-transitions trie)
57+
                  (+ pos (bytevector-length bv))))
58+
           (next-pos (car bvs))
59+
           (bvs (cdr bvs)))
60+
      (serialize-list (trie-transitions trie) serialize-pointer
61+
                      vals-sz bv)
62+
      (cons next-pos (cons bv bvs)))))
63+
64+
(define (serialize-pointer ptr pos bv)
65+
  (bytevector-u8-set! bv pos (car ptr))
66+
  (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness little))
67+
  (+ pos 5))
68+
69+
(define (serialize-int int pos bv)
70+
  (bytevector-u32-set! bv pos int (endianness little))
71+
  (+ pos 4))
72+
(define int-size (const 4))
73+
74+
(define (serialize-boolean bool pos bv)
75+
  (bytevector-u8-set! bv pos (if bool 1 0))
76+
  (+ pos 1))
77+
(define boolean-size (const 1))
78+
79+
(define (serialize-string str pos bv)
80+
  (let ((sbv (string->utf8 str)))
81+
    (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness little))
82+
    (bytevector-copy! sbv 0 bv (+ pos 4) (bytevector-length sbv))
83+
    (+ pos 4 (bytevector-length sbv))))
84+
(define (string-size str)
85+
  (let ((sbv (string->utf8 str)))
86+
    (+ 4 (bytevector-length sbv))))
87+
88+
(define* (serialize-list lst serialize pos bv #:key (size? #t))
89+
  (when (not (list? lst)) (throw 'not-list lst))
90+
  (when size?
91+
    (bytevector-u32-set! bv pos (length lst) (endianness little)))
92+
  (let loop ((lst lst) (pos (+ pos (if size? 4 0))))
93+
    (if (null? lst)
94+
      pos
95+
      (loop (cdr lst) (serialize (car lst) pos bv)))))
96+
(define* (list-size lst size #:key (size? #t))
97+
  (when (not (list? lst)) (throw 'not-list lst))
98+
  (apply + (if size? 4 0) (map size lst)))
99+
100+
(define (serialize-source source pos bv)
101+
  (when (not (source? source)) (throw 'not-source source))
102+
  (let* ((pos (serialize-list (source-content source) serialize-string pos bv))
103+
         (pos (serialize-boolean (source-wasei? source) pos bv))
104+
         (pos (serialize-string (source-type source) pos bv))
105+
         (pos (serialize-string (source-lang source) pos bv)))
106+
    pos))
107+
(define (source-size source)
108+
  (when (not (source? source)) (throw 'not-source source))
109+
  (+ (list-size (source-content source) string-size)
110+
     (boolean-size (source-wasei? source))
111+
     (string-size (source-type source))
112+
     (string-size (source-lang source))))
113+
114+
(define (serialize-reading reading pos bv)
115+
  (when (not (reading? reading)) (throw 'not-reading reading))
116+
  (let* ((pos (serialize-list (reading-kanjis reading) serialize-string pos bv))
117+
         (pos (serialize-list (reading-info reading) serialize-string pos bv))
118+
         (pos (serialize-list (reading-readings reading) serialize-string pos bv)))
119+
    pos))
120+
(define (reading-size reading)
121+
  (when (not (reading? reading)) (throw 'not-reading reading))
122+
  (+ (list-size (reading-kanjis reading) string-size)
123+
     (list-size (reading-info reading) string-size)
124+
     (list-size (reading-readings reading) string-size)))
125+
126+
(define (serialize-sense sense pos bv)
127+
  (when (not (sense? sense)) (throw 'not-sense sense))
128+
  (let* ((pos (serialize-list (sense-references sense) serialize-string pos bv))
129+
         (pos (serialize-list (sense-limits sense) serialize-string pos bv))
130+
         (pos (serialize-list (sense-infos sense) serialize-string pos bv))
131+
         (pos (serialize-list (sense-sources sense) serialize-source pos bv))
132+
         (pos (serialize-list (sense-tags sense) serialize-string pos bv))
133+
         (pos (serialize-list (sense-glosses sense) serialize-string pos bv))
134+
         (pos (serialize-string (sense-language sense) pos bv)))
135+
    pos))
136+
(define (sense-size sense)
137+
  (when (not (sense? sense)) (throw 'not-sense sense))
138+
  (+ (list-size (sense-references sense) string-size)
139+
     (list-size (sense-limits sense) string-size)
140+
     (list-size (sense-infos sense) string-size)
141+
     (list-size (sense-sources sense) source-size)
142+
     (list-size (sense-tags sense) string-size)
143+
     (list-size (sense-glosses sense) string-size)
144+
     (string-size (sense-language sense))))
145+
146+
(define (serialize-result result pos bv)
147+
  (when (not (result? result)) (throw 'not-result result))
148+
  (result-position-set! result pos)
149+
  (let* ((pos (serialize-list (result-kanjis result) serialize-string pos bv))
150+
         (pos (serialize-list (result-readings result) serialize-reading pos bv))
151+
         (pos (serialize-list (result-senses result) serialize-sense pos bv)))
152+
    pos))
153+
(define (result-size result)
154+
  (when (not (result? result)) (throw 'not-result result))
155+
  (+ (list-size (result-kanjis result) string-size)
156+
     (list-size (result-readings result) reading-size)
157+
     (list-size (result-senses result) sense-size)))
158+
159+
(define (serialize-jmdict results kanji-trie reading-trie sense-trie)
160+
  (let* ((header (string->utf8 "NANI_JMDICT"))
161+
         (header-size (bytevector-length header))
162+
         (pointers (make-bytevector 12 0))
163+
         (results-size (list-size results result-size #:size? #f))
164+
         (results-bv (make-bytevector (+ header-size 12 results-size 0))))
165+
    (serialize-list results serialize-result (+ header-size 12) results-bv #:size? #f)
166+
    (let* ((results (list->array 1 results))
167+
           (pos (+ header-size 12 (bytevector-length results-bv)))
168+
           (kanji-bvs (serialize-trie kanji-trie results pos))
169+
           (pos (car kanji-bvs))
170+
           (reading-bvs (serialize-trie reading-trie results pos))
171+
           (pos (car reading-bvs))
172+
           (meaning-bvs (serialize-trie sense-trie results pos)))
173+
      (bytevector-copy! header 0 results-bv 0 header-size)
174+
      (bytevector-copy! pointers 0 results-bv header-size 12)
175+
      (merge-bvs (append (list results-bv) (cdr kanji-bvs) (cdr reading-bvs)
176+
                         (cdr meaning-bvs))))))

modules/nani/jmdict/trie.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(define-module (nani jmdict trie)
20+
  #:use-module (nani trie)
21+
  #:use-module (nani result)
22+
  #:use-module (rnrs bytevectors)
23+
  #:export (make-kanji-trie
24+
            make-reading-trie
25+
            make-meaning-trie))
26+
27+
(define (make-key key)
28+
  (apply append
29+
    (map
30+
      (lambda (c)
31+
        (list (quotient c 16) (modulo c 16)))
32+
      (bytevector->u8-list (string->utf8 key)))))
33+
34+
(define (make-kanji-trie results)
35+
  (let ((trie (make-empty-trie)))
36+
    (let loop ((results results) (i 0))
37+
      (if (null? results)
38+
        trie
39+
        (begin
40+
          (for-each
41+
            (lambda (key)
42+
              (add-to-trie! trie (make-key key) i))
43+
            (result-kanjis (car results)))
44+
          (loop (cdr results) (+ i 1)))))))
45+
46+
(define (make-reading-trie results)
47+
  (let ((trie (make-empty-trie)))
48+
    (let loop ((results results) (i 0))
49+
      (if (null? results)
50+
        trie
51+
        (begin
52+
          (for-each
53+
            (lambda (reading)
54+
              (for-each
55+
                (lambda (key)
56+
                  (add-to-trie! trie (make-key key) i))
57+
                (reading-readings reading)))
58+
            (result-readings (car results)))
59+
          (loop (cdr results) (+ i 1)))))))
60+
61+
(define (make-meaning-trie results)
62+
  (let ((trie (make-empty-trie)))
63+
    (let loop ((results results) (i 0))
64+
      (if (null? results)
65+
        trie
66+
        (begin
67+
          (for-each
68+
            (lambda (meaning)
69+
              (for-each
70+
                (lambda (key)
71+
                  (add-to-trie! trie (make-key key) i))
72+
                (sense-glosses meaning)))
73+
            (result-senses (car results)))
74+
          (loop (cdr results) (+ i 1)))))))

modules/nani/jmdict/xml.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(define-module (nani jmdict xml)
20+
  #:use-module (ice-9 match)
21+
  #:use-module (ice-9 rdelim)
22+
  #:use-module (sxml fold)
23+
  #:use-module (sxml simple)
24+
  #:use-module (nani result)
25+
  #:export (load-dic sxml->results))
26+
27+
(define (load-dic file)
28+
  (xml->sxml (call-with-input-file file read-string)))
29+
30+
(define (sxml->reading lst)
31+
  (let loop ((reading (make-reading '() '() '())) (lst lst))
32+
    (if (null? lst)
33+
      reading
34+
      (loop
35+
        (match (car lst)
36+
          (('reading r) (update-reading reading #:readings (cons r (reading-readings reading))))
37+
          (('info r) (update-reading reading #:info (cons r (reading-info reading))))
38+
          (('limit r) (update-reading reading #:kanjis (cons r (reading-kanjis reading))))
39+
          ((? string? _) reading))
40+
        (cdr lst)))))
41+
42+
(define (sxml->source lst)
43+
  (let loop ((source (make-source '() #f "" "")) (lst lst))
44+
    (if (null? lst)
45+
      source
46+
      (loop
47+
        (match (car lst)
48+
          (('content c) (update-source source #:content (cons c (source-content source))))
49+
          (('wasei _) (update-source source #:wasei? #t))
50+
          (('type t) (update-source source #:type t))
51+
          (('lang l) (update-source source #:lang l))
52+
          ((? list? l) (loop source l))
53+
          ((? string? _) source))
54+
        (cdr lst)))))
55+
56+
(define (sxml->sense lst)
57+
  (let loop ((sense (make-sense '() '() '() '() '() '() "eng")) (lst lst))
58+
    (if (null? lst)
59+
      sense
60+
      (loop
61+
        (match (car lst)
62+
          (('ref (? string? r)) (update-sense sense #:references (cons r (sense-references sense))))
63+
          (('limit (? string? r)) (update-sense sense #:limits (cons r (sense-limits sense))))
64+
          (('info (? string? r)) (update-sense sense #:infos (cons r (sense-infos sense))))
65+
          ((? source? s) (update-sense sense #:sources (cons s (sense-sources sense))))
66+
          (('tag (? string? r)) (update-sense sense #:tags (cons r (sense-tags sense))))
67+
          (('gloss (? string? r)) (update-sense sense #:glosses (cons r (sense-glosses sense))))
68+
          (('lang (? string? l)) (update-sense sense #:language l))
69+
          ((? list? l) (loop sense l))
70+
          ((? string? _) sense))
71+
        (cdr lst)))))
72+
73+
(define (sxml->result lst)
74+
  (let loop ((result (make-result 0 '() '() '())) (lst lst))
75+
    (if (null? lst)
76+
      result
77+
      (loop
78+
        (match (car lst)
79+
          (('kanji kanji) (update-result result #:kanjis (cons kanji (result-kanjis result))))
80+
          ((? reading? r) (update-result result #:readings (cons r (result-readings result))))
81+
          ((? sense? s) (update-result result #:senses (cons s (result-senses result))))
82+
          ((? string? _) result))
83+
        (cdr lst)))))
84+
      
85+
(define (sxml->results sxml)
86+
  (foldt
87+
    (lambda (xml)
88+
      (match xml
89+
       (('ent_seq _) "")
90+
       (('ke_pri _) "")
91+
       (('re_pri _) "")
92+
       (('pri _) "")
93+
       (('keb kanji) `(kanji ,kanji))
94+
       (('ke_inf _) "")
95+
       (('k_ele lst ...) (car (filter list? lst)))
96+
       (('reb reading) `(reading ,reading))
97+
       (('re_nokanji _ ...) "")
98+
       (('re_restr r) `(limit ,r))
99+
       (('re_inf r) `(info ,r))
100+
       (('r_ele lst ...) (sxml->reading lst))
101+
       (('ant pos) `(tag ,pos))
102+
       (('dial pos) `(tag ,pos))
103+
       (('field pos) `(tag ,pos))
104+
       (('misc pos) `(tag ,pos))
105+
       (('pos pos) `(tag ,pos))
106+
       (('xref pos) `(ref ,pos))
107+
       (('g_type _ ...) "")
108+
       (('gloss (? string? g)) xml)
109+
       (('gloss (? list? g)) g)
110+
       (('gloss attr g) (cons `(gloss ,g) attr))
111+
       (('stagk k) `(limit ,k))
112+
       (('stagr r) `(limit ,r))
113+
       (('s_inf r) `(info ,r))
114+
       (('lsource lst ...) (sxml->source lst))
115+
       (('sense lst ...) (sxml->sense lst))
116+
       (('entry lst ...) (sxml->result lst))
117+
       (('JMdict lst ...) lst)
118+
       (('xml:lang l) `(lang ,l))
119+
       (('ls_wasei _) '(wasei #t))
120+
       (('ls_type t) `(type ,t))
121+
       (('@ lst ...) lst)
122+
       (('*TOP* _ l) (filter result? l))
123+
       (('*PI* _ ...) #f)))
124+
    (lambda (a) a)
125+
    sxml))

modules/nani/result.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(define-module (nani result)
20+
  #:use-module (srfi srfi-9)
21+
  #:export (make-result
22+
            result?
23+
            result-position
24+
            result-position-set!
25+
            result-kanjis
26+
            result-readings
27+
            result-senses
28+
            
29+
            make-reading
30+
            reading?
31+
            reading-kanjis
32+
            reading-info
33+
            reading-readings
34+
            
35+
            make-sense
36+
            sense?
37+
            sense-references
38+
            sense-limits
39+
            sense-infos
40+
            sense-sources
41+
            sense-tags
42+
            sense-glosses
43+
            sense-language
44+
            
45+
            make-source
46+
            source?
47+
            source-content
48+
            source-wasei?
49+
            source-type
50+
            source-lang
51+
            
52+
            update-result
53+
            update-reading
54+
            update-sense
55+
            update-source))
56+
57+
(define-record-type result
58+
  (make-result position kanjis readings senses)
59+
  result?
60+
  (position result-position result-position-set!) ; integer
61+
  (kanjis result-kanjis) ; string-list
62+
  (readings result-readings) ; reanding-list
63+
  (senses result-senses)) ; sense-list
64+
65+
(define-record-type reading
66+
  (make-reading kanjis info readings)
67+
  reading?
68+
  (kanjis reading-kanjis) ; string-list
69+
  (info reading-info) ; string-list
70+
  (readings reading-readings)) ; string-list
71+
72+
(define-record-type sense
73+
  (make-sense references limits infos sources tags glosses language)
74+
  sense?
75+
  (references sense-references) ; string-list
76+
  (limits sense-limits) ; string-list
77+
  (infos sense-infos) ; string-list
78+
  (sources sense-sources) ; source-list
79+
  (tags sense-tags) ; symbol-list
80+
  (glosses sense-glosses) ; string-list
81+
  (language sense-language)) ; string
82+
83+
(define-record-type source
84+
  (make-source content wasei? type lang)
85+
  source?
86+
  (content source-content) ; string-list
87+
  (wasei? source-wasei?) ; boolean
88+
  (type source-type) ; string
89+
  (lang source-lang)) ; string
90+
91+
(define* (update-result result
92+
           #:key (kanjis (result-kanjis result))
93+
                 (readings (result-readings result))
94+
                 (senses (result-senses result)))
95+
  (make-result (result-position result) kanjis readings senses))
96+
97+
(define* (update-reading reading
98+
           #:key (kanjis (reading-kanjis reading))
99+
                 (info (reading-info reading))
100+
                 (readings (reading-readings reading)))
101+
  (make-reading kanjis info readings))
102+
103+
(define* (update-sense sense
104+
           #:key (references (sense-references sense))
105+
                 (limits (sense-limits sense))
106+
                 (infos (sense-infos sense))
107+
                 (sources (sense-sources sense))
108+
                 (tags (sense-tags sense))
109+
                 (glosses (sense-glosses sense))
110+
                 (language (sense-language sense)))
111+
  (make-sense references limits infos sources tags glosses language))
112+
113+
(define* (update-source source
114+
           #:key (content (source-content source))
115+
                 (wasei? (source-wasei? source))
116+
                 (type (source-type source))
117+
                 (lang (source-lang source)))
118+
  (make-source content wasei? type lang))

modules/nani/trie.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(define-module (nani trie)
20+
  #:use-module (srfi srfi-9)
21+
  #:export (make-trie
22+
            trie?
23+
            trie-position
24+
            trie-position-set!
25+
            trie-vals
26+
            trie-vals-set!
27+
            trie-transitions
28+
            trie-transitions-set!
29+
            
30+
            make-empty-trie
31+
            add-to-trie!
32+
            compress-trie))
33+
34+
(define-record-type trie
35+
  (make-trie position vals transitions)
36+
  trie?
37+
  (position trie-position trie-position-set!) ; integer
38+
  (vals trie-vals trie-vals-set!) ; list
39+
  (transitions trie-transitions trie-transitions-set!)) ; array or alist
40+
41+
(define (make-empty-trie)
42+
  (make-trie 0 '() (make-array #f 16)))
43+
44+
(define (add-to-trie! trie key value)
45+
  (if (null? key)
46+
    (trie-vals-set! trie (cons value (trie-vals trie)))
47+
    (let ((next-trie (array-ref (trie-transitions trie) (car key))))
48+
      (if next-trie
49+
          (add-to-trie! next-trie (cdr key) value)
50+
          (let ((next-trie (make-empty-trie)))
51+
            (array-set! (trie-transitions trie) next-trie (car key))
52+
            (add-to-trie! next-trie (cdr key) value))))))
53+
54+
(define (convert-trie-transitions! trie)
55+
  (define (get-new-transitions transitions)
56+
    (let loop ((i 0) (tr '()))
57+
      (if (= i 16)
58+
        tr
59+
        (let ((elem (array-ref transitions i)))
60+
          (if elem
61+
            (begin
62+
              (convert-trie-transitions! elem)
63+
              (loop (+ i 1) (cons (cons i elem) tr)))
64+
            (loop (+ i 1) tr))))))
65+
  (let* ((transitions (trie-transitions trie))
66+
         (transitions (get-new-transitions transitions)))
67+
    (trie-transitions-set! trie transitions)))
68+
69+
(define (compress-trie trie)
70+
  (define (compress-aux trie)
71+
    (make-trie
72+
      (trie-position trie)
73+
      (trie-vals trie)
74+
      (apply append
75+
        (map
76+
          (lambda (tr)
77+
            (let ((trie (cdr tr)))
78+
              (map
79+
                (lambda (tr2)
80+
                  (cons (+ (car tr2) (* 16 (car tr)))
81+
                        (compress-aux (cdr tr2))))
82+
                (trie-transitions trie))))
83+
          (trie-transitions trie)))))
84+
  (convert-trie-transitions! trie)
85+
  (compress-aux trie))