Fix radk parsing and serializing
modules/nani/jmdict/serialize.scm
24 | 24 | #:use-module (rnrs bytevectors) | |
25 | 25 | #:export (serialize-jmdict)) | |
26 | 26 | ||
27 | + | (define (serialize-pointer ptr pos bv) | |
28 | + | (bytevector-u8-set! bv pos (car ptr)) | |
29 | + | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
30 | + | (+ pos 5)) | |
31 | + | ||
27 | 32 | (define (serialize-trie trie results pos) | |
28 | 33 | (define (serialize-trie-aux transitions pos) | |
29 | 34 | (let loop ((pos pos) |
modules/nani/radk.scm
82 | 82 | (define (serialize-rad-kanji-element element pos bv) | |
83 | 83 | (match element | |
84 | 84 | ((radical kanji ...) | |
85 | - | (let* ((pos (serialize-string radical pos bv))) | |
86 | - | (serialize-list kanji serialize-string pos bv))))) | |
85 | + | (let* ((pos (serialize-string (radical-character radical) pos bv))) | |
86 | + | (serialize-string (string-join kanji "") pos bv))))) | |
87 | 87 | (define (rad-kanji-element-size element) | |
88 | 88 | (match element | |
89 | 89 | ((radical kanji ...) | |
90 | - | (+ (string-size radical) (list-size kanji string-size))))) | |
90 | + | (+ (string-size (radical-character radical)) | |
91 | + | (string-size (string-join kanji "")))))) | |
91 | 92 | ||
92 | 93 | (define (serialize-rad-kanji rad-kanji pos bv) | |
93 | 94 | (serialize-list rad-kanji serialize-rad-kanji-element pos bv)) | |
… | |||
97 | 98 | (define (serialize-rad-stroke-element element pos bv) | |
98 | 99 | (match element | |
99 | 100 | ((radical . stroke) | |
100 | - | (let ((pos (serialize-string radical pos bv))) | |
101 | + | (let ((pos (serialize-string (radical-character radical) pos bv))) | |
101 | 102 | (serialize-char stroke pos bv))))) | |
102 | 103 | (define (rad-stroke-element-size element) | |
103 | 104 | (match element | |
104 | 105 | ((radical . stroke) | |
105 | - | (+ (string-size radical) (char-size stroke))))) | |
106 | + | (+ (string-size (radical-character radical)) (char-size stroke))))) | |
106 | 107 | ||
107 | 108 | (define (serialize-rad-stroke rad-stroke pos bv) | |
108 | 109 | (serialize-list rad-stroke serialize-rad-stroke-element pos bv)) | |
109 | 110 | (define (rad-stroke-size rad-stroke) | |
110 | 111 | (list-size rad-stroke rad-stroke-element-size)) | |
111 | 112 | ||
112 | - | (define serialize-kanji-stroke serialize-rad-stroke) | |
113 | - | (define kanji-stroke-size rad-stroke-size) | |
113 | + | (define (serialize-kanji-stroke-element element pos bv) | |
114 | + | (match element | |
115 | + | ((kanji . stroke) | |
116 | + | (let ((pos (serialize-string kanji pos bv))) | |
117 | + | (serialize-char stroke pos bv))))) | |
118 | + | (define (kanji-stroke-element-size element) | |
119 | + | (match element | |
120 | + | ((kanji . stroke) | |
121 | + | (+ (string-size kanji) (char-size stroke))))) | |
122 | + | ||
123 | + | (define (serialize-kanji-stroke kanji-stroke pos bv) | |
124 | + | (serialize-list kanji-stroke serialize-kanji-stroke-element pos bv)) | |
125 | + | (define (kanji-stroke-size kanji-stroke) | |
126 | + | (list-size kanji-stroke kanji-stroke-element-size)) | |
114 | 127 | ||
115 | 128 | (let* ((header (string->utf8 "NANI_RADK001")) | |
116 | 129 | (header-size (bytevector-length header)) | |
… | |||
125 | 138 | (pos (serialize-kanji-stroke kanji-stroke pos bv))) | |
126 | 139 | bv))) | |
127 | 140 | ||
128 | - | ||
129 | - | ||
130 | - | ||
131 | - | ||
141 | + | (define (radical-character kanji) | |
142 | + | (match kanji | |
143 | + | ("???" "???") | |
144 | + | ("???" "????") | |
145 | + | ("???" "???") | |
146 | + | ("???" "???") | |
147 | + | ("???" "????") | |
148 | + | ("???" "???") | |
149 | + | ("???" "???") | |
150 | + | ("???" "???") | |
151 | + | ("???" "???") | |
152 | + | ("???" "???") | |
153 | + | ("???" "???") | |
154 | + | ("???" "???") | |
155 | + | ("???" "???") | |
156 | + | ("???" "???") | |
157 | + | ("???" "???") | |
158 | + | ("???" "???") | |
159 | + | ("???" "???") | |
160 | + | ("???" "???") | |
161 | + | ("???" "???") | |
162 | + | ("???" "???") | |
163 | + | ("???" "???") | |
164 | + | ("???" "???") | |
165 | + | (_ kanji))) |
modules/nani/serialize.scm
20 | 20 | #:use-module (rnrs bytevectors) | |
21 | 21 | #:export (merge-bvs | |
22 | 22 | serialize-list list-size | |
23 | - | serialize-pointer | |
24 | 23 | serialize-char char-size | |
25 | 24 | serialize-int int-size | |
26 | 25 | serialize-boolean boolean-size | |
… | |||
48 | 47 | (when (not (list? lst)) (throw 'not-list lst)) | |
49 | 48 | (apply + (if size? 2 0) (map size lst))) | |
50 | 49 | ||
51 | - | (define (serialize-pointer ptr pos bv) | |
52 | - | (bytevector-u8-set! bv pos (car ptr)) | |
53 | - | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
54 | - | (+ pos 5)) | |
55 | - | ||
56 | 50 | (define (serialize-char int pos bv) | |
57 | 51 | (bytevector-u8-set! bv pos int) | |
58 | 52 | (+ pos 1)) |
tools/jmdict.scm
64 | 64 | (serialize-jmdict results kanji-trie reading-trie meaning-trie)))))) | |
65 | 65 | ||
66 | 66 | (define (print word dict) | |
67 | - | ()) | |
67 | + | #t) | |
68 | 68 | ||
69 | 69 | (match (command-line) | |
70 | 70 | ((_ cmd input lang output) |