Properly serialize jmdict
modules/nani/jmdict/serialize.scm
63 | 63 | ||
64 | 64 | (define (serialize-pointer ptr pos bv) | |
65 | 65 | (bytevector-u8-set! bv pos (car ptr)) | |
66 | - | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness little)) | |
66 | + | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
67 | 67 | (+ pos 5)) | |
68 | 68 | ||
69 | 69 | (define (serialize-int int pos bv) | |
70 | - | (bytevector-u32-set! bv pos int (endianness little)) | |
70 | + | (bytevector-u32-set! bv pos int (endianness big)) | |
71 | 71 | (+ pos 4)) | |
72 | 72 | (define int-size (const 4)) | |
73 | 73 | ||
… | |||
78 | 78 | ||
79 | 79 | (define (serialize-string str pos bv) | |
80 | 80 | (let ((sbv (string->utf8 str))) | |
81 | - | (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness little)) | |
81 | + | (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness big)) | |
82 | 82 | (bytevector-copy! sbv 0 bv (+ pos 4) (bytevector-length sbv)) | |
83 | 83 | (+ pos 4 (bytevector-length sbv)))) | |
84 | 84 | (define (string-size str) | |
… | |||
88 | 88 | (define* (serialize-list lst serialize pos bv #:key (size? #t)) | |
89 | 89 | (when (not (list? lst)) (throw 'not-list lst)) | |
90 | 90 | (when size? | |
91 | - | (bytevector-u32-set! bv pos (length lst) (endianness little))) | |
91 | + | (bytevector-u32-set! bv pos (length lst) (endianness big))) | |
92 | 92 | (let loop ((lst lst) (pos (+ pos (if size? 4 0)))) | |
93 | 93 | (if (null? lst) | |
94 | 94 | pos | |
… | |||
157 | 157 | (list-size (result-senses result) sense-size))) | |
158 | 158 | ||
159 | 159 | (define (serialize-jmdict results kanji-trie reading-trie sense-trie) | |
160 | - | (let* ((header (string->utf8 "NANI_JMDICT")) | |
160 | + | (let* ((header (string->utf8 "NANI_JMDICT001")) | |
161 | 161 | (header-size (bytevector-length header)) | |
162 | 162 | (pointers (make-bytevector 12 0)) | |
163 | 163 | (results-size (list-size results result-size #:size? #f)) | |
164 | 164 | (results-bv (make-bytevector (+ header-size 12 results-size 0)))) | |
165 | 165 | (serialize-list results serialize-result (+ header-size 12) results-bv #:size? #f) | |
166 | 166 | (let* ((results (list->array 1 results)) | |
167 | - | (pos (+ header-size 12 (bytevector-length results-bv))) | |
167 | + | (pos (bytevector-length results-bv)) | |
168 | 168 | (kanji-bvs (serialize-trie kanji-trie results pos)) | |
169 | 169 | (pos (car kanji-bvs)) | |
170 | 170 | (reading-bvs (serialize-trie reading-trie results pos)) | |
171 | 171 | (pos (car reading-bvs)) | |
172 | 172 | (meaning-bvs (serialize-trie sense-trie results pos))) | |
173 | + | (bytevector-u32-set! pointers 0 (bytevector-length results-bv) (endianness big)) | |
174 | + | (bytevector-u32-set! pointers 4 (car kanji-bvs) (endianness big)) | |
175 | + | (bytevector-u32-set! pointers 8 (car reading-bvs) (endianness big)) | |
173 | 176 | (bytevector-copy! header 0 results-bv 0 header-size) | |
174 | 177 | (bytevector-copy! pointers 0 results-bv header-size 12) | |
178 | + | ;; give some feedback on the size of file's structures | |
179 | + | (format #t "results is ~a bytes long~%" (bytevector-length results-bv)) | |
180 | + | (format #t "kanjis is ~a bytes long~%" (apply + (map bytevector-length (cdr kanji-bvs)))) | |
181 | + | (format #t "readings is ~a bytes long~%" (apply + (map bytevector-length (cdr reading-bvs)))) | |
182 | + | (format #t "senses is ~a bytes long~%" (apply + (map bytevector-length (cdr meaning-bvs)))) | |
175 | 183 | (merge-bvs (append (list results-bv) (cdr kanji-bvs) (cdr reading-bvs) | |
176 | 184 | (cdr meaning-bvs)))))) |