Properly serialize jmdict
modules/nani/jmdict/serialize.scm
| 63 | 63 | ||
| 64 | 64 | (define (serialize-pointer ptr pos bv) | |
| 65 | 65 | (bytevector-u8-set! bv pos (car ptr)) | |
| 66 | - | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness little)) | |
| 66 | + | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
| 67 | 67 | (+ pos 5)) | |
| 68 | 68 | ||
| 69 | 69 | (define (serialize-int int pos bv) | |
| 70 | - | (bytevector-u32-set! bv pos int (endianness little)) | |
| 70 | + | (bytevector-u32-set! bv pos int (endianness big)) | |
| 71 | 71 | (+ pos 4)) | |
| 72 | 72 | (define int-size (const 4)) | |
| 73 | 73 | ||
… | |||
| 78 | 78 | ||
| 79 | 79 | (define (serialize-string str pos bv) | |
| 80 | 80 | (let ((sbv (string->utf8 str))) | |
| 81 | - | (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness little)) | |
| 81 | + | (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness big)) | |
| 82 | 82 | (bytevector-copy! sbv 0 bv (+ pos 4) (bytevector-length sbv)) | |
| 83 | 83 | (+ pos 4 (bytevector-length sbv)))) | |
| 84 | 84 | (define (string-size str) | |
… | |||
| 88 | 88 | (define* (serialize-list lst serialize pos bv #:key (size? #t)) | |
| 89 | 89 | (when (not (list? lst)) (throw 'not-list lst)) | |
| 90 | 90 | (when size? | |
| 91 | - | (bytevector-u32-set! bv pos (length lst) (endianness little))) | |
| 91 | + | (bytevector-u32-set! bv pos (length lst) (endianness big))) | |
| 92 | 92 | (let loop ((lst lst) (pos (+ pos (if size? 4 0)))) | |
| 93 | 93 | (if (null? lst) | |
| 94 | 94 | pos | |
… | |||
| 157 | 157 | (list-size (result-senses result) sense-size))) | |
| 158 | 158 | ||
| 159 | 159 | (define (serialize-jmdict results kanji-trie reading-trie sense-trie) | |
| 160 | - | (let* ((header (string->utf8 "NANI_JMDICT")) | |
| 160 | + | (let* ((header (string->utf8 "NANI_JMDICT001")) | |
| 161 | 161 | (header-size (bytevector-length header)) | |
| 162 | 162 | (pointers (make-bytevector 12 0)) | |
| 163 | 163 | (results-size (list-size results result-size #:size? #f)) | |
| 164 | 164 | (results-bv (make-bytevector (+ header-size 12 results-size 0)))) | |
| 165 | 165 | (serialize-list results serialize-result (+ header-size 12) results-bv #:size? #f) | |
| 166 | 166 | (let* ((results (list->array 1 results)) | |
| 167 | - | (pos (+ header-size 12 (bytevector-length results-bv))) | |
| 167 | + | (pos (bytevector-length results-bv)) | |
| 168 | 168 | (kanji-bvs (serialize-trie kanji-trie results pos)) | |
| 169 | 169 | (pos (car kanji-bvs)) | |
| 170 | 170 | (reading-bvs (serialize-trie reading-trie results pos)) | |
| 171 | 171 | (pos (car reading-bvs)) | |
| 172 | 172 | (meaning-bvs (serialize-trie sense-trie results pos))) | |
| 173 | + | (bytevector-u32-set! pointers 0 (bytevector-length results-bv) (endianness big)) | |
| 174 | + | (bytevector-u32-set! pointers 4 (car kanji-bvs) (endianness big)) | |
| 175 | + | (bytevector-u32-set! pointers 8 (car reading-bvs) (endianness big)) | |
| 173 | 176 | (bytevector-copy! header 0 results-bv 0 header-size) | |
| 174 | 177 | (bytevector-copy! pointers 0 results-bv header-size 12) | |
| 178 | + | ;; give some feedback on the size of file's structures | |
| 179 | + | (format #t "results is ~a bytes long~%" (bytevector-length results-bv)) | |
| 180 | + | (format #t "kanjis is ~a bytes long~%" (apply + (map bytevector-length (cdr kanji-bvs)))) | |
| 181 | + | (format #t "readings is ~a bytes long~%" (apply + (map bytevector-length (cdr reading-bvs)))) | |
| 182 | + | (format #t "senses is ~a bytes long~%" (apply + (map bytevector-length (cdr meaning-bvs)))) | |
| 175 | 183 | (merge-bvs (append (list results-bv) (cdr kanji-bvs) (cdr reading-bvs) | |
| 176 | 184 | (cdr meaning-bvs)))))) | |