Properly serialize jmdict

Julien LepillerSat Apr 13 21:48:17+0200 2019

2e317df

Properly serialize jmdict

modules/nani/jmdict/serialize.scm

6363
6464
(define (serialize-pointer ptr pos bv)
6565
  (bytevector-u8-set! bv pos (car ptr))
66-
  (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness little))
66+
  (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big))
6767
  (+ pos 5))
6868
6969
(define (serialize-int int pos bv)
70-
  (bytevector-u32-set! bv pos int (endianness little))
70+
  (bytevector-u32-set! bv pos int (endianness big))
7171
  (+ pos 4))
7272
(define int-size (const 4))
7373

7878
7979
(define (serialize-string str pos bv)
8080
  (let ((sbv (string->utf8 str)))
81-
    (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness little))
81+
    (bytevector-u32-set! bv pos (bytevector-length sbv) (endianness big))
8282
    (bytevector-copy! sbv 0 bv (+ pos 4) (bytevector-length sbv))
8383
    (+ pos 4 (bytevector-length sbv))))
8484
(define (string-size str)

8888
(define* (serialize-list lst serialize pos bv #:key (size? #t))
8989
  (when (not (list? lst)) (throw 'not-list lst))
9090
  (when size?
91-
    (bytevector-u32-set! bv pos (length lst) (endianness little)))
91+
    (bytevector-u32-set! bv pos (length lst) (endianness big)))
9292
  (let loop ((lst lst) (pos (+ pos (if size? 4 0))))
9393
    (if (null? lst)
9494
      pos

157157
     (list-size (result-senses result) sense-size)))
158158
159159
(define (serialize-jmdict results kanji-trie reading-trie sense-trie)
160-
  (let* ((header (string->utf8 "NANI_JMDICT"))
160+
  (let* ((header (string->utf8 "NANI_JMDICT001"))
161161
         (header-size (bytevector-length header))
162162
         (pointers (make-bytevector 12 0))
163163
         (results-size (list-size results result-size #:size? #f))
164164
         (results-bv (make-bytevector (+ header-size 12 results-size 0))))
165165
    (serialize-list results serialize-result (+ header-size 12) results-bv #:size? #f)
166166
    (let* ((results (list->array 1 results))
167-
           (pos (+ header-size 12 (bytevector-length results-bv)))
167+
           (pos (bytevector-length results-bv))
168168
           (kanji-bvs (serialize-trie kanji-trie results pos))
169169
           (pos (car kanji-bvs))
170170
           (reading-bvs (serialize-trie reading-trie results pos))
171171
           (pos (car reading-bvs))
172172
           (meaning-bvs (serialize-trie sense-trie results pos)))
173+
      (bytevector-u32-set! pointers 0 (bytevector-length results-bv) (endianness big))
174+
      (bytevector-u32-set! pointers 4 (car kanji-bvs) (endianness big))
175+
      (bytevector-u32-set! pointers 8 (car reading-bvs) (endianness big))
173176
      (bytevector-copy! header 0 results-bv 0 header-size)
174177
      (bytevector-copy! pointers 0 results-bv header-size 12)
178+
      ;; give some feedback on the size of file's structures
179+
      (format #t "results is ~a bytes long~%" (bytevector-length results-bv))
180+
      (format #t "kanjis is ~a bytes long~%" (apply + (map bytevector-length (cdr kanji-bvs))))
181+
      (format #t "readings is ~a bytes long~%" (apply + (map bytevector-length (cdr reading-bvs))))
182+
      (format #t "senses is ~a bytes long~%" (apply + (map bytevector-length (cdr meaning-bvs))))
175183
      (merge-bvs (append (list results-bv) (cdr kanji-bvs) (cdr reading-bvs)
176184
                         (cdr meaning-bvs))))))