Fix radk parsing and serializing

Julien LepillerMon May 25 21:48:01+0200 2020

4cf6dc3

Fix radk parsing and serializing

modules/nani/jmdict/serialize.scm

2424
  #:use-module (rnrs bytevectors)
2525
  #:export (serialize-jmdict))
2626
27+
(define (serialize-pointer ptr pos bv)
28+
  (bytevector-u8-set! bv pos (car ptr))
29+
  (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big))
30+
  (+ pos 5))
31+
2732
(define (serialize-trie trie results pos)
2833
  (define (serialize-trie-aux transitions pos)
2934
    (let loop ((pos pos)

modules/nani/radk.scm

8282
  (define (serialize-rad-kanji-element element pos bv)
8383
    (match element
8484
      ((radical kanji ...)
85-
       (let* ((pos (serialize-string radical pos bv)))
86-
         (serialize-list kanji serialize-string pos bv)))))
85+
       (let* ((pos (serialize-string (radical-character radical) pos bv)))
86+
         (serialize-string (string-join kanji "") pos bv)))))
8787
  (define (rad-kanji-element-size element)
8888
    (match element
8989
      ((radical kanji ...)
90-
       (+ (string-size radical) (list-size kanji string-size)))))
90+
       (+ (string-size (radical-character radical))
91+
          (string-size (string-join kanji ""))))))
9192
9293
  (define (serialize-rad-kanji rad-kanji pos bv)
9394
    (serialize-list rad-kanji serialize-rad-kanji-element pos bv))

9798
  (define (serialize-rad-stroke-element element pos bv)
9899
    (match element
99100
      ((radical . stroke)
100-
       (let ((pos (serialize-string radical pos bv)))
101+
       (let ((pos (serialize-string (radical-character radical) pos bv)))
101102
         (serialize-char stroke pos bv)))))
102103
  (define (rad-stroke-element-size element)
103104
    (match element
104105
      ((radical . stroke)
105-
       (+ (string-size radical) (char-size stroke)))))
106+
       (+ (string-size (radical-character radical)) (char-size stroke)))))
106107
107108
  (define (serialize-rad-stroke rad-stroke pos bv)
108109
    (serialize-list rad-stroke serialize-rad-stroke-element pos bv))
109110
  (define (rad-stroke-size rad-stroke)
110111
    (list-size rad-stroke rad-stroke-element-size))
111112
112-
  (define serialize-kanji-stroke serialize-rad-stroke)
113-
  (define kanji-stroke-size rad-stroke-size)
113+
  (define (serialize-kanji-stroke-element element pos bv)
114+
    (match element
115+
      ((kanji . stroke)
116+
       (let ((pos (serialize-string kanji pos bv)))
117+
         (serialize-char stroke pos bv)))))
118+
  (define (kanji-stroke-element-size element)
119+
    (match element
120+
      ((kanji . stroke)
121+
       (+ (string-size kanji) (char-size stroke)))))
122+
123+
  (define (serialize-kanji-stroke kanji-stroke pos bv)
124+
    (serialize-list kanji-stroke serialize-kanji-stroke-element pos bv))
125+
  (define (kanji-stroke-size kanji-stroke)
126+
    (list-size kanji-stroke kanji-stroke-element-size))
114127
115128
  (let* ((header (string->utf8 "NANI_RADK001"))
116129
         (header-size (bytevector-length header))

125138
           (pos (serialize-kanji-stroke kanji-stroke pos bv)))
126139
      bv)))
127140
128-
129-
130-
131-
141+
(define (radical-character kanji)
142+
  (match kanji
143+
    ("???" "???")
144+
    ("???" "????")
145+
    ("???" "???")
146+
    ("???" "???")
147+
    ("???" "????")
148+
    ("???" "???")
149+
    ("???" "???")
150+
    ("???" "???")
151+
    ("???" "???")
152+
    ("???" "???")
153+
    ("???" "???")
154+
    ("???" "???")
155+
    ("???" "???")
156+
    ("???" "???")
157+
    ("???" "???")
158+
    ("???" "???")
159+
    ("???" "???")
160+
    ("???" "???")
161+
    ("???" "???")
162+
    ("???" "???")
163+
    ("???" "???")
164+
    ("???" "???")
165+
    (_ kanji)))

modules/nani/serialize.scm

2020
  #:use-module (rnrs bytevectors)
2121
  #:export (merge-bvs
2222
            serialize-list list-size
23-
            serialize-pointer
2423
            serialize-char char-size
2524
            serialize-int int-size
2625
            serialize-boolean boolean-size

4847
  (when (not (list? lst)) (throw 'not-list lst))
4948
  (apply + (if size? 2 0) (map size lst)))
5049
51-
(define (serialize-pointer ptr pos bv)
52-
  (bytevector-u8-set! bv pos (car ptr))
53-
  (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big))
54-
  (+ pos 5))
55-
5650
(define (serialize-char int pos bv)
5751
  (bytevector-u8-set! bv pos int)
5852
  (+ pos 1))

tools/jmdict.scm

6464
          (serialize-jmdict results kanji-trie reading-trie meaning-trie))))))
6565
6666
(define (print word dict)
67-
  ())
67+
  #t)
6868
6969
(match (command-line)
7070
  ((_ cmd input lang output)