Fix radk parsing and serializing
modules/nani/jmdict/serialize.scm
| 24 | 24 | #:use-module (rnrs bytevectors) | |
| 25 | 25 | #:export (serialize-jmdict)) | |
| 26 | 26 | ||
| 27 | + | (define (serialize-pointer ptr pos bv) | |
| 28 | + | (bytevector-u8-set! bv pos (car ptr)) | |
| 29 | + | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
| 30 | + | (+ pos 5)) | |
| 31 | + | ||
| 27 | 32 | (define (serialize-trie trie results pos) | |
| 28 | 33 | (define (serialize-trie-aux transitions pos) | |
| 29 | 34 | (let loop ((pos pos) |
modules/nani/radk.scm
| 82 | 82 | (define (serialize-rad-kanji-element element pos bv) | |
| 83 | 83 | (match element | |
| 84 | 84 | ((radical kanji ...) | |
| 85 | - | (let* ((pos (serialize-string radical pos bv))) | |
| 86 | - | (serialize-list kanji serialize-string pos bv))))) | |
| 85 | + | (let* ((pos (serialize-string (radical-character radical) pos bv))) | |
| 86 | + | (serialize-string (string-join kanji "") pos bv))))) | |
| 87 | 87 | (define (rad-kanji-element-size element) | |
| 88 | 88 | (match element | |
| 89 | 89 | ((radical kanji ...) | |
| 90 | - | (+ (string-size radical) (list-size kanji string-size))))) | |
| 90 | + | (+ (string-size (radical-character radical)) | |
| 91 | + | (string-size (string-join kanji "")))))) | |
| 91 | 92 | ||
| 92 | 93 | (define (serialize-rad-kanji rad-kanji pos bv) | |
| 93 | 94 | (serialize-list rad-kanji serialize-rad-kanji-element pos bv)) | |
… | |||
| 97 | 98 | (define (serialize-rad-stroke-element element pos bv) | |
| 98 | 99 | (match element | |
| 99 | 100 | ((radical . stroke) | |
| 100 | - | (let ((pos (serialize-string radical pos bv))) | |
| 101 | + | (let ((pos (serialize-string (radical-character radical) pos bv))) | |
| 101 | 102 | (serialize-char stroke pos bv))))) | |
| 102 | 103 | (define (rad-stroke-element-size element) | |
| 103 | 104 | (match element | |
| 104 | 105 | ((radical . stroke) | |
| 105 | - | (+ (string-size radical) (char-size stroke))))) | |
| 106 | + | (+ (string-size (radical-character radical)) (char-size stroke))))) | |
| 106 | 107 | ||
| 107 | 108 | (define (serialize-rad-stroke rad-stroke pos bv) | |
| 108 | 109 | (serialize-list rad-stroke serialize-rad-stroke-element pos bv)) | |
| 109 | 110 | (define (rad-stroke-size rad-stroke) | |
| 110 | 111 | (list-size rad-stroke rad-stroke-element-size)) | |
| 111 | 112 | ||
| 112 | - | (define serialize-kanji-stroke serialize-rad-stroke) | |
| 113 | - | (define kanji-stroke-size rad-stroke-size) | |
| 113 | + | (define (serialize-kanji-stroke-element element pos bv) | |
| 114 | + | (match element | |
| 115 | + | ((kanji . stroke) | |
| 116 | + | (let ((pos (serialize-string kanji pos bv))) | |
| 117 | + | (serialize-char stroke pos bv))))) | |
| 118 | + | (define (kanji-stroke-element-size element) | |
| 119 | + | (match element | |
| 120 | + | ((kanji . stroke) | |
| 121 | + | (+ (string-size kanji) (char-size stroke))))) | |
| 122 | + | ||
| 123 | + | (define (serialize-kanji-stroke kanji-stroke pos bv) | |
| 124 | + | (serialize-list kanji-stroke serialize-kanji-stroke-element pos bv)) | |
| 125 | + | (define (kanji-stroke-size kanji-stroke) | |
| 126 | + | (list-size kanji-stroke kanji-stroke-element-size)) | |
| 114 | 127 | ||
| 115 | 128 | (let* ((header (string->utf8 "NANI_RADK001")) | |
| 116 | 129 | (header-size (bytevector-length header)) | |
… | |||
| 125 | 138 | (pos (serialize-kanji-stroke kanji-stroke pos bv))) | |
| 126 | 139 | bv))) | |
| 127 | 140 | ||
| 128 | - | ||
| 129 | - | ||
| 130 | - | ||
| 131 | - | ||
| 141 | + | (define (radical-character kanji) | |
| 142 | + | (match kanji | |
| 143 | + | ("???" "???") | |
| 144 | + | ("???" "????") | |
| 145 | + | ("???" "???") | |
| 146 | + | ("???" "???") | |
| 147 | + | ("???" "????") | |
| 148 | + | ("???" "???") | |
| 149 | + | ("???" "???") | |
| 150 | + | ("???" "???") | |
| 151 | + | ("???" "???") | |
| 152 | + | ("???" "???") | |
| 153 | + | ("???" "???") | |
| 154 | + | ("???" "???") | |
| 155 | + | ("???" "???") | |
| 156 | + | ("???" "???") | |
| 157 | + | ("???" "???") | |
| 158 | + | ("???" "???") | |
| 159 | + | ("???" "???") | |
| 160 | + | ("???" "???") | |
| 161 | + | ("???" "???") | |
| 162 | + | ("???" "???") | |
| 163 | + | ("???" "???") | |
| 164 | + | ("???" "???") | |
| 165 | + | (_ kanji))) | |
modules/nani/serialize.scm
| 20 | 20 | #:use-module (rnrs bytevectors) | |
| 21 | 21 | #:export (merge-bvs | |
| 22 | 22 | serialize-list list-size | |
| 23 | - | serialize-pointer | |
| 24 | 23 | serialize-char char-size | |
| 25 | 24 | serialize-int int-size | |
| 26 | 25 | serialize-boolean boolean-size | |
… | |||
| 48 | 47 | (when (not (list? lst)) (throw 'not-list lst)) | |
| 49 | 48 | (apply + (if size? 2 0) (map size lst))) | |
| 50 | 49 | ||
| 51 | - | (define (serialize-pointer ptr pos bv) | |
| 52 | - | (bytevector-u8-set! bv pos (car ptr)) | |
| 53 | - | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
| 54 | - | (+ pos 5)) | |
| 55 | - | ||
| 56 | 50 | (define (serialize-char int pos bv) | |
| 57 | 51 | (bytevector-u8-set! bv pos int) | |
| 58 | 52 | (+ pos 1)) | |
tools/jmdict.scm
| 64 | 64 | (serialize-jmdict results kanji-trie reading-trie meaning-trie)))))) | |
| 65 | 65 | ||
| 66 | 66 | (define (print word dict) | |
| 67 | - | ()) | |
| 67 | + | #t) | |
| 68 | 68 | ||
| 69 | 69 | (match (command-line) | |
| 70 | 70 | ((_ cmd input lang output) |