Compress tags.
modules/nani/jmdict/entities.scm unknown status 1
| 1 | + | (define-module (nani jmdict entities) | |
| 2 | + | #:export (get-entity)) | |
| 3 | + | ||
| 4 | + | (define entities | |
| 5 | + | `(("MA" . 1) | |
| 6 | + | ("X" . 2) | |
| 7 | + | ("abbr" . 3) | |
| 8 | + | ("adj-i" . 4) | |
| 9 | + | ("adj-ix" . 5) | |
| 10 | + | ("adj-na" . 6) | |
| 11 | + | ("adj-no" . 7) | |
| 12 | + | ("adj-pn" . 8) | |
| 13 | + | ("adj-t" . 9) | |
| 14 | + | ("adj-f" . 10) | |
| 15 | + | ("adv" . 11) | |
| 16 | + | ("adv-to" . 12) | |
| 17 | + | ("arch" . 13) | |
| 18 | + | ("ateji" . 14) | |
| 19 | + | ("aux" . 15) | |
| 20 | + | ("aux-v" . 16) | |
| 21 | + | ("aux-adj" . 17) | |
| 22 | + | ("Buddh" . 18) | |
| 23 | + | ("chem" . 19) | |
| 24 | + | ("chn" . 20) | |
| 25 | + | ("col" . 21) | |
| 26 | + | ("comp" . 22) | |
| 27 | + | ("conj" . 23) | |
| 28 | + | ("cop-da" . 24) | |
| 29 | + | ("ctr" . 25) | |
| 30 | + | ("derog" . 26) | |
| 31 | + | ("eK" . 27) | |
| 32 | + | ("ek" . 28) | |
| 33 | + | ("exp" . 29) | |
| 34 | + | ("fam" . 30) | |
| 35 | + | ("fem" . 31) | |
| 36 | + | ("food" . 32) | |
| 37 | + | ("geom" . 33) | |
| 38 | + | ("gikun" . 34) | |
| 39 | + | ("hon" . 35) | |
| 40 | + | ("hum" . 36) | |
| 41 | + | ("iK" . 37) | |
| 42 | + | ("id" . 38) | |
| 43 | + | ("ik" . 39) | |
| 44 | + | ("int" . 40) | |
| 45 | + | ("io" . 41) | |
| 46 | + | ("iv" . 42) | |
| 47 | + | ("ling" . 43) | |
| 48 | + | ("m-sl" . 44) | |
| 49 | + | ("male" . 45) | |
| 50 | + | ("male-sl" . 46) | |
| 51 | + | ("math" . 47) | |
| 52 | + | ("mil" . 48) | |
| 53 | + | ("n" . 49) | |
| 54 | + | ("n-adv" . 50) | |
| 55 | + | ("n-suf" . 51) | |
| 56 | + | ("n-pref" . 52) | |
| 57 | + | ("n-t" . 53) | |
| 58 | + | ("num" . 54) | |
| 59 | + | ("oK" . 55) | |
| 60 | + | ("obs" . 56) | |
| 61 | + | ("obsc" . 57) | |
| 62 | + | ("ok" . 58) | |
| 63 | + | ("oik" . 59) | |
| 64 | + | ("on-mim" . 60) | |
| 65 | + | ("pn" . 61) | |
| 66 | + | ("poet" . 62) | |
| 67 | + | ("pol" . 63) | |
| 68 | + | ("pref" . 64) | |
| 69 | + | ("proverb" . 65) | |
| 70 | + | ("prt" . 66) | |
| 71 | + | ("physics" . 67) | |
| 72 | + | ("quote" . 68) | |
| 73 | + | ("rare" . 69) | |
| 74 | + | ("sens" . 70) | |
| 75 | + | ("sl" . 71) | |
| 76 | + | ("suf" . 72) | |
| 77 | + | ("uK" . 73) | |
| 78 | + | ("uk" . 74) | |
| 79 | + | ("unc" . 75) | |
| 80 | + | ("yoji" . 76) | |
| 81 | + | ("v1" . 77) | |
| 82 | + | ("v1-s" . 78) | |
| 83 | + | ("v2a-s" . 79) | |
| 84 | + | ("v4h" . 80) | |
| 85 | + | ("v4r" . 81) | |
| 86 | + | ("v5aru" . 82) | |
| 87 | + | ("v5b" . 83) | |
| 88 | + | ("v5g" . 84) | |
| 89 | + | ("v5k" . 85) | |
| 90 | + | ("v5k-s" . 86) | |
| 91 | + | ("v5m" . 87) | |
| 92 | + | ("v5n" . 88) | |
| 93 | + | ("v5r" . 89) | |
| 94 | + | ("v5r-i" . 90) | |
| 95 | + | ("v5s" . 91) | |
| 96 | + | ("v5t" . 92) | |
| 97 | + | ("v5u" . 93) | |
| 98 | + | ("v5u-s" . 94) | |
| 99 | + | ("v5uru" . 95) | |
| 100 | + | ("vz" . 96) | |
| 101 | + | ("vi" . 97) | |
| 102 | + | ("vk" . 98) | |
| 103 | + | ("vn" . 99) | |
| 104 | + | ("vr" . 100) | |
| 105 | + | ("vs" . 101) | |
| 106 | + | ("vs-c" . 102) | |
| 107 | + | ("vs-s" . 103) | |
| 108 | + | ("vs-i" . 104) | |
| 109 | + | ("kyb" . 105) | |
| 110 | + | ("osb" . 106) | |
| 111 | + | ("ksb" . 107) | |
| 112 | + | ("ktb" . 108) | |
| 113 | + | ("tsb" . 109) | |
| 114 | + | ("thb" . 110) | |
| 115 | + | ("tsug" . 111) | |
| 116 | + | ("kyu" . 112) | |
| 117 | + | ("rkb" . 113) | |
| 118 | + | ("nab" . 114) | |
| 119 | + | ("hob" . 115) | |
| 120 | + | ("vt" . 116) | |
| 121 | + | ("vulg" . 117) | |
| 122 | + | ("adj-kari" . 118) | |
| 123 | + | ("adj-ku" . 119) | |
| 124 | + | ("adj-shiku" . 120) | |
| 125 | + | ("adj-nari" . 121) | |
| 126 | + | ("n-pr" . 122) | |
| 127 | + | ("v-unspec" . 123) | |
| 128 | + | ("v4k" . 124) | |
| 129 | + | ("v4g" . 125) | |
| 130 | + | ("v4s" . 126) | |
| 131 | + | ("v4t" . 127) | |
| 132 | + | ("v4n" . 128) | |
| 133 | + | ("v4b" . 129) | |
| 134 | + | ("v4m" . 130) | |
| 135 | + | ("v2k-k" . 131) | |
| 136 | + | ("v2g-k" . 132) | |
| 137 | + | ("v2t-k" . 133) | |
| 138 | + | ("v2d-k" . 134) | |
| 139 | + | ("v2h-k" . 135) | |
| 140 | + | ("v2b-k" . 136) | |
| 141 | + | ("v2m-k" . 137) | |
| 142 | + | ("v2y-k" . 138) | |
| 143 | + | ("v2r-k" . 139) | |
| 144 | + | ("v2k-s" . 140) | |
| 145 | + | ("v2g-s" . 141) | |
| 146 | + | ("v2s-s" . 142) | |
| 147 | + | ("v2z-s" . 143) | |
| 148 | + | ("v2t-s" . 144) | |
| 149 | + | ("v2d-s" . 145) | |
| 150 | + | ("v2n-s" . 146) | |
| 151 | + | ("v2h-s" . 147) | |
| 152 | + | ("v2b-s" . 148) | |
| 153 | + | ("v2m-s" . 149) | |
| 154 | + | ("v2y-s" . 150) | |
| 155 | + | ("v2r-s" . 151) | |
| 156 | + | ("v2w-s" . 152) | |
| 157 | + | ("archit" . 153) | |
| 158 | + | ("astron" . 154) | |
| 159 | + | ("baseb" . 155) | |
| 160 | + | ("biol" . 156) | |
| 161 | + | ("bot" . 157) | |
| 162 | + | ("bus" . 158) | |
| 163 | + | ("econ" . 159) | |
| 164 | + | ("engr" . 160) | |
| 165 | + | ("finc" . 161) | |
| 166 | + | ("geol" . 162) | |
| 167 | + | ("law" . 163) | |
| 168 | + | ("mahj" . 164) | |
| 169 | + | ("med" . 165) | |
| 170 | + | ("music" . 166) | |
| 171 | + | ("Shinto" . 167) | |
| 172 | + | ("shogi" . 168) | |
| 173 | + | ("sports" . 169) | |
| 174 | + | ("sumo" . 170) | |
| 175 | + | ("zool" . 171) | |
| 176 | + | ("joc" . 172) | |
| 177 | + | ("anat" . 173))) | |
| 178 | + | ||
| 179 | + | (define (get-entity ent) | |
| 180 | + | (let ((val (assoc-ref entities ent))) | |
| 181 | + | (if val val (begin (pk 'val ent) #f)))) |
modules/nani/jmdict/serialize.scm
| 66 | 66 | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
| 67 | 67 | (+ pos 5)) | |
| 68 | 68 | ||
| 69 | + | (define (serialize-char int pos bv) | |
| 70 | + | (bytevector-u8-set! bv pos int) | |
| 71 | + | (+ pos 1)) | |
| 72 | + | (define char-size (const 1)) | |
| 73 | + | ||
| 69 | 74 | (define (serialize-int int pos bv) | |
| 70 | 75 | (bytevector-u32-set! bv pos int (endianness big)) | |
| 71 | 76 | (+ pos 4)) | |
… | |||
| 129 | 134 | (pos (serialize-list (sense-limits sense) serialize-string pos bv)) | |
| 130 | 135 | (pos (serialize-list (sense-infos sense) serialize-string pos bv)) | |
| 131 | 136 | (pos (serialize-list (sense-sources sense) serialize-source pos bv)) | |
| 132 | - | (pos (serialize-list (sense-tags sense) serialize-string pos bv)) | |
| 137 | + | (pos (serialize-list (sense-tags sense) serialize-char pos bv)) | |
| 133 | 138 | (pos (serialize-list (sense-glosses sense) serialize-string pos bv)) | |
| 134 | 139 | (pos (serialize-string (sense-language sense) pos bv))) | |
| 135 | 140 | pos)) | |
… | |||
| 139 | 144 | (list-size (sense-limits sense) string-size) | |
| 140 | 145 | (list-size (sense-infos sense) string-size) | |
| 141 | 146 | (list-size (sense-sources sense) source-size) | |
| 142 | - | (list-size (sense-tags sense) string-size) | |
| 147 | + | (list-size (sense-tags sense) char-size) | |
| 143 | 148 | (list-size (sense-glosses sense) string-size) | |
| 144 | 149 | (string-size (sense-language sense)))) | |
| 145 | 150 | ||
modules/nani/jmdict/xml.scm
| 22 | 22 | #:use-module (sxml fold) | |
| 23 | 23 | #:use-module (sxml simple) | |
| 24 | 24 | #:use-module (nani result) | |
| 25 | + | #:use-module (nani jmdict entities) | |
| 25 | 26 | #:export (load-dic sxml->results)) | |
| 26 | 27 | ||
| 27 | 28 | (define (load-dic file) | |
… | |||
| 63 | 64 | (('limit (? string? r)) (update-sense sense #:limits (cons r (sense-limits sense)))) | |
| 64 | 65 | (('info (? string? r)) (update-sense sense #:infos (cons r (sense-infos sense)))) | |
| 65 | 66 | ((? source? s) (update-sense sense #:sources (cons s (sense-sources sense)))) | |
| 66 | - | (('tag (? string? r)) (update-sense sense #:tags (cons r (sense-tags sense)))) | |
| 67 | + | (('tag (? integer? r)) (update-sense sense #:tags (cons r (sense-tags sense)))) | |
| 67 | 68 | (('gloss (? string? r)) (update-sense sense #:glosses (cons r (sense-glosses sense)))) | |
| 68 | 69 | (('lang (? string? l)) (update-sense sense #:language l)) | |
| 69 | 70 | ((? list? l) (loop sense l)) | |
… | |||
| 98 | 99 | (('re_restr r) `(limit ,r)) | |
| 99 | 100 | (('re_inf r) `(info ,r)) | |
| 100 | 101 | (('r_ele lst ...) (sxml->reading lst)) | |
| 101 | - | (('ant pos) `(tag ,pos)) | |
| 102 | - | (('dial pos) `(tag ,pos)) | |
| 103 | - | (('field pos) `(tag ,pos)) | |
| 104 | - | (('misc pos) `(tag ,pos)) | |
| 105 | - | (('pos pos) `(tag ,pos)) | |
| 102 | + | (('ant pos) `(ref ,pos)) | |
| 103 | + | (('dial pos) `(tag ,(get-entity pos))) | |
| 104 | + | (('field pos) `(tag ,(get-entity pos))) | |
| 105 | + | (('misc pos) `(tag ,(get-entity pos))) | |
| 106 | + | (('pos pos) `(tag ,(get-entity pos))) | |
| 106 | 107 | (('xref pos) `(ref ,pos)) | |
| 107 | 108 | (('g_type _ ...) "") | |
| 108 | 109 | (('gloss (? string? g)) xml) | |
modules/nani/result.scm
| 76 | 76 | (limits sense-limits) ; string-list | |
| 77 | 77 | (infos sense-infos) ; string-list | |
| 78 | 78 | (sources sense-sources) ; source-list | |
| 79 | - | (tags sense-tags) ; string-list | |
| 79 | + | (tags sense-tags) ; integer-list | |
| 80 | 80 | (glosses sense-glosses) ; string-list | |
| 81 | 81 | (language sense-language)) ; string | |
| 82 | 82 |