Compress tags.
modules/nani/jmdict/entities.scm unknown status 1
1 | + | (define-module (nani jmdict entities) | |
2 | + | #:export (get-entity)) | |
3 | + | ||
4 | + | (define entities | |
5 | + | `(("MA" . 1) | |
6 | + | ("X" . 2) | |
7 | + | ("abbr" . 3) | |
8 | + | ("adj-i" . 4) | |
9 | + | ("adj-ix" . 5) | |
10 | + | ("adj-na" . 6) | |
11 | + | ("adj-no" . 7) | |
12 | + | ("adj-pn" . 8) | |
13 | + | ("adj-t" . 9) | |
14 | + | ("adj-f" . 10) | |
15 | + | ("adv" . 11) | |
16 | + | ("adv-to" . 12) | |
17 | + | ("arch" . 13) | |
18 | + | ("ateji" . 14) | |
19 | + | ("aux" . 15) | |
20 | + | ("aux-v" . 16) | |
21 | + | ("aux-adj" . 17) | |
22 | + | ("Buddh" . 18) | |
23 | + | ("chem" . 19) | |
24 | + | ("chn" . 20) | |
25 | + | ("col" . 21) | |
26 | + | ("comp" . 22) | |
27 | + | ("conj" . 23) | |
28 | + | ("cop-da" . 24) | |
29 | + | ("ctr" . 25) | |
30 | + | ("derog" . 26) | |
31 | + | ("eK" . 27) | |
32 | + | ("ek" . 28) | |
33 | + | ("exp" . 29) | |
34 | + | ("fam" . 30) | |
35 | + | ("fem" . 31) | |
36 | + | ("food" . 32) | |
37 | + | ("geom" . 33) | |
38 | + | ("gikun" . 34) | |
39 | + | ("hon" . 35) | |
40 | + | ("hum" . 36) | |
41 | + | ("iK" . 37) | |
42 | + | ("id" . 38) | |
43 | + | ("ik" . 39) | |
44 | + | ("int" . 40) | |
45 | + | ("io" . 41) | |
46 | + | ("iv" . 42) | |
47 | + | ("ling" . 43) | |
48 | + | ("m-sl" . 44) | |
49 | + | ("male" . 45) | |
50 | + | ("male-sl" . 46) | |
51 | + | ("math" . 47) | |
52 | + | ("mil" . 48) | |
53 | + | ("n" . 49) | |
54 | + | ("n-adv" . 50) | |
55 | + | ("n-suf" . 51) | |
56 | + | ("n-pref" . 52) | |
57 | + | ("n-t" . 53) | |
58 | + | ("num" . 54) | |
59 | + | ("oK" . 55) | |
60 | + | ("obs" . 56) | |
61 | + | ("obsc" . 57) | |
62 | + | ("ok" . 58) | |
63 | + | ("oik" . 59) | |
64 | + | ("on-mim" . 60) | |
65 | + | ("pn" . 61) | |
66 | + | ("poet" . 62) | |
67 | + | ("pol" . 63) | |
68 | + | ("pref" . 64) | |
69 | + | ("proverb" . 65) | |
70 | + | ("prt" . 66) | |
71 | + | ("physics" . 67) | |
72 | + | ("quote" . 68) | |
73 | + | ("rare" . 69) | |
74 | + | ("sens" . 70) | |
75 | + | ("sl" . 71) | |
76 | + | ("suf" . 72) | |
77 | + | ("uK" . 73) | |
78 | + | ("uk" . 74) | |
79 | + | ("unc" . 75) | |
80 | + | ("yoji" . 76) | |
81 | + | ("v1" . 77) | |
82 | + | ("v1-s" . 78) | |
83 | + | ("v2a-s" . 79) | |
84 | + | ("v4h" . 80) | |
85 | + | ("v4r" . 81) | |
86 | + | ("v5aru" . 82) | |
87 | + | ("v5b" . 83) | |
88 | + | ("v5g" . 84) | |
89 | + | ("v5k" . 85) | |
90 | + | ("v5k-s" . 86) | |
91 | + | ("v5m" . 87) | |
92 | + | ("v5n" . 88) | |
93 | + | ("v5r" . 89) | |
94 | + | ("v5r-i" . 90) | |
95 | + | ("v5s" . 91) | |
96 | + | ("v5t" . 92) | |
97 | + | ("v5u" . 93) | |
98 | + | ("v5u-s" . 94) | |
99 | + | ("v5uru" . 95) | |
100 | + | ("vz" . 96) | |
101 | + | ("vi" . 97) | |
102 | + | ("vk" . 98) | |
103 | + | ("vn" . 99) | |
104 | + | ("vr" . 100) | |
105 | + | ("vs" . 101) | |
106 | + | ("vs-c" . 102) | |
107 | + | ("vs-s" . 103) | |
108 | + | ("vs-i" . 104) | |
109 | + | ("kyb" . 105) | |
110 | + | ("osb" . 106) | |
111 | + | ("ksb" . 107) | |
112 | + | ("ktb" . 108) | |
113 | + | ("tsb" . 109) | |
114 | + | ("thb" . 110) | |
115 | + | ("tsug" . 111) | |
116 | + | ("kyu" . 112) | |
117 | + | ("rkb" . 113) | |
118 | + | ("nab" . 114) | |
119 | + | ("hob" . 115) | |
120 | + | ("vt" . 116) | |
121 | + | ("vulg" . 117) | |
122 | + | ("adj-kari" . 118) | |
123 | + | ("adj-ku" . 119) | |
124 | + | ("adj-shiku" . 120) | |
125 | + | ("adj-nari" . 121) | |
126 | + | ("n-pr" . 122) | |
127 | + | ("v-unspec" . 123) | |
128 | + | ("v4k" . 124) | |
129 | + | ("v4g" . 125) | |
130 | + | ("v4s" . 126) | |
131 | + | ("v4t" . 127) | |
132 | + | ("v4n" . 128) | |
133 | + | ("v4b" . 129) | |
134 | + | ("v4m" . 130) | |
135 | + | ("v2k-k" . 131) | |
136 | + | ("v2g-k" . 132) | |
137 | + | ("v2t-k" . 133) | |
138 | + | ("v2d-k" . 134) | |
139 | + | ("v2h-k" . 135) | |
140 | + | ("v2b-k" . 136) | |
141 | + | ("v2m-k" . 137) | |
142 | + | ("v2y-k" . 138) | |
143 | + | ("v2r-k" . 139) | |
144 | + | ("v2k-s" . 140) | |
145 | + | ("v2g-s" . 141) | |
146 | + | ("v2s-s" . 142) | |
147 | + | ("v2z-s" . 143) | |
148 | + | ("v2t-s" . 144) | |
149 | + | ("v2d-s" . 145) | |
150 | + | ("v2n-s" . 146) | |
151 | + | ("v2h-s" . 147) | |
152 | + | ("v2b-s" . 148) | |
153 | + | ("v2m-s" . 149) | |
154 | + | ("v2y-s" . 150) | |
155 | + | ("v2r-s" . 151) | |
156 | + | ("v2w-s" . 152) | |
157 | + | ("archit" . 153) | |
158 | + | ("astron" . 154) | |
159 | + | ("baseb" . 155) | |
160 | + | ("biol" . 156) | |
161 | + | ("bot" . 157) | |
162 | + | ("bus" . 158) | |
163 | + | ("econ" . 159) | |
164 | + | ("engr" . 160) | |
165 | + | ("finc" . 161) | |
166 | + | ("geol" . 162) | |
167 | + | ("law" . 163) | |
168 | + | ("mahj" . 164) | |
169 | + | ("med" . 165) | |
170 | + | ("music" . 166) | |
171 | + | ("Shinto" . 167) | |
172 | + | ("shogi" . 168) | |
173 | + | ("sports" . 169) | |
174 | + | ("sumo" . 170) | |
175 | + | ("zool" . 171) | |
176 | + | ("joc" . 172) | |
177 | + | ("anat" . 173))) | |
178 | + | ||
179 | + | (define (get-entity ent) | |
180 | + | (let ((val (assoc-ref entities ent))) | |
181 | + | (if val val (begin (pk 'val ent) #f)))) |
modules/nani/jmdict/serialize.scm
66 | 66 | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
67 | 67 | (+ pos 5)) | |
68 | 68 | ||
69 | + | (define (serialize-char int pos bv) | |
70 | + | (bytevector-u8-set! bv pos int) | |
71 | + | (+ pos 1)) | |
72 | + | (define char-size (const 1)) | |
73 | + | ||
69 | 74 | (define (serialize-int int pos bv) | |
70 | 75 | (bytevector-u32-set! bv pos int (endianness big)) | |
71 | 76 | (+ pos 4)) | |
… | |||
129 | 134 | (pos (serialize-list (sense-limits sense) serialize-string pos bv)) | |
130 | 135 | (pos (serialize-list (sense-infos sense) serialize-string pos bv)) | |
131 | 136 | (pos (serialize-list (sense-sources sense) serialize-source pos bv)) | |
132 | - | (pos (serialize-list (sense-tags sense) serialize-string pos bv)) | |
137 | + | (pos (serialize-list (sense-tags sense) serialize-char pos bv)) | |
133 | 138 | (pos (serialize-list (sense-glosses sense) serialize-string pos bv)) | |
134 | 139 | (pos (serialize-string (sense-language sense) pos bv))) | |
135 | 140 | pos)) | |
… | |||
139 | 144 | (list-size (sense-limits sense) string-size) | |
140 | 145 | (list-size (sense-infos sense) string-size) | |
141 | 146 | (list-size (sense-sources sense) source-size) | |
142 | - | (list-size (sense-tags sense) string-size) | |
147 | + | (list-size (sense-tags sense) char-size) | |
143 | 148 | (list-size (sense-glosses sense) string-size) | |
144 | 149 | (string-size (sense-language sense)))) | |
145 | 150 |
modules/nani/jmdict/xml.scm
22 | 22 | #:use-module (sxml fold) | |
23 | 23 | #:use-module (sxml simple) | |
24 | 24 | #:use-module (nani result) | |
25 | + | #:use-module (nani jmdict entities) | |
25 | 26 | #:export (load-dic sxml->results)) | |
26 | 27 | ||
27 | 28 | (define (load-dic file) | |
… | |||
63 | 64 | (('limit (? string? r)) (update-sense sense #:limits (cons r (sense-limits sense)))) | |
64 | 65 | (('info (? string? r)) (update-sense sense #:infos (cons r (sense-infos sense)))) | |
65 | 66 | ((? source? s) (update-sense sense #:sources (cons s (sense-sources sense)))) | |
66 | - | (('tag (? string? r)) (update-sense sense #:tags (cons r (sense-tags sense)))) | |
67 | + | (('tag (? integer? r)) (update-sense sense #:tags (cons r (sense-tags sense)))) | |
67 | 68 | (('gloss (? string? r)) (update-sense sense #:glosses (cons r (sense-glosses sense)))) | |
68 | 69 | (('lang (? string? l)) (update-sense sense #:language l)) | |
69 | 70 | ((? list? l) (loop sense l)) | |
… | |||
98 | 99 | (('re_restr r) `(limit ,r)) | |
99 | 100 | (('re_inf r) `(info ,r)) | |
100 | 101 | (('r_ele lst ...) (sxml->reading lst)) | |
101 | - | (('ant pos) `(tag ,pos)) | |
102 | - | (('dial pos) `(tag ,pos)) | |
103 | - | (('field pos) `(tag ,pos)) | |
104 | - | (('misc pos) `(tag ,pos)) | |
105 | - | (('pos pos) `(tag ,pos)) | |
102 | + | (('ant pos) `(ref ,pos)) | |
103 | + | (('dial pos) `(tag ,(get-entity pos))) | |
104 | + | (('field pos) `(tag ,(get-entity pos))) | |
105 | + | (('misc pos) `(tag ,(get-entity pos))) | |
106 | + | (('pos pos) `(tag ,(get-entity pos))) | |
106 | 107 | (('xref pos) `(ref ,pos)) | |
107 | 108 | (('g_type _ ...) "") | |
108 | 109 | (('gloss (? string? g)) xml) |
modules/nani/result.scm
76 | 76 | (limits sense-limits) ; string-list | |
77 | 77 | (infos sense-infos) ; string-list | |
78 | 78 | (sources sense-sources) ; source-list | |
79 | - | (tags sense-tags) ; string-list | |
79 | + | (tags sense-tags) ; integer-list | |
80 | 80 | (glosses sense-glosses) ; string-list | |
81 | 81 | (language sense-language)) ; string | |
82 | 82 |