jmdict: use ssax and serialize entry count

Julien LepillerWed Jun 03 20:43:15+0200 2020

62e94b4

jmdict: use ssax and serialize entry count

modules/nani/jmdict/entities.scm

11
(define-module (nani jmdict entities)
2-
  #:export (get-entity))
3-
4-
(define entities
5-
  `(("MA" . 1)
6-
    ("X" . 2)
7-
    ("abbr" . 3)
8-
    ("adj-i" . 4)
9-
    ("adj-ix" . 5)
10-
    ("adj-na" . 6)
11-
    ("adj-no" . 7)
12-
    ("adj-pn" . 8)
13-
    ("adj-t" . 9)
14-
    ("adj-f" . 10)
15-
    ("adv" . 11)
16-
    ("adv-to" . 12)
17-
    ("arch" . 13)
18-
    ("ateji" . 14)
19-
    ("aux" . 15)
20-
    ("aux-v" . 16)
21-
    ("aux-adj" . 17)
22-
    ("Buddh" . 18)
23-
    ("chem" . 19)
24-
    ("chn" . 20)
25-
    ("col" . 21)
26-
    ("comp" . 22)
27-
    ("conj" . 23)
28-
    ("cop" . 24)
29-
    ("ctr" . 25)
30-
    ("derog" . 26)
31-
    ("eK" . 27)
32-
    ("ek" . 28)
33-
    ("exp" . 29)
34-
    ("fam" . 30)
35-
    ("fem" . 31)
36-
    ("food" . 32)
37-
    ("geom" . 33)
38-
    ("gikun" . 34)
39-
    ("hon" . 35)
40-
    ("hum" . 36)
41-
    ("iK" . 37)
42-
    ("id" . 38)
43-
    ("ik" . 39)
44-
    ("int" . 40)
45-
    ("io" . 41)
46-
    ("iv" . 42)
47-
    ("ling" . 43)
48-
    ("m-sl" . 44)
49-
    ("male" . 45)
50-
    ("male-sl" . 46)
51-
    ("math" . 47)
52-
    ("mil" . 48)
53-
    ("n" . 49)
54-
    ("n-adv" . 50)
55-
    ("n-suf" . 51)
56-
    ("n-pref" . 52)
57-
    ("n-t" . 53)
58-
    ("num" . 54)
59-
    ("oK" . 55)
60-
    ("obs" . 56)
61-
    ("obsc" . 57)
62-
    ("ok" . 58)
63-
    ("oik" . 59)
64-
    ("on-mim" . 60)
65-
    ("pn" . 61)
66-
    ("poet" . 62)
67-
    ("pol" . 63)
68-
    ("pref" . 64)
69-
    ("proverb" . 65)
70-
    ("prt" . 66)
71-
    ("physics" . 67)
72-
    ("quote" . 68)
73-
    ("rare" . 69)
74-
    ("sens" . 70)
75-
    ("sl" . 71)
76-
    ("suf" . 72)
77-
    ("uK" . 73)
78-
    ("uk" . 74)
79-
    ("unc" . 75)
80-
    ("yoji" . 76)
81-
    ("v1" . 77)
82-
    ("v1-s" . 78)
83-
    ("v2a-s" . 79)
84-
    ("v4h" . 80)
85-
    ("v4r" . 81)
86-
    ("v5aru" . 82)
87-
    ("v5b" . 83)
88-
    ("v5g" . 84)
89-
    ("v5k" . 85)
90-
    ("v5k-s" . 86)
91-
    ("v5m" . 87)
92-
    ("v5n" . 88)
93-
    ("v5r" . 89)
94-
    ("v5r-i" . 90)
95-
    ("v5s" . 91)
96-
    ("v5t" . 92)
97-
    ("v5u" . 93)
98-
    ("v5u-s" . 94)
99-
    ("v5uru" . 95)
100-
    ("vz" . 96)
101-
    ("vi" . 97)
102-
    ("vk" . 98)
103-
    ("vn" . 99)
104-
    ("vr" . 100)
105-
    ("vs" . 101)
106-
    ("vs-c" . 102)
107-
    ("vs-s" . 103)
108-
    ("vs-i" . 104)
109-
    ("kyb" . 105)
110-
    ("osb" . 106)
111-
    ("ksb" . 107)
112-
    ("ktb" . 108)
113-
    ("tsb" . 109)
114-
    ("thb" . 110)
115-
    ("tsug" . 111)
116-
    ("kyu" . 112)
117-
    ("rkb" . 113)
118-
    ("nab" . 114)
119-
    ("hob" . 115)
120-
    ("vt" . 116)
121-
    ("vulg" . 117)
122-
    ("adj-kari" . 118)
123-
    ("adj-ku" . 119)
124-
    ("adj-shiku" . 120)
125-
    ("adj-nari" . 121)
126-
    ("n-pr" . 122)
127-
    ("v-unspec" . 123)
128-
    ("v4k" . 124)
129-
    ("v4g" . 125)
130-
    ("v4s" . 126)
131-
    ("v4t" . 127)
132-
    ("v4n" . 128)
133-
    ("v4b" . 129)
134-
    ("v4m" . 130)
135-
    ("v2k-k" . 131)
136-
    ("v2g-k" . 132)
137-
    ("v2t-k" . 133)
138-
    ("v2d-k" . 134)
139-
    ("v2h-k" . 135)
140-
    ("v2b-k" . 136)
141-
    ("v2m-k" . 137)
142-
    ("v2y-k" . 138)
143-
    ("v2r-k" . 139)
144-
    ("v2k-s" . 140)
145-
    ("v2g-s" . 141)
146-
    ("v2s-s" . 142)
147-
    ("v2z-s" . 143)
148-
    ("v2t-s" . 144)
149-
    ("v2d-s" . 145)
150-
    ("v2n-s" . 146)
151-
    ("v2h-s" . 147)
152-
    ("v2b-s" . 148)
153-
    ("v2m-s" . 149)
154-
    ("v2y-s" . 150)
155-
    ("v2r-s" . 151)
156-
    ("v2w-s" . 152)
157-
    ("archit" . 153)
158-
    ("astron" . 154)
159-
    ("baseb" . 155)
160-
    ("biol" . 156)
161-
    ("bot" . 157)
162-
    ("bus" . 158)
163-
    ("econ" . 159)
164-
    ("engr" . 160)
165-
    ("finc" . 161)
166-
    ("geol" . 162)
167-
    ("law" . 163)
168-
    ("mahj" . 164)
169-
    ("med" . 165)
170-
    ("music" . 166)
171-
    ("Shinto" . 167)
172-
    ("shogi" . 168)
173-
    ("sports" . 169)
174-
    ("sumo" . 170)
175-
    ("zool" . 171)
176-
    ("joc" . 172)
177-
    ("anat" . 173)
178-
    ("Christn" . 174)
179-
    ("net-sl" . 175)
180-
    ("dated" . 176)
181-
    ("hist" . 177)
182-
    ("lit" . 178)
183-
    ("litf" . 179)
184-
    ("surname" . 180)
185-
    ("place" . 181)
186-
    ("unclass" . 182)
187-
    ("company" . 183)
188-
    ("product" . 184)
189-
    ("work" . 185)
190-
    ("person" . 186)
191-
    ("given" . 187)
192-
    ("station" . 188)
193-
    ("organization" . 189)
194-
    ("frq500" . 190)
195-
    ("frq1000" . 191)
196-
    ("frq2000" . 192)
197-
    ("frq5000" . 193)
198-
    ("frq10000" . 194)
199-
    ("frq20000" . 195)))
200-
201-
(define (get-entity ent)
202-
  (let ((val (assoc-ref entities ent)))
203-
    (if val val (begin (pk 'unknown-entity-will-cause-error ent) #f))))
2+
  #:use-module (nani result)
3+
  #:use-module (nani tags)
4+
  #:re-export (get-tag get-points))

modules/nani/jmdict/serialize.scm

2121
  #:use-module (nani result)
2222
  #:use-module (nani serialize)
2323
  #:use-module (nani trie)
24+
  #:use-module (ice-9 binary-ports)
2425
  #:use-module (rnrs bytevectors)
25-
  #:export (serialize-jmdict))
26+
  #:export (serialize-jmdict
27+
            jmdict-entry-count))
2628
2729
(define (serialize-pointer ptr pos bv)
2830
  (bytevector-u8-set! bv pos (car ptr))

164166
         (huffman-size (+ reading-huffman-size sense-huffman-size kanji-huffman-size))
165167
         (results-bv (make-bytevector (+ header-size 12 kanji-huffman-size
166168
                                         reading-huffman-size sense-huffman-size
167-
                                         results-size 0))))
169+
                                         results-size 4))))
168170
    (format #t "Number of nodes in kanjis: ~a~%"
169171
      (trie-node-size kanji-trie))
170172
    (format #t "Number of nodes in readings: ~a~%"

173175
      (trie-node-size sense-trie))
174176
    (serialize-list results serialize-result (+ header-size 12 huffman-size)
175177
                    results-bv #:size? #f)
178+
    ;; number of entries
179+
    (serialize-int (length results) (+ header-size 12 huffman-size results-size)
180+
                   results-bv)
176181
    (let* ((results (list->array 1 results))
177182
           (pos (bytevector-length results-bv))
178183
           (kanji-bvs (serialize-trie kanji-trie results pos))

200205
      (format #t "senses is ~a bytes long~%" (apply + (map bytevector-length (cdr meaning-bvs))))
201206
      (merge-bvs (append (list results-bv) (cdr kanji-bvs) (cdr reading-bvs)
202207
                         (cdr meaning-bvs))))))
208+
209+
(define (jmdict-entry-count file)
210+
  (call-with-input-file file
211+
    (lambda (port)
212+
      (let* ((header (utf8->string (get-bytevector-n port 14)))
213+
             (pointers (get-bytevector-n port 12))
214+
             (kanji-pos (bytevector-u32-ref pointers 0 (endianness big))))
215+
        (seek port (- kanji-pos 4) SEEK_SET)
216+
        (bytevector-u32-ref (get-bytevector-n port 4) 0 (endianness big))))))

modules/nani/jmdict/xml.scm

1919
(define-module (nani jmdict xml)
2020
  #:use-module (ice-9 match)
2121
  #:use-module (ice-9 rdelim)
22-
  #:use-module (sxml fold)
22+
  #:use-module (sxml ssax)
2323
  #:use-module (nani frequency)
2424
  #:use-module (sxml simple)
2525
  #:use-module (nani result)
2626
  #:use-module (nani jmdict entities)
27-
  #:export (load-dic sxml->results))
27+
  #:export (load-dic xml->results))
2828
2929
(define (load-dic file)
3030
  (xml->sxml (call-with-input-file file read-string)))

3838
          (('reading r) (update-reading reading #:readings (cons r (reading-readings reading))))
3939
          (('info r) (update-reading reading #:info (cons r (reading-info reading))))
4040
          (('limit r) (update-reading reading #:kanjis (cons r (reading-kanjis reading))))
41+
          (((? symbol? s) v) (throw 'unknown-content s v))
4142
          ((? string? _) reading))
4243
        (cdr lst)))))
4344

4849
      (loop
4950
        (match (car lst)
5051
          (('content c) (update-source source #:content (cons c (source-content source))))
51-
          (('wasei _) (update-source source #:wasei? #t))
52-
          (('type t) (update-source source #:type t))
53-
          (('lang l) (update-source source #:lang l))
52+
          (('ls_wasei _) (update-source source #:wasei? #t))
53+
          (('ls_type t) (update-source source #:type t))
54+
          ((('xml . 'lang) l) (update-source source #:lang l))
55+
          (((? symbol? s) v) (throw 'unknown-content s v))
5456
          ((? list? l) (loop source l))
5557
          ((? string? _) source))
5658
        (cdr lst)))))

6870
          (('tag (? integer? r)) (update-sense sense #:tags (cons r (sense-tags sense))))
6971
          (('gloss (? string? r)) (update-sense sense #:glosses (cons r (sense-glosses sense))))
7072
          (('lang (? string? l)) (update-sense sense #:language l))
73+
          (((? symbol? s) v) (throw 'unknown-content s v))
7174
          ((? list? l) (loop sense l))
7275
          ((? string? _) sense))
7376
        (cdr lst)))))

9497
           (sense1 (car (result-senses result)))
9598
           (sense1 (update-sense sense1 #:tags (cons entity (sense-tags sense1))))
9699
           (senses (cons sense1 (cdr (result-senses result)))))
97-
      (update-result result #:points (get-points (update-result result #:senses senses))))))
100+
      (update-result result
101+
        #:points (get-points (update-result result #:senses senses))
102+
        #:senses senses))))
98103
99-
(define (get-points result)
100-
  (let ((tags (apply append (map sense-tags (result-senses result)))))
101-
    (apply + (map (lambda (tag)
102-
                    (cond
103-
                      ;; frequency
104-
                      ((eq? tag (get-entity "frq500"))
105-
                       32)
106-
                      ((eq? tag (get-entity "frq1000"))
107-
                       16)
108-
                      ((eq? tag (get-entity "frq2000"))
109-
                       8)
110-
                      ((eq? tag (get-entity "frq5000"))
111-
                       4)
112-
                      ((eq? tag (get-entity "frq10000"))
113-
                       2)
114-
                      ((eq? tag (get-entity "frq20000"))
115-
                       1)
116-
                      ;; archaic term
117-
                      ((eq? tag (get-entity "arch"))
118-
                       -3)
119-
                      ;; children language
120-
                      ((eq? tag (get-entity "chn"))
121-
                       -1)
122-
                      ;; colloquialism
123-
                      ((eq? tag (get-entity "col"))
124-
                       5)
125-
                      ;; expression
126-
                      ((eq? tag (get-entity "exp"))
127-
                       1)
128-
                      ;; sonkeigo (honorific or respectful)
129-
                      ((eq? tag (get-entity "hon"))
130-
                       -1)
131-
                      ;; kenjougo (humble)
132-
                      ((eq? tag (get-entity "hum"))
133-
                       -1)
134-
                      ;; teineigo (polite)
135-
                      ((eq? tag (get-entity "pol"))
136-
                       -1)
137-
                      ;; irregular kanji
138-
                      ((eq? tag (get-entity "iK"))
139-
                       -2)
140-
                      ;; idiomatic expression
141-
                      ((eq? tag (get-entity "id"))
142-
                       3)
143-
                      ;; slang
144-
                      ((member tag (list
145-
                                     (get-entity "sl")
146-
                                     (get-entity "m-sl")
147-
                                     (get-entity "male-sl")))
148-
                       -4)
149-
                      ;; outdated kanji
150-
                      ((eq? tag (get-entity "oK"))
151-
                       -2)
152-
                      ;; obsolete term
153-
                      ((eq? tag (get-entity "obs"))
154-
                       -5)
155-
                      ;; obscure term
156-
                      ((eq? tag (get-entity "obsc"))
157-
                       -10)
158-
                      ;; onomatopea
159-
                      ((eq? tag (get-entity "on-mim"))
160-
                       1)
161-
                      ;; poetical term
162-
                      ((eq? tag (get-entity "poet"))
163-
                       -1)
164-
                      ((eq? tag (get-entity "proverb"))
165-
                       1)
166-
                      ((eq? tag (get-entity "rare"))
167-
                       -3)
168-
                      ;; sensitive
169-
                      ((eq? tag (get-entity "sens"))
170-
                       -2)
171-
                      ;; dialect
172-
                      ((member tag (list
173-
                                     (get-entity "kyb")
174-
                                     (get-entity "osb")
175-
                                     (get-entity "ksb")
176-
                                     (get-entity "ktb")
177-
                                     (get-entity "tsb")
178-
                                     (get-entity "thb")
179-
                                     (get-entity "tsug")
180-
                                     (get-entity "kyu")
181-
                                     (get-entity "rkb")
182-
                                     (get-entity "nab")
183-
                                     (get-entity "hob")))
184-
                       -1)
185-
                      (else 0)))
186-
                  tags))))
187-
      
188-
(define (sxml->results sxml frq)
189-
  (sort
190-
    (foldt
191-
      (lambda (xml)
192-
        (match xml
193-
         (('ent_seq _) "")
194-
         (('ke_pri _) "")
195-
         (('re_pri _) "")
196-
         (('pri _) "")
197-
         (('keb kanji) `(kanji ,kanji))
198-
         (('ke_inf _) "")
199-
         (('k_ele lst ...) (car (filter list? lst)))
200-
         (('reb reading) `(reading ,reading))
201-
         (('re_nokanji _ ...) "")
202-
         (('re_restr r) `(limit ,r))
203-
         (('re_inf r) `(info ,r))
204-
         (('r_ele lst ...) (sxml->reading lst))
205-
         (('ant pos) `(ref ,pos))
206-
         (('dial pos) `(tag ,(get-entity pos)))
207-
         (('field pos) `(tag ,(get-entity pos)))
208-
         (('misc pos) `(tag ,(get-entity pos)))
209-
         (('pos pos) `(tag ,(get-entity pos)))
210-
         (('xref pos) `(ref ,pos))
211-
         (('g_type _ ...) "")
212-
         (('gloss (? string? g)) xml)
213-
         (('gloss (? list? g)) g)
214-
         (('gloss attr g) (cons `(gloss ,g) attr))
215-
         (('stagk k) `(limit ,k))
216-
         (('stagr r) `(limit ,r))
217-
         (('s_inf r) `(info ,r))
218-
         (('lsource lst ...) (sxml->source lst))
219-
         (('sense lst ...) (sxml->sense lst))
220-
         (('entry lst ...) (sxml->result lst frq))
221-
         (('JMdict lst ...) lst)
222-
         (('xml:lang l) `(lang ,l))
223-
         (('ls_wasei _) '(wasei #t))
224-
         (('ls_type t) `(type ,t))
225-
         (('@ lst ...) lst)
226-
         (('*TOP* _ l) (filter result? l))
227-
         (('*PI* _ ...) #f)))
228-
      (lambda (a) a)
229-
      sxml)
230-
    ;; reverse order: bigger score first
231-
    (lambda (a b) (> (result-points a) (result-points b)))))
104+
(define (sxml->element lst elem frq)
105+
  (match elem
106+
    ('ent_seq "")
107+
    ('ke_pri "")
108+
    ('re_nokanji "")
109+
    ('re_pri "")
110+
    ('ke_inf "")
111+
    ('misc (if (and (= (length lst) 1) (string? (car lst)))
112+
               `(tag ,(get-tag (car lst)))
113+
               (throw 'invalid-misc lst)))
114+
    ('re_restr (if (and (= (length lst) 1) (string? (car lst)))
115+
                   `(limit ,(car lst))
116+
                   (throw 'invalid-re_restr lst)))
117+
    ('keb (if (and (= (length lst) 1) (string? (car lst)))
118+
              `(kanji ,(car lst))
119+
              (throw 'invalid-keb lst)))
120+
    ('s_inf (if (and (= (length lst) 1) (string? (car lst)))
121+
                `(info ,(car lst))
122+
                (throw 'invalid-s_inf lst)))
123+
    ('dial (if (and (= (length lst) 1) (string? (car lst)))
124+
               `(tag ,(get-tag (car lst)))
125+
               (throw 'invalid-dial lst)))
126+
    ('re_inf (if (and (= (length lst) 1) (string? (car lst)))
127+
                 `(info ,(car lst))
128+
                 (throw 'invalid-re_inf lst)))
129+
    ('stagk (if (and (= (length lst) 1) (string? (car lst)))
130+
                `(limit ,(car lst))
131+
                (throw 'invalid-stagk lst)))
132+
    ('stagr (if (and (= (length lst) 1) (string? (car lst)))
133+
                `(limit ,(car lst))
134+
                (throw 'invalid-stagr lst)))
135+
    ('field (if (and (= (length lst) 1) (string? (car lst)))
136+
                `(tag ,(get-tag (car lst)))
137+
                (throw 'invalid-field lst)))
138+
    ('ant (if (and (= (length lst) 1) (string? (car lst)))
139+
              `(ref ,(car lst))
140+
              (throw 'invalid-ant lst)))
141+
    ('reb `(reading ,lst))
142+
    ('r_ele (sxml->reading lst))
143+
    ('k_ele (car (filter list? lst)))
144+
    ('pos (if (and (= (length lst) 1) (string? (car lst)))
145+
              `(tag ,(get-tag (car lst)))
146+
              (throw 'invalid-pos lst)))
147+
    ('xref (if (and (= (length lst) 1) (string? (car lst)))
148+
               `(ref ,(car lst))
149+
               (throw 'invalid-xref lst)))
150+
    ('gloss (append
151+
              (filter list? lst)
152+
              (map (lambda (v) `(gloss ,v)) (filter string? lst))))
153+
    ('lsource (sxml->source lst))
154+
    ('sense (sxml->sense lst))
155+
    ('entry (sxml->result lst frq))))
156+
157+
(define (create-parser frq)
158+
  (ssax:make-parser
159+
    NEW-LEVEL-SEED
160+
    (lambda (elem-gi attributes namespaces expected-content seed)
161+
      (map
162+
        (match-lambda
163+
          ((k . v) (list k v)))
164+
        (filter
165+
          (match-lambda
166+
            ((k . v) (not (member k '(g_type)))))
167+
          attributes)))
168+
169+
    FINISH-ELEMENT
170+
    (lambda (elem-gi attributes namespaces parent-seed seed)
171+
      (if (equal? elem-gi 'JMdict)
172+
          seed
173+
          (let* ((seed (reverse seed))
174+
                 (element (sxml->element seed elem-gi frq)))
175+
            (cons element parent-seed))))
176+
177+
    CHAR-DATA-HANDLER
178+
    (lambda (string1 string2 seed)
179+
      (cons (string-append string1 string2) seed))))
180+
181+
(define (xml->results port frq)
182+
  (let ((results (filter result? ((create-parser frq) port '()))))
183+
    (sort results (lambda (a b) (> (result-points a) (result-points b))))))

modules/nani/tags.scm unknown status 1

1+
(define-module (nani tags)
2+
  #:use-module (nani result)
3+
  #:export (get-tag get-points))
4+
5+
(define tags
6+
  '("MA" "X" "abbr" "adj-i" "adj-ix" "adj-na" "adj-no" "adj-pn" "adj-t" "adj-f"
7+
    "adv" "adv-to" "arch" "ateji" "aux" "aux-v" "aux-adj" "Buddh" "chem" "chn"
8+
    "col" "comp" "conj" "cop" "ctr" "derog" "eK" "ek" "exp" "fam" "fem" "food"
9+
    "geom" "gikun" "hon" "hum" "iK" "id" "ik" "int" "io" "iv" "ling" "m-sl"
10+
    "male" "male-sl" "math" "meteor" "mil" "n" "n-adv" "n-suf" "n-pref" "n-t"
11+
    "num" "oK" "obs" "obsc" "ok" "oik" "on-mim" "pn" "poet" "pol" "pref" "proverb"
12+
    "prt" "physics" "quote" "rare" "sens" "sl" "suf" "uK" "uk" "unc" "yoji"
13+
    "v1" "v1-s" "v2a-s" "v4h" "v4r" "v5aru" "v5b" "v5g" "v5k" "v5k-s" "v5m"
14+
    "v5n" "v5r" "v5r-i" "v5s" "v5t" "v5u" "v5u-s" "v5uru" "vz" "vi" "vk" "vn"
15+
    "vr" "vs" "vs-c" "vs-s" "vs-i" "kyb" "osb" "ksb" "ktb" "tsb" "thb" "tsug"
16+
    "kyu" "rkb" "nab" "hob" "vt" "vulg" "adj-kari" "adj-ku" "adj-shiku"
17+
    "adj-nari" "n-pr" "v-unspec" "v4k" "v4g" "v4s" "v4t" "v4n" "v4b" "v4m"
18+
    "v2k-k" "v2g-k" "v2t-k" "v2d-k" "v2h-k" "v2b-k" "v2m-k" "v2y-k" "v2r-k"
19+
    "v2k-s" "v2g-s" "v2s-s" "v2z-s" "v2t-s" "v2d-s" "v2n-s" "v2h-s" "v2b-s"
20+
    "v2m-s" "v2y-s" "v2r-s" "v2w-s" "archit" "astron" "baseb" "biol" "bot"
21+
    "bus" "econ" "engr" "finc" "geol" "law" "mahj" "med" "music" "Shinto"
22+
    "shogi" "sports" "sumo" "zool" "joc" "anat" "Christn" "net-sl" "dated"
23+
    "hist" "litf" "surname" "place" "unclass" "company" "product" "work"
24+
    "person" "given" "station" "organization"
25+
    
26+
    ;; wadoku special
27+
    "young" "thief" "secret" "baby" "stud" "sail" "fashion" "archeo"
28+
    "rel" "color" "psy" "print" "aero" "politics" "anth" "biblio"
29+
    "game" "agri" "alco" "demo" "train" "philo" "ethno" "photo" "trans"
30+
    "telecom" "build" "sci" "school" "art" "hobby"
31+
32+
    "season-spring" "season-summer" "season-winter" "season-autumn"
33+
    
34+
    "frq500" "frq1000" "frq2000" "frq5000" "frq10000" "frq20000"))
35+
36+
(define (get-tag tag)
37+
  (let ((val (member tag tags)))
38+
    (if val (- (length val) 1) (throw 'unknown-tag tag))))
39+
40+
(define (get-points result)
41+
  (define (tag-point tag)
42+
    (cond
43+
      ;; frequency
44+
      ((eq? tag (get-tag "frq500"))
45+
       32)
46+
      ((eq? tag (get-tag "frq1000"))
47+
       16)
48+
      ((eq? tag (get-tag "frq2000"))
49+
       8)
50+
      ((eq? tag (get-tag "frq5000"))
51+
       4)
52+
      ((eq? tag (get-tag "frq10000"))
53+
       2)
54+
      ((eq? tag (get-tag "frq20000"))
55+
       1)
56+
      ;; archaic term
57+
      ((eq? tag (get-tag "arch"))
58+
       -3)
59+
      ;; children language
60+
      ((eq? tag (get-tag "chn"))
61+
       -1)
62+
      ;; colloquialism
63+
      ((eq? tag (get-tag "col"))
64+
       5)
65+
      ;; expression
66+
      ((eq? tag (get-tag "exp"))
67+
       1)
68+
      ;; sonkeigo (honorific or respectful)
69+
      ((eq? tag (get-tag "hon"))
70+
       -1)
71+
      ;; kenjougo (humble)
72+
      ((eq? tag (get-tag "hum"))
73+
       -1)
74+
      ;; teineigo (polite)
75+
      ((eq? tag (get-tag "pol"))
76+
       -1)
77+
      ;; irregular kanji
78+
      ((eq? tag (get-tag "iK"))
79+
       -2)
80+
      ;; idiomatic expression
81+
      ((eq? tag (get-tag "id"))
82+
       3)
83+
      ;; slang
84+
      ((member tag (list
85+
                     (get-tag "sl")
86+
                     (get-tag "m-sl")
87+
                     (get-tag "male-sl")))
88+
       -4)
89+
      ;; outdated kanji
90+
      ((eq? tag (get-tag "oK"))
91+
       -2)
92+
      ;; obsolete term
93+
      ((eq? tag (get-tag "obs"))
94+
       -5)
95+
      ;; obscure term
96+
      ((eq? tag (get-tag "obsc"))
97+
       -10)
98+
      ;; onomatopea
99+
      ((eq? tag (get-tag "on-mim"))
100+
       1)
101+
      ;; poetical term
102+
      ((eq? tag (get-tag "poet"))
103+
       -1)
104+
      ((eq? tag (get-tag "proverb"))
105+
       1)
106+
      ((eq? tag (get-tag "rare"))
107+
       -3)
108+
      ;; sensitive
109+
      ((eq? tag (get-tag "sens"))
110+
       -2)
111+
      ;; dialect
112+
      ((member tag (list
113+
                     (get-tag "kyb")
114+
                     (get-tag "osb")
115+
                     (get-tag "ksb")
116+
                     (get-tag "ktb")
117+
                     (get-tag "tsb")
118+
                     (get-tag "thb")
119+
                     (get-tag "tsug")
120+
                     (get-tag "kyu")
121+
                     (get-tag "rkb")
122+
                     (get-tag "nab")
123+
                     (get-tag "hob")))
124+
       -1)
125+
      (else 0)))
126+
  (let ((tags (apply append (map sense-tags (result-senses result)))))
127+
    (apply + (map tag-point tags))))