nani/website/tools/jmdict.scm

jmdict.scm

1
(use-modules (nani jmdict trie))
2
(use-modules (nani jmdict serialize))
3
(use-modules (nani jmdict xml))
4
(use-modules (nani trie))
5
(use-modules (nani result))
6
(use-modules (ice-9 match))
7
(use-modules (ice-9 binary-ports))
8
9
(define (convert input output)
10
  (let ((sxml (load-dic input)))
11
    (call-with-output-file output
12
      (lambda (port)
13
        (write sxml port)))))
14
15
(define (compile input sense-filter output)
16
  (let* ((sxml (if (equal? (substring input (- (string-length input) 3)) "xml")
17
                 (load-dic input)
18
                 (read input)))
19
         (results (sxml->results sxml))
20
         (results (map (lambda (result)
21
                         (update-result
22
                           result
23
                           #:senses (filter sense-filter
24
                                            (result-senses result))))
25
                       results))
26
         (results (filter (lambda (result) (not (null? (result-senses result))))
27
                          results))
28
         (kanji-trie (compress-trie (make-kanji-trie results)))
29
         (reading-trie (compress-trie (make-reading-trie results)))
30
         (meaning-trie (compress-trie (make-meaning-trie results))))
31
    (format #t "Number of entries in ~a: ~a~%" output (length results))
32
    (call-with-output-file output
33
      (lambda (port)
34
        (put-bytevector port
35
          (serialize-jmdict results kanji-trie reading-trie meaning-trie))))))
36
37
(match (command-line)
38
  ((_ cmd input lang output)
39
   (cond
40
    ((equal? cmd "build")
41
     (if (equal? lang "e")
42
       (compile input (const #t) output)
43
       (compile input (lambda (sense) (equal? (sense-language sense) lang)) output)))
44
    ((equal? cmd "convert")
45
     (convert input output))
46
    (else (format #t "Unknown cmd ~a.~%" cmd)))))
47