jmdict.scm
1 | ;;; Nani Project website |
2 | ;;; Copyright © 2019 Julien Lepiller <julien@lepiller.eu> |
3 | ;;; |
4 | ;;; This file is part of the Nani Project website. |
5 | ;;; |
6 | ;;; The Nani Project website is free software; you can redistribute it and/or modify it |
7 | ;;; under the terms of the GNU Affero General Public License as published by |
8 | ;;; the Free Software Foundation; either version 3 of the License, or (at |
9 | ;;; your option) any later version. |
10 | ;;; |
11 | ;;; The Nani Project website is distributed in the hope that it will be useful, but |
12 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | ;;; GNU Affero General Public License for more details. |
15 | ;;; |
16 | ;;; You should have received a copy of the GNU Affero General Public License |
17 | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. |
18 | |
19 | (use-modules (nani jmdict trie)) |
20 | (use-modules (nani jmdict serialize)) |
21 | (use-modules (nani jmdict xml)) |
22 | (use-modules (nani frequency)) |
23 | (use-modules (nani trie)) |
24 | (use-modules (nani result)) |
25 | (use-modules (ice-9 match)) |
26 | (use-modules (ice-9 binary-ports)) |
27 | |
28 | ;; Break these steps to try and let the GC reclaim these big objects |
29 | (define (get-results1 input frq) |
30 | (call-with-input-file input |
31 | (lambda (port) |
32 | (xml->results port frq)))) |
33 | |
34 | (define (get-results input sense-filter frq) |
35 | (let* ((results (get-results1 input frq)) |
36 | (results (map (lambda (result) |
37 | (update-result |
38 | result |
39 | #:senses (filter sense-filter |
40 | (result-senses result)))) |
41 | results)) |
42 | (results (filter (lambda (result) (not (null? (result-senses result)))) |
43 | results))) |
44 | results)) |
45 | |
46 | (define (compile input sense-filter output) |
47 | (let* ((results (get-results input sense-filter |
48 | (load-frequency "dictionaries/frequency.tsv"))) |
49 | (kanji-trie (compress-trie (make-kanji-trie results))) |
50 | (reading-trie (compress-trie (make-reading-trie results))) |
51 | (meaning-trie (compress-trie (make-meaning-trie results)))) |
52 | (format #t "Number of entries in ~a: ~a~%" output (length results)) |
53 | (call-with-output-file output |
54 | (lambda (port) |
55 | (put-bytevector port |
56 | (serialize-jmdict results kanji-trie reading-trie meaning-trie)))))) |
57 | |
58 | (define (print word dict) |
59 | #t) |
60 | |
61 | (match (command-line) |
62 | ((_ cmd input lang output) |
63 | (cond |
64 | ((equal? cmd "build") |
65 | (if (equal? lang "e") |
66 | (compile input (const #t) output) |
67 | (compile input (lambda (sense) (equal? (sense-language sense) lang)) output))) |
68 | (else (format #t "Unknown cmd ~a.~%" cmd)))) |
69 | ((_ "print" word input) |
70 | (print word input))) |
71 |