wadoku.scm
1 | ;;; Nani Project website |
2 | ;;; Copyright © 2020 Julien Lepiller <julien@lepiller.eu> |
3 | ;;; |
4 | ;;; This file is part of the Nani Project website. |
5 | ;;; |
6 | ;;; The Nani Project website is free software; you can redistribute it and/or modify it |
7 | ;;; under the terms of the GNU Affero General Public License as published by |
8 | ;;; the Free Software Foundation; either version 3 of the License, or (at |
9 | ;;; your option) any later version. |
10 | ;;; |
11 | ;;; The Nani Project website is distributed in the hope that it will be useful, but |
12 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | ;;; GNU Affero General Public License for more details. |
15 | ;;; |
16 | ;;; You should have received a copy of the GNU Affero General Public License |
17 | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. |
18 | |
19 | (use-modules (nani jmdict trie)) |
20 | (use-modules (nani jmdict serialize)) |
21 | (use-modules (nani wadoku xml)) |
22 | (use-modules (nani wadoku pitch)) |
23 | (use-modules (nani frequency)) |
24 | (use-modules (nani trie)) |
25 | (use-modules (nani result)) |
26 | (use-modules (ice-9 match)) |
27 | (use-modules (ice-9 binary-ports)) |
28 | |
29 | ;; Break these steps to try and let the GC reclaim these big objects |
30 | (define (get-results1 input frq) |
31 | (call-with-input-file input |
32 | (lambda (port) |
33 | (xml->results port frq)))) |
34 | |
35 | (define (get-results input sense-filter frq) |
36 | (let* ((results (get-results1 input frq)) |
37 | (results (map (lambda (result) |
38 | (update-result |
39 | result |
40 | #:senses (filter sense-filter |
41 | (result-senses result)))) |
42 | results)) |
43 | (results (filter (lambda (result) (not (null? (result-senses result)))) |
44 | results))) |
45 | results)) |
46 | |
47 | (define (compile input sense-filter output) |
48 | (let* ((results (get-results input sense-filter |
49 | (load-frequency "dictionaries/frequency.tsv"))) |
50 | (kanji-trie (compress-trie (make-kanji-trie results))) |
51 | (reading-trie (compress-trie (make-reading-trie results))) |
52 | (meaning-trie (compress-trie (make-meaning-trie results)))) |
53 | (format #t "Number of entries in ~a: ~a~%" output (length results)) |
54 | (call-with-output-file output |
55 | (lambda (port) |
56 | (put-bytevector port |
57 | (serialize-jmdict results kanji-trie reading-trie meaning-trie)))))) |
58 | |
59 | (define (get-pitch input) |
60 | (call-with-input-file input |
61 | (lambda (port) |
62 | (xml->pitch port)))) |
63 | |
64 | (define (pitch input output) |
65 | (let ((results (get-pitch input))) |
66 | (format #t "~a results." (length results)) |
67 | (call-with-output-file output |
68 | (lambda (port) |
69 | (put-bytevector port |
70 | (serialize-pitch results)))))) |
71 | |
72 | (match (command-line) |
73 | ((_ cmd input output) |
74 | (cond |
75 | ((equal? cmd "build") |
76 | (compile input (const #t) output)) |
77 | ((equal? cmd "pitch") |
78 | (pitch input output)) |
79 | (else (format #t "Unknown cmd ~a.~%" cmd))))) |
80 |