Add wadoku, separate i18n, build dictionary list

Julien LepillerWed Jun 03 20:45:27+0200 2020

d0d7b29

Add wadoku, separate i18n, build dictionary list

pages/blog.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (pages blog)
20+
  #:use-module (tools haunt-i18n)
2021
  #:use-module (tools i18n)
2122
  #:use-module (tools theme)
2223
  #:use-module (haunt builder blog)

pages/data.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (pages data)
20+
  #:use-module (tools haunt-i18n)
2021
  #:use-module (tools i18n)
2122
  #:use-module (tools theme)
2223
  #:export (page-data))

pages/documentation.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (pages documentation)
20+
  #:use-module (tools haunt-i18n)
2021
  #:use-module (tools i18n)
2122
  #:use-module (tools theme)
2223
  #:export (page-documentation))

pages/e404.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (pages e404)
20+
  #:use-module (tools haunt-i18n)
2021
  #:use-module (tools i18n)
2122
  #:use-module (tools theme)
2223
  #:export (page404))

pages/feeds.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (pages feeds)
20+
  #:use-module (tools haunt-i18n)
2021
  #:use-module (tools i18n)
2122
  #:use-module (tools theme)
2223
  #:export (page-feeds))

pages/index.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (pages index)
20+
  #:use-module (tools haunt-i18n)
2021
  #:use-module (tools i18n)
2122
  #:use-module (tools theme)
2223
  #:export (page-index))

pages/mentions.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (pages mentions)
20+
  #:use-module (tools haunt-i18n)
2021
  #:use-module (tools i18n)
2122
  #:use-module (tools theme)
2223
  #:export (page-mentions))

tools/haunt-i18n.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(define-module (tools haunt-i18n)
20+
  #:use-module (haunt builder blog)
21+
  #:use-module (haunt html)
22+
  #:use-module (haunt page)
23+
  #:use-module (sxml simple)
24+
  #:use-module (ice-9 match)
25+
  #:use-module (tools i18n)
26+
  #:export (internationalize))
27+
28+
(define (make-my-page lang title filename body layout)
29+
  (lambda (site posts)
30+
    (define (my-body)
31+
      (site-locale lang)
32+
      (tr body))
33+
    (make-page (if (equal? lang "")
34+
                 (string-append filename ".html")
35+
                 (string-append filename "." lang ".html"))
36+
               (with-layout (layout lang filename) site title (my-body))
37+
               sxml->html)))
38+
39+
(define (internationalize title filename body layout)
40+
  (map (lambda (lang) (make-my-page lang title filename body layout)) languages))

tools/i18n.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (tools i18n)
20-
  #:use-module (haunt builder blog)
21-
  #:use-module (haunt html)
22-
  #:use-module (haunt page)
2320
  #:use-module (sxml simple)
2421
  #:use-module (ice-9 match)
25-
  #:export (internationalize languages _ site-locale))
22+
  #:export (languages tr _ site-locale translate))
2623
2724
(define language-map
2825
  `(("" . "en_US.UTF-8")

4643
(define (_ tr . args)
4744
  (let ((res (apply __ tr args)))
4845
    (match res
49-
      ((*TOP* (tr rest ...)) rest))))
46+
      ((*TOP* (tr rest ...)) (car rest)))))
5047
5148
(define (tr body)
5249
  (match body
50+
    (('_ rest) (_ rest))
5351
    (('_ rest ...) (apply _ rest))
5452
    ((tag ('@ opt ...) rest ...) (apply list tag (apply list '@ opt) (map tr rest)))
5553
    ((tag rest ...) (apply list tag (map tr rest)))
5654
    (foo foo)))
5755
58-
(define (make-my-page lang title filename body layout)
59-
  (lambda (site posts)
60-
    (define (my-body)
61-
      (site-locale lang)
62-
      (tr body))
63-
    (make-page (if (equal? lang "")
64-
                 (string-append filename ".html")
65-
                 (string-append filename "." lang ".html"))
66-
               (with-layout (layout lang filename) site title (my-body))
67-
               sxml->html)))
68-
69-
(define (internationalize title filename body layout)
70-
  (map (lambda (lang) (make-my-page lang title filename body layout)) languages))
56+
(define (translate str lang)
57+
  (site-locale lang)
58+
  (tr str))

tools/jmdict.scm

3333
3434
;; Break these steps to try and let the GC reclaim these big objects
3535
(define (get-results1 input frq)
36-
  (let ((sxml (if (equal? (substring input (- (string-length input) 4)) ".xml")
37-
                 (load-dic input)
38-
                 (call-with-input-file input read))))
39-
    (format #t "Read xml~%")
40-
    (sxml->results sxml frq)))
36+
  (call-with-input-file input
37+
    (lambda (port)
38+
      (xml->results port frq))))
4139
4240
(define (get-results input sense-filter frq)
4341
  (let* ((results (get-results1 input frq))

tools/list.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2020 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(use-modules (tools i18n))
20+
(use-modules (nani radk))
21+
(use-modules (nani jmdict serialize))
22+
(use-modules (gcrypt hash))
23+
(use-modules (ice-9 match))
24+
(use-modules (ice-9 format))
25+
(use-modules (ice-9 binary-ports))
26+
27+
(define* (description dico #:key (long? #f) (lang "en"))
28+
  (define radk-synopsis
29+
    `(_ "Radical to Kanji dictionary from the Electronic Dictionary Research and Development Group."))
30+
  (define radk-description
31+
    `(_ "This dictionary allows you to enter kanji by selecting some of its
32+
    components.  Tap the water component button on the bottom of the screen to
33+
    access the kanji selection by component view"))
34+
35+
  (define (jmdict-synopsis lang)
36+
    (match lang
37+
      ("e" `(_ "Japanese/English dictionary from the Electronic Dictionary Research and Development Group."))
38+
      ("dut" `(_ "Japanese/Dutch dictionary from the Electronic Dictionary Research and Development Group."))
39+
      ("fre" `(_ "Japanese/French dictionary from the Electronic Dictionary Research and Development Group."))
40+
      ("ger" `(_ "Japanese/German dictionary from the Electronic Dictionary Research and Development Group."))
41+
      ("hun" `(_ "Japanese/Hungarian dictionary from the Electronic Dictionary Research and Development Group."))
42+
      ("rus" `(_ "Japanese/Russian dictionary from the Electronic Dictionary Research and Development Group."))
43+
      ("slv" `(_ "Japanese/Slovenian dictionary from the Electronic Dictionary Research and Development Group."))
44+
      ("spa" `(_ "Japanese/Spanish dictionary from the Electronic Dictionary Research and Development Group."))
45+
      ("swe" `(_ "Japanese/Swedish dictionary from the Electronic Dictionary Research and Development Group."))))
46+
  (define (jmdict-description lang)
47+
    `(_ "This dictionary allows you to do searches on the main view of this app.
48+
        Failing to download one of these dictionaries will make the app unusable
49+
        as you can't search for anything.  This dictionary can be searched for by
50+
        kanji, reading (kana) and by meaning in the languages you selected."))
51+
52+
  (let* ((english
53+
          (cond
54+
            ((equal? (dico-type dico) "radk")
55+
             (if long?
56+
                 radk-description
57+
                 radk-synopsis))
58+
            ((equal? (dico-type dico) "jmdict")
59+
             (let ((dico-lang (substring dico 7)))
60+
               (if long?
61+
                   (jmdict-description dico-lang)
62+
                   (jmdict-synopsis dico-lang))))))
63+
         (translated (translate english lang)))
64+
    (if (and (equal? english translated) (not (equal? lang "en")))
65+
        #f
66+
        translated)))
67+
68+
(define (filesize file)
69+
  (stat:size (stat file)))
70+
71+
(define (sha256 file)
72+
  (define hash (file-sha256 file))
73+
  (apply
74+
    string-append
75+
    (map
76+
      (lambda (n)
77+
        (format #f "~2,'0x" n))
78+
      (array->list hash))))
79+
80+
(define (dico-type file)
81+
  (cond
82+
    ((equal? file "radicals") "radk")
83+
    ((and (> (string-length file) 6) (equal? (substring file 0 6) "JMdict"))
84+
     "jmdict")))
85+
86+
(define (entries file)
87+
  (cond
88+
    ((equal? (dico-type (dico-name file)) "radk")
89+
     (kanji-count file))
90+
    ((equal? (dico-type (dico-name file)) "jmdict")
91+
     (jmdict-entry-count file))))
92+
93+
(define (dico-name file)
94+
  (basename file ".nani"))
95+
96+
(match (command-line)
97+
  ((_ output dicos ...)
98+
   (with-output-to-file output
99+
     (lambda _
100+
       (for-each
101+
         (lambda (dico)
102+
           (let* ((sha256 (sha256 dico))
103+
                  (size (filesize dico))
104+
                  (name (dico-name dico))
105+
                  (type (dico-type name))
106+
                  (entry-count (entries dico)))
107+
             (format #t "[~a]~%" name)
108+
             (for-each
109+
               (lambda (lang)
110+
                 (let ((synopsis (description name #:lang lang))
111+
                       (description (description name #:lang lang #:long? #t)))
112+
                   (when synopsis
113+
                     (format #t "synopsis=~a=~a~%" lang synopsis))
114+
                   (when description
115+
                     (format #t "description=~a=~a~%" lang description))))
116+
               (filter (lambda (lang) (not (equal? lang ""))) languages))
117+
             (format #t "sha256=~a~%" sha256)
118+
             (format #t "size=~a~%" size)
119+
             (format #t "type=~a~%" type)
120+
             (format #t "entries=~a~%" entry-count)
121+
             (format #t "url=~a~%" (string-append "https://nani.lepiller.eu/" dico))
122+
             (format #t "~%")))
123+
         dicos)))))

tools/theme.scm

2020
  #:use-module (haunt builder blog)
2121
  #:use-module (haunt post)
2222
  #:use-module (haunt site)
23+
  #:use-module (tools haunt-i18n)
2324
  #:use-module (tools i18n)
2425
  #:use-module (srfi srfi-1)
2526
  #:export (nani-theme))

tools/wadoku.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(use-modules (nani jmdict trie))
20+
(use-modules (nani jmdict serialize))
21+
(use-modules (nani wadoku xml))
22+
(use-modules (nani frequency))
23+
(use-modules (nani trie))
24+
(use-modules (nani result))
25+
(use-modules (ice-9 match))
26+
(use-modules (ice-9 binary-ports))
27+
28+
;; Break these steps to try and let the GC reclaim these big objects
29+
(define (get-results1 input frq)
30+
  (call-with-input-file input
31+
    (lambda (port)
32+
      (xml->results port frq))))
33+
34+
(define (get-results input sense-filter frq)
35+
  (let* ((results (get-results1 input frq))
36+
         (results (map (lambda (result)
37+
                         (update-result
38+
                           result
39+
                           #:senses (filter sense-filter
40+
                                            (result-senses result))))
41+
                       results))
42+
         (results (filter (lambda (result) (not (null? (result-senses result))))
43+
                          results)))
44+
    (pk (car results))
45+
    (let ((readings (apply append (map result-readings results))))
46+
      (pk (apply append (map reading-readings readings))))
47+
    results))
48+
49+
(define (compile input sense-filter output)
50+
  (let* ((results (get-results input sense-filter
51+
                               (load-frequency "dictionaries/frequency.tsv")))
52+
         (kanji-trie (compress-trie (make-kanji-trie results)))
53+
         (reading-trie (compress-trie (make-reading-trie results)))
54+
         (meaning-trie (compress-trie (make-meaning-trie results))))
55+
    (format #t "Number of entries in ~a: ~a~%" output (length results))
56+
    (call-with-output-file output
57+
      (lambda (port)
58+
        (put-bytevector port
59+
          (serialize-jmdict results kanji-trie reading-trie meaning-trie))))))
60+
61+
(define (print word dict)
62+
  #t)
63+
64+
(match (command-line)
65+
  ((_ cmd input lang output)
66+
   (cond
67+
    ((equal? cmd "build")
68+
     (if (equal? lang "e")
69+
       (compile input (const #t) output)
70+
       (compile input (lambda (sense) (equal? (sense-language sense) lang)) output)))
71+
    ((equal? cmd "convert")
72+
     (convert input output))
73+
    (else (format #t "Unknown cmd ~a.~%" cmd))))
74+
  ((_ "print" word input)
75+
   (print word input)))