Add kanjivg

Julien LepillerTue Aug 03 14:23:18+0200 2021

411647a

Add kanjivg

Makefile

1212
include jibiki.mk
1313
include jmdict.mk
1414
include kanjidic.mk
15+
include kanjivg.mk
1516
include radicals.mk
1617
include wadoku.mk
1718

kanjivg.mk unknown status 1

1+
DICOS+=dicos/kanjivg.nani
2+
DOWNLOADS+=dictionaries/kanjivg
3+
4+
dictionaries/kanjivg:
5+
	git clone --depth 1 https://github.com/KanjiVG/kanjivg $@
6+
7+
dicos/kanjivg.nani: dictionaries/kanjivg tools/kanjivg.scm $(DICO_MODULES)
8+
	guile -L modules tools/kanjivg.scm build $< $@

modules/nani/kanji/kanjivg.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2021 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(define-module (nani kanji kanjivg)
20+
  #:use-module (ice-9 binary-ports)
21+
  #:use-module (ice-9 match)
22+
  #:use-module (nani encoding huffman)
23+
  #:use-module (nani encoding serialize)
24+
  #:use-module (nani encoding trie)
25+
  #:use-module (rnrs bytevectors)
26+
  #:use-module (srfi srfi-9)
27+
  #:use-module (sxml simple)
28+
  #:export (make-kanji
29+
            kanji?
30+
            kanji-position kanji-position-set!
31+
            kanji-kanji
32+
            kanji-elements
33+
            kanji-strokes
34+
35+
            xml->kanji
36+
            serialize-stroke stroke-size
37+
            serialize-kanji kanji-size
38+
            serialize-kanjivg
39+
            kanjivg-entry-count))
40+
41+
(define-record-type <kanji>
42+
  (make-kanji position kanji elements strokes)
43+
  kanji?
44+
  (position kanji-position kanji-position-set!)
45+
  (kanji    kanji-kanji)
46+
  (elements kanji-elements)
47+
  (strokes  kanji-strokes))
48+
49+
(define-record-type <stroke>
50+
  (make-stroke x y command)
51+
  stroke?
52+
  (x       stroke-x)
53+
  (y       stroke-y)
54+
  (command stroke-command))
55+
56+
(define (strokes->elements strokes)
57+
  "collect only top-level elements"
58+
  (match strokes
59+
    (() '())
60+
    ((('http://www.w3.org/2000/svg:g ('@ attr ...) _ ...) strokes ...)
61+
     (let loop ((attr attr))
62+
       (match attr
63+
         (() (strokes->elements strokes))
64+
         ((('kvg:element element) _ ...)
65+
          (cons element (strokes->elements strokes)))
66+
         ((_ attr ...) (loop attr)))))
67+
    (((_ ...) strokes ...)
68+
     (strokes->elements strokes))))
69+
70+
(define (find-stroke kvg id)
71+
  (match kvg
72+
    (() #f)
73+
    ((('http://www.w3.org/2000/svg:path ('@ attr ...)) kvg ...)
74+
     (if (equal? (car (assoc-ref attr 'id)) id)
75+
         (car (assoc-ref attr 'd))
76+
         (find-stroke kvg id)))
77+
    ((('http://www.w3.org/2000/svg:g ('@ _ ...) content ...) kvg ...)
78+
     (or (find-stroke content id) (find-stroke kvg id)))
79+
    (((? string? _) kvg ...)
80+
     (find-stroke kvg id))))
81+
82+
(define (get-strokes strokes label id)
83+
  (match label
84+
    (() '())
85+
    ((('http://www.w3.org/2000/svg:text ('@ ('transform transform)) num) label ...) 
86+
     (let* ((xy (substring transform 15 (- (string-length transform) 1)))
87+
            (xy (string-split xy #\ ))
88+
            (x (car xy))
89+
            (y (cadr xy)))
90+
       (cons (make-stroke x y (find-stroke strokes (string-append id "-s" num)))
91+
             (get-strokes strokes label id))))
92+
    (((? string? _) label ...)
93+
     (get-strokes strokes label id))))
94+
95+
(define (kvg-ref kvg elem)
96+
  (assoc-ref kvg (symbol-append 'http://www.w3.org/2000/svg: elem)))
97+
98+
(define (xml->kanji port)
99+
  (match (xml->sxml port #:namespaces '((kvg . "kvg")))
100+
    (('*TOP* _ ('http://www.w3.org/2000/svg:svg ('@ _ ...) svg ...))
101+
     (match (filter pair? svg)
102+
       ((('http://www.w3.org/2000/svg:g ('@ _ ...) strokes ...)
103+
         ('http://www.w3.org/2000/svg:g ('@ _ ...) labels ...))
104+
        (let* ((elements (strokes->elements (filter pair? (kvg-ref strokes 'g))))
105+
               (kanji (assoc-ref (assoc-ref (kvg-ref strokes 'g) '@) 'kvg:element))
106+
               (kanji (if kanji (car kanji) #f))
107+
               (id (car (assoc-ref (assoc-ref (kvg-ref strokes 'g) '@) 'id)))
108+
               (strokes (get-strokes strokes labels id)))
109+
          (make-kanji 0 kanji elements strokes)))))))
110+
111+
(define (serialize-stroke command-huffman-code)
112+
  (lambda (stroke pos bv)
113+
    (when (not (stroke? stroke)) (throw 'not-stroke stroke))
114+
    (let* ((pos ((serialize-huffman-string command-huffman-code) (stroke-command stroke) pos bv))
115+
           (pos ((serialize-huffman-string command-huffman-code) (stroke-x stroke) pos bv))
116+
           (pos ((serialize-huffman-string command-huffman-code) (stroke-y stroke) pos bv)))
117+
      pos)))
118+
(define (stroke-size command-huffman-code)
119+
  (lambda (stroke)
120+
    (when (not (stroke? stroke)) (throw 'not-stroke stroke))
121+
    (+ ((huffman-string-size command-huffman-code) (stroke-command stroke))
122+
       ((huffman-string-size command-huffman-code) (stroke-x stroke))
123+
       ((huffman-string-size command-huffman-code) (stroke-y stroke)))))
124+
125+
(define (serialize-kanji command-huffman-code)
126+
  (lambda (kanji pos bv)
127+
    (when (not (kanji? kanji)) (throw 'not-kanji kanji))
128+
    (kanji-position-set! kanji pos)
129+
    (let* ((pos ((serialize-list serialize-string) (kanji-elements kanji) pos bv))
130+
           (pos ((serialize-list (serialize-stroke command-huffman-code))
131+
                 (kanji-strokes kanji) pos bv)))
132+
      pos)))
133+
(define (kanji-size command-huffman-code)
134+
  (lambda (kanji)
135+
    (when (not (kanji? kanji)) (throw 'not-kanji kanji))
136+
    (+ ((list-size string-size) (kanji-elements kanji))
137+
       ((list-size (stroke-size command-huffman-code)) (kanji-strokes kanji)))))
138+
139+
(define (update-trie-pos! trie kanji)
140+
  (let* ((vals (trie-vals trie))
141+
         (vals (match vals
142+
                 ((pos) (kanji-position (array-ref kanji pos)))
143+
                 ((pos1 pos2) ; alias between ??? and ???, maybe others?
144+
                  (kanji-position (array-ref kanji pos1)))
145+
                 (() 0))))
146+
    (trie-vals-set! trie vals))
147+
  (for-each
148+
    (match-lambda
149+
      ((char . child)
150+
       (update-trie-pos! child kanji)))
151+
    (trie-transitions trie)))
152+
153+
(define (make-key key)
154+
  (apply append
155+
    (map (lambda (c)
156+
           (list (quotient c 16) (modulo c 16)))
157+
         (bytevector->u8-list (string->utf8 key)))))
158+
159+
(define (make-kanji-trie kanji)
160+
  (let ((trie (make-empty-trie)))
161+
    (let loop ((kanji kanji) (i 0))
162+
      (if (null? kanji)
163+
          (compress-trie trie)
164+
          (begin
165+
            (add-to-trie! trie (make-key (kanji-kanji (car kanji))) i)
166+
            (loop (cdr kanji) (+ i 1)))))))
167+
168+
(define (serialize-kanjivg results)
169+
  (define command-huffman
170+
    (let* ((strokes (apply append (map kanji-strokes results)))
171+
           (commands (map stroke-command strokes))
172+
           (x (map stroke-x strokes))
173+
           (y (map stroke-y strokes)))
174+
      (create-huffman (append commands x y))))
175+
  (define command-huffman-code (huffman->code command-huffman))
176+
177+
  (let* ((header (string->utf8 "NANI_KANJIVG001"))
178+
         (header-size (bytevector-length header))
179+
         (command-huffman-bv (serialize-huffman command-huffman))
180+
         (command-huffman-size (bytevector-length command-huffman-bv))
181+
         (serialize-trie (serialize-trie-single serialize-int int-size))
182+
         (trie-size (trie-size-single int-size))
183+
         (kanji-trie (make-kanji-trie results))
184+
         (kanji-trie-size (trie-size kanji-trie))
185+
         (results-size
186+
           ((list-size (kanji-size command-huffman-code)
187+
                       #:size? #f)
188+
            results))
189+
         (bv (make-bytevector (+ header-size 4 command-huffman-size
190+
                                 kanji-trie-size results-size))))
191+
    (format #t "Number of kanji: ~a~%" (length results))
192+
    ((serialize-list (serialize-kanji command-huffman-code)
193+
                     #:size? #f)
194+
     results (+ header-size 4 command-huffman-size kanji-trie-size) bv)
195+
    (let ((results (list->array 1 results)))
196+
      (update-trie-pos! kanji-trie results))
197+
    (bytevector-copy! header 0 bv 0 header-size)
198+
    (serialize-int (length results) (+ header-size) bv)
199+
    (bytevector-copy! command-huffman-bv 0 bv (+ header-size 4)
200+
                      command-huffman-size)
201+
    (serialize-trie kanji-trie (+ header-size 4 command-huffman-size) bv)
202+
    bv))
203+
204+
(define (kanjivg-entry-count file)
205+
  (call-with-input-file file
206+
    (lambda (port)
207+
      (let* ((header (get-bytevector-n port 15))
208+
             (size (get-bytevector-n port 4)))
209+
        (bytevector-u32-ref size 0 (endianness big))))))

po/nani.pot

88
msgstr ""
99
"Project-Id-Version: PACKAGE VERSION\n"
1010
"Report-Msgid-Bugs-To: \n"
11-
"POT-Creation-Date: 2021-07-21 14:25+0200\n"
11+
"POT-Creation-Date: 2021-07-29 21:50+0200\n"
1212
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
1313
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
1414
"Language-Team: LANGUAGE <LL@li.org>\n"

102102
msgid "JMdict"
103103
msgstr ""
104104
105-
#: tools/list.scm:64
105+
#: tools/list.scm:71
106106
msgid ""
107107
"Japanese/Dutch dictionary from the Electronic Dictionary Research and "
108108
"Development Group."
109109
msgstr ""
110110
111-
#: tools/list.scm:63
111+
#: tools/list.scm:70
112112
msgid ""
113113
"Japanese/English dictionary from the Electronic Dictionary Research and "
114114
"Development Group."
115115
msgstr ""
116116
117-
#: tools/list.scm:65
117+
#: tools/list.scm:72
118118
msgid ""
119119
"Japanese/French dictionary from the Electronic Dictionary Research and "
120120
"Development Group."
121121
msgstr ""
122122
123-
#: tools/list.scm:54
123+
#: tools/list.scm:61
124124
msgid "Japanese/French dictionary from the Jibiki project."
125125
msgstr ""
126126
127-
#: tools/list.scm:38
127+
#: tools/list.scm:45
128128
msgid "Japanese/German dictionary from Wadoku."
129129
msgstr ""
130130
131-
#: tools/list.scm:66
131+
#: tools/list.scm:73
132132
msgid ""
133133
"Japanese/German dictionary from the Electronic Dictionary Research and "
134134
"Development Group."
135135
msgstr ""
136136
137-
#: tools/list.scm:67
137+
#: tools/list.scm:74
138138
msgid ""
139139
"Japanese/Hungarian dictionary from the Electronic Dictionary Research and "
140140
"Development Group."
141141
msgstr ""
142142
143-
#: tools/list.scm:68
143+
#: tools/list.scm:75
144144
msgid ""
145145
"Japanese/Russian dictionary from the Electronic Dictionary Research and "
146146
"Development Group."
147147
msgstr ""
148148
149-
#: tools/list.scm:69
149+
#: tools/list.scm:76
150150
msgid ""
151151
"Japanese/Slovenian dictionary from the Electronic Dictionary Research and "
152152
"Development Group."
153153
msgstr ""
154154
155-
#: tools/list.scm:70
155+
#: tools/list.scm:77
156156
msgid ""
157157
"Japanese/Spanish dictionary from the Electronic Dictionary Research and "
158158
"Development Group."
159159
msgstr ""
160160
161-
#: tools/list.scm:71
161+
#: tools/list.scm:78
162162
msgid ""
163163
"Japanese/Swedish dictionary from the Electronic Dictionary Research and "
164164
"Development Group."
165165
msgstr ""
166166
167-
#: tools/list.scm:80
167+
#: tools/list.scm:87
168168
msgid "Kanji dictionary with English meanings."
169169
msgstr ""
170170
171-
#: tools/list.scm:82
171+
#: tools/list.scm:89
172172
msgid "Kanji dictionary with French meanings."
173173
msgstr ""
174174
175-
#: tools/list.scm:83
175+
#: tools/list.scm:90
176176
msgid "Kanji dictionary with Portuguese meanings."
177177
msgstr ""
178178
179-
#: tools/list.scm:81
179+
#: tools/list.scm:88
180180
msgid "Kanji dictionary with Spanish meanings."
181181
msgstr ""
182182
183+
#: tools/list.scm:39
184+
msgid "Kanji writing visual help by the Kanjivg project."
185+
msgstr ""
186+
183187
#: tools/theme.scm:59
184188
msgid "Language"
185189
msgstr ""

229233
msgid "Phone: "
230234
msgstr ""
231235
232-
#: tools/list.scm:46
236+
#: tools/list.scm:53
233237
msgid "Pitch accent dictionary from Wadoku."
234238
msgstr ""
235239
236-
#: tools/list.scm:31
240+
#: tools/list.scm:32
237241
msgid ""
238242
"Radical to Kanji dictionary from the Electronic Dictionary Research and "
239243
"Development Group."

331335
"In the following sections we will see how to use them."
332336
msgstr ""
333337
334-
#: tools/list.scm:48
338+
#: tools/list.scm:55
335339
msgid ""
336340
"This dictionary allows you to augment search results on the main view\n"
337341
"         with pitch accent (pronunciation) information.  Japanese is not "

341345
"         words better, with a standard Japanese pitch accent."
342346
msgstr ""
343347
344-
#: tools/list.scm:56
348+
#: tools/list.scm:63
345349
msgid ""
346350
"This dictionary allows you to do searches on the main view of this app.\n"
347351
"\tFailing to download one of these dictionaries will make the app unusable\n"

349353
"\tby kanji, reading (kana) and by French translation."
350354
msgstr ""
351355
352-
#: tools/list.scm:40
356+
#: tools/list.scm:47
353357
msgid ""
354358
"This dictionary allows you to do searches on the main view of this app.\n"
355359
"        Failing to download one of these dictionaries will make the app "

359363
"        by kanji, reading (kana) and by German translation."
360364
msgstr ""
361365
362-
#: tools/list.scm:73
366+
#: tools/list.scm:80
363367
msgid ""
364368
"This dictionary allows you to do searches on the main view of this app.\n"
365369
"        Failing to download one of these dictionaries will make the app "

369373
"        kanji, reading (kana) and by meaning in the languages you selected."
370374
msgstr ""
371375
372-
#: tools/list.scm:33
376+
#: tools/list.scm:34
373377
msgid ""
374378
"This dictionary allows you to enter kanji by selecting some of its\n"
375379
"    components.  Tap the water component button on the bottom of the screen "

377381
"    access the kanji selection by component view"
378382
msgstr ""
379383
380-
#: tools/list.scm:85
384+
#: tools/list.scm:92
381385
msgid ""
382386
"This dictionary allows you to search for kanji and view kanji information\n"
383387
"        such as number of strokes, pronunciations and meanings."
384388
msgstr ""
385389
390+
#: tools/list.scm:41
391+
msgid ""
392+
"This dictionary allows you to see how a kanji is written, what it is\n"
393+
"composed of, and the order in which strokes are written."
394+
msgstr ""
395+
386396
#: pages/documentation.scm:30
387397
msgid ""
388398
"This documentation will guide you in some of the most important\n"

tools/kanjivg.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2021 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(use-modules (nani kanji kanjivg))
20+
(use-modules (ice-9 match))
21+
(use-modules (ice-9 binary-ports))
22+
23+
(define (get-files directory)
24+
  (let loop ((dir (opendir (string-append directory "/kanji/"))) (svgs '()))
25+
    (let ((entry (readdir dir)))
26+
      (cond
27+
        ((eof-object? entry) svgs)
28+
        ((string-contains entry "-") (loop dir svgs))
29+
        ((string-prefix? "000" entry) (loop dir svgs))
30+
        ((string-suffix? ".svg" entry)
31+
         (loop dir (cons (string-append directory "/kanji/" entry) svgs)))
32+
        (else (loop dir svgs))))))
33+
34+
(match (command-line)
35+
  ((_ cmd kanjivg-dir output)
36+
   (cond
37+
    ((equal? cmd "build")
38+
     (let* ((results
39+
              (map
40+
                (lambda (file)
41+
                  (call-with-input-file file
42+
                    (lambda (port)
43+
                      (let ((r (xml->kanji port)))
44+
                        (unless (kanji-kanji r)
45+
                          (pk 'no-kanji file))
46+
                        r))))
47+
                (sort (get-files kanjivg-dir) string<?))))
48+
       (call-with-output-file output
49+
         (lambda (port)
50+
           (put-bytevector port
51+
             (serialize-kanjivg (filter
52+
                                  (lambda (r) (kanji-kanji r))
53+
                                  results)))))))
54+
    (else (format #t "Unknown cmd ~a.~%" cmd)))))

tools/list.scm

1818
1919
(use-modules (tools i18n))
2020
(use-modules (nani kanji kanjidic))
21-
(use-modules (nani kanji radk))
21+
(use-modules ((nani kanji radk) #:prefix radk:))
22+
(use-modules (nani kanji kanjivg))
2223
(use-modules (nani result result))
2324
(use-modules (nani pitch pitch))
2425
(use-modules (gcrypt hash))

3435
    components.  Tap the water component button on the bottom of the screen to
3536
    access the kanji selection by component view"))
3637
38+
  (define ksvg-synopsis
39+
    `(_ "Kanji writing visual help by the Kanjivg project."))
40+
  (define ksvg-description
41+
    `(_ "This dictionary allows you to see how a kanji is written, what it is
42+
composed of, and the order in which strokes are written."))
43+
3744
  (define wadoku-synopsis
3845
    `(_ "Japanese/German dictionary from Wadoku."))
3946
  (define wadoku-description

9198
             (if long?
9299
                 radk-description
93100
                 radk-synopsis))
101+
            ((equal? (dico-type dico) "ksvg")
102+
             (if long?
103+
                 ksvg-description
104+
                 ksvg-synopsis))
94105
            ((equal? (dico-type dico) "kanjidic")
95106
             (let ((dico-lang (substring dico 9)))
96107
               (if long?

133144
(define (dico-type file)
134145
  (cond
135146
    ((equal? file "radicals") "radk")
147+
    ((equal? file "kanjivg") "ksvg")
136148
    ((and (> (string-length file) 8) (equal? (substring file 0 8) "kanjidic"))
137149
     "kanjidic")
138150
    ((and (> (string-length file) 6) (equal? (substring file 0 6) "JMdict"))

144156
(define (entries file)
145157
  (cond
146158
    ((equal? (dico-type (dico-name file)) "radk")
147-
     (kanji-count file))
159+
     (radk:kanji-count file))
160+
    ((equal? (dico-type (dico-name file)) "ksvg")
161+
     (kanjivg-entry-count file))
148162
    ((equal? (dico-type (dico-name file)) "kanjidic")
149163
     (kanjidic-entry-count file))
150164
    ((member (dico-type (dico-name file)) '("jmdict" "wadoku" "jibiki"))

158172
(define (dico-lang name)
159173
  (cond
160174
    ((equal? name "radicals") "")
161-
    ((equal? name "kanjidic") "")
175+
    ((equal? name "kanjivg") "")
162176
    ((equal? name "wadoku_pitch") "")
177+
    ((and (> (string-length name) 8) (equal? (substring name 0 8) "kanjidic"))
178+
     (substring name 9))
163179
    ((and (> (string-length name) 6) (equal? (substring name 0 6) "JMdict"))
164180
     (let ((lang (substring name 7)))
165181
       (match lang