Generat pitch accent dictionary
modules/nani/huffman.scm
23 | 23 | huffman->code | |
24 | 24 | huffman-encode | |
25 | 25 | huffman-decode | |
26 | - | serialize-huffman)) | |
26 | + | serialize-huffman | |
27 | + | serialize-huffman-string huffman-string-size)) | |
27 | 28 | ||
28 | 29 | (define (add-occurence occ char) | |
29 | 30 | (let* ((o (assoc-ref occ char)) | |
… | |||
141 | 142 | (((? char? char) . weight) | |
142 | 143 | (append (bytevector->u8-list (string->utf8 (list->string (list char)))) '(0))))) | |
143 | 144 | (u8-list->bytevector (serialize huffman))) | |
145 | + | ||
146 | + | (define (serialize-huffman-string huffman-code) | |
147 | + | (lambda (str pos bv) | |
148 | + | (let ((sbv (huffman-encode huffman-code str))) | |
149 | + | (bytevector-copy! sbv 0 bv pos (bytevector-length sbv)) | |
150 | + | (+ pos (bytevector-length sbv))))) | |
151 | + | ||
152 | + | (define (huffman-string-size huffman-code) | |
153 | + | (lambda (str) | |
154 | + | (let ((sbv (huffman-encode huffman-code str))) | |
155 | + | (+ (bytevector-length sbv))))) |
modules/nani/jmdict/serialize.scm
77 | 77 | (create-huffman glosses))) | |
78 | 78 | (define sense-huffman-code (huffman->code sense-huffman)) | |
79 | 79 | ||
80 | - | (define (serialize-huffman-string huffman-code) | |
81 | - | (lambda (str pos bv) | |
82 | - | (let ((sbv (huffman-encode huffman-code str))) | |
83 | - | (bytevector-copy! sbv 0 bv pos (bytevector-length sbv)) | |
84 | - | (+ pos (bytevector-length sbv))))) | |
85 | - | (define (huffman-string-size huffman-code) | |
86 | - | (lambda (str) | |
87 | - | (let ((sbv (huffman-encode huffman-code str))) | |
88 | - | (+ (bytevector-length sbv))))) | |
89 | - | ||
90 | 80 | (define (serialize-source source pos bv) | |
91 | 81 | (when (not (source? source)) (throw 'not-source source)) | |
92 | 82 | (let* ((pos (serialize-list (source-content source) serialize-string pos bv)) |
modules/nani/trie.scm
17 | 17 | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | 18 | ||
19 | 19 | (define-module (nani trie) | |
20 | + | #:use-module (nani serialize) | |
21 | + | #:use-module (rnrs bytevectors) | |
20 | 22 | #:use-module (srfi srfi-9) | |
21 | 23 | #:export (make-trie | |
22 | 24 | trie? | |
… | |||
29 | 31 | ||
30 | 32 | make-empty-trie | |
31 | 33 | add-to-trie! | |
32 | - | compress-trie)) | |
34 | + | compress-trie | |
35 | + | ||
36 | + | serialize-trie | |
37 | + | trie-size)) | |
33 | 38 | ||
34 | 39 | (define-record-type trie | |
35 | 40 | (make-trie position vals transitions) | |
… | |||
83 | 88 | (trie-transitions trie))))) | |
84 | 89 | (convert-trie-transitions! trie) | |
85 | 90 | (compress-aux trie)) | |
91 | + | ||
92 | + | (define (pointer-size ptr) | |
93 | + | 5) | |
94 | + | ||
95 | + | (define (serialize-pointer ptr pos bv) | |
96 | + | (bytevector-u8-set! bv pos (car ptr)) | |
97 | + | (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big)) | |
98 | + | (+ pos 5)) | |
99 | + | ||
100 | + | (define (serialize-trie trie serialize size results pos) | |
101 | + | (define (serialize-trie-aux transitions pos) | |
102 | + | (let loop ((pos pos) | |
103 | + | (trs transitions) | |
104 | + | (bvs '())) | |
105 | + | (if (null? trs) | |
106 | + | (cons pos bvs) | |
107 | + | (let* ((next-trie (cdr (car trs))) | |
108 | + | (bv (get-trie-bv next-trie results pos)) | |
109 | + | (pos (car bv)) | |
110 | + | (bv (cdr bv))) | |
111 | + | (loop pos (cdr trs) (append bvs bv)))))) | |
112 | + | ||
113 | + | (define (get-trie-bv trie results pos) | |
114 | + | (trie-position-set! trie pos) | |
115 | + | (let* ((vals-sz (size (trie-vals trie))) | |
116 | + | (trs-sz (list-size (trie-transitions trie) (const 5) #:size? #f)) | |
117 | + | (sz (+ vals-sz 1 trs-sz)) | |
118 | + | (bv (make-bytevector sz 0))) | |
119 | + | (serialize (trie-vals trie) 0 bv) | |
120 | + | (let* ((bvs (serialize-trie-aux | |
121 | + | (trie-transitions trie) | |
122 | + | (+ pos sz))) | |
123 | + | (next-pos (car bvs)) | |
124 | + | (bvs (cdr bvs))) | |
125 | + | (bytevector-u8-set! bv vals-sz (length (trie-transitions trie))) | |
126 | + | (serialize-list (trie-transitions trie) serialize-pointer | |
127 | + | (+ vals-sz 1) bv #:size? #f) | |
128 | + | (cons next-pos (cons bv bvs))))) | |
129 | + | ||
130 | + | (let* ((trie-bv (get-trie-bv trie results pos)) | |
131 | + | (new-pos (car trie-bv)) | |
132 | + | (trie-bv (merge-bvs (cdr trie-bv)))) | |
133 | + | (bytevector-copy! trie-bv 0 results pos (bytevector-length trie-bv)) | |
134 | + | new-pos)) | |
135 | + | ||
136 | + | (define (trie-size trie size) | |
137 | + | (apply + | |
138 | + | (size (trie-vals trie)) | |
139 | + | 1 | |
140 | + | (list-size (trie-transitions trie) pointer-size #:size? #f) | |
141 | + | (map (lambda (trie) (trie-size trie size)) | |
142 | + | (map cdr (trie-transitions trie))))) |
modules/nani/wadoku/pitch.scm unknown status 1
1 | + | ;;; Nani Project website | |
2 | + | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
3 | + | ;;; | |
4 | + | ;;; This file is part of the Nani Project website. | |
5 | + | ;;; | |
6 | + | ;;; The Nani Project website is free software; you can redistribute it and/or modify it | |
7 | + | ;;; under the terms of the GNU Affero General Public License as published by | |
8 | + | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | + | ;;; your option) any later version. | |
10 | + | ;;; | |
11 | + | ;;; The Nani Project website is distributed in the hope that it will be useful, but | |
12 | + | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + | ;;; GNU Affero General Public License for more details. | |
15 | + | ;;; | |
16 | + | ;;; You should have received a copy of the GNU Affero General Public License | |
17 | + | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
18 | + | ||
19 | + | (define-module (nani wadoku pitch) | |
20 | + | #:use-module (ice-9 binary-ports) | |
21 | + | #:use-module (ice-9 match) | |
22 | + | #:use-module (ice-9 rdelim) | |
23 | + | #:use-module (nani frequency) | |
24 | + | #:use-module (nani huffman) | |
25 | + | #:use-module (nani result) | |
26 | + | #:use-module (nani trie) | |
27 | + | #:use-module (nani wadoku entities) | |
28 | + | #:use-module (srfi srfi-1) | |
29 | + | #:use-module (srfi srfi-9) | |
30 | + | #:use-module (sxml ssax) | |
31 | + | #:use-module (rnrs bytevectors) | |
32 | + | #:export (xml->pitch serialize-pitch pitch-entry-count)) | |
33 | + | ||
34 | + | (define-record-type pitch | |
35 | + | (make-pitch kanjis accents) | |
36 | + | pitch? | |
37 | + | (kanjis pitch-kanjis) | |
38 | + | (accents pitch-accents)) | |
39 | + | ||
40 | + | (define (sxml->element lst elem) | |
41 | + | (let ((elem (match elem | |
42 | + | ((_ . elem) elem) | |
43 | + | (_ elem)))) | |
44 | + | (match elem | |
45 | + | ('accent `(pitch ,(car lst))) | |
46 | + | ('orth (let ((kanji (filter string? lst))) | |
47 | + | (if (null? kanji) #f `(kanji . ,(car kanji))))) | |
48 | + | ('form | |
49 | + | `(form . ,(append-map (lambda (a) (if (list? a) a (list a))) lst))) | |
50 | + | ('reading (filter pair? lst)) | |
51 | + | ('entry | |
52 | + | (let loop ((lst lst) (kanjis '()) (accents '())) | |
53 | + | (if (null? lst) | |
54 | + | (if (or (null? kanjis) (null? accents)) | |
55 | + | #f | |
56 | + | (make-pitch kanjis accents)) | |
57 | + | (match (car lst) | |
58 | + | (('form . f) (loop (append f (cdr lst)) kanjis accents)) | |
59 | + | (('pitch . pitch) (loop (cdr lst) kanjis (cons pitch accents))) | |
60 | + | (('kanji . kanji) (loop (cdr lst) (cons kanji kanjis) accents)) | |
61 | + | (_ (loop (cdr lst) kanjis accents)))))) | |
62 | + | (_ #f)))) | |
63 | + | ||
64 | + | (define parser | |
65 | + | (ssax:make-parser | |
66 | + | NEW-LEVEL-SEED | |
67 | + | (lambda (elem-gi attributes namespaces expected-content seed) | |
68 | + | attributes) | |
69 | + | ||
70 | + | FINISH-ELEMENT | |
71 | + | (lambda (elem-gi attributes namespaces parent-seed seed) | |
72 | + | (if (equal? elem-gi 'entries) | |
73 | + | seed | |
74 | + | (let* ((seed (reverse seed)) | |
75 | + | (element (sxml->element seed elem-gi))) | |
76 | + | (cons element parent-seed)))) | |
77 | + | ||
78 | + | CHAR-DATA-HANDLER | |
79 | + | (lambda (string1 string2 seed) | |
80 | + | (cons (string-append string1 string2) seed)))) | |
81 | + | ||
82 | + | (define (xml->pitch port) | |
83 | + | (filter pitch? (parser port '()))) | |
84 | + | ||
85 | + | (define (make-trie-key key) | |
86 | + | (append-map | |
87 | + | (lambda (c) | |
88 | + | (list (quotient c 16) (modulo c 16))) | |
89 | + | (bytevector->u8-list (string->utf8 key)))) | |
90 | + | ||
91 | + | (define (get-pitch-trie pitches) | |
92 | + | (let ((trie (make-empty-trie))) | |
93 | + | (for-each | |
94 | + | (lambda (pitch) | |
95 | + | (for-each | |
96 | + | (lambda (kanji) | |
97 | + | (for-each | |
98 | + | (lambda (accent) | |
99 | + | (cond | |
100 | + | ((string? accent) | |
101 | + | (add-to-trie! trie (make-trie-key kanji) accent)) | |
102 | + | ((list? accent) | |
103 | + | (add-to-trie! trie (make-trie-key kanji) (car accent))) | |
104 | + | (else | |
105 | + | (throw 'accent accent)))) | |
106 | + | (pitch-accents pitch))) | |
107 | + | (pitch-kanjis pitch))) | |
108 | + | pitches) | |
109 | + | (compress-trie trie))) | |
110 | + | ||
111 | + | (define (collapse-vals! trie) | |
112 | + | (let ((transitions (trie-transitions trie)) | |
113 | + | (vals (trie-vals trie))) | |
114 | + | (trie-vals-set! trie (string-join vals ", ")) | |
115 | + | (for-each collapse-vals! (map cdr transitions)))) | |
116 | + | ||
117 | + | (define (collect-vals trie) | |
118 | + | (let ((transitions (trie-transitions trie)) | |
119 | + | (vals (trie-vals trie))) | |
120 | + | (cons vals (append-map collect-vals (map cdr transitions))))) | |
121 | + | ||
122 | + | (define (entry-number trie) | |
123 | + | (let ((transitions (trie-transitions trie)) | |
124 | + | (vals (trie-vals trie))) | |
125 | + | (apply + (if (string-null? vals) 0 1) | |
126 | + | (map entry-number (map cdr transitions))))) | |
127 | + | ||
128 | + | (define (serialize-pitch pitches) | |
129 | + | (let ((trie (get-pitch-trie pitches))) | |
130 | + | (collapse-vals! trie) | |
131 | + | (let* ((huffman (create-huffman (collect-vals trie))) | |
132 | + | (code (huffman->code huffman)) | |
133 | + | (entries (entry-number trie))) | |
134 | + | (let* ((header (string->utf8 "NANI_PITCH001")) | |
135 | + | (header-size (bytevector-length header)) | |
136 | + | (huffman-bv (serialize-huffman huffman)) | |
137 | + | (huffman-size (bytevector-length huffman-bv)) | |
138 | + | (trie-size (trie-size trie (huffman-string-size code))) | |
139 | + | (result (make-bytevector (+ header-size 4 huffman-size trie-size)))) | |
140 | + | (bytevector-copy! header 0 result 0 header-size) | |
141 | + | (bytevector-u32-set! result header-size entries (endianness big)) | |
142 | + | (bytevector-copy! huffman-bv 0 result (+ header-size 4) huffman-size) | |
143 | + | (serialize-trie trie (serialize-huffman-string code) | |
144 | + | (huffman-string-size code) | |
145 | + | result (+ header-size 4 huffman-size)) | |
146 | + | result)))) | |
147 | + | ||
148 | + | (define (pitch-entry-count file) | |
149 | + | (call-with-input-file file | |
150 | + | (lambda (port) | |
151 | + | ;; header | |
152 | + | (get-bytevector-n port 13) | |
153 | + | ;; size | |
154 | + | (bytevector-u32-ref (get-bytevector-n port 4) 0 (endianness big))))) |
po/fr.po
7 | 7 | msgstr "" | |
8 | 8 | "Project-Id-Version: PACKAGE VERSION\n" | |
9 | 9 | "Report-Msgid-Bugs-To: \n" | |
10 | - | "POT-Creation-Date: 2020-06-04 00:05+0200\n" | |
10 | + | "POT-Creation-Date: 2020-06-05 15:21+0200\n" | |
11 | 11 | "PO-Revision-Date: 2019-04-16 18:08+0200\n" | |
12 | 12 | "Last-Translator: root <julien@lepiller.eu>\n" | |
13 | 13 | "Language-Team: French\n" | |
… | |||
105 | 105 | msgid "JMdict" | |
106 | 106 | msgstr "JMdict" | |
107 | 107 | ||
108 | - | #: tools/list.scm:46 | |
108 | + | #: tools/list.scm:55 | |
109 | 109 | msgid "" | |
110 | 110 | "Japanese/Dutch dictionary from the Electronic Dictionary Research and " | |
111 | 111 | "Development Group." | |
… | |||
113 | 113 | "Dictionnaire japonais/anglais de l???Electronic Dictionary Research and " | |
114 | 114 | "Development Group." | |
115 | 115 | ||
116 | - | #: tools/list.scm:45 | |
116 | + | #: tools/list.scm:54 | |
117 | 117 | #, fuzzy | |
118 | 118 | msgid "" | |
119 | 119 | "Japanese/English dictionary from the Electronic Dictionary Research and " | |
… | |||
122 | 122 | "Dictionnaire japonais/n??erlandais de l???Electronic Dictionary Research and " | |
123 | 123 | "Development Group." | |
124 | 124 | ||
125 | - | #: tools/list.scm:47 | |
125 | + | #: tools/list.scm:56 | |
126 | 126 | msgid "" | |
127 | 127 | "Japanese/French dictionary from the Electronic Dictionary Research and " | |
128 | 128 | "Development Group." | |
… | |||
130 | 130 | "Dictionnaire japonais/fran??ais de l???Electronic Dictionary Research and " | |
131 | 131 | "Development Group." | |
132 | 132 | ||
133 | - | #: tools/list.scm:36 | |
133 | + | #: tools/list.scm:37 | |
134 | 134 | msgid "Japanese/German dictionary from Wadoku." | |
135 | 135 | msgstr "Dictionnaire japonais/allemand de Wadoku." | |
136 | 136 | ||
137 | - | #: tools/list.scm:48 | |
137 | + | #: tools/list.scm:57 | |
138 | 138 | msgid "" | |
139 | 139 | "Japanese/German dictionary from the Electronic Dictionary Research and " | |
140 | 140 | "Development Group." | |
… | |||
142 | 142 | "Dictionnaire japonais/allemand de l???Electronic Dictionary Research and " | |
143 | 143 | "Development Group." | |
144 | 144 | ||
145 | - | #: tools/list.scm:49 | |
145 | + | #: tools/list.scm:58 | |
146 | 146 | msgid "" | |
147 | 147 | "Japanese/Hungarian dictionary from the Electronic Dictionary Research and " | |
148 | 148 | "Development Group." | |
… | |||
150 | 150 | "Dictionnaire japonais/hongrois de l???Electronic Dictionary Research and " | |
151 | 151 | "Development Group." | |
152 | 152 | ||
153 | - | #: tools/list.scm:50 | |
153 | + | #: tools/list.scm:59 | |
154 | 154 | msgid "" | |
155 | 155 | "Japanese/Russian dictionary from the Electronic Dictionary Research and " | |
156 | 156 | "Development Group." | |
… | |||
158 | 158 | "Dictionnaire japonais/russe de l???Electronic Dictionary Research and " | |
159 | 159 | "Development Group." | |
160 | 160 | ||
161 | - | #: tools/list.scm:51 | |
161 | + | #: tools/list.scm:60 | |
162 | 162 | msgid "" | |
163 | 163 | "Japanese/Slovenian dictionary from the Electronic Dictionary Research and " | |
164 | 164 | "Development Group." | |
… | |||
166 | 166 | "Dictionnaire japonais/slov??ne de l???Electronic Dictionary Research and " | |
167 | 167 | "Development Group." | |
168 | 168 | ||
169 | - | #: tools/list.scm:52 | |
169 | + | #: tools/list.scm:61 | |
170 | 170 | msgid "" | |
171 | 171 | "Japanese/Spanish dictionary from the Electronic Dictionary Research and " | |
172 | 172 | "Development Group." | |
… | |||
174 | 174 | "Dictionnaire japonais/espagnol de l???Electronic Dictionary Research and " | |
175 | 175 | "Development Group." | |
176 | 176 | ||
177 | - | #: tools/list.scm:53 | |
177 | + | #: tools/list.scm:62 | |
178 | 178 | msgid "" | |
179 | 179 | "Japanese/Swedish dictionary from the Electronic Dictionary Research and " | |
180 | 180 | "Development Group." | |
… | |||
237 | 237 | msgid "Phone: " | |
238 | 238 | msgstr "T??l??phone : " | |
239 | 239 | ||
240 | - | #: tools/list.scm:29 | |
240 | + | #: tools/list.scm:45 | |
241 | + | #, fuzzy | |
242 | + | msgid "Pitch accent dictionary from Wadoku." | |
243 | + | msgstr "Dictionnaire d'accent de hauteur de Wadoku." | |
244 | + | ||
245 | + | #: tools/list.scm:30 | |
241 | 246 | msgid "" | |
242 | 247 | "Radical to Kanji dictionary from the Electronic Dictionary Research and " | |
243 | 248 | "Development Group." | |
… | |||
362 | 367 | "l'application. Dans les sections suivantes, nous verrons comment les " | |
363 | 368 | "utiliser." | |
364 | 369 | ||
365 | - | #: tools/list.scm:38 | |
370 | + | #: tools/list.scm:47 | |
371 | + | msgid "" | |
372 | + | "This dictionary allows you to augment search results on the main view\n" | |
373 | + | " with pitch accent (pronounciation) information. Japanese is not " | |
374 | + | "flat,\n" | |
375 | + | " and this dictionary will add information that will help you " | |
376 | + | "pronounce\n" | |
377 | + | " words better, with a standard Japanese pitch accent." | |
378 | + | msgstr "" | |
379 | + | "Ce dictionnaire vous permet d'am??liorer les r??sultats de recherche de la\n" | |
380 | + | " vue principale avec des informations sur l'accent de hauteur (la\n" | |
381 | + | " prononciation). Le japonais n'est pas plat, et ce dictionnaire vous\n" | |
382 | + | " aidera ?? mieux prononcer les mots, avec l'accent de hauteur du japonais\n" | |
383 | + | " standard." | |
384 | + | ||
385 | + | #: tools/list.scm:39 | |
366 | 386 | msgid "" | |
367 | 387 | "This dictionary allows you to do searches on the main view of this app.\n" | |
368 | 388 | " Failing to download on of these dictionaries will make the app " | |
… | |||
377 | 397 | " dictionnaire permet d???effectuer des recherches par kanji, par\n" | |
378 | 398 | " prononciation (kana) et par traduction allemande." | |
379 | 399 | ||
380 | - | #: tools/list.scm:55 | |
400 | + | #: tools/list.scm:64 | |
381 | 401 | msgid "" | |
382 | 402 | "This dictionary allows you to do searches on the main view of this app.\n" | |
383 | 403 | " Failing to download one of these dictionaries will make the app " | |
… | |||
393 | 413 | " prononciation (kana) et par signification dans les langues que vous\n" | |
394 | 414 | " aurez t??l??charg??es." | |
395 | 415 | ||
396 | - | #: tools/list.scm:31 | |
416 | + | #: tools/list.scm:32 | |
397 | 417 | msgid "" | |
398 | 418 | "This dictionary allows you to enter kanji by selecting some of its\n" | |
399 | 419 | " components. Tap the water component button on the bottom of the screen " |
po/nani.pot
8 | 8 | msgstr "" | |
9 | 9 | "Project-Id-Version: PACKAGE VERSION\n" | |
10 | 10 | "Report-Msgid-Bugs-To: \n" | |
11 | - | "POT-Creation-Date: 2020-06-04 00:05+0200\n" | |
11 | + | "POT-Creation-Date: 2020-06-05 15:21+0200\n" | |
12 | 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" | |
13 | 13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" | |
14 | 14 | "Language-Team: LANGUAGE <LL@li.org>\n" | |
… | |||
97 | 97 | msgid "JMdict" | |
98 | 98 | msgstr "" | |
99 | 99 | ||
100 | - | #: tools/list.scm:46 | |
100 | + | #: tools/list.scm:55 | |
101 | 101 | msgid "" | |
102 | 102 | "Japanese/Dutch dictionary from the Electronic Dictionary Research and " | |
103 | 103 | "Development Group." | |
104 | 104 | msgstr "" | |
105 | 105 | ||
106 | - | #: tools/list.scm:45 | |
106 | + | #: tools/list.scm:54 | |
107 | 107 | msgid "" | |
108 | 108 | "Japanese/English dictionary from the Electronic Dictionary Research and " | |
109 | 109 | "Development Group." | |
110 | 110 | msgstr "" | |
111 | 111 | ||
112 | - | #: tools/list.scm:47 | |
112 | + | #: tools/list.scm:56 | |
113 | 113 | msgid "" | |
114 | 114 | "Japanese/French dictionary from the Electronic Dictionary Research and " | |
115 | 115 | "Development Group." | |
116 | 116 | msgstr "" | |
117 | 117 | ||
118 | - | #: tools/list.scm:36 | |
118 | + | #: tools/list.scm:37 | |
119 | 119 | msgid "Japanese/German dictionary from Wadoku." | |
120 | 120 | msgstr "" | |
121 | 121 | ||
122 | - | #: tools/list.scm:48 | |
122 | + | #: tools/list.scm:57 | |
123 | 123 | msgid "" | |
124 | 124 | "Japanese/German dictionary from the Electronic Dictionary Research and " | |
125 | 125 | "Development Group." | |
126 | 126 | msgstr "" | |
127 | 127 | ||
128 | - | #: tools/list.scm:49 | |
128 | + | #: tools/list.scm:58 | |
129 | 129 | msgid "" | |
130 | 130 | "Japanese/Hungarian dictionary from the Electronic Dictionary Research and " | |
131 | 131 | "Development Group." | |
132 | 132 | msgstr "" | |
133 | 133 | ||
134 | - | #: tools/list.scm:50 | |
134 | + | #: tools/list.scm:59 | |
135 | 135 | msgid "" | |
136 | 136 | "Japanese/Russian dictionary from the Electronic Dictionary Research and " | |
137 | 137 | "Development Group." | |
138 | 138 | msgstr "" | |
139 | 139 | ||
140 | - | #: tools/list.scm:51 | |
140 | + | #: tools/list.scm:60 | |
141 | 141 | msgid "" | |
142 | 142 | "Japanese/Slovenian dictionary from the Electronic Dictionary Research and " | |
143 | 143 | "Development Group." | |
144 | 144 | msgstr "" | |
145 | 145 | ||
146 | - | #: tools/list.scm:52 | |
146 | + | #: tools/list.scm:61 | |
147 | 147 | msgid "" | |
148 | 148 | "Japanese/Spanish dictionary from the Electronic Dictionary Research and " | |
149 | 149 | "Development Group." | |
150 | 150 | msgstr "" | |
151 | 151 | ||
152 | - | #: tools/list.scm:53 | |
152 | + | #: tools/list.scm:62 | |
153 | 153 | msgid "" | |
154 | 154 | "Japanese/Swedish dictionary from the Electronic Dictionary Research and " | |
155 | 155 | "Development Group." | |
… | |||
200 | 200 | msgid "Phone: " | |
201 | 201 | msgstr "" | |
202 | 202 | ||
203 | - | #: tools/list.scm:29 | |
203 | + | #: tools/list.scm:45 | |
204 | + | msgid "Pitch accent dictionary from Wadoku." | |
205 | + | msgstr "" | |
206 | + | ||
207 | + | #: tools/list.scm:30 | |
204 | 208 | msgid "" | |
205 | 209 | "Radical to Kanji dictionary from the Electronic Dictionary Research and " | |
206 | 210 | "Development Group." | |
… | |||
297 | 301 | "In the following sections we will see how to use them." | |
298 | 302 | msgstr "" | |
299 | 303 | ||
300 | - | #: tools/list.scm:38 | |
304 | + | #: tools/list.scm:47 | |
305 | + | msgid "" | |
306 | + | "This dictionary allows you to augment search results on the main view\n" | |
307 | + | " with pitch accent (pronounciation) information. Japanese is not " | |
308 | + | "flat,\n" | |
309 | + | " and this dictionary will add information that will help you " | |
310 | + | "pronounce\n" | |
311 | + | " words better, with a standard Japanese pitch accent." | |
312 | + | msgstr "" | |
313 | + | ||
314 | + | #: tools/list.scm:39 | |
301 | 315 | msgid "" | |
302 | 316 | "This dictionary allows you to do searches on the main view of this app.\n" | |
303 | 317 | " Failing to download on of these dictionaries will make the app " | |
… | |||
307 | 321 | " by kanji, reading (kana) and by German translation." | |
308 | 322 | msgstr "" | |
309 | 323 | ||
310 | - | #: tools/list.scm:55 | |
324 | + | #: tools/list.scm:64 | |
311 | 325 | msgid "" | |
312 | 326 | "This dictionary allows you to do searches on the main view of this app.\n" | |
313 | 327 | " Failing to download one of these dictionaries will make the app " | |
… | |||
317 | 331 | " kanji, reading (kana) and by meaning in the languages you selected." | |
318 | 332 | msgstr "" | |
319 | 333 | ||
320 | - | #: tools/list.scm:31 | |
334 | + | #: tools/list.scm:32 | |
321 | 335 | msgid "" | |
322 | 336 | "This dictionary allows you to enter kanji by selecting some of its\n" | |
323 | 337 | " components. Tap the water component button on the bottom of the screen " |
tools/list.scm
19 | 19 | (use-modules (tools i18n)) | |
20 | 20 | (use-modules (nani radk)) | |
21 | 21 | (use-modules (nani jmdict serialize)) | |
22 | + | (use-modules (nani wadoku pitch)) | |
22 | 23 | (use-modules (gcrypt hash)) | |
23 | 24 | (use-modules (ice-9 match)) | |
24 | 25 | (use-modules (ice-9 format)) | |
… | |||
40 | 41 | as you can't search for anything. This dictionary can be searched for | |
41 | 42 | by kanji, reading (kana) and by German translation.")) | |
42 | 43 | ||
44 | + | (define wadoku-pitch-synopsis | |
45 | + | `(_ "Pitch accent dictionary from Wadoku.")) | |
46 | + | (define wadoku-pitch-description | |
47 | + | `(_ "This dictionary allows you to augment search results on the main view | |
48 | + | with pitch accent (pronounciation) information. Japanese is not flat, | |
49 | + | and this dictionary will add information that will help you pronounce | |
50 | + | words better, with a standard Japanese pitch accent.")) | |
51 | + | ||
43 | 52 | (define (jmdict-synopsis lang) | |
44 | 53 | (match lang | |
45 | 54 | ("e" `(_ "Japanese/English dictionary from the Electronic Dictionary Research and Development Group.")) | |
… | |||
67 | 76 | (if long? | |
68 | 77 | wadoku-description | |
69 | 78 | wadoku-synopsis)) | |
79 | + | ((equal? (dico-type dico) "wadoku_pitch") | |
80 | + | (if long? | |
81 | + | wadoku-pitch-description | |
82 | + | wadoku-pitch-synopsis)) | |
70 | 83 | ((equal? (dico-type dico) "jmdict") | |
71 | 84 | (let ((dico-lang (substring dico 7))) | |
72 | 85 | (if long? | |
… | |||
94 | 107 | ((equal? file "radicals") "radk") | |
95 | 108 | ((and (> (string-length file) 6) (equal? (substring file 0 6) "JMdict")) | |
96 | 109 | "jmdict") | |
97 | - | ((equal? file "wadoku_ger") "wadoku"))) | |
110 | + | ((equal? file "wadoku_ger") "wadoku") | |
111 | + | ((equal? file "wadoku_pitch") "wadoku_pitch"))) | |
98 | 112 | ||
99 | 113 | (define (entries file) | |
100 | 114 | (cond | |
101 | 115 | ((equal? (dico-type (dico-name file)) "radk") | |
102 | 116 | (kanji-count file)) | |
103 | 117 | ((member (dico-type (dico-name file)) '("jmdict" "wadoku")) | |
104 | - | (jmdict-entry-count file)))) | |
118 | + | (jmdict-entry-count file)) | |
119 | + | ((equal? (dico-type (dico-name file)) "wadoku_pitch") | |
120 | + | (pitch-entry-count file)))) | |
105 | 121 | ||
106 | 122 | (define (dico-name file) | |
107 | 123 | (basename file ".nani")) |
tools/wadoku.scm
1 | 1 | ;;; Nani Project website | |
2 | - | ;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu> | |
2 | + | ;;; Copyright ?? 2020 Julien Lepiller <julien@lepiller.eu> | |
3 | 3 | ;;; | |
4 | 4 | ;;; This file is part of the Nani Project website. | |
5 | 5 | ;;; | |
… | |||
19 | 19 | (use-modules (nani jmdict trie)) | |
20 | 20 | (use-modules (nani jmdict serialize)) | |
21 | 21 | (use-modules (nani wadoku xml)) | |
22 | + | (use-modules (nani wadoku pitch)) | |
22 | 23 | (use-modules (nani frequency)) | |
23 | 24 | (use-modules (nani trie)) | |
24 | 25 | (use-modules (nani result)) | |
… | |||
55 | 56 | (put-bytevector port | |
56 | 57 | (serialize-jmdict results kanji-trie reading-trie meaning-trie)))))) | |
57 | 58 | ||
58 | - | (define (print word dict) | |
59 | - | #t) | |
59 | + | (define (get-pitch input) | |
60 | + | (call-with-input-file input | |
61 | + | (lambda (port) | |
62 | + | (xml->pitch port)))) | |
63 | + | ||
64 | + | (define (pitch input output) | |
65 | + | (let ((results (get-pitch input))) | |
66 | + | (format #t "~a results." (length results)) | |
67 | + | (call-with-output-file output | |
68 | + | (lambda (port) | |
69 | + | (put-bytevector port | |
70 | + | (serialize-pitch results)))))) | |
60 | 71 | ||
61 | 72 | (match (command-line) | |
62 | - | ((_ cmd input lang output) | |
73 | + | ((_ cmd input output) | |
63 | 74 | (cond | |
64 | 75 | ((equal? cmd "build") | |
65 | - | (if (equal? lang "e") | |
66 | - | (compile input (const #t) output) | |
67 | - | (compile input (lambda (sense) (equal? (sense-language sense) lang)) output))) | |
68 | - | (else (format #t "Unknown cmd ~a.~%" cmd)))) | |
69 | - | ((_ "print" word input) | |
70 | - | (print word input))) | |
76 | + | (compile input (const #t) output)) | |
77 | + | ((equal? cmd "pitch") | |
78 | + | (pitch input output)) | |
79 | + | (else (format #t "Unknown cmd ~a.~%" cmd))))) |
wadoku.mk
1 | 1 | WADOKU_TMP_DIR=dictionaries/wadoku-tmp | |
2 | - | DICOS+=dicos/wadoku_ger.nani | |
2 | + | DICOS+=dicos/wadoku_ger.nani dicos/wadoku_pitch.nani | |
3 | 3 | DOWNLOADS+=dictionaries/wadoku.xml | |
4 | 4 | ||
5 | 5 | dictionaries/wadoku.xml: | |
… | |||
14 | 14 | rm -rf $(WADOKU_TMP_DIR) | |
15 | 15 | ||
16 | 16 | dicos/wadoku_ger.nani: dictionaries/wadoku.xml tools/wadoku.scm dictionaries/frequency.tsv $(DICO_MODULES) | |
17 | - | guile -L modules tools/wadoku.scm build \ | |
18 | - | $< $(shell echo $@ | sed 's|^.*_\([^.]*\)\..*$$|\1|g') $@ | |
17 | + | guile -L modules tools/wadoku.scm build $< $@ | |
18 | + | ||
19 | + | dicos/wadoku_pitch.nani: dictionaries/wadoku.xml tools/wadoku.scm | |
20 | + | guile -L modules tools/wadoku.scm pitch $< $@ |