Generat pitch accent dictionary

Julien LepillerSat Jun 06 00:51:58+0200 2020

a3c71d2

Generat pitch accent dictionary

modules/nani/huffman.scm

2323
            huffman->code
2424
            huffman-encode
2525
            huffman-decode
26-
            serialize-huffman))
26+
            serialize-huffman
27+
            serialize-huffman-string huffman-string-size))
2728
2829
(define (add-occurence occ char)
2930
  (let* ((o (assoc-ref occ char))

141142
      (((? char? char) . weight)
142143
       (append (bytevector->u8-list (string->utf8 (list->string (list char)))) '(0)))))
143144
  (u8-list->bytevector (serialize huffman)))
145+
146+
(define (serialize-huffman-string huffman-code)
147+
  (lambda (str pos bv)
148+
    (let ((sbv (huffman-encode huffman-code str)))
149+
      (bytevector-copy! sbv 0 bv pos (bytevector-length sbv))
150+
      (+ pos (bytevector-length sbv)))))
151+
152+
(define (huffman-string-size huffman-code)
153+
  (lambda (str)
154+
    (let ((sbv (huffman-encode huffman-code str)))
155+
      (+ (bytevector-length sbv)))))

modules/nani/jmdict/serialize.scm

7777
      (create-huffman glosses)))
7878
  (define sense-huffman-code (huffman->code sense-huffman))
7979
80-
  (define (serialize-huffman-string huffman-code)
81-
    (lambda (str pos bv)
82-
      (let ((sbv (huffman-encode huffman-code str)))
83-
        (bytevector-copy! sbv 0 bv pos (bytevector-length sbv))
84-
        (+ pos (bytevector-length sbv)))))
85-
  (define (huffman-string-size huffman-code)
86-
    (lambda (str)
87-
      (let ((sbv (huffman-encode huffman-code str)))
88-
        (+ (bytevector-length sbv)))))
89-
  
9080
  (define (serialize-source source pos bv)
9181
    (when (not (source? source)) (throw 'not-source source))
9282
    (let* ((pos (serialize-list (source-content source) serialize-string pos bv))

modules/nani/trie.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (nani trie)
20+
  #:use-module (nani serialize)
21+
  #:use-module (rnrs bytevectors)
2022
  #:use-module (srfi srfi-9)
2123
  #:export (make-trie
2224
            trie?

2931
            
3032
            make-empty-trie
3133
            add-to-trie!
32-
            compress-trie))
34+
            compress-trie
35+
            
36+
            serialize-trie
37+
            trie-size))
3338
3439
(define-record-type trie
3540
  (make-trie position vals transitions)

8388
          (trie-transitions trie)))))
8489
  (convert-trie-transitions! trie)
8590
  (compress-aux trie))
91+
92+
(define (pointer-size ptr)
93+
  5)
94+
95+
(define (serialize-pointer ptr pos bv)
96+
  (bytevector-u8-set! bv pos (car ptr))
97+
  (bytevector-u32-set! bv (+ pos 1) (trie-position (cdr ptr)) (endianness big))
98+
  (+ pos 5))
99+
100+
(define (serialize-trie trie serialize size results pos)
101+
  (define (serialize-trie-aux transitions pos)
102+
    (let loop ((pos pos)
103+
               (trs transitions)
104+
               (bvs '()))
105+
      (if (null? trs)
106+
        (cons pos bvs)
107+
        (let* ((next-trie (cdr (car trs)))
108+
               (bv (get-trie-bv next-trie results pos))
109+
               (pos (car bv))
110+
               (bv (cdr bv)))
111+
          (loop pos (cdr trs) (append bvs bv))))))
112+
113+
  (define (get-trie-bv trie results pos)
114+
    (trie-position-set! trie pos)
115+
    (let* ((vals-sz (size (trie-vals trie)))
116+
           (trs-sz (list-size (trie-transitions trie) (const 5) #:size? #f))
117+
           (sz (+ vals-sz 1 trs-sz))
118+
           (bv (make-bytevector sz 0)))
119+
      (serialize (trie-vals trie) 0 bv)
120+
      (let* ((bvs (serialize-trie-aux
121+
                    (trie-transitions trie)
122+
                    (+ pos sz)))
123+
             (next-pos (car bvs))
124+
             (bvs (cdr bvs)))
125+
        (bytevector-u8-set! bv vals-sz (length (trie-transitions trie)))
126+
        (serialize-list (trie-transitions trie) serialize-pointer
127+
                        (+ vals-sz 1) bv #:size? #f)
128+
        (cons next-pos (cons bv bvs)))))
129+
  
130+
  (let* ((trie-bv (get-trie-bv trie results pos))
131+
         (new-pos (car trie-bv))
132+
         (trie-bv (merge-bvs (cdr trie-bv))))
133+
    (bytevector-copy! trie-bv 0 results pos (bytevector-length trie-bv))
134+
    new-pos))
135+
 
136+
(define (trie-size trie size)
137+
  (apply +
138+
    (size (trie-vals trie))
139+
    1
140+
    (list-size (trie-transitions trie) pointer-size #:size? #f)
141+
    (map (lambda (trie) (trie-size trie size))
142+
         (map cdr (trie-transitions trie)))))

modules/nani/wadoku/pitch.scm unknown status 1

1+
;;; Nani Project website
2+
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
3+
;;;
4+
;;; This file is part of the Nani Project website.
5+
;;;
6+
;;; The Nani Project website is free software; you can redistribute it and/or modify it
7+
;;; under the terms of the GNU Affero General Public License as published by
8+
;;; the Free Software Foundation; either version 3 of the License, or (at
9+
;;; your option) any later version.
10+
;;;
11+
;;; The Nani Project website is distributed in the hope that it will be useful, but
12+
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13+
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14+
;;; GNU Affero General Public License for more details.
15+
;;;
16+
;;; You should have received a copy of the GNU Affero General Public License
17+
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
18+
19+
(define-module (nani wadoku pitch)
20+
  #:use-module (ice-9 binary-ports)
21+
  #:use-module (ice-9 match)
22+
  #:use-module (ice-9 rdelim)
23+
  #:use-module (nani frequency)
24+
  #:use-module (nani huffman)
25+
  #:use-module (nani result)
26+
  #:use-module (nani trie)
27+
  #:use-module (nani wadoku entities)
28+
  #:use-module (srfi srfi-1)
29+
  #:use-module (srfi srfi-9)
30+
  #:use-module (sxml ssax)
31+
  #:use-module (rnrs bytevectors)
32+
  #:export (xml->pitch serialize-pitch pitch-entry-count))
33+
34+
(define-record-type pitch
35+
  (make-pitch kanjis accents)
36+
  pitch?
37+
  (kanjis  pitch-kanjis)
38+
  (accents pitch-accents))
39+
40+
(define (sxml->element lst elem)
41+
  (let ((elem (match elem
42+
                ((_ . elem) elem)
43+
                (_ elem))))
44+
    (match elem
45+
      ('accent `(pitch ,(car lst)))
46+
      ('orth (let ((kanji (filter string? lst)))
47+
               (if (null? kanji) #f `(kanji . ,(car kanji)))))
48+
      ('form
49+
       `(form . ,(append-map (lambda (a) (if (list? a) a (list a))) lst)))
50+
      ('reading (filter pair? lst))
51+
      ('entry
52+
        (let loop ((lst lst) (kanjis '()) (accents '()))
53+
          (if (null? lst)
54+
              (if (or (null? kanjis) (null? accents))
55+
                  #f
56+
                  (make-pitch kanjis accents))
57+
              (match (car lst)
58+
                (('form . f) (loop (append f (cdr lst)) kanjis accents))
59+
                (('pitch . pitch) (loop (cdr lst) kanjis (cons pitch accents)))
60+
                (('kanji . kanji) (loop (cdr lst) (cons kanji kanjis) accents))
61+
                (_ (loop (cdr lst) kanjis accents))))))
62+
      (_ #f))))
63+
64+
(define parser
65+
  (ssax:make-parser
66+
    NEW-LEVEL-SEED
67+
    (lambda (elem-gi attributes namespaces expected-content seed)
68+
      attributes)
69+
    
70+
    FINISH-ELEMENT
71+
    (lambda (elem-gi attributes namespaces parent-seed seed)
72+
      (if (equal? elem-gi 'entries)
73+
          seed
74+
          (let* ((seed (reverse seed))
75+
                 (element (sxml->element seed elem-gi)))
76+
            (cons element parent-seed))))
77+
    
78+
    CHAR-DATA-HANDLER
79+
    (lambda (string1 string2 seed)
80+
      (cons (string-append string1 string2) seed))))
81+
82+
(define (xml->pitch port)
83+
  (filter pitch? (parser port '())))
84+
85+
(define (make-trie-key key)
86+
  (append-map
87+
    (lambda (c)
88+
      (list (quotient c 16) (modulo c 16)))
89+
    (bytevector->u8-list (string->utf8 key))))
90+
91+
(define (get-pitch-trie pitches)
92+
  (let ((trie (make-empty-trie)))
93+
    (for-each
94+
      (lambda (pitch)
95+
        (for-each
96+
          (lambda (kanji)
97+
            (for-each
98+
              (lambda (accent)
99+
                (cond
100+
                  ((string? accent)
101+
                   (add-to-trie! trie (make-trie-key kanji) accent))
102+
                  ((list? accent)
103+
                   (add-to-trie! trie (make-trie-key kanji) (car accent)))
104+
                  (else
105+
                    (throw 'accent accent))))
106+
              (pitch-accents pitch)))
107+
          (pitch-kanjis pitch)))
108+
      pitches)
109+
    (compress-trie trie)))
110+
111+
(define (collapse-vals! trie)
112+
  (let ((transitions (trie-transitions trie))
113+
        (vals (trie-vals trie)))
114+
    (trie-vals-set! trie (string-join vals ", "))
115+
    (for-each collapse-vals! (map cdr transitions))))
116+
117+
(define (collect-vals trie)
118+
  (let ((transitions (trie-transitions trie))
119+
        (vals (trie-vals trie)))
120+
    (cons vals (append-map collect-vals (map cdr transitions)))))
121+
122+
(define (entry-number trie)
123+
  (let ((transitions (trie-transitions trie))
124+
        (vals (trie-vals trie)))
125+
    (apply + (if (string-null? vals) 0 1) 
126+
           (map entry-number (map cdr transitions)))))
127+
128+
(define (serialize-pitch pitches)
129+
  (let ((trie (get-pitch-trie pitches)))
130+
    (collapse-vals! trie)
131+
    (let* ((huffman (create-huffman (collect-vals trie)))
132+
           (code (huffman->code huffman))
133+
           (entries (entry-number trie)))
134+
      (let* ((header (string->utf8 "NANI_PITCH001"))
135+
             (header-size (bytevector-length header))
136+
             (huffman-bv (serialize-huffman huffman))
137+
             (huffman-size (bytevector-length huffman-bv))
138+
             (trie-size (trie-size trie (huffman-string-size code)))
139+
             (result (make-bytevector (+ header-size 4 huffman-size trie-size))))
140+
        (bytevector-copy! header 0 result 0 header-size)
141+
        (bytevector-u32-set! result header-size entries (endianness big))
142+
        (bytevector-copy! huffman-bv 0 result (+ header-size 4) huffman-size)
143+
        (serialize-trie trie (serialize-huffman-string code)
144+
                        (huffman-string-size code)
145+
                        result (+ header-size 4 huffman-size))
146+
        result))))
147+
148+
(define (pitch-entry-count file)
149+
  (call-with-input-file file
150+
    (lambda (port)
151+
      ;; header
152+
      (get-bytevector-n port 13)
153+
      ;; size
154+
      (bytevector-u32-ref (get-bytevector-n port 4) 0 (endianness big)))))

po/fr.po

77
msgstr ""
88
"Project-Id-Version: PACKAGE VERSION\n"
99
"Report-Msgid-Bugs-To: \n"
10-
"POT-Creation-Date: 2020-06-04 00:05+0200\n"
10+
"POT-Creation-Date: 2020-06-05 15:21+0200\n"
1111
"PO-Revision-Date: 2019-04-16 18:08+0200\n"
1212
"Last-Translator: root <julien@lepiller.eu>\n"
1313
"Language-Team: French\n"

105105
msgid "JMdict"
106106
msgstr "JMdict"
107107
108-
#: tools/list.scm:46
108+
#: tools/list.scm:55
109109
msgid ""
110110
"Japanese/Dutch dictionary from the Electronic Dictionary Research and "
111111
"Development Group."

113113
"Dictionnaire japonais/anglais de l???Electronic Dictionary Research and "
114114
"Development Group."
115115
116-
#: tools/list.scm:45
116+
#: tools/list.scm:54
117117
#, fuzzy
118118
msgid ""
119119
"Japanese/English dictionary from the Electronic Dictionary Research and "

122122
"Dictionnaire japonais/n??erlandais de l???Electronic Dictionary Research and "
123123
"Development Group."
124124
125-
#: tools/list.scm:47
125+
#: tools/list.scm:56
126126
msgid ""
127127
"Japanese/French dictionary from the Electronic Dictionary Research and "
128128
"Development Group."

130130
"Dictionnaire japonais/fran??ais de l???Electronic Dictionary Research and "
131131
"Development Group."
132132
133-
#: tools/list.scm:36
133+
#: tools/list.scm:37
134134
msgid "Japanese/German dictionary from Wadoku."
135135
msgstr "Dictionnaire japonais/allemand de Wadoku."
136136
137-
#: tools/list.scm:48
137+
#: tools/list.scm:57
138138
msgid ""
139139
"Japanese/German dictionary from the Electronic Dictionary Research and "
140140
"Development Group."

142142
"Dictionnaire japonais/allemand de l???Electronic Dictionary Research and "
143143
"Development Group."
144144
145-
#: tools/list.scm:49
145+
#: tools/list.scm:58
146146
msgid ""
147147
"Japanese/Hungarian dictionary from the Electronic Dictionary Research and "
148148
"Development Group."

150150
"Dictionnaire japonais/hongrois de l???Electronic Dictionary Research and "
151151
"Development Group."
152152
153-
#: tools/list.scm:50
153+
#: tools/list.scm:59
154154
msgid ""
155155
"Japanese/Russian dictionary from the Electronic Dictionary Research and "
156156
"Development Group."

158158
"Dictionnaire japonais/russe de l???Electronic Dictionary Research and "
159159
"Development Group."
160160
161-
#: tools/list.scm:51
161+
#: tools/list.scm:60
162162
msgid ""
163163
"Japanese/Slovenian dictionary from the Electronic Dictionary Research and "
164164
"Development Group."

166166
"Dictionnaire japonais/slov??ne de l???Electronic Dictionary Research and "
167167
"Development Group."
168168
169-
#: tools/list.scm:52
169+
#: tools/list.scm:61
170170
msgid ""
171171
"Japanese/Spanish dictionary from the Electronic Dictionary Research and "
172172
"Development Group."

174174
"Dictionnaire japonais/espagnol de l???Electronic Dictionary Research and "
175175
"Development Group."
176176
177-
#: tools/list.scm:53
177+
#: tools/list.scm:62
178178
msgid ""
179179
"Japanese/Swedish dictionary from the Electronic Dictionary Research and "
180180
"Development Group."

237237
msgid "Phone: "
238238
msgstr "T??l??phone : "
239239
240-
#: tools/list.scm:29
240+
#: tools/list.scm:45
241+
#, fuzzy
242+
msgid "Pitch accent dictionary from Wadoku."
243+
msgstr "Dictionnaire d'accent de hauteur de Wadoku."
244+
245+
#: tools/list.scm:30
241246
msgid ""
242247
"Radical to Kanji dictionary from the Electronic Dictionary Research and "
243248
"Development Group."

362367
"l'application. Dans les sections suivantes, nous verrons comment les "
363368
"utiliser."
364369
365-
#: tools/list.scm:38
370+
#: tools/list.scm:47
371+
msgid ""
372+
"This dictionary allows you to augment search results on the main view\n"
373+
"         with pitch accent (pronounciation) information.  Japanese is not "
374+
"flat,\n"
375+
"         and this dictionary will add information that will help you "
376+
"pronounce\n"
377+
"         words better, with a standard Japanese pitch accent."
378+
msgstr ""
379+
"Ce dictionnaire vous permet d'am??liorer les r??sultats de recherche de la\n"
380+
"    vue principale avec des informations sur l'accent de hauteur (la\n"
381+
"    prononciation).  Le japonais n'est pas plat, et ce dictionnaire vous\n"
382+
"    aidera ?? mieux prononcer les mots, avec l'accent de hauteur du japonais\n"
383+
"    standard."
384+
385+
#: tools/list.scm:39
366386
msgid ""
367387
"This dictionary allows you to do searches on the main view of this app.\n"
368388
"        Failing to download on of these dictionaries will make the app "

377397
"    dictionnaire permet d???effectuer des recherches par kanji, par\n"
378398
"    prononciation (kana) et par traduction allemande."
379399
380-
#: tools/list.scm:55
400+
#: tools/list.scm:64
381401
msgid ""
382402
"This dictionary allows you to do searches on the main view of this app.\n"
383403
"        Failing to download one of these dictionaries will make the app "

393413
"    prononciation (kana) et par signification dans les langues que vous\n"
394414
"    aurez t??l??charg??es."
395415
396-
#: tools/list.scm:31
416+
#: tools/list.scm:32
397417
msgid ""
398418
"This dictionary allows you to enter kanji by selecting some of its\n"
399419
"    components.  Tap the water component button on the bottom of the screen "

po/nani.pot

88
msgstr ""
99
"Project-Id-Version: PACKAGE VERSION\n"
1010
"Report-Msgid-Bugs-To: \n"
11-
"POT-Creation-Date: 2020-06-04 00:05+0200\n"
11+
"POT-Creation-Date: 2020-06-05 15:21+0200\n"
1212
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
1313
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
1414
"Language-Team: LANGUAGE <LL@li.org>\n"

9797
msgid "JMdict"
9898
msgstr ""
9999
100-
#: tools/list.scm:46
100+
#: tools/list.scm:55
101101
msgid ""
102102
"Japanese/Dutch dictionary from the Electronic Dictionary Research and "
103103
"Development Group."
104104
msgstr ""
105105
106-
#: tools/list.scm:45
106+
#: tools/list.scm:54
107107
msgid ""
108108
"Japanese/English dictionary from the Electronic Dictionary Research and "
109109
"Development Group."
110110
msgstr ""
111111
112-
#: tools/list.scm:47
112+
#: tools/list.scm:56
113113
msgid ""
114114
"Japanese/French dictionary from the Electronic Dictionary Research and "
115115
"Development Group."
116116
msgstr ""
117117
118-
#: tools/list.scm:36
118+
#: tools/list.scm:37
119119
msgid "Japanese/German dictionary from Wadoku."
120120
msgstr ""
121121
122-
#: tools/list.scm:48
122+
#: tools/list.scm:57
123123
msgid ""
124124
"Japanese/German dictionary from the Electronic Dictionary Research and "
125125
"Development Group."
126126
msgstr ""
127127
128-
#: tools/list.scm:49
128+
#: tools/list.scm:58
129129
msgid ""
130130
"Japanese/Hungarian dictionary from the Electronic Dictionary Research and "
131131
"Development Group."
132132
msgstr ""
133133
134-
#: tools/list.scm:50
134+
#: tools/list.scm:59
135135
msgid ""
136136
"Japanese/Russian dictionary from the Electronic Dictionary Research and "
137137
"Development Group."
138138
msgstr ""
139139
140-
#: tools/list.scm:51
140+
#: tools/list.scm:60
141141
msgid ""
142142
"Japanese/Slovenian dictionary from the Electronic Dictionary Research and "
143143
"Development Group."
144144
msgstr ""
145145
146-
#: tools/list.scm:52
146+
#: tools/list.scm:61
147147
msgid ""
148148
"Japanese/Spanish dictionary from the Electronic Dictionary Research and "
149149
"Development Group."
150150
msgstr ""
151151
152-
#: tools/list.scm:53
152+
#: tools/list.scm:62
153153
msgid ""
154154
"Japanese/Swedish dictionary from the Electronic Dictionary Research and "
155155
"Development Group."

200200
msgid "Phone: "
201201
msgstr ""
202202
203-
#: tools/list.scm:29
203+
#: tools/list.scm:45
204+
msgid "Pitch accent dictionary from Wadoku."
205+
msgstr ""
206+
207+
#: tools/list.scm:30
204208
msgid ""
205209
"Radical to Kanji dictionary from the Electronic Dictionary Research and "
206210
"Development Group."

297301
"In the following sections we will see how to use them."
298302
msgstr ""
299303
300-
#: tools/list.scm:38
304+
#: tools/list.scm:47
305+
msgid ""
306+
"This dictionary allows you to augment search results on the main view\n"
307+
"         with pitch accent (pronounciation) information.  Japanese is not "
308+
"flat,\n"
309+
"         and this dictionary will add information that will help you "
310+
"pronounce\n"
311+
"         words better, with a standard Japanese pitch accent."
312+
msgstr ""
313+
314+
#: tools/list.scm:39
301315
msgid ""
302316
"This dictionary allows you to do searches on the main view of this app.\n"
303317
"        Failing to download on of these dictionaries will make the app "

307321
"        by kanji, reading (kana) and by German translation."
308322
msgstr ""
309323
310-
#: tools/list.scm:55
324+
#: tools/list.scm:64
311325
msgid ""
312326
"This dictionary allows you to do searches on the main view of this app.\n"
313327
"        Failing to download one of these dictionaries will make the app "

317331
"        kanji, reading (kana) and by meaning in the languages you selected."
318332
msgstr ""
319333
320-
#: tools/list.scm:31
334+
#: tools/list.scm:32
321335
msgid ""
322336
"This dictionary allows you to enter kanji by selecting some of its\n"
323337
"    components.  Tap the water component button on the bottom of the screen "

tools/list.scm

1919
(use-modules (tools i18n))
2020
(use-modules (nani radk))
2121
(use-modules (nani jmdict serialize))
22+
(use-modules (nani wadoku pitch))
2223
(use-modules (gcrypt hash))
2324
(use-modules (ice-9 match))
2425
(use-modules (ice-9 format))

4041
        as you can't search for anything.  This dictionary can be searched for
4142
        by kanji, reading (kana) and by German translation."))
4243
44+
  (define wadoku-pitch-synopsis
45+
    `(_ "Pitch accent dictionary from Wadoku."))
46+
  (define wadoku-pitch-description
47+
    `(_ "This dictionary allows you to augment search results on the main view
48+
         with pitch accent (pronounciation) information.  Japanese is not flat,
49+
         and this dictionary will add information that will help you pronounce
50+
         words better, with a standard Japanese pitch accent."))
51+
4352
  (define (jmdict-synopsis lang)
4453
    (match lang
4554
      ("e" `(_ "Japanese/English dictionary from the Electronic Dictionary Research and Development Group."))

6776
             (if long?
6877
                 wadoku-description
6978
                 wadoku-synopsis))
79+
            ((equal? (dico-type dico) "wadoku_pitch")
80+
             (if long?
81+
                 wadoku-pitch-description
82+
                 wadoku-pitch-synopsis))
7083
            ((equal? (dico-type dico) "jmdict")
7184
             (let ((dico-lang (substring dico 7)))
7285
               (if long?

94107
    ((equal? file "radicals") "radk")
95108
    ((and (> (string-length file) 6) (equal? (substring file 0 6) "JMdict"))
96109
     "jmdict")
97-
    ((equal? file "wadoku_ger") "wadoku")))
110+
    ((equal? file "wadoku_ger") "wadoku")
111+
    ((equal? file "wadoku_pitch") "wadoku_pitch")))
98112
99113
(define (entries file)
100114
  (cond
101115
    ((equal? (dico-type (dico-name file)) "radk")
102116
     (kanji-count file))
103117
    ((member (dico-type (dico-name file)) '("jmdict" "wadoku"))
104-
     (jmdict-entry-count file))))
118+
     (jmdict-entry-count file))
119+
    ((equal? (dico-type (dico-name file)) "wadoku_pitch")
120+
     (pitch-entry-count file))))
105121
106122
(define (dico-name file)
107123
  (basename file ".nani"))

tools/wadoku.scm

11
;;; Nani Project website
2-
;;; Copyright ?? 2019 Julien Lepiller <julien@lepiller.eu>
2+
;;; Copyright ?? 2020 Julien Lepiller <julien@lepiller.eu>
33
;;;
44
;;; This file is part of the Nani Project website.
55
;;;

1919
(use-modules (nani jmdict trie))
2020
(use-modules (nani jmdict serialize))
2121
(use-modules (nani wadoku xml))
22+
(use-modules (nani wadoku pitch))
2223
(use-modules (nani frequency))
2324
(use-modules (nani trie))
2425
(use-modules (nani result))

5556
        (put-bytevector port
5657
          (serialize-jmdict results kanji-trie reading-trie meaning-trie))))))
5758
58-
(define (print word dict)
59-
  #t)
59+
(define (get-pitch input)
60+
  (call-with-input-file input
61+
    (lambda (port)
62+
      (xml->pitch port))))
63+
64+
(define (pitch input output)
65+
  (let ((results (get-pitch input)))
66+
    (format #t "~a results." (length results))
67+
    (call-with-output-file output
68+
      (lambda (port)
69+
        (put-bytevector port
70+
          (serialize-pitch results))))))
6071
6172
(match (command-line)
62-
  ((_ cmd input lang output)
73+
  ((_ cmd input output)
6374
   (cond
6475
    ((equal? cmd "build")
65-
     (if (equal? lang "e")
66-
       (compile input (const #t) output)
67-
       (compile input (lambda (sense) (equal? (sense-language sense) lang)) output)))
68-
    (else (format #t "Unknown cmd ~a.~%" cmd))))
69-
  ((_ "print" word input)
70-
   (print word input)))
76+
     (compile input (const #t) output))
77+
    ((equal? cmd "pitch")
78+
     (pitch input output))
79+
    (else (format #t "Unknown cmd ~a.~%" cmd)))))

wadoku.mk

11
WADOKU_TMP_DIR=dictionaries/wadoku-tmp
2-
DICOS+=dicos/wadoku_ger.nani
2+
DICOS+=dicos/wadoku_ger.nani dicos/wadoku_pitch.nani
33
DOWNLOADS+=dictionaries/wadoku.xml
44
55
dictionaries/wadoku.xml:

1414
	rm -rf $(WADOKU_TMP_DIR)
1515
1616
dicos/wadoku_ger.nani: dictionaries/wadoku.xml tools/wadoku.scm dictionaries/frequency.tsv $(DICO_MODULES)
17-
	guile -L modules tools/wadoku.scm build \
18-
        $< $(shell echo $@ | sed 's|^.*_\([^.]*\)\..*$$|\1|g') $@
17+
	guile -L modules tools/wadoku.scm build $< $@
18+
19+
dicos/wadoku_pitch.nani: dictionaries/wadoku.xml tools/wadoku.scm
20+
	guile -L modules tools/wadoku.scm pitch $< $@