Add make to build dictionaries
.gitignore
1 | 1 | *.swp | |
2 | + | *~ | |
3 | + | *.back | |
4 | + | *.mo | |
5 | + | dicos | |
6 | + | dictionaries | |
7 | + | site | |
8 | + | public | |
9 | + | all |
Makefile unknown status 1
1 | + | .PRECIOUS: po/%.po dictionaries/% | |
2 | + | ||
3 | + | JMDICT_LANGS=dut fre ger hun rus slv spa swe | |
4 | + | ||
5 | + | DICOS=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS))) | |
6 | + | ||
7 | + | PAGES=blog.scm e404.scm feeds.scm index.scm mentions.scm | |
8 | + | ||
9 | + | HAUNT_FILES= haunt.scm $(addprefix pages/, $(PAGES)) \ | |
10 | + | tools/i18n.scm tools/theme.scm | |
11 | + | ||
12 | + | WEB_FILES= $(HAUNT_FILES) \ | |
13 | + | $(shell find css) $(shell find images) $(DICOS) \ | |
14 | + | ||
15 | + | DICO_MODULES=modules/nani/trie.scm modules/nani/result.scm modules/nani/jmdict/trie.scm \ | |
16 | + | modules/nani/jmdict/serialize.scm modules/nani/jmdict/xml.scm \ | |
17 | + | modules/nani/jmdict/entities.scm | |
18 | + | ||
19 | + | all: po/fr/LC_MESSAGES/nani.mo po/eo/LC_MESSAGES/nani.mo $(WEB_FILES) | |
20 | + | haunt build | |
21 | + | rm -rf public.bak | |
22 | + | mv public public.bak | |
23 | + | mv site public | |
24 | + | touch all | |
25 | + | ||
26 | + | download: | |
27 | + | @rm -f dictionaries/* | |
28 | + | @$(MAKE) dictionaries/JMdic_e.xml dictionaries/JMdic.xml | |
29 | + | ||
30 | + | dictionaries/%.xml: | |
31 | + | wget http://ftp.monash.edu/pub/nihongo/$(shell basename $<).gz -O $<.gz | |
32 | + | gunzip $<.gz | |
33 | + | sed -i -e 's|<|\&\<;|g' -e 's|>|\&\>;|g' $< | |
34 | + | sed -i -e 's|&\([^;]\+\);|\1|g' $< | |
35 | + | cp $< $@ | |
36 | + | ||
37 | + | dictionaries/%.sxml: dictionaries/%.xml tools/jmdict.scm | |
38 | + | guile -L modules tools/jmdict.scm convert $< nolang $@ | |
39 | + | ||
40 | + | dicos/JMdict_%.nani: dictionaries/JMdict.sxml tools/jmdict.scm $(DICO_MODULES) | |
41 | + | guile -L modules tools/jmdict.scm build \ | |
42 | + | $< $(shell echo $@ | sed 's|^.*_\([^.]*\)\..*$$|\1|g') $@ | |
43 | + | ||
44 | + | dicos/JMdict_e.nani: dictionaries/JMdict_e.sxml tools/jmdict.scm $(DICO_MODULES) | |
45 | + | guile -L modules tools/jmdict.scm build $< e $@ | |
46 | + | ||
47 | + | po/%/LC_MESSAGES/nani.mo: po/%.po | |
48 | + | @mkdir -p $$(dirname $@) | |
49 | + | msgfmt --output-file=$@ $< | |
50 | + | ||
51 | + | po/%.po: po/nani.pot | |
52 | + | if [ -f $@ ]; then \ | |
53 | + | msgmerge --update $@ $< ;\ | |
54 | + | else \ | |
55 | + | msginit --input=$< --locale=$$(printf $$(basename $@) | sed 's|.po$$||') --output=$@ ;\ | |
56 | + | fi | |
57 | + | ||
58 | + | po/nani.pot: $(HAUNT_FILES) | |
59 | + | xgettext --keyword=_ --language=scheme --add-comments --sort-output --from-code UTF-8 -o $@ $^ |
tools/jmdict.scm unknown status 1
1 | + | (use-modules (nani jmdict trie)) | |
2 | + | (use-modules (nani jmdict serialize)) | |
3 | + | (use-modules (nani jmdict xml)) | |
4 | + | (use-modules (nani trie)) | |
5 | + | (use-modules (nani result)) | |
6 | + | (use-modules (ice-9 match)) | |
7 | + | (use-modules (ice-9 binary-ports)) | |
8 | + | ||
9 | + | (define (convert input output) | |
10 | + | (let ((sxml (load-dic input))) | |
11 | + | (call-with-output-file output | |
12 | + | (lambda (port) | |
13 | + | (write sxml port))))) | |
14 | + | ||
15 | + | (define (compile input sense-filter output) | |
16 | + | (let* ((sxml (if (equal? (substring input (- (string-length input) 3)) "xml") | |
17 | + | (load-dic input) | |
18 | + | (read input))) | |
19 | + | (results (sxml->results sxml)) | |
20 | + | (results (map (lambda (result) | |
21 | + | (update-result | |
22 | + | result | |
23 | + | #:senses (filter sense-filter | |
24 | + | (result-senses result)))) | |
25 | + | results)) | |
26 | + | (results (filter (lambda (result) (not (null? (result-senses result)))) | |
27 | + | results)) | |
28 | + | (kanji-trie (compress-trie (make-kanji-trie results))) | |
29 | + | (reading-trie (compress-trie (make-reading-trie results))) | |
30 | + | (meaning-trie (compress-trie (make-meaning-trie results)))) | |
31 | + | (format #t "Number of entries in ~a: ~a~%" output (length results)) | |
32 | + | (call-with-output-file output | |
33 | + | (lambda (port) | |
34 | + | (put-bytevector port | |
35 | + | (serialize-jmdict results kanji-trie reading-trie meaning-trie)))))) | |
36 | + | ||
37 | + | (match (command-line) | |
38 | + | ((_ cmd input lang output) | |
39 | + | (cond | |
40 | + | ((equal? cmd "build") | |
41 | + | (if (equal? lang "e") | |
42 | + | (compile input (const #t) output) | |
43 | + | (compile input (lambda (sense) (equal? (sense-language sense) lang)) output))) | |
44 | + | ((equal? cmd "convert") | |
45 | + | (convert input output)) | |
46 | + | (else (format #t "Unknown cmd ~a.~%" cmd))))) |