Add make to build dictionaries

Julien LepillerWed Apr 17 11:29:09+0200 2019

07cd3b0

Add make to build dictionaries

.gitignore

11
*.swp
2+
*~
3+
*.back
4+
*.mo
5+
dicos
6+
dictionaries
7+
site
8+
public
9+
all

Makefile unknown status 1

1+
.PRECIOUS: po/%.po dictionaries/%
2+
3+
JMDICT_LANGS=dut fre ger hun rus slv spa swe
4+
5+
DICOS=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS)))
6+
7+
PAGES=blog.scm e404.scm feeds.scm index.scm mentions.scm
8+
9+
HAUNT_FILES= haunt.scm $(addprefix pages/, $(PAGES)) \
10+
        tools/i18n.scm tools/theme.scm
11+
12+
WEB_FILES= $(HAUNT_FILES) \
13+
        $(shell find css) $(shell find images) $(DICOS) \
14+
15+
DICO_MODULES=modules/nani/trie.scm modules/nani/result.scm modules/nani/jmdict/trie.scm \
16+
        modules/nani/jmdict/serialize.scm modules/nani/jmdict/xml.scm \
17+
        modules/nani/jmdict/entities.scm
18+
19+
all: po/fr/LC_MESSAGES/nani.mo po/eo/LC_MESSAGES/nani.mo $(WEB_FILES)
20+
	haunt build
21+
	rm -rf public.bak
22+
	mv public public.bak
23+
	mv site public
24+
	touch all
25+
26+
download:
27+
	@rm -f dictionaries/*
28+
	@$(MAKE) dictionaries/JMdic_e.xml dictionaries/JMdic.xml
29+
30+
dictionaries/%.xml:
31+
	wget http://ftp.monash.edu/pub/nihongo/$(shell basename $<).gz -O $<.gz
32+
	gunzip $<.gz
33+
	sed -i -e 's|&lt;|\&\&lt;;|g' -e 's|&gt;|\&\&gt;;|g' $<
34+
	sed -i -e 's|&\([^;]\+\);|\1|g' $<
35+
	cp $< $@
36+
37+
dictionaries/%.sxml: dictionaries/%.xml tools/jmdict.scm
38+
	guile -L modules tools/jmdict.scm convert $< nolang $@
39+
40+
dicos/JMdict_%.nani: dictionaries/JMdict.sxml tools/jmdict.scm $(DICO_MODULES)
41+
	guile -L modules tools/jmdict.scm build \
42+
        $< $(shell echo $@ | sed 's|^.*_\([^.]*\)\..*$$|\1|g') $@
43+
44+
dicos/JMdict_e.nani: dictionaries/JMdict_e.sxml tools/jmdict.scm $(DICO_MODULES)
45+
	guile -L modules tools/jmdict.scm build $< e $@
46+
47+
po/%/LC_MESSAGES/nani.mo: po/%.po
48+
	@mkdir -p $$(dirname $@)
49+
	msgfmt --output-file=$@ $<
50+
51+
po/%.po: po/nani.pot
52+
	if [ -f $@ ]; then \
53+
	  msgmerge --update $@ $< ;\
54+
	else \
55+
          msginit --input=$< --locale=$$(printf $$(basename $@) | sed 's|.po$$||') --output=$@ ;\
56+
        fi
57+
58+
po/nani.pot: $(HAUNT_FILES)
59+
	xgettext --keyword=_ --language=scheme --add-comments --sort-output --from-code UTF-8 -o $@ $^

tools/jmdict.scm unknown status 1

1+
(use-modules (nani jmdict trie))
2+
(use-modules (nani jmdict serialize))
3+
(use-modules (nani jmdict xml))
4+
(use-modules (nani trie))
5+
(use-modules (nani result))
6+
(use-modules (ice-9 match))
7+
(use-modules (ice-9 binary-ports))
8+
9+
(define (convert input output)
10+
  (let ((sxml (load-dic input)))
11+
    (call-with-output-file output
12+
      (lambda (port)
13+
        (write sxml port)))))
14+
15+
(define (compile input sense-filter output)
16+
  (let* ((sxml (if (equal? (substring input (- (string-length input) 3)) "xml")
17+
                 (load-dic input)
18+
                 (read input)))
19+
         (results (sxml->results sxml))
20+
         (results (map (lambda (result)
21+
                         (update-result
22+
                           result
23+
                           #:senses (filter sense-filter
24+
                                            (result-senses result))))
25+
                       results))
26+
         (results (filter (lambda (result) (not (null? (result-senses result))))
27+
                          results))
28+
         (kanji-trie (compress-trie (make-kanji-trie results)))
29+
         (reading-trie (compress-trie (make-reading-trie results)))
30+
         (meaning-trie (compress-trie (make-meaning-trie results))))
31+
    (format #t "Number of entries in ~a: ~a~%" output (length results))
32+
    (call-with-output-file output
33+
      (lambda (port)
34+
        (put-bytevector port
35+
          (serialize-jmdict results kanji-trie reading-trie meaning-trie))))))
36+
37+
(match (command-line)
38+
  ((_ cmd input lang output)
39+
   (cond
40+
    ((equal? cmd "build")
41+
     (if (equal? lang "e")
42+
       (compile input (const #t) output)
43+
       (compile input (lambda (sense) (equal? (sense-language sense) lang)) output)))
44+
    ((equal? cmd "convert")
45+
     (convert input output))
46+
    (else (format #t "Unknown cmd ~a.~%" cmd)))))