Add make to build dictionaries
.gitignore
| 1 | 1 | *.swp | |
| 2 | + | *~ | |
| 3 | + | *.back | |
| 4 | + | *.mo | |
| 5 | + | dicos | |
| 6 | + | dictionaries | |
| 7 | + | site | |
| 8 | + | public | |
| 9 | + | all |
Makefile unknown status 1
| 1 | + | .PRECIOUS: po/%.po dictionaries/% | |
| 2 | + | ||
| 3 | + | JMDICT_LANGS=dut fre ger hun rus slv spa swe | |
| 4 | + | ||
| 5 | + | DICOS=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS))) | |
| 6 | + | ||
| 7 | + | PAGES=blog.scm e404.scm feeds.scm index.scm mentions.scm | |
| 8 | + | ||
| 9 | + | HAUNT_FILES= haunt.scm $(addprefix pages/, $(PAGES)) \ | |
| 10 | + | tools/i18n.scm tools/theme.scm | |
| 11 | + | ||
| 12 | + | WEB_FILES= $(HAUNT_FILES) \ | |
| 13 | + | $(shell find css) $(shell find images) $(DICOS) \ | |
| 14 | + | ||
| 15 | + | DICO_MODULES=modules/nani/trie.scm modules/nani/result.scm modules/nani/jmdict/trie.scm \ | |
| 16 | + | modules/nani/jmdict/serialize.scm modules/nani/jmdict/xml.scm \ | |
| 17 | + | modules/nani/jmdict/entities.scm | |
| 18 | + | ||
| 19 | + | all: po/fr/LC_MESSAGES/nani.mo po/eo/LC_MESSAGES/nani.mo $(WEB_FILES) | |
| 20 | + | haunt build | |
| 21 | + | rm -rf public.bak | |
| 22 | + | mv public public.bak | |
| 23 | + | mv site public | |
| 24 | + | touch all | |
| 25 | + | ||
| 26 | + | download: | |
| 27 | + | @rm -f dictionaries/* | |
| 28 | + | @$(MAKE) dictionaries/JMdic_e.xml dictionaries/JMdic.xml | |
| 29 | + | ||
| 30 | + | dictionaries/%.xml: | |
| 31 | + | wget http://ftp.monash.edu/pub/nihongo/$(shell basename $<).gz -O $<.gz | |
| 32 | + | gunzip $<.gz | |
| 33 | + | sed -i -e 's|<|\&\<;|g' -e 's|>|\&\>;|g' $< | |
| 34 | + | sed -i -e 's|&\([^;]\+\);|\1|g' $< | |
| 35 | + | cp $< $@ | |
| 36 | + | ||
| 37 | + | dictionaries/%.sxml: dictionaries/%.xml tools/jmdict.scm | |
| 38 | + | guile -L modules tools/jmdict.scm convert $< nolang $@ | |
| 39 | + | ||
| 40 | + | dicos/JMdict_%.nani: dictionaries/JMdict.sxml tools/jmdict.scm $(DICO_MODULES) | |
| 41 | + | guile -L modules tools/jmdict.scm build \ | |
| 42 | + | $< $(shell echo $@ | sed 's|^.*_\([^.]*\)\..*$$|\1|g') $@ | |
| 43 | + | ||
| 44 | + | dicos/JMdict_e.nani: dictionaries/JMdict_e.sxml tools/jmdict.scm $(DICO_MODULES) | |
| 45 | + | guile -L modules tools/jmdict.scm build $< e $@ | |
| 46 | + | ||
| 47 | + | po/%/LC_MESSAGES/nani.mo: po/%.po | |
| 48 | + | @mkdir -p $$(dirname $@) | |
| 49 | + | msgfmt --output-file=$@ $< | |
| 50 | + | ||
| 51 | + | po/%.po: po/nani.pot | |
| 52 | + | if [ -f $@ ]; then \ | |
| 53 | + | msgmerge --update $@ $< ;\ | |
| 54 | + | else \ | |
| 55 | + | msginit --input=$< --locale=$$(printf $$(basename $@) | sed 's|.po$$||') --output=$@ ;\ | |
| 56 | + | fi | |
| 57 | + | ||
| 58 | + | po/nani.pot: $(HAUNT_FILES) | |
| 59 | + | xgettext --keyword=_ --language=scheme --add-comments --sort-output --from-code UTF-8 -o $@ $^ |
tools/jmdict.scm unknown status 1
| 1 | + | (use-modules (nani jmdict trie)) | |
| 2 | + | (use-modules (nani jmdict serialize)) | |
| 3 | + | (use-modules (nani jmdict xml)) | |
| 4 | + | (use-modules (nani trie)) | |
| 5 | + | (use-modules (nani result)) | |
| 6 | + | (use-modules (ice-9 match)) | |
| 7 | + | (use-modules (ice-9 binary-ports)) | |
| 8 | + | ||
| 9 | + | (define (convert input output) | |
| 10 | + | (let ((sxml (load-dic input))) | |
| 11 | + | (call-with-output-file output | |
| 12 | + | (lambda (port) | |
| 13 | + | (write sxml port))))) | |
| 14 | + | ||
| 15 | + | (define (compile input sense-filter output) | |
| 16 | + | (let* ((sxml (if (equal? (substring input (- (string-length input) 3)) "xml") | |
| 17 | + | (load-dic input) | |
| 18 | + | (read input))) | |
| 19 | + | (results (sxml->results sxml)) | |
| 20 | + | (results (map (lambda (result) | |
| 21 | + | (update-result | |
| 22 | + | result | |
| 23 | + | #:senses (filter sense-filter | |
| 24 | + | (result-senses result)))) | |
| 25 | + | results)) | |
| 26 | + | (results (filter (lambda (result) (not (null? (result-senses result)))) | |
| 27 | + | results)) | |
| 28 | + | (kanji-trie (compress-trie (make-kanji-trie results))) | |
| 29 | + | (reading-trie (compress-trie (make-reading-trie results))) | |
| 30 | + | (meaning-trie (compress-trie (make-meaning-trie results)))) | |
| 31 | + | (format #t "Number of entries in ~a: ~a~%" output (length results)) | |
| 32 | + | (call-with-output-file output | |
| 33 | + | (lambda (port) | |
| 34 | + | (put-bytevector port | |
| 35 | + | (serialize-jmdict results kanji-trie reading-trie meaning-trie)))))) | |
| 36 | + | ||
| 37 | + | (match (command-line) | |
| 38 | + | ((_ cmd input lang output) | |
| 39 | + | (cond | |
| 40 | + | ((equal? cmd "build") | |
| 41 | + | (if (equal? lang "e") | |
| 42 | + | (compile input (const #t) output) | |
| 43 | + | (compile input (lambda (sense) (equal? (sense-language sense) lang)) output))) | |
| 44 | + | ((equal? cmd "convert") | |
| 45 | + | (convert input output)) | |
| 46 | + | (else (format #t "Unknown cmd ~a.~%" cmd))))) |