Move jmdict stuff to its own makefile.
Makefile
2 | 2 | ||
3 | 3 | all: site | |
4 | 4 | ||
5 | - | JMDICT_LANGS=dut fre ger hun rus slv spa swe | |
6 | - | ||
7 | - | DICOS=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS))) | |
8 | - | ||
9 | - | DOWNLOADS=dictionaries/JMdict_e.xml dictionaries/JMdict.xml dictionaries/frequency.tsv | |
5 | + | DICOS= | |
6 | + | DOWNLOADS= | |
10 | 7 | ||
11 | 8 | include radicals.mk | |
12 | 9 | include wadoku.mk | |
10 | + | include jmdict.mk | |
13 | 11 | ||
14 | 12 | PAGES=blog.scm data.scm documentation.scm e404.scm feeds.scm index.scm mentions.scm | |
15 | 13 | ||
… | |||
39 | 37 | @rm -f dictionaries/* | |
40 | 38 | @$(MAKE) $(DOWNLOADS) | |
41 | 39 | ||
42 | - | # Download JMdict dictionaries from ERDRG | |
43 | - | dictionaries/%.xml: | |
44 | - | dl_filename="$(shell basename "$@" | rev | cut -c5- | rev)"; \ | |
45 | - | wget ftp://ftp.edrdg.org/pub/Nihongo/"$$dl_filename".gz -O "$$dl_filename.gz"; \ | |
46 | - | gunzip "$$dl_filename.gz"; \ | |
47 | - | sed -i -e 's|<|\&\<;|g' -e 's|>|\&\>;|g' "$$dl_filename"; \ | |
48 | - | sed -i -e 's|&\([^;]\+\);|\1|g' "$$dl_filename"; \ | |
49 | - | mv "$$dl_filename" "$@" | |
50 | - | ||
51 | - | # Download frequency analysis run on Wikipedia in 2015 | |
52 | - | # https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/Japanese2015_10000 | |
53 | - | dictionaries/frequency.tsv: | |
54 | - | wget --no-check-certificate \ | |
55 | - | https://namakajiri.net/data/wikipedia-20150422-lemmas.tsv -O $@.tmp | |
56 | - | sed -i 's| ||g' $@.tmp | |
57 | - | head -n20000 $@.tmp > $@ | |
58 | - | rm $@.tmp | |
59 | - | ||
60 | - | dicos/JMdict_%.nani: dictionaries/JMdict.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) | |
61 | - | guile -L modules tools/jmdict.scm build \ | |
62 | - | $< $(shell echo $@ | sed 's|^.*_\([^.]*\)\..*$$|\1|g') $@ | |
63 | - | ||
64 | - | dicos/JMdict_e.nani: dictionaries/JMdict_e.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) | |
65 | - | guile -L modules tools/jmdict.scm build $< e $@ | |
66 | - | ||
67 | 40 | po/%/LC_MESSAGES/nani.mo: po/%.po | |
68 | 41 | @mkdir -p $$(dirname $@) | |
69 | 42 | msgfmt --output-file=$@ $< |
jmdict.mk unknown status 1
1 | + | JMDICT_LANGS=dut fre ger hun rus slv spa swe | |
2 | + | DICOS+=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS))) | |
3 | + | DOWNLOADS+=dictionaries/JMdict_e.xml dictionaries/JMdict.xml dictionaries/frequency.tsv | |
4 | + | ||
5 | + | # Download JMdict dictionaries from ERDRG | |
6 | + | dictionaries/%.xml: | |
7 | + | dl_filename="$(shell basename "$@" | rev | cut -c5- | rev)"; \ | |
8 | + | wget ftp://ftp.edrdg.org/pub/Nihongo/"$$dl_filename".gz -O "$$dl_filename.gz"; \ | |
9 | + | gunzip "$$dl_filename.gz"; \ | |
10 | + | sed -i -e 's|<|\&\<;|g' -e 's|>|\&\>;|g' "$$dl_filename"; \ | |
11 | + | sed -i -e 's|&\([^;]\+\);|\1|g' "$$dl_filename"; \ | |
12 | + | mv "$$dl_filename" "$@" | |
13 | + | ||
14 | + | # Download frequency analysis run on Wikipedia in 2015 | |
15 | + | # https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/Japanese2015_10000 | |
16 | + | dictionaries/frequency.tsv: | |
17 | + | wget --no-check-certificate \ | |
18 | + | https://namakajiri.net/data/wikipedia-20150422-lemmas.tsv -O $@.tmp | |
19 | + | sed -i 's| ||g' $@.tmp | |
20 | + | head -n20000 $@.tmp > $@ | |
21 | + | rm $@.tmp | |
22 | + | ||
23 | + | dicos/JMdict_%.nani: dictionaries/JMdict.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) | |
24 | + | guile -L modules tools/jmdict.scm build \ | |
25 | + | $< $(shell echo $@ | sed 's|^.*_\([^.]*\)\..*$$|\1|g') $@ | |
26 | + | ||
27 | + | dicos/JMdict_e.nani: dictionaries/JMdict_e.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) | |
28 | + | guile -L modules tools/jmdict.scm build $< e $@ |
manifest.scm unknown status 1
1 | + | (specifications->manifest | |
2 | + | '("make" "guile" "haunt" | |
3 | + | ||
4 | + | ; for download | |
5 | + | "unzip" "libiconv" | |
6 | + | ||
7 | + | ; for all | |
8 | + | "gettext")) |
radicals.mk
6 | 6 | wget ftp://ftp.monash.edu/pub/nihongo/kradzip.zip -O dictionaries/kradzip.zip | |
7 | 7 | unzip dictionaries/kradzip.zip radkfilex -d dictionaries | |
8 | 8 | iconv -f euc-jp -t utf-8 dictionaries/radkfilex > $@ | |
9 | - | rm radkfilex | |
9 | + | rm dictionaries/radkfilex | |
10 | 10 | ||
11 | 11 | dictionaries/kanjidic2.xml: | |
12 | 12 | wget http://www.edrdg.org/kanjidic/kanjidic2.xml.gz -O $@.gz |