jmdict.mk
1 | JMDICT_LANGS=dut fre ger hun rus slv spa swe |
2 | DICOS+=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS))) |
3 | DOWNLOADS+=dictionaries/JMdict_e.xml dictionaries/JMdict.xml dictionaries/frequency.tsv |
4 | |
5 | # Download JMdict dictionaries from ERDRG |
6 | dictionaries/%.xml: |
7 | dl_filename="$(shell basename "$@" | rev | cut -c5- | rev)"; \ |
8 | wget ftp://ftp.edrdg.org/pub/Nihongo/"$$dl_filename".gz -O "$$dl_filename.gz"; \ |
9 | gunzip "$$dl_filename.gz"; \ |
10 | sed -i -e 's|<|\&\<;|g' -e 's|>|\&\>;|g' "$$dl_filename"; \ |
11 | sed -i -e 's|&\([^;]\+\);|\1|g' "$$dl_filename"; \ |
12 | mv "$$dl_filename" "$@" |
13 | |
14 | # Download frequency analysis run on Wikipedia in 2015 |
15 | # https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/Japanese2015_10000 |
16 | dictionaries/frequency.tsv: |
17 | wget --no-check-certificate \ |
18 | https://namakajiri.net/data/wikipedia-20150422-lemmas.tsv -O $@.tmp |
19 | sed -i 's| ||g' $@.tmp |
20 | head -n20000 $@.tmp > $@ |
21 | rm $@.tmp |
22 | |
23 | dicos/JMdict_%.nani: dictionaries/JMdict.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) |
24 | guile -L modules tools/jmdict.scm build \ |
25 | $< $(shell basename $@ .nani | sed 's|^JMdict_||g') $@ |
26 | |
27 | dicos/JMdict_e.nani: dictionaries/JMdict_e.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) |
28 | guile -L modules tools/jmdict.scm build $< e $@ |
29 |