JMDICT_LANGS=dut fre ger hun rus slv spa swe DICOS+=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS))) DOWNLOADS+=dictionaries/JMdict_e.xml dictionaries/JMdict.xml dictionaries/frequency.tsv # Download JMdict dictionaries from ERDRG dictionaries/%.xml: dl_filename="$(shell basename "$@" | rev | cut -c5- | rev)"; \ wget ftp://ftp.edrdg.org/pub/Nihongo/"$$dl_filename".gz -O "$$dl_filename.gz"; \ gunzip "$$dl_filename.gz"; \ sed -i -e 's|<|\&\<;|g' -e 's|>|\&\>;|g' "$$dl_filename"; \ sed -i -e 's|&\([^;]\+\);|\1|g' "$$dl_filename"; \ mv "$$dl_filename" "$@" # Download frequency analysis run on Wikipedia in 2015 # https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/Japanese2015_10000 dictionaries/frequency.tsv: wget --no-check-certificate \ https://namakajiri.net/data/wikipedia-20150422-lemmas.tsv -O $@.tmp sed -i 's| ||g' $@.tmp head -n20000 $@.tmp > $@ rm $@.tmp dicos/JMdict_%.nani: dictionaries/JMdict.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) guile -L modules tools/jmdict.scm build \ $< $(shell basename $@ .nani | sed 's|^JMdict_||g') $@ dicos/JMdict_e.nani: dictionaries/JMdict_e.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) guile -L modules tools/jmdict.scm build $< e $@