jmdict.mk
| 1 | JMDICT_LANGS=dut fre ger hun rus slv spa swe |
| 2 | DICOS+=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS))) |
| 3 | DOWNLOADS+=dictionaries/JMdict_e.xml dictionaries/JMdict.xml dictionaries/frequency.tsv |
| 4 | |
| 5 | # Download JMdict dictionaries from ERDRG |
| 6 | dictionaries/%.xml: |
| 7 | dl_filename="$(shell basename "$@" | rev | cut -c5- | rev)"; \ |
| 8 | wget ftp://ftp.edrdg.org/pub/Nihongo/"$$dl_filename".gz -O "$$dl_filename.gz"; \ |
| 9 | gunzip "$$dl_filename.gz"; \ |
| 10 | sed -i -e 's|<|\&\<;|g' -e 's|>|\&\>;|g' "$$dl_filename"; \ |
| 11 | sed -i -e 's|&\([^;]\+\);|\1|g' "$$dl_filename"; \ |
| 12 | mv "$$dl_filename" "$@" |
| 13 | |
| 14 | # Download frequency analysis run on Wikipedia in 2015 |
| 15 | # https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/Japanese2015_10000 |
| 16 | dictionaries/frequency.tsv: |
| 17 | wget --no-check-certificate \ |
| 18 | https://namakajiri.net/data/wikipedia-20150422-lemmas.tsv -O $@.tmp |
| 19 | sed -i 's| ||g' $@.tmp |
| 20 | head -n20000 $@.tmp > $@ |
| 21 | rm $@.tmp |
| 22 | |
| 23 | dicos/JMdict_%.nani: dictionaries/JMdict.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) |
| 24 | guile -L modules tools/jmdict.scm build \ |
| 25 | $< $(shell basename $@ .nani | sed 's|^JMdict_||g') $@ |
| 26 | |
| 27 | dicos/JMdict_e.nani: dictionaries/JMdict_e.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES) |
| 28 | guile -L modules tools/jmdict.scm build $< e $@ |
| 29 |