nani/website/jmdict.mk

jmdict.mk

1
JMDICT_LANGS=dut fre ger hun rus slv spa swe
2
DICOS+=dicos/JMdict_e.nani $(addprefix dicos/JMdict_, $(addsuffix .nani, $(JMDICT_LANGS)))
3
DOWNLOADS+=dictionaries/JMdict_e.xml dictionaries/JMdict.xml dictionaries/frequency.tsv
4
5
# Download JMdict dictionaries from ERDRG
6
dictionaries/%.xml:
7
	dl_filename="$(shell basename "$@" | rev | cut -c5- | rev)"; \
8
	wget ftp://ftp.edrdg.org/pub/Nihongo/"$$dl_filename".gz -O "$$dl_filename.gz"; \
9
	gunzip "$$dl_filename.gz"; \
10
	sed -i -e 's|<|\&\<;|g' -e 's|>|\&\>;|g' "$$dl_filename"; \
11
	sed -i -e 's|&\([^;]\+\);|\1|g' "$$dl_filename"; \
12
	mv "$$dl_filename" "$@"
13
14
# Download frequency analysis run on Wikipedia in 2015
15
# https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/Japanese2015_10000
16
dictionaries/frequency.tsv:
17
	wget --no-check-certificate \
18
        https://namakajiri.net/data/wikipedia-20150422-lemmas.tsv -O $@.tmp
19
	sed -i 's| ||g' $@.tmp
20
	head -n20000 $@.tmp > $@
21
	rm $@.tmp
22
23
dicos/JMdict_%.nani: dictionaries/JMdict.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES)
24
	guile -L modules tools/jmdict.scm build \
25
        $< $(shell basename $@ .nani | sed 's|^JMdict_||g') $@
26
27
dicos/JMdict_e.nani: dictionaries/JMdict_e.xml tools/jmdict.scm dictionaries/frequency.tsv $(DICO_MODULES)
28
	guile -L modules tools/jmdict.scm build $< e $@
29