moses.scm
1 | ;;; GNU Guix --- Functional package management for GNU |
2 | ;;; Copyright © 2018 Julien Lepiller <julien@lepiller.eu> |
3 | ;;; |
4 | ;;; This file is part of GNU Guix. |
5 | ;;; |
6 | ;;; GNU Guix is free software; you can redistribute it and/or modify it |
7 | ;;; under the terms of the GNU General Public License as published by |
8 | ;;; the Free Software Foundation; either version 3 of the License, or (at |
9 | ;;; your option) any later version. |
10 | ;;; |
11 | ;;; GNU Guix is distributed in the hope that it will be useful, but |
12 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | ;;; GNU General Public License for more details. |
15 | ;;; |
16 | ;;; You should have received a copy of the GNU General Public License |
17 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. |
18 | |
19 | (define-module (more packages moses) |
20 | #:use-module ((guix licenses) #:prefix license:) |
21 | #:use-module (gnu packages) |
22 | #:use-module (gnu packages boost) |
23 | #:use-module (gnu packages compression) |
24 | #:use-module (guix packages) |
25 | #:use-module (guix download) |
26 | #:use-module (guix git-download) |
27 | #:use-module (guix utils) |
28 | #:use-module (guix build-system cmake) |
29 | #:use-module (guix build-system gnu)) |
30 | |
31 | (define-public cmph |
32 | (package |
33 | (name "cmph") |
34 | (version "2.0") |
35 | (source (origin |
36 | (method url-fetch) |
37 | (uri (string-append "mirror://sourceforge/cmph/cmph/cmph-" |
38 | version ".tar.gz")) |
39 | (sha256 |
40 | (base32 |
41 | "0xms1hii88wlihrr4766qmk26kvzhzcw3h6a489gp89xzxsrlv5d")))) |
42 | (build-system gnu-build-system) |
43 | (home-page "http://cmph.sourceforge.net/") |
44 | (synopsis "C Minimal Perfect Hashing Library") |
45 | (description "Perfect hash functions map a static set of n keys into a set |
46 | of m integer numbers without collisions, where m is greater than or equal to |
47 | n. If m is equal to n, the function is called minimal. |
48 | |
49 | Minimal perfect hash functions are widely used for memory efficient storage |
50 | and fast retrieval of items from static sets, such as words in natural |
51 | languages, reserved words in programming languages or interactive systems, |
52 | universal resource locations (URLs) in Web search engines, or item sets in |
53 | data mining techniques. Therefore, there are applications for minimal perfect |
54 | hash functions in information retrieval systems, database systems, language |
55 | translation systems, electronic commerce systems, compilers, operating |
56 | systems, among others.") |
57 | (license (list license:lgpl2.1+ license:mpl1.1)))) |
58 | |
59 | (define-public moses |
60 | (package |
61 | (name "moses") |
62 | (version "4.0") |
63 | (source (origin |
64 | (method url-fetch) |
65 | (uri (string-append "https://github.com/moses-smt/mosesdecoder/" |
66 | "archive/RELEASE-" version ".tar.gz")) |
67 | (sha256 |
68 | (base32 |
69 | "13wvxizbvzrklswf1s8751r0vqd71xfn55biy76ifni2pg6pcwrm")))) |
70 | (build-system gnu-build-system) |
71 | (arguments |
72 | `(#:tests? #f; Tests run during build |
73 | #:make-flags |
74 | `(,(string-append "--with-boost=" (assoc-ref %build-inputs "boost")) |
75 | ,(string-append "--with-cmph=" (assoc-ref %build-inputs "cmph")) |
76 | "--with-mm" "--with-probing-pt" "--no-xmlrpc-c" "-q" "link=shared" |
77 | ,(string-append "--prefix=" (assoc-ref %outputs "out"))) |
78 | #:phases |
79 | (modify-phases %standard-phases |
80 | (delete 'configure) |
81 | (add-before 'build 'patch-bin-sh |
82 | (lambda _ |
83 | (substitute* "jam-files/engine/execunix.c" |
84 | (("\"/bin/sh\"") (string-append "\"" (which "sh") "\""))))) |
85 | (replace 'build |
86 | (lambda* (#:key make-flags inputs #:allow-other-keys) |
87 | (setenv "JAMSHELL" (string-append (which "sh") " -c")) |
88 | (apply invoke "./bjam" make-flags))) |
89 | (replace 'install |
90 | (lambda* (#:key make-flags inputs #:allow-other-keys) |
91 | (apply invoke "./bjam" "install" make-flags)))))) |
92 | (inputs |
93 | `(("boost" ,boost) |
94 | ("cmph" ,cmph) |
95 | ("zlib" ,zlib))) |
96 | (home-page "http://www.statmt.org/moses") |
97 | (synopsis "Statistical machine translation") |
98 | (description "Moses is an implementation of the statistical (or data-driven) |
99 | approach to machine translation (MT). In statistical machine translation |
100 | (SMT), translation systems are trained on large quantities of parallel data |
101 | (from which the systems learn how to translate small segments), as well as |
102 | even larger quantities of monolingual data (from which the systems learn what |
103 | the target language should look like). Parallel data is a collection of |
104 | sentences in two different languages, which is sentence-aligned, in that |
105 | each sentence in one language is matched with its corresponding translated |
106 | sentence in the other language.") |
107 | (license license:asl2.0))) |
108 | |
109 | (define-public mgiza |
110 | (package |
111 | (name "mgiza") |
112 | (version "0") |
113 | (source (origin |
114 | (method git-fetch) |
115 | (uri (git-reference |
116 | (url "https://github.com/moses-smt/mgiza") |
117 | (commit "d643960de98565d208114780ba8025799208afa7"))) |
118 | (sha256 |
119 | (base32 |
120 | "1zvs18fxdw9frhlxlrmq3pjzm9b9chcjvppmn2yljqdxpzmqimc6")))) |
121 | (build-system cmake-build-system) |
122 | (arguments |
123 | `(#:tests? #f; no tests |
124 | #:phases |
125 | (modify-phases %standard-phases |
126 | (add-before 'configure 'chdir |
127 | (lambda _ |
128 | (chdir "mgizapp") |
129 | #t))))) |
130 | (inputs |
131 | `(("boost" ,boost))) |
132 | (home-page "https://github.com/moses-smt/mgiza") |
133 | (synopsis "Word alignement tool") |
134 | (description "Mgiza is a word alignment tool based on the famous GIZA++, |
135 | extended to support multi-threading, resume training and incremental training.") |
136 | (license license:gpl2+))) |
137 |