moses.scm
1 | ;;; GNU Guix --- Functional package management for GNU |
2 | ;;; Copyright © 2018 Julien Lepiller <julien@lepiller.eu> |
3 | ;;; |
4 | ;;; This file is part of GNU Guix. |
5 | ;;; |
6 | ;;; GNU Guix is free software; you can redistribute it and/or modify it |
7 | ;;; under the terms of the GNU General Public License as published by |
8 | ;;; the Free Software Foundation; either version 3 of the License, or (at |
9 | ;;; your option) any later version. |
10 | ;;; |
11 | ;;; GNU Guix is distributed in the hope that it will be useful, but |
12 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | ;;; GNU General Public License for more details. |
15 | ;;; |
16 | ;;; You should have received a copy of the GNU General Public License |
17 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. |
18 | |
19 | (define-module (more packages moses) |
20 | #:use-module ((guix licenses) #:prefix license:) |
21 | #:use-module (gnu packages) |
22 | #:use-module (gnu packages boost) |
23 | #:use-module (gnu packages compression) |
24 | #:use-module (guix packages) |
25 | #:use-module (guix download) |
26 | #:use-module (guix git-download) |
27 | #:use-module (guix utils) |
28 | #:use-module (guix build-system gnu)) |
29 | |
30 | (define-public cmph |
31 | (package |
32 | (name "cmph") |
33 | (version "2.0") |
34 | (source (origin |
35 | (method url-fetch) |
36 | (uri (string-append "mirror://sourceforge/cmph/cmph/cmph-" |
37 | version ".tar.gz")) |
38 | (sha256 |
39 | (base32 |
40 | "0xms1hii88wlihrr4766qmk26kvzhzcw3h6a489gp89xzxsrlv5d")))) |
41 | (build-system gnu-build-system) |
42 | (home-page "http://cmph.sourceforge.net/") |
43 | (synopsis "C Minimal Perfect Hashing Library") |
44 | (description "Perfect hash functions map a static set of n keys into a set |
45 | of m integer numbers without collisions, where m is greater than or equal to |
46 | n. If m is equal to n, the function is called minimal. |
47 | |
48 | Minimal perfect hash functions are widely used for memory efficient storage |
49 | and fast retrieval of items from static sets, such as words in natural |
50 | languages, reserved words in programming languages or interactive systems, |
51 | universal resource locations (URLs) in Web search engines, or item sets in |
52 | data mining techniques. Therefore, there are applications for minimal perfect |
53 | hash functions in information retrieval systems, database systems, language |
54 | translation systems, electronic commerce systems, compilers, operating |
55 | systems, among others.") |
56 | (license (list license:lgpl2.1+ license:mpl1.1)))) |
57 | |
58 | (define-public moses |
59 | (package |
60 | (name "moses") |
61 | (version "4.0") |
62 | (source (origin |
63 | (method url-fetch) |
64 | (uri (string-append "https://github.com/moses-smt/mosesdecoder/" |
65 | "archive/RELEASE-" version ".tar.gz")) |
66 | (sha256 |
67 | (base32 |
68 | "13wvxizbvzrklswf1s8751r0vqd71xfn55biy76ifni2pg6pcwrm")))) |
69 | (build-system gnu-build-system) |
70 | (arguments |
71 | `(#:phases |
72 | (modify-phases %standard-phases |
73 | (delete 'configure) |
74 | (add-before 'build 'patch-bin-sh |
75 | (lambda _ |
76 | (substitute* "jam-files/engine/execunix.c" |
77 | (("\"/bin/sh\"") (string-append "\"" (which "sh") "\""))))) |
78 | (replace 'build |
79 | (lambda* (#:key inputs #:allow-other-keys) |
80 | (setenv "JAMSHELL" (string-append (which "sh") " -c")) |
81 | (invoke "./bjam" (string-append "--with-boost=" (assoc-ref inputs "boost")) |
82 | (string-append "--with-cmph=" (assoc-ref inputs "cmph")) |
83 | "--with-mm" "--with-probing-pt" "--no-xmlrpc-c" "-q")))))) |
84 | (inputs |
85 | `(("boost" ,boost) |
86 | ("cmph" ,cmph) |
87 | ("zlib" ,zlib))) |
88 | (home-page "http://www.statmt.org/moses") |
89 | (synopsis "Statistical machine translation") |
90 | (description "Moses is an implementation of the statistical (or data-driven) |
91 | approach to machine translation (MT). In statistical machine translation |
92 | (SMT), translation systems are trained on large quantities of parallel data |
93 | (from which the systems learn how to translate small segments), as well as |
94 | even larger quantities of monolingual data (from which the systems learn what |
95 | the target language should look like). Parallel data is a collection of |
96 | sentences in two different languages, which is sentence-aligned, in that |
97 | each sentence in one language is matched with its corresponding translated |
98 | sentence in the other language.") |
99 | (license license:asl2.0))) |
100 |