guix-more/more/packages/moses.scm

moses.scm

1
;;; GNU Guix --- Functional package management for GNU
2
;;; Copyright © 2018 Julien Lepiller <julien@lepiller.eu>
3
;;;
4
;;; This file is part of GNU Guix.
5
;;;
6
;;; GNU Guix is free software; you can redistribute it and/or modify it
7
;;; under the terms of the GNU General Public License as published by
8
;;; the Free Software Foundation; either version 3 of the License, or (at
9
;;; your option) any later version.
10
;;;
11
;;; GNU Guix is distributed in the hope that it will be useful, but
12
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;;; GNU General Public License for more details.
15
;;;
16
;;; You should have received a copy of the GNU General Public License
17
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
18
19
(define-module (more packages moses)
20
  #:use-module ((guix licenses) #:prefix license:)
21
  #:use-module (gnu packages)
22
  #:use-module (gnu packages boost)
23
  #:use-module (gnu packages compression)
24
  #:use-module (guix packages)
25
  #:use-module (guix download)
26
  #:use-module (guix git-download)
27
  #:use-module (guix utils)
28
  #:use-module (guix build-system cmake)
29
  #:use-module (guix build-system gnu))
30
31
(define-public cmph
32
  (package
33
    (name "cmph")
34
    (version "2.0")
35
    (source (origin
36
              (method url-fetch)
37
              (uri (string-append "mirror://sourceforge/cmph/cmph/cmph-"
38
                                  version ".tar.gz"))
39
              (sha256
40
               (base32
41
                "0xms1hii88wlihrr4766qmk26kvzhzcw3h6a489gp89xzxsrlv5d"))))
42
    (build-system gnu-build-system)
43
    (home-page "http://cmph.sourceforge.net/")
44
    (synopsis "C Minimal Perfect Hashing Library")
45
    (description "Perfect hash functions map a static set of n keys into a set
46
of m integer numbers without collisions, where m is greater than or equal to
47
n.  If m is equal to n, the function is called minimal.
48
49
Minimal perfect hash functions are widely used for memory efficient storage
50
and fast retrieval of items from static sets, such as words in natural
51
languages, reserved words in programming languages or interactive systems,
52
universal resource locations (URLs) in Web search engines, or item sets in
53
data mining techniques.  Therefore, there are applications for minimal perfect
54
hash functions in information retrieval systems, database systems, language
55
translation systems, electronic commerce systems, compilers, operating
56
systems, among others.")
57
    (license (list license:lgpl2.1+ license:mpl1.1))))
58
59
(define-public moses
60
  (package
61
    (name "moses")
62
    (version "4.0")
63
    (source (origin
64
              (method url-fetch)
65
              (uri (string-append "https://github.com/moses-smt/mosesdecoder/"
66
                                  "archive/RELEASE-" version ".tar.gz"))
67
              (sha256
68
               (base32
69
                "13wvxizbvzrklswf1s8751r0vqd71xfn55biy76ifni2pg6pcwrm"))))
70
    (build-system gnu-build-system)
71
    (arguments
72
     `(#:tests? #f; Tests run during build
73
       #:make-flags
74
       `(,(string-append "--with-boost=" (assoc-ref %build-inputs "boost"))
75
         ,(string-append "--with-cmph=" (assoc-ref %build-inputs "cmph"))
76
         "--with-mm" "--with-probing-pt" "--no-xmlrpc-c" "-q" "link=shared"
77
         ,(string-append "--prefix=" (assoc-ref %outputs "out")))
78
       #:phases
79
       (modify-phases %standard-phases
80
         (delete 'configure)
81
         (add-before 'build 'patch-bin-sh
82
           (lambda _
83
             (substitute* "jam-files/engine/execunix.c"
84
               (("\"/bin/sh\"") (string-append "\"" (which "sh") "\"")))))
85
         (replace 'build
86
           (lambda* (#:key make-flags inputs #:allow-other-keys)
87
             (setenv "JAMSHELL" (string-append (which "sh") " -c"))
88
             (apply invoke "./bjam" make-flags)))
89
         (replace 'install
90
           (lambda* (#:key make-flags inputs #:allow-other-keys)
91
             (apply invoke "./bjam" "install" make-flags))))))
92
    (inputs
93
     `(("boost" ,boost)
94
       ("cmph" ,cmph)
95
       ("zlib" ,zlib)))
96
    (home-page "http://www.statmt.org/moses")
97
    (synopsis "Statistical machine translation")
98
    (description "Moses is an implementation of the statistical (or data-driven)
99
approach to machine translation (MT).  In statistical machine translation
100
(SMT), translation systems are trained on large quantities of parallel data
101
(from which the systems learn how to translate small segments), as well as
102
even larger quantities of monolingual data (from which the systems learn what
103
the target language should look like).  Parallel data is a collection of
104
sentences in two different languages, which is sentence-aligned, in that
105
each sentence in one language is matched with its corresponding translated
106
sentence in the other language.")
107
    (license license:asl2.0)))
108
109
(define-public mgiza
110
  (package
111
    (name "mgiza")
112
    (version "0")
113
    (source (origin
114
              (method git-fetch)
115
              (uri (git-reference
116
                     (url "https://github.com/moses-smt/mgiza")
117
                     (commit "d643960de98565d208114780ba8025799208afa7")))
118
              (sha256
119
               (base32
120
                "1zvs18fxdw9frhlxlrmq3pjzm9b9chcjvppmn2yljqdxpzmqimc6"))))
121
    (build-system cmake-build-system)
122
    (arguments
123
     `(#:tests? #f; no tests
124
       #:phases
125
       (modify-phases %standard-phases
126
         (add-before 'configure 'chdir
127
           (lambda _
128
             (chdir "mgizapp")
129
             #t)))))
130
    (inputs
131
     `(("boost" ,boost)))
132
    (home-page "https://github.com/moses-smt/mgiza")
133
    (synopsis "Word alignement tool")
134
    (description "Mgiza is a word alignment tool based on the famous GIZA++,
135
extended to support multi-threading, resume training and incremental training.")
136
    (license license:gpl2+)))
137