Add mecab and wp2txt

Julien LepillerFri Apr 19 15:39:25+0200 2019

40b62d4

Add mecab and wp2txt

mecab-variable-param.patch unknown status 1

1+
From 2396e90056706ef897acab3aaa081289c7336483 Mon Sep 17 00:00:00 2001
2+
From: LEPILLER Julien <julien.lepiller@irisa.fr>
3+
Date: Fri, 19 Apr 2019 11:48:39 +0200
4+
Subject: [PATCH] Allow variable parameters
5+
6+
---
7+
 src/param.cpp | 6 +++++-
8+
 1 file changed, 5 insertions(+), 1 deletion(-)
9+
10+
diff --git a/src/param.cpp b/src/param.cpp
11+
index 65328a2..006b1b5 100644
12+
--- a/src/param.cpp
13+
+++ b/src/param.cpp
14+
@@ -79,8 +79,12 @@ bool Param::load(const char *filename) {
15+
     size_t s1, s2;
16+
     for (s1 = pos+1; s1 < line.size() && isspace(line[s1]); s1++);
17+
     for (s2 = pos-1; static_cast<long>(s2) >= 0 && isspace(line[s2]); s2--);
18+
-    const std::string value = line.substr(s1, line.size() - s1);
19+
+    std::string value = line.substr(s1, line.size() - s1);
20+
     const std::string key   = line.substr(0, s2 + 1);
21+
+
22+
+    if(value.find('$') == 0) {
23+
+        value = std::getenv(value.substr(1).c_str());
24+
+    }
25+
     set<std::string>(key.c_str(), value, false);
26+
   }
27+
 
28+
-- 
29+
2.20.1
30+

more/packages/games.scm

353353
lightweight & fast actor implementations, pattern matching for messages,
354354
network transparent messaging, and more.")
355355
    (license (list license:boost1.0 license:bsd-3))))
356+
357+
(define-public mecab
358+
  (package
359+
    (name "mecab")
360+
    (version "0.996")
361+
    (source (origin
362+
              (method url-fetch)
363+
              (uri "https://drive.google.com/uc?export=download&id=0B4y35FiV1wh7cENtOXlicTFaRUE")
364+
              (file-name (string-append name "-" version ".tar.gz"))
365+
              (sha256
366+
               (base32
367+
                "0ncwlqxl1hdn1x4v4kr2sn1sbbcgnhdphp0lcvk74nqkhdbk4wz0"))
368+
              (patches
369+
                (search-patches
370+
                  "mecab-variable-param.patch"))))
371+
    (build-system gnu-build-system)
372+
    (search-paths
373+
      (list (search-path-specification
374+
              (variable "MECAB_DICDIR")
375+
              (separator #f)
376+
              (files '("lib/mecab/dic")))))
377+
    (arguments
378+
     `(#:phases
379+
       (modify-phases %standard-phases
380+
         (add-before 'build 'add-mecab-dicdir-variable
381+
           (lambda _
382+
             (substitute* "mecabrc.in"
383+
               (("dicdir = .*")
384+
                "dicdir = $MECAB_DICDIR"))
385+
             (substitute* "mecab-config.in"
386+
               (("echo @libdir@/mecab/dic")
387+
                "if [ -z \"$MECAB_DICDIR\" ]; then
388+
  echo @libdir@/mecab/dic
389+
else
390+
  echo \"$MECAB_DICDIR\"
391+
fi"))
392+
             #t)))))
393+
    (inputs
394+
     `(("libiconv" ,libiconv)))
395+
    (home-page "https://taku910.github.io/mecab")
396+
    (synopsis "Morphological analysis engine for texts")
397+
    (description "Mecab is a morphological analysis engine developped as a
398+
collaboration between the Kyoto university and Nippon Telegraph and Telephone
399+
Corporation.  The engine is independent of any language, dictionary or corpus.
400+
")
401+
    (license (list license:gpl2+ license:lgpl2.1+ license:bsd-3))))
402+
403+
(define-public mecab-ipadic
404+
  (package
405+
    (name "mecab-ipadic")
406+
    (version "2.7.0")
407+
    (source (origin
408+
              (method url-fetch)
409+
              (uri "https://drive.google.com/uc?export=download&id=0B4y35FiV1wh7MWVlSDBCSXZMTXM")
410+
              (file-name (string-append name "-" version ".tar.gz"))
411+
              (sha256
412+
               (base32
413+
                "08rmkvj0f0x6jq0axrjw2y5nam0mavv6x77dp9v4al0wi1ym4bxn"))))
414+
    (build-system gnu-build-system)
415+
    (arguments
416+
     `(#:configure-flags
417+
       (list (string-append "--with-dicdir=" (assoc-ref %outputs "out")
418+
                            "/lib/mecab/dic")
419+
             "--with-charset=utf8")
420+
       #:phases
421+
       (modify-phases %standard-phases
422+
         (add-before 'configure 'set-mecab-dir
423+
           (lambda* (#:key outputs #:allow-other-keys)
424+
             (setenv "MECAB_DICDIR" (string-append (assoc-ref outputs "out")
425+
                                                   "/lib/mecab/dic"))
426+
             #t)))))
427+
    (native-inputs
428+
     `(("mecab" ,mecab))); for mecab-config
429+
    (home-page "")
430+
    (synopsis "")
431+
    (description "")
432+
    (license (license:non-copyleft "COPYING"))))

more/packages/ruby.scm

552552
")
553553
    (home-page "http://www.rubocop.org/")
554554
    (license license:expat)))
555+
556+
(define-public ruby-trollop
557+
  (package
558+
    (name "ruby-trollop")
559+
    (version "2.9.9")
560+
    (source
561+
      (origin
562+
        (method url-fetch)
563+
        (uri (rubygems-uri "trollop" version))
564+
        (sha256
565+
          (base32
566+
            "074h7lns72kg1dl5gvz5apl3xz1i0axbnbc01pf2kbw4q0lkpnp4"))))
567+
    (build-system ruby-build-system)
568+
    (arguments
569+
     `(#:tests? #f))
570+
    (synopsis
571+
      "Trollop is a commandline option parser for Ruby that just gets out of your way.")
572+
    (description
573+
      "Trollop is a commandline option parser for Ruby that just gets out of your way.")
574+
    (home-page "")
575+
    (license license:expat)))
576+
577+
(define-public ruby-htmlentities
578+
  (package
579+
    (name "ruby-htmlentities")
580+
    (version "4.3.4")
581+
    (source
582+
      (origin
583+
        (method url-fetch)
584+
        (uri (rubygems-uri "htmlentities" version))
585+
        (sha256
586+
          (base32
587+
            "1nkklqsn8ir8wizzlakncfv42i32wc0w9hxp00hvdlgjr7376nhj"))))
588+
    (build-system ruby-build-system)
589+
    (arguments
590+
     `(#:tests? #f))
591+
    (synopsis
592+
      "A module for encoding and decoding (X)HTML entities.")
593+
    (description
594+
      "This package provides a module for encoding and decoding (X)HTML entities.")
595+
    (home-page
596+
      "https://github.com/threedaymonk/htmlentities")
597+
    (license license:expat)))
598+
599+
(define-public ruby-wp2txt
600+
  (package
601+
    (name "ruby-wp2txt")
602+
    (version "0.9.1")
603+
    (source
604+
      (origin
605+
        (method url-fetch)
606+
        (uri (rubygems-uri "wp2txt" version))
607+
        (sha256
608+
          (base32
609+
            "01l0r83ma3fp9zb94km4lqasvxpml2azd4dj36qzpm71c2pyhng4"))))
610+
    (build-system ruby-build-system)
611+
    (arguments
612+
     `(#:tests? #f))
613+
    (propagated-inputs
614+
      `(("ruby-htmlentities" ,ruby-htmlentities)
615+
        ("ruby-nokogiri" ,ruby-nokogiri)
616+
        ("ruby-parallel" ,ruby-parallel)
617+
        ("ruby-trollop" ,ruby-trollop)))
618+
    (synopsis
619+
      "WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compressed with Bzip2) stripping all the MediaWiki markups and other metadata.")
620+
    (description
621+
      "WP2TXT extracts plain text data from Wikipedia dump file (encoded in XML/compressed with Bzip2) stripping all the MediaWiki markups and other metadata.")
622+
    (home-page "http://github.com/yohasebe/wp2txt")
623+
    (license #f)))