[112694] trunk/dports/textproc

29 Oct 2013

Revision: 112694
          https://trac.macports.org/changeset/112694
Author:   hum@macports.org
Date:     2013-10-29 08:14:23 -0700 (Tue, 29 Oct 2013)
Log Message:
-----------
New port: cicada @ 0.3.0 - a hypergraph-based toolkit for statistical machine translation.

Added Paths:
-----------
    trunk/dports/textproc/cicada/
    trunk/dports/textproc/cicada/Portfile
    trunk/dports/textproc/cicada/files/
    trunk/dports/textproc/cicada/files/patch-configure.diff
    trunk/dports/textproc/cicada/files/patch-samples.diff

Added: trunk/dports/textproc/cicada/Portfile
===================================================================

--- trunk/dports/textproc/cicada/Portfile	                        (rev 0)
+++ trunk/dports/textproc/cicada/Portfile	2013-10-29 15:14:23 UTC (rev 112694)
@@ -0,0 +1,66 @@
+# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
+# $Id$
+
+PortSystem          1.0
+
+name                cicada
+version             0.3.0
+categories          textproc
+maintainers         hum openmaintainer
+
+description         a hypergraph-based toolkit for statistical machine translation
+long_description    ${description}
+
+homepage            http://www2.nict.go.jp/univ-com/multi_trans/cicada
+platforms           darwin
+license             LGPL
+
+master_sites        ${homepage}
+checksums           rmd160  895cc333fa335ec74fb1808b1cea72e01c556179 \
+                    sha256  dd2102812953a6d1093fe23688ddf7f80fe86e98cd0e9f217ad650a5beb5b524
+
+depends_lib         port:expgram \
+                    port:boost \
+                    port:openmpi \
+                    port:icu
+
+depends_build       port:py27-docutils \
+                    port:py27-pygments
+
+patchfiles          patch-configure.diff \
+                    patch-samples.diff
+
+post-build {
+    system -W ${worksrcpath}/doc "${build.cmd} index.html"
+}
+
+post-destroot {
+    # install additional documents.
+    set dest_doc ${destroot}${prefix}/share/doc/${name}
+    xinstall -d ${dest_doc}
+    xinstall -m 644 -W ${worksrcpath} \
+        COPYING.GPL COPYING.LGPL FAQ LICENSE NEWS.rst README.rst TODO.rst \
+        doc/index.html \
+        ${dest_doc}
+    foreach rst [glob ${worksrcpath}/doc/*.rst] {
+        xinstall -m 644 ${rst} ${dest_doc}
+    }
+    # install sample data and scripts.
+    xinstall -d ${destroot}${prefix}/share/examples
+    set dest_ex ${destroot}${prefix}/share/examples/${name}
+    copy ${worksrcpath}/samples ${dest_ex}
+    foreach f {Makefile Makefile.am Makefile.in} {
+        delete ${dest_ex}/${f}
+    }
+    foreach sh [glob ${dest_ex}/*/*.sh ${dest_ex}/*/*/*.sh ${dest_ex}/*/*/*/*.sh] {
+        reinplace "s|@PREFIX@|${prefix}|g" ${sh}
+    }
+}
+
+# Note:
+# To run kftt sample scripts in ${prefix}/share/examples/${name}/kftt.30k,
+# please install the following ports: mecab, cabocha, stanford-parser.
+
+livecheck.type      regex
+livecheck.url       [lindex ${master_sites} 0]
+livecheck.regex     ${name}-(\[0-9.\]+)\\.tar


Property changes on: trunk/dports/textproc/cicada/Portfile
___________________________________________________________________
Added: svn:keywords
   + Id
Added: svn:eol-style
   + native

Added: trunk/dports/textproc/cicada/files/patch-configure.diff
===================================================================
--- trunk/dports/textproc/cicada/files/patch-configure.diff	                        (rev 0)
+++ trunk/dports/textproc/cicada/files/patch-configure.diff	2013-10-29 15:14:23 UTC (rev 112694)
@@ -0,0 +1,20 @@
+--- configure.orig	2013-10-26 06:39:46.000000000 +0900
++++ configure	2013-10-26 18:58:40.000000000 +0900
+@@ -16576,7 +16576,7 @@
+ 
+ # Check for binaries...
+ # Extract the first word of "rst2man.py", so it can be a program name with args.
+-set dummy rst2man.py; ac_word=$2
++set dummy rst2man-2.7.py; ac_word=$2
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+ $as_echo_n "checking for $ac_word... " >&6; }
+ if ${ac_cv_path_RST2MAN+:} false; then :
+@@ -16626,7 +16626,7 @@
+ 
+ 
+ # Extract the first word of "rst2html.py", so it can be a program name with args.
+-set dummy rst2html.py; ac_word=$2
++set dummy rst2html-2.7.py; ac_word=$2
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+ $as_echo_n "checking for $ac_word... " >&6; }
+ if ${ac_cv_path_RST2HTML+:} false; then :

Added: trunk/dports/textproc/cicada/files/patch-samples.diff
===================================================================
--- trunk/dports/textproc/cicada/files/patch-samples.diff	                        (rev 0)
+++ trunk/dports/textproc/cicada/files/patch-samples.diff	2013-10-29 15:14:23 UTC (rev 112694)
@@ -0,0 +1,581 @@
+--- samples/kftt.30k/alignment/alignment.sh.orig	2013-07-30 14:14:40.000000000 +0900
++++ samples/kftt.30k/alignment/alignment.sh	2013-10-27 18:22:32.000000000 +0900
+@@ -1,11 +1,11 @@
+ #!/bin/sh
+ 
+-cicada=../../..
++bindir=@PREFIX@/bin
+ 
+ ## We perform symmetized posterior constrained training, and
+ ## perform smoothing by naive Bayes.
+ 
+-exec ${cicada}/scripts/cicada-alignment.py \
++exec ${bindir}/cicada-alignment.py \
+ 	--f ../data/train.ja.bz2 \
+ 	--e ../data/train.en.bz2 \
+ 	--symmetric \
+--- samples/kftt.30k/data/preprocess.sh.orig	2013-07-29 12:46:08.000000000 +0900
++++ samples/kftt.30k/data/preprocess.sh	2013-10-27 18:22:09.000000000 +0900
+@@ -1,6 +1,6 @@
+ #!/bin/sh
+ 
+-cicada=../../..
++bindir=@PREFIX@/bin
+ 
+ ### Here, we will generate reference data suitable for cicada
+ ### The format is simply:
+@@ -13,5 +13,5 @@
+ ### so that multiple reference translations are merged into a single file.
+ 
+ for data in tune dev; do
+-  $cicada/progs/cicada_filter_refset $data.en --output $data.en.ref
++  ${bindir}/cicada_filter_refset $data.en --output $data.en.ref
+ done
+--- samples/kftt.30k/ngram/expgram.sh.orig	2013-07-30 14:14:34.000000000 +0900
++++ samples/kftt.30k/ngram/expgram.sh	2013-10-27 18:23:31.000000000 +0900
+@@ -1,17 +1,12 @@
+ #!/bin/sh
+ 
+-expgram=
+-
+-if test "$expgram" = ""; then
+-  echo "where is your expgram?"
+-  exit 1
+-fi
++bindir=@PREFIX@/bin
+ 
+ ### Following is a quick example of LM estimation on a small data set.
+ 
+-$expgram/progs/expgram_counts_extract --corpus ../data/train.en.bz2 --output ngram.5.en.counts --order 5 --threads 4
++${bindir}/expgram_counts_extract --corpus ../data/train.en.bz2 --output ngram.5.en.counts --order 5 --threads 4
+ 
+-$expgram/progs/expgram_counts_estimate --ngram ngram.5.en.counts --output ngram.5.en.lm --shard 4
++${bindir}/expgram_counts_estimate --ngram ngram.5.en.counts --output ngram.5.en.lm --shard 4
+ 
+ ### A standard way is:
+-# $expgram/scripts/expgram.py --corpus ../data/train.en --output ngram.5.en --threads 4
++# ${bindir}/expgram.py --corpus ../data/train.en --output ngram.5.en --threads 4
+--- samples/kftt.30k/s2t/data/preprocess.sh.orig	2013-08-02 10:22:41.000000000 +0900
++++ samples/kftt.30k/s2t/data/preprocess.sh	2013-10-27 19:52:56.000000000 +0900
+@@ -1,12 +1,8 @@
+ #!/bin/sh
+ 
+-stanford=
+-cicada=../../../..
+-
+-if test "$stanford" = ""; then
+-  echo "where is your stanford parser?"
+-  exit 1
+-fi
++stanford=@PREFIX@/share/java/stanford-parser
++version=3.2.0
++bindir=@PREFIX@/bin
+ 
+ # Here, we use stanford-parser to parse training data in English
+ # cicada_filter_penntreebank to transform into hypergraph.
+@@ -18,7 +14,7 @@
+ bzcat ../../data/train.en.bz2 | \
+ java \
+     -mx12g \
+-    -cp $stanford/stanford-parser.jar:$stanford/stanford-parser-3.2.0-models.jar \
++    -cp $stanford/stanford-parser.jar:$stanford/stanford-parser-${version}-models.jar \
+     -tLPP edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams \
+     -tokenized -sentences newline \
+     -escaper edu.stanford.nlp.process.PTBEscapingProcessor \
+@@ -29,10 +25,10 @@
+     -outputFormatOptions includePunctuationDependencies \
+     edu/stanford/nlp/models/lexparser/englishFactored.ser.gz \
+     - | \
+-$cicada/progs/cicada_filter_penntreebank \
++${bindir}/cicada_filter_penntreebank \
+     --map ../../data/train.en.bz2 \
+     --normalize | \
+-$cicada/progs/cicada \
++${bindir}/cicada \
+     --input-forest \
+     --threads 8 \
+     --operation binarize:direction=left,order=2 \
+--- samples/kftt.30k/s2t/model/extract.sh.orig	2013-07-31 09:19:40.000000000 +0900
++++ samples/kftt.30k/s2t/model/extract.sh	2013-10-27 19:53:42.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-extract.py \
++exec $bindir/cicada-extract.py \
+ 	--f ../../data/train.ja.bz2 \
+ 	--e ../../data/train.en.bz2 \
+ 	--a ../../alignment/model/aligned.posterior-itg \
+--- samples/kftt.30k/s2t/model/index.sh.orig	2013-07-31 09:19:37.000000000 +0900
++++ samples/kftt.30k/s2t/model/index.sh	2013-10-27 19:54:07.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-index.py \
++exec $bindir/cicada-index.py \
+ 	--model-dir . \
+ 	\
+ 	--ghkm \
+--- samples/kftt.30k/s2t/test/test.sh.orig	2013-08-02 10:23:48.000000000 +0900
++++ samples/kftt.30k/s2t/test/test.sh	2013-10-27 19:55:42.000000000 +0900
+@@ -1,16 +1,11 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
+-weights=
+-
+-if test "$weights" = ""; then
+-  echo "where is your weights? (for instance ../tune/learn.10.weights)"
+-  exit 1
+-fi
++bindir=@PREFIX@/bin
++weights=../tune/learn.10.weights
+ 
+ ### generate config file
+ 
+-$cicada/progs/cicada_filter_config \
++$bindir/cicada_filter_config \
+   --input ../tune/cicada.config \
+   --output cicada.config \
+   --weights "weights=$weights" \
+@@ -18,9 +13,9 @@
+   --file "file=-"
+ 
+ ### perform translation
+-$cicada/progs/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en
++$bindir/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en
+ 
+ ### evaluation
+-$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref
++$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref
+ 
+ 
+--- samples/kftt.30k/s2t/tune/config.sh.orig	2013-08-02 10:24:29.000000000 +0900
++++ samples/kftt.30k/s2t/tune/config.sh	2013-10-27 19:56:16.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-config.py \
++exec $bindir/cicada-config.py \
+ 	--tree-grammar ../model/ghkm-index \
+ 	--max-span 10 \
+ 	--goal '[ROOT]' \
+--- samples/kftt.30k/s2t/tune/tune.sh.orig	2013-09-19 17:38:55.000000000 +0900
++++ samples/kftt.30k/s2t/tune/tune.sh	2013-10-27 19:56:32.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../../
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-learn.py \
++exec $bindir/cicada-learn.py \
+ 	--srcset ../../data/tune.ja \
+ 	--refset ../../data/tune.en.ref \
+ 	--config cicada.config \
+--- samples/kftt.30k/scfg/model/extract.sh.orig	2013-07-30 14:14:16.000000000 +0900
++++ samples/kftt.30k/scfg/model/extract.sh	2013-10-27 19:57:17.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-extract.py \
++exec $bindir/cicada-extract.py \
+ 	--f ../../data/train.ja.bz2 \
+ 	--e ../../data/train.en.bz2 \
+ 	--a ../../alignment/model/aligned.posterior-itg \
+--- samples/kftt.30k/scfg/model/index.sh.orig	2013-07-30 14:14:21.000000000 +0900
++++ samples/kftt.30k/scfg/model/index.sh	2013-10-27 19:57:11.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-index.py \
++exec $bindir/cicada-index.py \
+ 	--model-dir . \
+ 	\
+ 	--scfg \
+--- samples/kftt.30k/scfg/test/test.sh.orig	2013-07-30 13:00:31.000000000 +0900
++++ samples/kftt.30k/scfg/test/test.sh	2013-10-27 19:57:55.000000000 +0900
+@@ -1,16 +1,11 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
+-weights=
+-
+-if test "$weights" = ""; then
+-  echo "where is your weights? (for instance ../tune/learn.10.weights)"
+-  exit 1
+-fi
++bindir=@PREFIX@/bin
++weights=../tune/learn.10.weights
+ 
+ ### generate config file
+ 
+-$cicada/progs/cicada_filter_config \
++$bindir/cicada_filter_config \
+   --input ../tune/cicada.config \
+   --output cicada.config \
+   --weights "weights=$weights" \
+@@ -18,9 +13,9 @@
+   --file "file=-"
+ 
+ ### perform translation
+-$cicada/progs/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en
++$bindir/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en
+ 
+ ### evaluation
+-$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref
++$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref
+ 
+ 
+--- samples/kftt.30k/scfg/tune/config.sh.orig	2013-07-29 13:01:44.000000000 +0900
++++ samples/kftt.30k/scfg/tune/config.sh	2013-10-27 19:58:21.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-config.py \
++exec $bindir/cicada-config.py \
+ 	--grammar ../model/scfg-index \
+ 	--max-span 15 \
+ 	--straight \
+--- samples/kftt.30k/scfg/tune/tune.sh.orig	2013-09-19 17:38:30.000000000 +0900
++++ samples/kftt.30k/scfg/tune/tune.sh	2013-10-27 19:58:32.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-learn.py \
++exec $bindir/cicada-learn.py \
+ 	--srcset ../../data/tune.ja \
+ 	--refset ../../data/tune.en.ref \
+ 	--config cicada.config \
+--- samples/kftt.30k/t2s/data/preprocess.sh.orig	2013-07-30 15:15:17.000000000 +0900
++++ samples/kftt.30k/t2s/data/preprocess.sh	2013-10-27 20:00:35.000000000 +0900
+@@ -4,20 +4,9 @@
+ # Note that the Japanese side was segmented by mecab (https://code.google.com/p/mecab/).
+ # Thus, it is a natural choice to use cabocha (https://code.google.com/p/cabocha/).
+ 
+-cicada=../../../..
+-mecab=
+-cabocha=
+-
+-if test "$mecab" = ""; then
+-  echo "where is your mecab?"
+-  exit 1
+-fi
+-
+-if test "$cabocha" = ""; then
+-  echo "where is your cabocha?"
+-  exit 1
+-fi
+-
++bindir=@PREFIX@/bin
++mecab=$bindir/mecab
++cabocha=$bindir/cabocha
+ 
+ for data in train dev tune; do
+ 
+@@ -34,12 +23,12 @@
+   awk '{for (i=1;i<=NF;++i) {printf "%s\t*\n", $i } print "EOS";}' | \
+   $mecab -p | \
+   $cabocha -f1 -I 1 | \
+-  $cicada/progs/cicada_filter_dependency \
++  $bindir/cicada_filter_dependency \
+ 	--cabocha \
+ 	--func \
+ 	--forest \
+ 	--head | \
+-  $cicada/progs/cicada \
++  $bindir/cicada \
+ 	--input-forest \
+ 	--threads 4 \
+ 	--operation binarize:direction=cyk,order=1 \
+--- samples/kftt.30k/t2s/model/extract.sh.orig	2013-07-30 15:26:16.000000000 +0900
++++ samples/kftt.30k/t2s/model/extract.sh	2013-10-27 20:01:00.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-extract.py \
++exec $bindir/cicada-extract.py \
+ 	--f ../../data/train.ja.bz2 \
+ 	--e ../../data/train.en.bz2 \
+ 	--a ../../alignment/model/aligned.posterior-itg \
+--- samples/kftt.30k/t2s/model/index.sh.orig	2013-07-30 14:14:12.000000000 +0900
++++ samples/kftt.30k/t2s/model/index.sh	2013-10-27 20:01:08.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-index.py \
++exec $bindir/cicada-index.py \
+ 	--root-dir . \
+ 	--model-dir . \
+ 	\
+--- samples/kftt.30k/t2s/test/test.sh.orig	2013-07-30 13:00:41.000000000 +0900
++++ samples/kftt.30k/t2s/test/test.sh	2013-10-27 20:01:41.000000000 +0900
+@@ -1,16 +1,11 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
+-weights=
+-
+-if test "$weights" = ""; then
+-  echo "where is your weights? (for instance ../tune/learn.10.weights)"
+-  exit 1
+-fi
++bindir=@PREFIX@/bin
++weights=../tune/learn.10.weights
+ 
+ ### generate config file
+ 
+-$cicada/progs/cicada_filter_config \
++$bindir/cicada_filter_config \
+   --input ../tune/cicada.config \
+   --output cicada.config \
+   --weights "weights=$weights" \
+@@ -18,9 +13,9 @@
+   --file "file=-"
+ 
+ ### perform translation
+-$cicada/progs/cicada --config cicada.config --threads 4 --debug --input ../data/dev.forest.ja.gz > dev.ja-en
++$bindir/cicada --config cicada.config --threads 4 --debug --input ../data/dev.forest.ja.gz > dev.ja-en
+ 
+ ### evaluation
+-$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref
++$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref
+ 
+ 
+--- samples/kftt.30k/t2s/tune/config.sh.orig	2013-07-29 13:52:34.000000000 +0900
++++ samples/kftt.30k/t2s/tune/config.sh	2013-10-27 20:02:02.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-config.py \
++exec $bindir/cicada-config.py \
+ 	--tree-grammar ../model/ghkm-index \
+ 	--goal '[x]' \
+ 	--glue '[x]' \
+--- samples/kftt.30k/t2s/tune/tune.sh.orig	2013-09-19 17:38:49.000000000 +0900
++++ samples/kftt.30k/t2s/tune/tune.sh	2013-10-27 20:02:19.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-learn.py \
++exec $bindir/cicada-learn.py \
+ 	--srcset ../data/tune.forest.ja.gz \
+ 	--refset ../../data/tune.en.ref \
+ 	--config cicada.config \
+--- samples/kftt.30k/t2t/model/extract.sh.orig	2013-08-01 08:37:16.000000000 +0900
++++ samples/kftt.30k/t2t/model/extract.sh	2013-10-27 20:02:40.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-extract.py \
++exec $bindir/cicada-extract.py \
+ 	--f ../../data/train.ja \
+ 	--e ../../data/train.en \
+ 	--a ../../alignment/model/aligned.posterior-itg \
+--- samples/kftt.30k/t2t/model/index.sh.orig	2013-07-31 09:36:12.000000000 +0900
++++ samples/kftt.30k/t2t/model/index.sh	2013-10-27 20:02:49.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-index.py \
++exec $bindir/cicada-index.py \
+ 	--model-dir . \
+ 	\
+ 	--tree \
+--- samples/kftt.30k/t2t/test/test.sh.orig	2013-07-31 09:35:38.000000000 +0900
++++ samples/kftt.30k/t2t/test/test.sh	2013-10-27 20:03:19.000000000 +0900
+@@ -1,16 +1,11 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
+-weights=
+-
+-if test "$weights" = ""; then
+-  echo "where is your weights? (for instance ../tune/learn.10.weights)"
+-  exit 1
+-fi
++bindir=@PREFIX@/bin
++weights=../tune/learn.10.weights
+ 
+ ### generate config file
+ 
+-$cicada/progs/cicada_filter_config \
++$bindir/cicada_filter_config \
+   --input ../tune/cicada.config \
+   --output cicada.config \
+   --weights "weights=$weights" \
+@@ -18,9 +13,9 @@
+   --file "file=-"
+ 
+ ### perform translation
+-$cicada/progs/cicada --config cicada.config --threads 4 --debug --input ../../t2s/data/dev.forest.ja.gz > dev.ja-en
++$bindir/cicada --config cicada.config --threads 4 --debug --input ../../t2s/data/dev.forest.ja.gz > dev.ja-en
+ 
+ ### evaluation
+-$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref
++$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref
+ 
+ 
+--- samples/kftt.30k/t2t/tune/config.sh.orig	2013-07-31 09:52:14.000000000 +0900
++++ samples/kftt.30k/t2t/tune/config.sh	2013-10-27 20:03:41.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-config.py \
++exec $bindir/cicada-config.py \
+ 	--tree-grammar ../model/tree-index \
+    	--goal '[ROOT]' \
+         --glue '[x]' \
+--- samples/kftt.30k/t2t/tune/tune.sh.orig	2013-09-19 17:38:41.000000000 +0900
++++ samples/kftt.30k/t2t/tune/tune.sh	2013-10-27 20:03:55.000000000 +0900
+@@ -1,8 +1,8 @@
+ #!/bin/sh
+ 
+-cicada=../../../..
++bindir=@PREFIX@/bin
+ 
+-exec $cicada/scripts/cicada-learn.py \
++exec $bindir/cicada-learn.py \
+ 	--srcset ../../t2s/data/tune.forest.ja.gz \
+ 	--refset ../../data/tune.en.ref \
+ 	--config cicada.config \
+--- samples/s2t/sample.sh.orig	2013-08-03 10:37:31.000000000 +0900
++++ samples/s2t/sample.sh	2013-10-27 20:07:14.000000000 +0900
+@@ -1,6 +1,7 @@
+ #!/bin/sh
+ 
+-cicada=../..
++bindir=@PREFIX@/bin
++exdir=@PREFIX@/share/examples/cicada
+ 
+ ##
+ ##
+@@ -18,16 +19,16 @@
+ ##      apply features
+ ##      output kbests 
+ 
+-exec $cicada/progs/cicada \
+-      --input $cicada/samples/scfg/input.txt \
++exec $bindir/cicada \
++      --input $exdir/scfg/input.txt \
+       --goal '[ROOT]' \
+-      --tree-grammar $cicada/samples/s2t/grammar.bin:max-span=10 \
++      --tree-grammar $exdir/s2t/grammar.bin:max-span=10 \
+       --tree-grammar "glue:goal-source=[ROOT],goal-target=[ROOT],non-terminal-source=[x],non-terminal-target=[x],straight=true,invert=false" \
+       --grammar "insertion:non-terminal=[x]" \
+-      --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \
++      --feature-function "ngram:file=$exdir/scfg/ngram.bin" \
+       --feature-function word-penalty \
+       --feature-function rule-penalty \
+       --feature-function glue-tree-penalty \
+       --operation compose-tree-cky \
+-      --operation apply:prune=true,size=100,weights=$cicada/samples/s2t/weights \
+-      --operation output:file=-,kbest=10,weights=$cicada/samples/s2t/weights
++      --operation apply:prune=true,size=100,weights=$exdir/s2t/weights \
++      --operation output:file=-,kbest=10,weights=$exdir/s2t/weights
+--- samples/scfg/sample.sh.orig	2013-07-30 14:00:13.000000000 +0900
++++ samples/scfg/sample.sh	2013-10-27 20:07:02.000000000 +0900
+@@ -1,15 +1,16 @@
+ #!/bin/sh
+ 
+-cicada=../..
++bindir=@PREFIX@/bin
++exdir=@PREFIX@/share/examples/cicada
+ 
+-exec $cicada/progs/cicada \
+-      --input $cicada/samples/scfg/input.txt \
+-      --grammar $cicada/samples/scfg/grammar.bin \
++exec $bindir/cicada \
++      --input $exdir/scfg/input.txt \
++      --grammar $exdir/scfg/grammar.bin \
+       --grammar "glue:straight=true,inverted=false,non-terminal=[x],goal=[s]" \
+       --grammar "insertion:non-terminal=[x]" \
+-      --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \
++      --feature-function "ngram:file=$exdir/scfg/ngram.bin" \
+       --feature-function word-penalty \
+       --feature-function rule-penalty \
+       --operation compose-cky \
+-      --operation apply:prune=true,size=100,weights=$cicada/samples/scfg/weights \
+-      --operation output:file=-,kbest=10,weights=$cicada/samples/scfg/weights
++      --operation apply:prune=true,size=100,weights=$exdir/scfg/weights \
++      --operation output:file=-,kbest=10,weights=$exdir/scfg/weights
+--- samples/t2s/sample.sh.orig	2013-08-19 16:18:37.000000000 +0900
++++ samples/t2s/sample.sh	2013-10-27 20:08:28.000000000 +0900
+@@ -1,6 +1,7 @@
+ #!/bin/sh
+ 
+-cicada=../..
++bindir=@PREFIX@/bin
++exdir=@PREFIX@/share/examples/cicada
+ 
+ ##
+ ## 1. pentreebank is converted into hypergraph
+@@ -20,20 +21,20 @@
+ ##      apply features
+ ##      output kbests 
+ 
+-$cicada/progs/cicada_filter_penntreebank \
+-      --input $cicada/samples/t2s/input.txt \
++$bindir/cicada_filter_penntreebank \
++      --input $exdir/t2s/input.txt \
+       --normalize \
+ | \
+-$cicada/progs/cicada \
++$bindir/cicada \
+       --input - \
+       --input-forest \
+       --goal '[ROOT]' \
+-      --tree-grammar $cicada/samples/t2s/grammar.bin \
++      --tree-grammar $exdir/t2s/grammar.bin \
+       --tree-grammar fallback \
+-      --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \
++      --feature-function "ngram:file=$exdir/scfg/ngram.bin" \
+       --feature-function word-penalty \
+       --feature-function rule-penalty \
+       --operation binarize:direction=cyk,order=1 \
+       --operation compose-tree \
+-      --operation apply:prune=true,size=1000,weights=$cicada/samples/t2s/weights \
+-      --operation output:file=-,kbest=10,weights=$cicada/samples/t2s/weights
++      --operation apply:prune=true,size=1000,weights=$exdir/t2s/weights \
++      --operation output:file=-,kbest=10,weights=$exdir/t2s/weights

    

hum＠macports.org

tags

participants (1)