Revision: 112694 https://trac.macports.org/changeset/112694 Author: hum@macports.org Date: 2013-10-29 08:14:23 -0700 (Tue, 29 Oct 2013) Log Message: ----------- New port: cicada @ 0.3.0 - a hypergraph-based toolkit for statistical machine translation. Added Paths: ----------- trunk/dports/textproc/cicada/ trunk/dports/textproc/cicada/Portfile trunk/dports/textproc/cicada/files/ trunk/dports/textproc/cicada/files/patch-configure.diff trunk/dports/textproc/cicada/files/patch-samples.diff Added: trunk/dports/textproc/cicada/Portfile =================================================================== --- trunk/dports/textproc/cicada/Portfile (rev 0) +++ trunk/dports/textproc/cicada/Portfile 2013-10-29 15:14:23 UTC (rev 112694) @@ -0,0 +1,66 @@ +# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4 +# $Id$ + +PortSystem 1.0 + +name cicada +version 0.3.0 +categories textproc +maintainers hum openmaintainer + +description a hypergraph-based toolkit for statistical machine translation +long_description ${description} + +homepage http://www2.nict.go.jp/univ-com/multi_trans/cicada +platforms darwin +license LGPL + +master_sites ${homepage} +checksums rmd160 895cc333fa335ec74fb1808b1cea72e01c556179 \ + sha256 dd2102812953a6d1093fe23688ddf7f80fe86e98cd0e9f217ad650a5beb5b524 + +depends_lib port:expgram \ + port:boost \ + port:openmpi \ + port:icu + +depends_build port:py27-docutils \ + port:py27-pygments + +patchfiles patch-configure.diff \ + patch-samples.diff + +post-build { + system -W ${worksrcpath}/doc "${build.cmd} index.html" +} + +post-destroot { + # install additional documents. + set dest_doc ${destroot}${prefix}/share/doc/${name} + xinstall -d ${dest_doc} + xinstall -m 644 -W ${worksrcpath} \ + COPYING.GPL COPYING.LGPL FAQ LICENSE NEWS.rst README.rst TODO.rst \ + doc/index.html \ + ${dest_doc} + foreach rst [glob ${worksrcpath}/doc/*.rst] { + xinstall -m 644 ${rst} ${dest_doc} + } + # install sample data and scripts. + xinstall -d ${destroot}${prefix}/share/examples + set dest_ex ${destroot}${prefix}/share/examples/${name} + copy ${worksrcpath}/samples ${dest_ex} + foreach f {Makefile Makefile.am Makefile.in} { + delete ${dest_ex}/${f} + } + foreach sh [glob ${dest_ex}/*/*.sh ${dest_ex}/*/*/*.sh ${dest_ex}/*/*/*/*.sh] { + reinplace "s|@PREFIX@|${prefix}|g" ${sh} + } +} + +# Note: +# To run kftt sample scripts in ${prefix}/share/examples/${name}/kftt.30k, +# please install the following ports: mecab, cabocha, stanford-parser. + +livecheck.type regex +livecheck.url [lindex ${master_sites} 0] +livecheck.regex ${name}-(\[0-9.\]+)\\.tar Property changes on: trunk/dports/textproc/cicada/Portfile ___________________________________________________________________ Added: svn:keywords + Id Added: svn:eol-style + native Added: trunk/dports/textproc/cicada/files/patch-configure.diff =================================================================== --- trunk/dports/textproc/cicada/files/patch-configure.diff (rev 0) +++ trunk/dports/textproc/cicada/files/patch-configure.diff 2013-10-29 15:14:23 UTC (rev 112694) @@ -0,0 +1,20 @@ +--- configure.orig 2013-10-26 06:39:46.000000000 +0900 ++++ configure 2013-10-26 18:58:40.000000000 +0900 +@@ -16576,7 +16576,7 @@ + + # Check for binaries... + # Extract the first word of "rst2man.py", so it can be a program name with args. +-set dummy rst2man.py; ac_word=$2 ++set dummy rst2man-2.7.py; ac_word=$2 + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 + $as_echo_n "checking for $ac_word... " >&6; } + if ${ac_cv_path_RST2MAN+:} false; then : +@@ -16626,7 +16626,7 @@ + + + # Extract the first word of "rst2html.py", so it can be a program name with args. +-set dummy rst2html.py; ac_word=$2 ++set dummy rst2html-2.7.py; ac_word=$2 + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 + $as_echo_n "checking for $ac_word... " >&6; } + if ${ac_cv_path_RST2HTML+:} false; then : Added: trunk/dports/textproc/cicada/files/patch-samples.diff =================================================================== --- trunk/dports/textproc/cicada/files/patch-samples.diff (rev 0) +++ trunk/dports/textproc/cicada/files/patch-samples.diff 2013-10-29 15:14:23 UTC (rev 112694) @@ -0,0 +1,581 @@ +--- samples/kftt.30k/alignment/alignment.sh.orig 2013-07-30 14:14:40.000000000 +0900 ++++ samples/kftt.30k/alignment/alignment.sh 2013-10-27 18:22:32.000000000 +0900 +@@ -1,11 +1,11 @@ + #!/bin/sh + +-cicada=../../.. ++bindir=@PREFIX@/bin + + ## We perform symmetized posterior constrained training, and + ## perform smoothing by naive Bayes. + +-exec ${cicada}/scripts/cicada-alignment.py \ ++exec ${bindir}/cicada-alignment.py \ + --f ../data/train.ja.bz2 \ + --e ../data/train.en.bz2 \ + --symmetric \ +--- samples/kftt.30k/data/preprocess.sh.orig 2013-07-29 12:46:08.000000000 +0900 ++++ samples/kftt.30k/data/preprocess.sh 2013-10-27 18:22:09.000000000 +0900 +@@ -1,6 +1,6 @@ + #!/bin/sh + +-cicada=../../.. ++bindir=@PREFIX@/bin + + ### Here, we will generate reference data suitable for cicada + ### The format is simply: +@@ -13,5 +13,5 @@ + ### so that multiple reference translations are merged into a single file. + + for data in tune dev; do +- $cicada/progs/cicada_filter_refset $data.en --output $data.en.ref ++ ${bindir}/cicada_filter_refset $data.en --output $data.en.ref + done +--- samples/kftt.30k/ngram/expgram.sh.orig 2013-07-30 14:14:34.000000000 +0900 ++++ samples/kftt.30k/ngram/expgram.sh 2013-10-27 18:23:31.000000000 +0900 +@@ -1,17 +1,12 @@ + #!/bin/sh + +-expgram= +- +-if test "$expgram" = ""; then +- echo "where is your expgram?" +- exit 1 +-fi ++bindir=@PREFIX@/bin + + ### Following is a quick example of LM estimation on a small data set. + +-$expgram/progs/expgram_counts_extract --corpus ../data/train.en.bz2 --output ngram.5.en.counts --order 5 --threads 4 ++${bindir}/expgram_counts_extract --corpus ../data/train.en.bz2 --output ngram.5.en.counts --order 5 --threads 4 + +-$expgram/progs/expgram_counts_estimate --ngram ngram.5.en.counts --output ngram.5.en.lm --shard 4 ++${bindir}/expgram_counts_estimate --ngram ngram.5.en.counts --output ngram.5.en.lm --shard 4 + + ### A standard way is: +-# $expgram/scripts/expgram.py --corpus ../data/train.en --output ngram.5.en --threads 4 ++# ${bindir}/expgram.py --corpus ../data/train.en --output ngram.5.en --threads 4 +--- samples/kftt.30k/s2t/data/preprocess.sh.orig 2013-08-02 10:22:41.000000000 +0900 ++++ samples/kftt.30k/s2t/data/preprocess.sh 2013-10-27 19:52:56.000000000 +0900 +@@ -1,12 +1,8 @@ + #!/bin/sh + +-stanford= +-cicada=../../../.. +- +-if test "$stanford" = ""; then +- echo "where is your stanford parser?" +- exit 1 +-fi ++stanford=@PREFIX@/share/java/stanford-parser ++version=3.2.0 ++bindir=@PREFIX@/bin + + # Here, we use stanford-parser to parse training data in English + # cicada_filter_penntreebank to transform into hypergraph. +@@ -18,7 +14,7 @@ + bzcat ../../data/train.en.bz2 | \ + java \ + -mx12g \ +- -cp $stanford/stanford-parser.jar:$stanford/stanford-parser-3.2.0-models.jar \ ++ -cp $stanford/stanford-parser.jar:$stanford/stanford-parser-${version}-models.jar \ + -tLPP edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams \ + -tokenized -sentences newline \ + -escaper edu.stanford.nlp.process.PTBEscapingProcessor \ +@@ -29,10 +25,10 @@ + -outputFormatOptions includePunctuationDependencies \ + edu/stanford/nlp/models/lexparser/englishFactored.ser.gz \ + - | \ +-$cicada/progs/cicada_filter_penntreebank \ ++${bindir}/cicada_filter_penntreebank \ + --map ../../data/train.en.bz2 \ + --normalize | \ +-$cicada/progs/cicada \ ++${bindir}/cicada \ + --input-forest \ + --threads 8 \ + --operation binarize:direction=left,order=2 \ +--- samples/kftt.30k/s2t/model/extract.sh.orig 2013-07-31 09:19:40.000000000 +0900 ++++ samples/kftt.30k/s2t/model/extract.sh 2013-10-27 19:53:42.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-extract.py \ ++exec $bindir/cicada-extract.py \ + --f ../../data/train.ja.bz2 \ + --e ../../data/train.en.bz2 \ + --a ../../alignment/model/aligned.posterior-itg \ +--- samples/kftt.30k/s2t/model/index.sh.orig 2013-07-31 09:19:37.000000000 +0900 ++++ samples/kftt.30k/s2t/model/index.sh 2013-10-27 19:54:07.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-index.py \ ++exec $bindir/cicada-index.py \ + --model-dir . \ + \ + --ghkm \ +--- samples/kftt.30k/s2t/test/test.sh.orig 2013-08-02 10:23:48.000000000 +0900 ++++ samples/kftt.30k/s2t/test/test.sh 2013-10-27 19:55:42.000000000 +0900 +@@ -1,16 +1,11 @@ + #!/bin/sh + +-cicada=../../../.. +-weights= +- +-if test "$weights" = ""; then +- echo "where is your weights? (for instance ../tune/learn.10.weights)" +- exit 1 +-fi ++bindir=@PREFIX@/bin ++weights=../tune/learn.10.weights + + ### generate config file + +-$cicada/progs/cicada_filter_config \ ++$bindir/cicada_filter_config \ + --input ../tune/cicada.config \ + --output cicada.config \ + --weights "weights=$weights" \ +@@ -18,9 +13,9 @@ + --file "file=-" + + ### perform translation +-$cicada/progs/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en ++$bindir/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en + + ### evaluation +-$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref ++$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref + + +--- samples/kftt.30k/s2t/tune/config.sh.orig 2013-08-02 10:24:29.000000000 +0900 ++++ samples/kftt.30k/s2t/tune/config.sh 2013-10-27 19:56:16.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-config.py \ ++exec $bindir/cicada-config.py \ + --tree-grammar ../model/ghkm-index \ + --max-span 10 \ + --goal '[ROOT]' \ +--- samples/kftt.30k/s2t/tune/tune.sh.orig 2013-09-19 17:38:55.000000000 +0900 ++++ samples/kftt.30k/s2t/tune/tune.sh 2013-10-27 19:56:32.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../../ ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-learn.py \ ++exec $bindir/cicada-learn.py \ + --srcset ../../data/tune.ja \ + --refset ../../data/tune.en.ref \ + --config cicada.config \ +--- samples/kftt.30k/scfg/model/extract.sh.orig 2013-07-30 14:14:16.000000000 +0900 ++++ samples/kftt.30k/scfg/model/extract.sh 2013-10-27 19:57:17.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-extract.py \ ++exec $bindir/cicada-extract.py \ + --f ../../data/train.ja.bz2 \ + --e ../../data/train.en.bz2 \ + --a ../../alignment/model/aligned.posterior-itg \ +--- samples/kftt.30k/scfg/model/index.sh.orig 2013-07-30 14:14:21.000000000 +0900 ++++ samples/kftt.30k/scfg/model/index.sh 2013-10-27 19:57:11.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-index.py \ ++exec $bindir/cicada-index.py \ + --model-dir . \ + \ + --scfg \ +--- samples/kftt.30k/scfg/test/test.sh.orig 2013-07-30 13:00:31.000000000 +0900 ++++ samples/kftt.30k/scfg/test/test.sh 2013-10-27 19:57:55.000000000 +0900 +@@ -1,16 +1,11 @@ + #!/bin/sh + +-cicada=../../../.. +-weights= +- +-if test "$weights" = ""; then +- echo "where is your weights? (for instance ../tune/learn.10.weights)" +- exit 1 +-fi ++bindir=@PREFIX@/bin ++weights=../tune/learn.10.weights + + ### generate config file + +-$cicada/progs/cicada_filter_config \ ++$bindir/cicada_filter_config \ + --input ../tune/cicada.config \ + --output cicada.config \ + --weights "weights=$weights" \ +@@ -18,9 +13,9 @@ + --file "file=-" + + ### perform translation +-$cicada/progs/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en ++$bindir/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en + + ### evaluation +-$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref ++$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref + + +--- samples/kftt.30k/scfg/tune/config.sh.orig 2013-07-29 13:01:44.000000000 +0900 ++++ samples/kftt.30k/scfg/tune/config.sh 2013-10-27 19:58:21.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-config.py \ ++exec $bindir/cicada-config.py \ + --grammar ../model/scfg-index \ + --max-span 15 \ + --straight \ +--- samples/kftt.30k/scfg/tune/tune.sh.orig 2013-09-19 17:38:30.000000000 +0900 ++++ samples/kftt.30k/scfg/tune/tune.sh 2013-10-27 19:58:32.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-learn.py \ ++exec $bindir/cicada-learn.py \ + --srcset ../../data/tune.ja \ + --refset ../../data/tune.en.ref \ + --config cicada.config \ +--- samples/kftt.30k/t2s/data/preprocess.sh.orig 2013-07-30 15:15:17.000000000 +0900 ++++ samples/kftt.30k/t2s/data/preprocess.sh 2013-10-27 20:00:35.000000000 +0900 +@@ -4,20 +4,9 @@ + # Note that the Japanese side was segmented by mecab (https://code.google.com/p/mecab/). + # Thus, it is a natural choice to use cabocha (https://code.google.com/p/cabocha/). + +-cicada=../../../.. +-mecab= +-cabocha= +- +-if test "$mecab" = ""; then +- echo "where is your mecab?" +- exit 1 +-fi +- +-if test "$cabocha" = ""; then +- echo "where is your cabocha?" +- exit 1 +-fi +- ++bindir=@PREFIX@/bin ++mecab=$bindir/mecab ++cabocha=$bindir/cabocha + + for data in train dev tune; do + +@@ -34,12 +23,12 @@ + awk '{for (i=1;i<=NF;++i) {printf "%s\t*\n", $i } print "EOS";}' | \ + $mecab -p | \ + $cabocha -f1 -I 1 | \ +- $cicada/progs/cicada_filter_dependency \ ++ $bindir/cicada_filter_dependency \ + --cabocha \ + --func \ + --forest \ + --head | \ +- $cicada/progs/cicada \ ++ $bindir/cicada \ + --input-forest \ + --threads 4 \ + --operation binarize:direction=cyk,order=1 \ +--- samples/kftt.30k/t2s/model/extract.sh.orig 2013-07-30 15:26:16.000000000 +0900 ++++ samples/kftt.30k/t2s/model/extract.sh 2013-10-27 20:01:00.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-extract.py \ ++exec $bindir/cicada-extract.py \ + --f ../../data/train.ja.bz2 \ + --e ../../data/train.en.bz2 \ + --a ../../alignment/model/aligned.posterior-itg \ +--- samples/kftt.30k/t2s/model/index.sh.orig 2013-07-30 14:14:12.000000000 +0900 ++++ samples/kftt.30k/t2s/model/index.sh 2013-10-27 20:01:08.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-index.py \ ++exec $bindir/cicada-index.py \ + --root-dir . \ + --model-dir . \ + \ +--- samples/kftt.30k/t2s/test/test.sh.orig 2013-07-30 13:00:41.000000000 +0900 ++++ samples/kftt.30k/t2s/test/test.sh 2013-10-27 20:01:41.000000000 +0900 +@@ -1,16 +1,11 @@ + #!/bin/sh + +-cicada=../../../.. +-weights= +- +-if test "$weights" = ""; then +- echo "where is your weights? (for instance ../tune/learn.10.weights)" +- exit 1 +-fi ++bindir=@PREFIX@/bin ++weights=../tune/learn.10.weights + + ### generate config file + +-$cicada/progs/cicada_filter_config \ ++$bindir/cicada_filter_config \ + --input ../tune/cicada.config \ + --output cicada.config \ + --weights "weights=$weights" \ +@@ -18,9 +13,9 @@ + --file "file=-" + + ### perform translation +-$cicada/progs/cicada --config cicada.config --threads 4 --debug --input ../data/dev.forest.ja.gz > dev.ja-en ++$bindir/cicada --config cicada.config --threads 4 --debug --input ../data/dev.forest.ja.gz > dev.ja-en + + ### evaluation +-$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref ++$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref + + +--- samples/kftt.30k/t2s/tune/config.sh.orig 2013-07-29 13:52:34.000000000 +0900 ++++ samples/kftt.30k/t2s/tune/config.sh 2013-10-27 20:02:02.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-config.py \ ++exec $bindir/cicada-config.py \ + --tree-grammar ../model/ghkm-index \ + --goal '[x]' \ + --glue '[x]' \ +--- samples/kftt.30k/t2s/tune/tune.sh.orig 2013-09-19 17:38:49.000000000 +0900 ++++ samples/kftt.30k/t2s/tune/tune.sh 2013-10-27 20:02:19.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-learn.py \ ++exec $bindir/cicada-learn.py \ + --srcset ../data/tune.forest.ja.gz \ + --refset ../../data/tune.en.ref \ + --config cicada.config \ +--- samples/kftt.30k/t2t/model/extract.sh.orig 2013-08-01 08:37:16.000000000 +0900 ++++ samples/kftt.30k/t2t/model/extract.sh 2013-10-27 20:02:40.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-extract.py \ ++exec $bindir/cicada-extract.py \ + --f ../../data/train.ja \ + --e ../../data/train.en \ + --a ../../alignment/model/aligned.posterior-itg \ +--- samples/kftt.30k/t2t/model/index.sh.orig 2013-07-31 09:36:12.000000000 +0900 ++++ samples/kftt.30k/t2t/model/index.sh 2013-10-27 20:02:49.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-index.py \ ++exec $bindir/cicada-index.py \ + --model-dir . \ + \ + --tree \ +--- samples/kftt.30k/t2t/test/test.sh.orig 2013-07-31 09:35:38.000000000 +0900 ++++ samples/kftt.30k/t2t/test/test.sh 2013-10-27 20:03:19.000000000 +0900 +@@ -1,16 +1,11 @@ + #!/bin/sh + +-cicada=../../../.. +-weights= +- +-if test "$weights" = ""; then +- echo "where is your weights? (for instance ../tune/learn.10.weights)" +- exit 1 +-fi ++bindir=@PREFIX@/bin ++weights=../tune/learn.10.weights + + ### generate config file + +-$cicada/progs/cicada_filter_config \ ++$bindir/cicada_filter_config \ + --input ../tune/cicada.config \ + --output cicada.config \ + --weights "weights=$weights" \ +@@ -18,9 +13,9 @@ + --file "file=-" + + ### perform translation +-$cicada/progs/cicada --config cicada.config --threads 4 --debug --input ../../t2s/data/dev.forest.ja.gz > dev.ja-en ++$bindir/cicada --config cicada.config --threads 4 --debug --input ../../t2s/data/dev.forest.ja.gz > dev.ja-en + + ### evaluation +-$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref ++$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref + + +--- samples/kftt.30k/t2t/tune/config.sh.orig 2013-07-31 09:52:14.000000000 +0900 ++++ samples/kftt.30k/t2t/tune/config.sh 2013-10-27 20:03:41.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-config.py \ ++exec $bindir/cicada-config.py \ + --tree-grammar ../model/tree-index \ + --goal '[ROOT]' \ + --glue '[x]' \ +--- samples/kftt.30k/t2t/tune/tune.sh.orig 2013-09-19 17:38:41.000000000 +0900 ++++ samples/kftt.30k/t2t/tune/tune.sh 2013-10-27 20:03:55.000000000 +0900 +@@ -1,8 +1,8 @@ + #!/bin/sh + +-cicada=../../../.. ++bindir=@PREFIX@/bin + +-exec $cicada/scripts/cicada-learn.py \ ++exec $bindir/cicada-learn.py \ + --srcset ../../t2s/data/tune.forest.ja.gz \ + --refset ../../data/tune.en.ref \ + --config cicada.config \ +--- samples/s2t/sample.sh.orig 2013-08-03 10:37:31.000000000 +0900 ++++ samples/s2t/sample.sh 2013-10-27 20:07:14.000000000 +0900 +@@ -1,6 +1,7 @@ + #!/bin/sh + +-cicada=../.. ++bindir=@PREFIX@/bin ++exdir=@PREFIX@/share/examples/cicada + + ## + ## +@@ -18,16 +19,16 @@ + ## apply features + ## output kbests + +-exec $cicada/progs/cicada \ +- --input $cicada/samples/scfg/input.txt \ ++exec $bindir/cicada \ ++ --input $exdir/scfg/input.txt \ + --goal '[ROOT]' \ +- --tree-grammar $cicada/samples/s2t/grammar.bin:max-span=10 \ ++ --tree-grammar $exdir/s2t/grammar.bin:max-span=10 \ + --tree-grammar "glue:goal-source=[ROOT],goal-target=[ROOT],non-terminal-source=[x],non-terminal-target=[x],straight=true,invert=false" \ + --grammar "insertion:non-terminal=[x]" \ +- --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \ ++ --feature-function "ngram:file=$exdir/scfg/ngram.bin" \ + --feature-function word-penalty \ + --feature-function rule-penalty \ + --feature-function glue-tree-penalty \ + --operation compose-tree-cky \ +- --operation apply:prune=true,size=100,weights=$cicada/samples/s2t/weights \ +- --operation output:file=-,kbest=10,weights=$cicada/samples/s2t/weights ++ --operation apply:prune=true,size=100,weights=$exdir/s2t/weights \ ++ --operation output:file=-,kbest=10,weights=$exdir/s2t/weights +--- samples/scfg/sample.sh.orig 2013-07-30 14:00:13.000000000 +0900 ++++ samples/scfg/sample.sh 2013-10-27 20:07:02.000000000 +0900 +@@ -1,15 +1,16 @@ + #!/bin/sh + +-cicada=../.. ++bindir=@PREFIX@/bin ++exdir=@PREFIX@/share/examples/cicada + +-exec $cicada/progs/cicada \ +- --input $cicada/samples/scfg/input.txt \ +- --grammar $cicada/samples/scfg/grammar.bin \ ++exec $bindir/cicada \ ++ --input $exdir/scfg/input.txt \ ++ --grammar $exdir/scfg/grammar.bin \ + --grammar "glue:straight=true,inverted=false,non-terminal=[x],goal=[s]" \ + --grammar "insertion:non-terminal=[x]" \ +- --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \ ++ --feature-function "ngram:file=$exdir/scfg/ngram.bin" \ + --feature-function word-penalty \ + --feature-function rule-penalty \ + --operation compose-cky \ +- --operation apply:prune=true,size=100,weights=$cicada/samples/scfg/weights \ +- --operation output:file=-,kbest=10,weights=$cicada/samples/scfg/weights ++ --operation apply:prune=true,size=100,weights=$exdir/scfg/weights \ ++ --operation output:file=-,kbest=10,weights=$exdir/scfg/weights +--- samples/t2s/sample.sh.orig 2013-08-19 16:18:37.000000000 +0900 ++++ samples/t2s/sample.sh 2013-10-27 20:08:28.000000000 +0900 +@@ -1,6 +1,7 @@ + #!/bin/sh + +-cicada=../.. ++bindir=@PREFIX@/bin ++exdir=@PREFIX@/share/examples/cicada + + ## + ## 1. pentreebank is converted into hypergraph +@@ -20,20 +21,20 @@ + ## apply features + ## output kbests + +-$cicada/progs/cicada_filter_penntreebank \ +- --input $cicada/samples/t2s/input.txt \ ++$bindir/cicada_filter_penntreebank \ ++ --input $exdir/t2s/input.txt \ + --normalize \ + | \ +-$cicada/progs/cicada \ ++$bindir/cicada \ + --input - \ + --input-forest \ + --goal '[ROOT]' \ +- --tree-grammar $cicada/samples/t2s/grammar.bin \ ++ --tree-grammar $exdir/t2s/grammar.bin \ + --tree-grammar fallback \ +- --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \ ++ --feature-function "ngram:file=$exdir/scfg/ngram.bin" \ + --feature-function word-penalty \ + --feature-function rule-penalty \ + --operation binarize:direction=cyk,order=1 \ + --operation compose-tree \ +- --operation apply:prune=true,size=1000,weights=$cicada/samples/t2s/weights \ +- --operation output:file=-,kbest=10,weights=$cicada/samples/t2s/weights ++ --operation apply:prune=true,size=1000,weights=$exdir/t2s/weights \ ++ --operation output:file=-,kbest=10,weights=$exdir/t2s/weights
participants (1)
-
hum@macports.org