Revision: 81543 http://trac.macports.org/changeset/81543 Author: hum@macports.org Date: 2011-08-01 06:55:15 -0700 (Mon, 01 Aug 2011) Log Message: ----------- added a new port for dualist version 0.1; #29984 Added Paths: ----------- trunk/dports/java/dualist/ trunk/dports/java/dualist/Portfile trunk/dports/java/dualist/files/ trunk/dports/java/dualist/files/Makefile trunk/dports/java/dualist/files/SimpleMecabPipe.java trunk/dports/java/dualist/files/dualist trunk/dports/java/dualist/files/dualist-mecab trunk/dports/java/dualist/files/patch-mecab.diff Added: trunk/dports/java/dualist/Portfile =================================================================== --- trunk/dports/java/dualist/Portfile (rev 0) +++ trunk/dports/java/dualist/Portfile 2011-08-01 13:55:15 UTC (rev 81543) @@ -0,0 +1,104 @@ +# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4 +# $Id$ + +PortSystem 1.0 + +name dualist +version 0.1 +categories java textproc +platforms darwin +maintainers hum openmaintainer +license Apache-2.0 + +description An interactive machine learning system for building classifiers quickly. + +long_description DUALIST is an interactive machine learning system for building classifiers \ + quickly. It does so by asking \"questions\" of the user in the form of both data \ + instances (e.g., text documents) and features (e.g., words or phrases). It \ + utilizes active and semi-supervised learning to quickly train a multinomial \ + naive Bayes classifier for this setting. + +homepage http://code.google.com/p/dualist/ +master_sites googlecode:${name} + +checksums sha1 7a2fc1fd432c71e2575a1514a946bbf68652fdfa \ + rmd160 8b4ca3fcd3ec42eeda43235c246390f07a849998 + +depends_run port:play + +use_zip yes +worksrcdir ${name} + +set java_home /System/Library/Frameworks/JavaVM.framework/Versions/1.6/Home + +pre-configure { + if {![file exists ${java_home}]} { + ui_error "Java 1.6 is required, but not located at ${java_home}" + return -code error "Java 1.6 missing" + } +} + +use_configure no +universal_variant no + +build.cmd true + +set share_java_dir ${prefix}/share/java +set dualist_dir ${share_java_dir}/${worksrcdir} + +destroot { + # Copy the distribution. + xinstall -m 755 -d ${destroot}${share_java_dir} + copy ${worksrcpath} ${destroot}${share_java_dir} + + # Configure working directories. + xinstall -m 1777 -d ${destroot}${dualist_dir}/tmp + system "chmod 1777 ${destroot}${dualist_dir}/public/results" + destroot.keepdirs ${destroot}${dualist_dir}/tmp \ + ${destroot}${dualist_dir}/public/results + + # Install an extra script for this port. + xinstall -m 755 ${filespath}/dualist ${destroot}${prefix}/bin + reinplace "s|@prefix@|${prefix}|g" ${destroot}${prefix}/bin/dualist + reinplace "s|@java_home@|${java_home}|g" ${destroot}${prefix}/bin/dualist + + notes "To try DUALIST, run 'dualist' and open http://localhost:9000/." +} + +post-deactivate { + ui_msg "To revert the system after uninstalling the port," + ui_msg "delete the dualist directory:" + ui_msg " $ sudo rm -rf ${dualist_dir}" +} + +variant mecab description {Use MeCab to analyze Japanese texts} { + depends_lib-append port:mecab-java + patchfiles-append patch-mecab.diff + + post-extract { + xinstall -m 644 -W ${filespath} \ + Makefile \ + SimpleMecabPipe.java \ + ${worksrcpath} + system "ln -s ${prefix}/share/java/mecab.jar ${worksrcpath}/lib/" + } + + post-patch { + reinplace "s|@java_home@|${java_home}|g" ${worksrcpath}/Makefile + } + + build.cmd make + + post-destroot { + # Install an extra script for this variant. + xinstall -m 755 ${filespath}/dualist-mecab ${destroot}${prefix}/bin + reinplace "s|@prefix@|${prefix}|g" ${destroot}${prefix}/bin/dualist-mecab + reinplace "s|@java_home@|${java_home}|g" ${destroot}${prefix}/bin/dualist-mecab + + notes-append "For Japanese texts, run 'dualist-mecab' instead of 'dualist'." + } +} + +livecheck.type regex +livecheck.url http://code.google.com/p/dualist/downloads/list +livecheck.regex ${name}-(\[0-9.\]+)\.zip Property changes on: trunk/dports/java/dualist/Portfile ___________________________________________________________________ Added: svn:keywords + Id Added: svn:eol-style + native Added: trunk/dports/java/dualist/files/Makefile =================================================================== --- trunk/dports/java/dualist/files/Makefile (rev 0) +++ trunk/dports/java/dualist/files/Makefile 2011-08-01 13:55:15 UTC (rev 81543) @@ -0,0 +1,16 @@ +JAVA_HOME=@java_home@ + +JAVAC=$(JAVA_HOME)/bin/javac -encoding UTF-8 -cp lib/mallet.jar:lib/mecab.jar +JAR=$(JAVA_HOME)/bin/jar + +TARGETS=lib/dualist-ja.jar + +all : $(TARGETS) + +$(TARGETS) : SimpleMecabPipe.java + -mkdir -p classes + $(JAVAC) -d classes $< + $(JAR) cfv $@ -C classes . + +clean: + -rm -rf $(TARGETS) classes Added: trunk/dports/java/dualist/files/SimpleMecabPipe.java =================================================================== --- trunk/dports/java/dualist/files/SimpleMecabPipe.java (rev 0) +++ trunk/dports/java/dualist/files/SimpleMecabPipe.java 2011-08-01 13:55:15 UTC (rev 81543) @@ -0,0 +1,47 @@ +package dualist.ja; + +import cc.mallet.pipe.Pipe; +import cc.mallet.extract.StringSpan; +import cc.mallet.extract.StringTokenization; +import cc.mallet.types.Instance; +import cc.mallet.types.TokenSequence; + +import org.chasen.mecab.Tagger; +import org.chasen.mecab.Node; + +public class SimpleMecabPipe extends Pipe +{ + static { + try { + System.loadLibrary("mecab-java"); + } catch (UnsatisfiedLinkError e) { + System.err.println("ERROR: Failed to load mecab-java native code."); + System.err.println(e); + System.exit(1); + } + } + + public Instance pipe (Instance carrier) + { + CharSequence input = (CharSequence) carrier.getData(); + String string = input.toString(); + Tagger tagger = new Tagger(); + Node node = tagger.parseToNode(string); + int cursor = 0; + TokenSequence ts = new StringTokenization(input); + while (node != null) { + node = node.getNext(); + if (node == null) break; + String[] f = node.getFeature().split(","); + if (f[0].equals("名詞") && + !f[1].equals("数") && !f[1].equals("サ変接続") && !f[1].equals("接尾") || + f[0].equals("未知語")) { + String surface = node.getSurface(); + cursor = string.indexOf(surface, cursor); + ts.add (new StringSpan(input, cursor, cursor + surface.length())); + } + } + carrier.setData(ts); + return carrier; + } +} Added: trunk/dports/java/dualist/files/dualist =================================================================== --- trunk/dports/java/dualist/files/dualist (rev 0) +++ trunk/dports/java/dualist/files/dualist 2011-08-01 13:55:15 UTC (rev 81543) @@ -0,0 +1,9 @@ +#!/bin/sh +cmd="$1" +if [ -z $cmd ]; then + cmd=run +fi +shift +JAVA_HOME=@java_home@ +cd @prefix@/share/java/dualist +@prefix@/bin/play $cmd $@ Added: trunk/dports/java/dualist/files/dualist-mecab =================================================================== --- trunk/dports/java/dualist/files/dualist-mecab (rev 0) +++ trunk/dports/java/dualist/files/dualist-mecab 2011-08-01 13:55:15 UTC (rev 81543) @@ -0,0 +1,9 @@ +#!/bin/sh +cmd="$1" +if [ -z $cmd ]; then + cmd=run +fi +shift +JAVA_HOME=@java_home@ +cd @prefix@/share/java/dualist +@prefix@/bin/play $cmd -Ddualist.lang=ja -Djava.library.path=@prefix@/lib -Dfile.encoding=UTF-8 $@ Added: trunk/dports/java/dualist/files/patch-mecab.diff =================================================================== --- trunk/dports/java/dualist/files/patch-mecab.diff (rev 0) +++ trunk/dports/java/dualist/files/patch-mecab.diff 2011-08-01 13:55:15 UTC (rev 81543) @@ -0,0 +1,21 @@ +--- app/guts/pipes/DocumentPipe.java.orig 2011-05-30 22:31:39.000000000 +0900 ++++ app/guts/pipes/DocumentPipe.java 2011-06-30 22:46:50.000000000 +0900 +@@ -16,6 +16,8 @@ + import cc.mallet.types.Instance; + import cc.mallet.util.CharSequenceLexer; + ++import dualist.ja.SimpleMecabPipe; ++ + public class DocumentPipe extends Pipe { + + private Pipe myPipe = new SerialPipes(new Pipe[] { +@@ -27,6 +29,9 @@ + new CharSequenceReplace(Pattern.compile("&(.*?);"), ""), + new CharSequenceReplace(Pattern.compile("[0-9]+"), "00"), + new CharSequenceLowercase(), ++ (System.getProperty("dualist.lang") != null && ++ System.getProperty("dualist.lang").equals("ja")) ? ++ new SimpleMecabPipe() : + new CharSequence2TokenSequence(CharSequenceLexer.LEX_WORD_CLASSES), + new TokenSequenceRemoveStopwords(), + new TokenSequence2FeatureSequence(),
participants (1)
-
hum@macports.org