[83880] trunk/dports/java/apache-solr

Mon Sep 12 07:50:29 PDT 2011

Revision: 83880
          http://trac.macports.org/changeset/83880
Author:   hum at macports.org
Date:     2011-09-12 07:50:27 -0700 (Mon, 12 Sep 2011)
Log Message:
-----------
apache-solr: enhance the Portfile; add ja variant for Japanese texts; see #31155.

Modified Paths:
--------------
    trunk/dports/java/apache-solr/Portfile
    trunk/dports/java/apache-solr/files/solr.in

Added Paths:
-----------
    trunk/dports/java/apache-solr/files/patch-solr-ja.diff
    trunk/dports/java/apache-solr/files/solr-ja.xml
    trunk/dports/java/apache-solr/files/velocity.properties

Modified: trunk/dports/java/apache-solr/Portfile
===================================================================

--- trunk/dports/java/apache-solr/Portfile	2011-09-12 14:46:02 UTC (rev 83879)
+++ trunk/dports/java/apache-solr/Portfile	2011-09-12 14:50:27 UTC (rev 83880)
@@ -5,48 +5,89 @@
 
 name                apache-solr
 version             3.3.0
-categories          java search
+revision            1
+categories          java textproc
 platforms           darwin
-maintainers         gmail.com:haya10.ito openmaintainer
+maintainers         gmail.com:haya10.ito hum openmaintainer
 license             Apache-2.0
 
-description         The open source enterprise search platform
-
+homepage            http://lucene.apache.org/solr/
+description         An open source enterprise search platform.
 long_description    Solr is the popular, blazing fast open source enterprise \
                     search platform from the Apache Lucene project.
 
-homepage            http://lucene.apache.org/solr/
 master_sites        apache:lucene/solr/${version}/
-
+extract.suffix      .tgz
 checksums           sha1    e6017419051e5eda9cb222e3b17006eeed682db9 \
                     rmd160  ef2c2e58d44a9d8786c2fc5b2f19f59bf9e1f70c
 
-extract.suffix      .tgz
+# set the destination paths.
+set java_basepath   ${prefix}/share/java
+set solr_destpath   ${java_basepath}/${distname}
+set solr_path       ${solr_destpath}/example
+set solr_home       ${solr_path}/solr
 
+post-patch {
+    # expand relative pathes into abusolute ones.
+    reinplace "s|\"\.\./\.\./|\"${solr_destpath}/|g" \
+        ${worksrcpath}/example/solr/conf/solrconfig.xml
+}
+
 use_configure       no
 supported_archs     noarch
 
 build {}
 
-set java_home       /System/Library/Frameworks/JavaVM.framework/Versions/1.6/Home
+destroot {
+    # copy the distribution.
+    xinstall -d         ${destroot}${java_basepath}
+    copy ${worksrcpath} ${destroot}${java_basepath}
+    # install the solr script.
+    xinstall -m 755 ${filespath}/solr.in     ${destroot}${prefix}/bin/solr
+    reinplace "s|@solr_path@|${solr_path}|g" ${destroot}${prefix}/bin/solr
+    reinplace "s|@solr_home@|${solr_home}|g" ${destroot}${prefix}/bin/solr
+}
 
-pre-configure {
-    if {![file exists ${java_home}]} {
-        ui_error "Java 1.6 is required, but not located at ${java_home}"
-        return -code error "Java 1.6 missing"
+notes "
+To try apache-solr, run 'sudo solr' and open http://localhost:8983/solr/browse.
+To store sample documents, run 'cd ${solr_path}/exampledocs && ./post.sh *.xml'."
+
+# solr home for Japanese configurations.
+set solr_home_ja    ${solr_home}-ja
+
+variant ja description {Add Japanese settings with lucene-gosen} {
+    depends_run-append  port:lucene-gosen
+    # create Japanese solr home 'solr-ja'.
+    post-extract {
+        copy ${worksrcpath}/example/solr ${worksrcpath}/example/solr-ja
     }
-}
+    patchfiles-append   patch-solr-ja.diff
+    post-patch {
+        # expand relative pathes into abusolute ones.
+        reinplace "s|\"\.\./\.\./|\"${solr_destpath}/|g" \
+            ${worksrcpath}/example/solr-ja/conf/solrconfig.xml
+    }
+    post-destroot {
+        # set the lucene-gosen configuration path to config files.
+        foreach config {schema.xml solrconfig.xml} {
+            reinplace "s|@gosen_path@|${java_basepath}/lucene-gosen|g" \
+                ${destroot}${solr_home_ja}/conf/${config}
+        }
+        # install a property file for UTF-8 encoding.
+        copy ${filespath}/velocity.properties ${destroot}${solr_home_ja}/conf
+        # copy a sample Japanese doc for testing.
+        copy ${filespath}/solr-ja.xml ${destroot}${solr_path}/exampledocs
+        # install the solr-ja script.
+        xinstall -m 755 ${filespath}/solr.in        ${destroot}${prefix}/bin/solr-ja
+        reinplace "s|@solr_path@|${solr_path}|g"    ${destroot}${prefix}/bin/solr-ja
+        reinplace "s|@solr_home@|${solr_home_ja}|g" ${destroot}${prefix}/bin/solr-ja
+    }
+    notes-append "
 
-set target          ${prefix}/share/java/${name}-${version}
-
-destroot {
-    xinstall -d ${destroot}[file dirname ${target}]
-    copy ${worksrcpath}/example ${destroot}${target}
-    xinstall -m 755 ${filespath}/solr.in ${destroot}${prefix}/bin/solr
-    reinplace "s|@TARGET@|${target}|g" ${destroot}${prefix}/bin/solr
-    reinplace "s|@JAVA_HOME@|${java_home}|g" ${destroot}${prefix}/bin/solr
+For Japanese texts, please run 'sudo solr-ja' instead of 'sudo solr'.
+See ${solr_home_ja}."
 }
 
 livecheck.type      regex
-livecheck.url       http://mirrors.ibiblio.org/pub/mirrors/apache/lucene/solr/
-livecheck.regex     {href="([0-9.]+)"}
+livecheck.url       http://www.apache.org/dist/lucene/solr/
+livecheck.regex     (\[0-9.\]+)\/

Added: trunk/dports/java/apache-solr/files/patch-solr-ja.diff
===================================================================
--- trunk/dports/java/apache-solr/files/patch-solr-ja.diff	                        (rev 0)
+++ trunk/dports/java/apache-solr/files/patch-solr-ja.diff	2011-09-12 14:50:27 UTC (rev 83880)
@@ -0,0 +1,141 @@
+--- example/solr-ja/conf/schema.xml.orig	2011-09-03 23:57:07.000000000 +0900
++++ example/solr-ja/conf/schema.xml	2011-09-05 23:56:02.000000000 +0900
+@@ -467,6 +467,92 @@
+     See http://wiki.apache.org/solr/SpatialSearch
+    -->
+     <fieldtype name="geohash" class="solr.GeoHashField"/>
++
++    <!-- configuration for japanese text, using a morphological analyzer
++      Most possibilities for customization are specified here in the schema.
++
++      Note: you can set the default query operator to be OR, AND, or PHRASE:
++       OR: Use these defaults (autoGeneratePhraseQueries="false", <solrQueryParser defaultOperator="OR"/>
++           In this case Solr works like it does with the English language. The default query is OR,
++           but documents that contain more of the query terms get a special boost. You can probably
++           use a less aggressive stopwords/stoptags in this case, and its probably a good idea to use
++           enablePositionIncrements=true, so that if a user puts a query in quotes, they get a much more
++           exact phrase query.
++       AND: Set autoGeneratePhraseQueries=false, but set <solrQueryParser defaultOperator="AND"/> in
++           your schema.xml. Note if you do this, you should use a more aggressive stopwords/stoptags
++           list (at least at query-time), otherwise a document might not match simply because it does
++           not contain a prefix or particle. As in the above case, its probably a good idea to use
++           enablePositionIncrements=true for explicit phrase queries from the user.
++       PHRASE: Set autoGeneratePhraseQueries=true. If you do this, you should probably use both a very
++           aggressive stopwords list, and you should probably also set enablePositionIncrements=false
++           everywhere.  Otherwise, even documents that contain the query's phrase in exact order will
++           not match because of slightly different grammatical structure.
++    -->
++    <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
++      <analyzer>
++        <!-- map characters before the tokenizer:
++             Optionally, instead of the JapaneseWidthFactory, you can choose to do the width
++             mappings before the text is sent to the tokenizer.
++        <charFilter class="solr.MappingCharFilterFactory" mapping="@gosen_path@/conf/mapping-japanese.txt"/>
++        -->
++
++        <!-- morphological tokenizer: sets the SURFACE form as the token, but also sets these attributes:
++             BasicFormAttribute, ConjugationAttribute, PartOfSpeechAttribute, PronunciationsAttribute,
++             ReadingsAttribute, and SentenceStartAttribute.
++        -->
++        <tokenizer class="solr.JapaneseTokenizerFactory"/>
++
++        <!-- normalizes CJK width differences:
++             1. Folds fullwidth ASCII variants into the equivalent basic latin
++             2. Folds halfwidth Katakana variants into the equivalent kana
++
++             Note: alternatively you can use a MappingCharFilter before the tokenizer for this, but please note
++             that mapping characters can change how Sen tokenizes text.
++        -->
++        <filter class="solr.JapaneseWidthFilterFactory"/>
++
++        <!-- the punctuation filter removes all-punctuation tokens base on Unicode properties.
++             punctuation tokens are tagged as "unknown", and its better to do this than to remove
++             tokens with an unknown pos (as they might be valuable!). Because this punctuation 
++             usually signifies a phrase or sentence boundary, enablePositionIncrements can be
++             used to prevent phrase queries from matching across natural phrase/sentence boundaries -->
++        <filter class="solr.JapanesePunctuationFilterFactory" enablePositionIncrements="true"/>
++
++        <!-- this is a part-of-speech based stopfilter, it removes any tokens that have a certain
++             of speech. you can set enablePositionIncrements for tighter phrase queries -->
++        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="@gosen_path@/conf/stoptags_ja.txt" enablePositionIncrements="true"/>
++        
++        <!-- a standard stopfilter, to specify specific stopwords. -->
++        <filter class="solr.StopFilterFactory" ignoreCase="true" words="@gosen_path@/conf/stopwords_ja.txt" enablePositionIncrements="true"/>
++
++        <!-- alternatively, instead of using a part-of-speech based stopfilter, you can use a 
++             part-of-speech based keepfilter: specifying only the parts of speech you wish to index.
++             anything else will be removed. HOWEVER: this could be a little dangerous, because if
++             we upgrade ipadic they might add some new tags (the tags are fairly specific), and suddenly
++             things that you were indexing before are no longer being indexed. Its recommended to
++             use the part-of-speech based stopfilter above if at all possible, for safety.
++        <filter class="solr.JapanesePartOfSpeechKeepFilterFactory" tags="@gosen_path@/conf/keeptags_ja.txt" enablePositionIncrements="true"/>
++        -->
++
++        <!-- before any stemming/lemmatization, you can protect words from being modified by specifying
++             a protwords.txt.
++        <filter class="solr.KeywordMarkerFilterFactory" protected="@gosen_path@/conf/protwords_ja.txt" ignoreCase="false"/>
++
++             or you can also supply a custom stem dictionary for inflected forms (tab separated). No
++             further stemming/lemmatization will modify this.
++        <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
++        -->
++
++        <!-- the basic form filter converts inflected verbs and adjectives to their dictionary citation form. -->
++        <filter class="solr.JapaneseBasicFormFilterFactory"/>
++
++        <!-- this filter heuristically normalizes katakana forms with a final prolonged sound mark -->
++        <filter class="solr.JapaneseKatakanaStemFilterFactory"/>
++
++        <!-- you might want to lowercase for any english text content you have -->
++        <filter class="solr.LowerCaseFilterFactory"/>
++      </analyzer>
++    </fieldType>
+  </types>
+ 
+ 
+@@ -533,7 +619,7 @@
+ 
+    <!-- catchall field, containing all other searchable text fields (implemented
+         via copyField further on in this schema  -->
+-   <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
++   <field name="text" type="text_ja" indexed="true" stored="false" multiValued="true"/>
+ 
+    <!-- catchall text field that indexes tokens both normally and in reverse for efficient
+         leading wildcard queries. -->
+--- example/solr-ja/conf/solrconfig.xml.orig	2011-09-04 00:16:51.000000000 +0900
++++ example/solr-ja/conf/solrconfig.xml	2011-09-06 00:05:40.000000000 +0900
+@@ -80,6 +80,7 @@
+        is found that matches, it will be ignored
+     -->
+   <lib dir="../../contrib/clustering/lib/" />
++  <lib dir="@gosen_path@/lib/" />
+   <lib dir="/total/crap/dir/ignored" /> 
+   <!-- an exact path can be used to specify a specific file.  This
+        will cause a serious error to be logged if it can't be loaded.
+@@ -780,6 +781,7 @@
+        <str name="wt">velocity</str>
+ 
+        <str name="v.template">browse</str>
++       <str name="v.properties">velocity.properties</str>
+        <str name="v.layout">layout</str>
+        <str name="title">Solritas</str>
+ 
+--- example/solr-ja/conf/velocity/head.vm.orig	2011-09-03 23:57:07.000000000 +0900
++++ example/solr-ja/conf/velocity/head.vm	2011-09-05 23:56:41.000000000 +0900
+@@ -32,7 +32,7 @@
+            extraParams:{
+              'terms.prefix': function() { return $("\#q").val();},
+              'terms.sort': 'count',
+-             'terms.fl': 'name',
++             'terms.fl': 'text',
+              'wt': 'velocity',
+              'v.template': 'suggest'
+            }
+--- example/solr-ja/conf/velocity/suggest.vm.orig	2011-09-03 23:57:07.000000000 +0900
++++ example/solr-ja/conf/velocity/suggest.vm	2011-09-05 23:57:16.000000000 +0900
+@@ -1,3 +1,3 @@
+-#foreach($t in $response.response.terms.name)
++#foreach($t in $response.response.terms.text)
+ $t.key
+ #end
+\ No newline at end of file

Added: trunk/dports/java/apache-solr/files/solr-ja.xml
===================================================================
--- trunk/dports/java/apache-solr/files/solr-ja.xml	                        (rev 0)
+++ trunk/dports/java/apache-solr/files/solr-ja.xml	2011-09-12 14:50:27 UTC (rev 83880)
@@ -0,0 +1,14 @@
+<add>
+<doc>
+  <field name="id">SOLR2000</field>
+  <field name="name">Solr(ソーラ), オープンソースの全文検索システム</field>
+  <field name="manu">Apacheソフトウェア財団</field>
+  <field name="cat">ソフトウェア</field>
+  <field name="cat">検索</field>
+  <field name="features">Luceneを使った先進的な全文検索機能</field>
+  <field name="price">0</field>
+  <field name="popularity">10</field>
+  <field name="inStock">true</field>
+  <field name="incubationdate_dt">2006-01-17T00:00:00.000Z</field>
+</doc>
+</add>

Modified: trunk/dports/java/apache-solr/files/solr.in
===================================================================
--- trunk/dports/java/apache-solr/files/solr.in	2011-09-12 14:46:02 UTC (rev 83879)
+++ trunk/dports/java/apache-solr/files/solr.in	2011-09-12 14:50:27 UTC (rev 83880)
@@ -2,16 +2,22 @@
 
 CMDNAME=`basename "$0"`
 
-export JAVA_HOME=@JAVA_HOME@
-SOLR=@TARGET@
-
 usage() {
-    echo "Usage: ${CMDNAME} path/to/config/dir" 1>&2
+    echo "Usage: ${CMDNAME} [-h|--help] [solr_home]" 1>&2
     exit 1
 }
 
-if [ -z "$1" ]; then
-    usage
-else
-    cd ${SOLR} && exec ${JAVA_HOME}/bin/java -Dsolr.solr.home="$1" -jar ${SOLR}/start.jar
-fi
+SOLR_PATH=@solr_path@
+SOLR_HOME=@solr_home@
+
+while test -n "$1"; do
+    case "$1" in
+        -h|--help) usage ;;
+        *)         SOLR_HOME="$1";    shift ;;
+    esac
+done
+
+java -Dsolr.clustering.enabled=true \
+     -Dsolr.solr.home="${SOLR_HOME}" \
+     -Djetty.home="${SOLR_PATH}" \
+     -jar ${SOLR_PATH}/start.jar

Added: trunk/dports/java/apache-solr/files/velocity.properties
===================================================================
--- trunk/dports/java/apache-solr/files/velocity.properties	                        (rev 0)
+++ trunk/dports/java/apache-solr/files/velocity.properties	2011-09-12 14:50:27 UTC (rev 83880)
@@ -0,0 +1,2 @@
+input.encoding=UTF-8
+output.encoding=UTF-8
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macports-changes/attachments/20110912/f8c1e334/attachment.html>