[91889] trunk/dports/java/apache-solr

Thu Apr 12 09:20:31 PDT 2012

Revision: 91889
          https://trac.macports.org/changeset/91889
Author:   hum at macports.org
Date:     2012-04-12 09:20:30 -0700 (Thu, 12 Apr 2012)
Log Message:
-----------
apache-solr: update to 3.6.0; remove the dependency on lucene-gosen in ja variant.

Modified Paths:
--------------
    trunk/dports/java/apache-solr/Portfile
    trunk/dports/java/apache-solr/files/patch-solr-ja.diff

Modified: trunk/dports/java/apache-solr/Portfile
===================================================================

--- trunk/dports/java/apache-solr/Portfile	2012-04-12 14:47:47 UTC (rev 91888)
+++ trunk/dports/java/apache-solr/Portfile	2012-04-12 16:20:30 UTC (rev 91889)
@@ -4,7 +4,7 @@
 PortSystem          1.0
 
 name                apache-solr
-version             3.5.0
+version             3.6.0
 categories          java textproc
 platforms           darwin
 maintainers         gmail.com:haya10.ito hum openmaintainer
@@ -17,8 +17,8 @@
 
 master_sites        apache:lucene/solr/${version}/
 extract.suffix      .tgz
-checksums           rmd160  365d4b27753375ea3a39b9d42c06b80dce474731 \
-                    sha256  804f3ba9d1296f81388605a79538b7362355693fbdd03b7b2dbf9a706bf1d1d0
+checksums           rmd160  f54aee9e0d0196e7e96408b40ae025fc69478229 \
+                    sha256  3acac4323ba3dbfa153d8ef01f156bab9b0eccf1b1f1f03e91b8b6739d3dc6c6
 
 # set the destination paths.
 set java_basepath   ${prefix}/share/java
@@ -54,8 +54,7 @@
 # solr home for Japanese configurations.
 set solr_home_ja    ${solr_home}-ja
 
-variant ja description {Add Japanese settings with lucene-gosen} {
-    depends_run-append  port:lucene-gosen
+variant ja description {Add Japanese settings} {
     # create Japanese solr home 'solr-ja'.
     post-extract {
         copy ${worksrcpath}/example/solr ${worksrcpath}/example/solr-ja
@@ -67,11 +66,6 @@
             ${worksrcpath}/example/solr-ja/conf/solrconfig.xml
     }
     post-destroot {
-        # set the lucene-gosen configuration path to config files.
-        foreach config {schema.xml solrconfig.xml} {
-            reinplace "s|@gosen_path@|${java_basepath}/lucene-gosen|g" \
-                ${destroot}${solr_home_ja}/conf/${config}
-        }
         # install a property file for UTF-8 encoding.
         copy ${filespath}/velocity.properties ${destroot}${solr_home_ja}/conf
         # copy a sample Japanese doc for testing.

Modified: trunk/dports/java/apache-solr/files/patch-solr-ja.diff
===================================================================
--- trunk/dports/java/apache-solr/files/patch-solr-ja.diff	2012-04-12 14:47:47 UTC (rev 91888)
+++ trunk/dports/java/apache-solr/files/patch-solr-ja.diff	2012-04-12 16:20:30 UTC (rev 91889)
@@ -1,100 +1,7 @@
---- example/solr-ja/conf/schema.xml.orig	2011-11-22 22:02:40.000000000 +0900
-+++ example/solr-ja/conf/schema.xml	2011-11-27 00:08:15.000000000 +0900
-@@ -469,6 +469,92 @@
-     See http://wiki.apache.org/solr/SpatialSearch
-    -->
-     <fieldtype name="geohash" class="solr.GeoHashField"/>
-+
-+    <!-- configuration for japanese text, using a morphological analyzer
-+      Most possibilities for customization are specified here in the schema.
-+
-+      Note: you can set the default query operator to be OR, AND, or PHRASE:
-+       OR: Use these defaults (autoGeneratePhraseQueries="false", <solrQueryParser defaultOperator="OR"/>
-+           In this case Solr works like it does with the English language. The default query is OR,
-+           but documents that contain more of the query terms get a special boost. You can probably
-+           use a less aggressive stopwords/stoptags in this case, and its probably a good idea to use
-+           enablePositionIncrements=true, so that if a user puts a query in quotes, they get a much more
-+           exact phrase query.
-+       AND: Set autoGeneratePhraseQueries=false, but set <solrQueryParser defaultOperator="AND"/> in
-+           your schema.xml. Note if you do this, you should use a more aggressive stopwords/stoptags
-+           list (at least at query-time), otherwise a document might not match simply because it does
-+           not contain a prefix or particle. As in the above case, its probably a good idea to use
-+           enablePositionIncrements=true for explicit phrase queries from the user.
-+       PHRASE: Set autoGeneratePhraseQueries=true. If you do this, you should probably use both a very
-+           aggressive stopwords list, and you should probably also set enablePositionIncrements=false
-+           everywhere.  Otherwise, even documents that contain the query's phrase in exact order will
-+           not match because of slightly different grammatical structure.
-+    -->
-+    <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
-+      <analyzer>
-+        <!-- map characters before the tokenizer:
-+             Optionally, instead of the JapaneseWidthFactory, you can choose to do the width
-+             mappings before the text is sent to the tokenizer.
-+        <charFilter class="solr.MappingCharFilterFactory" mapping="@gosen_path@/conf/mapping-japanese.txt"/>
-+        -->
-+
-+        <!-- morphological tokenizer: sets the SURFACE form as the token, but also sets these attributes:
-+             BasicFormAttribute, ConjugationAttribute, PartOfSpeechAttribute, PronunciationsAttribute,
-+             ReadingsAttribute, and SentenceStartAttribute.
-+        -->
-+        <tokenizer class="solr.JapaneseTokenizerFactory"/>
-+
-+        <!-- normalizes CJK width differences:
-+             1. Folds fullwidth ASCII variants into the equivalent basic latin
-+             2. Folds halfwidth Katakana variants into the equivalent kana
-+
-+             Note: alternatively you can use a MappingCharFilter before the tokenizer for this, but please note
-+             that mapping characters can change how Sen tokenizes text.
-+        -->
-+        <filter class="solr.JapaneseWidthFilterFactory"/>
-+
-+        <!-- the punctuation filter removes all-punctuation tokens base on Unicode properties.
-+             punctuation tokens are tagged as "unknown", and its better to do this than to remove
-+             tokens with an unknown pos (as they might be valuable!). Because this punctuation 
-+             usually signifies a phrase or sentence boundary, enablePositionIncrements can be
-+             used to prevent phrase queries from matching across natural phrase/sentence boundaries -->
-+        <filter class="solr.JapanesePunctuationFilterFactory" enablePositionIncrements="true"/>
-+
-+        <!-- this is a part-of-speech based stopfilter, it removes any tokens that have a certain
-+             of speech. you can set enablePositionIncrements for tighter phrase queries -->
-+        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="@gosen_path@/conf/stoptags_ja.txt" enablePositionIncrements="true"/>
-+        
-+        <!-- a standard stopfilter, to specify specific stopwords. -->
-+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="@gosen_path@/conf/stopwords_ja.txt" enablePositionIncrements="true"/>
-+
-+        <!-- alternatively, instead of using a part-of-speech based stopfilter, you can use a 
-+             part-of-speech based keepfilter: specifying only the parts of speech you wish to index.
-+             anything else will be removed. HOWEVER: this could be a little dangerous, because if
-+             we upgrade ipadic they might add some new tags (the tags are fairly specific), and suddenly
-+             things that you were indexing before are no longer being indexed. Its recommended to
-+             use the part-of-speech based stopfilter above if at all possible, for safety.
-+        <filter class="solr.JapanesePartOfSpeechKeepFilterFactory" tags="@gosen_path@/conf/keeptags_ja.txt" enablePositionIncrements="true"/>
-+        -->
-+
-+        <!-- before any stemming/lemmatization, you can protect words from being modified by specifying
-+             a protwords.txt.
-+        <filter class="solr.KeywordMarkerFilterFactory" protected="@gosen_path@/conf/protwords_ja.txt" ignoreCase="false"/>
-+
-+             or you can also supply a custom stem dictionary for inflected forms (tab separated). No
-+             further stemming/lemmatization will modify this.
-+        <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
-+        -->
-+
-+        <!-- the basic form filter converts inflected verbs and adjectives to their dictionary citation form. -->
-+        <filter class="solr.JapaneseBasicFormFilterFactory"/>
-+
-+        <!-- this filter heuristically normalizes katakana forms with a final prolonged sound mark -->
-+        <filter class="solr.JapaneseKatakanaStemFilterFactory"/>
-+
-+        <!-- you might want to lowercase for any english text content you have -->
-+        <filter class="solr.LowerCaseFilterFactory"/>
-+      </analyzer>
-+    </fieldType>
-  </types>
+--- example/solr-ja/conf/schema.xml.orig	2012-03-31 01:07:12.000000000 +0900
++++ example/solr-ja/conf/schema.xml	2012-04-13 00:51:09.000000000 +0900
+@@ -931,7 +931,7 @@
  
- 
-@@ -534,7 +620,7 @@
- 
     <!-- catchall field, containing all other searchable text fields (implemented
          via copyField further on in this schema  -->
 -   <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
@@ -102,17 +9,9 @@
  
     <!-- catchall text field that indexes tokens both normally and in reverse for efficient
          leading wildcard queries. -->
---- example/solr-ja/conf/solrconfig.xml.orig	2011-11-22 22:02:40.000000000 +0900
-+++ example/solr-ja/conf/solrconfig.xml	2011-11-27 00:08:15.000000000 +0900
-@@ -86,6 +86,7 @@
-        is found that matches, it will be ignored
-     -->
-   <lib dir="../../contrib/clustering/lib/" />
-+  <lib dir="@gosen_path@/lib/" />
-   <lib dir="/total/crap/dir/ignored" />
- 
-   <!-- an exact path can be used to specify a specific file.  This
-@@ -791,6 +792,7 @@
+--- example/solr-ja/conf/solrconfig.xml.orig	2012-03-31 01:07:12.000000000 +0900
++++ example/solr-ja/conf/solrconfig.xml	2012-04-13 00:51:09.000000000 +0900
+@@ -798,6 +798,7 @@
         <str name="wt">velocity</str>
  
         <str name="v.template">browse</str>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macports-changes/attachments/20120412/ff55a625/attachment.html>