[91889] trunk/dports/java/apache-solr
hum at macports.org
hum at macports.org
Thu Apr 12 09:20:31 PDT 2012
Revision: 91889
https://trac.macports.org/changeset/91889
Author: hum at macports.org
Date: 2012-04-12 09:20:30 -0700 (Thu, 12 Apr 2012)
Log Message:
-----------
apache-solr: update to 3.6.0; remove the dependency on lucene-gosen in ja variant.
Modified Paths:
--------------
trunk/dports/java/apache-solr/Portfile
trunk/dports/java/apache-solr/files/patch-solr-ja.diff
Modified: trunk/dports/java/apache-solr/Portfile
===================================================================
--- trunk/dports/java/apache-solr/Portfile 2012-04-12 14:47:47 UTC (rev 91888)
+++ trunk/dports/java/apache-solr/Portfile 2012-04-12 16:20:30 UTC (rev 91889)
@@ -4,7 +4,7 @@
PortSystem 1.0
name apache-solr
-version 3.5.0
+version 3.6.0
categories java textproc
platforms darwin
maintainers gmail.com:haya10.ito hum openmaintainer
@@ -17,8 +17,8 @@
master_sites apache:lucene/solr/${version}/
extract.suffix .tgz
-checksums rmd160 365d4b27753375ea3a39b9d42c06b80dce474731 \
- sha256 804f3ba9d1296f81388605a79538b7362355693fbdd03b7b2dbf9a706bf1d1d0
+checksums rmd160 f54aee9e0d0196e7e96408b40ae025fc69478229 \
+ sha256 3acac4323ba3dbfa153d8ef01f156bab9b0eccf1b1f1f03e91b8b6739d3dc6c6
# set the destination paths.
set java_basepath ${prefix}/share/java
@@ -54,8 +54,7 @@
# solr home for Japanese configurations.
set solr_home_ja ${solr_home}-ja
-variant ja description {Add Japanese settings with lucene-gosen} {
- depends_run-append port:lucene-gosen
+variant ja description {Add Japanese settings} {
# create Japanese solr home 'solr-ja'.
post-extract {
copy ${worksrcpath}/example/solr ${worksrcpath}/example/solr-ja
@@ -67,11 +66,6 @@
${worksrcpath}/example/solr-ja/conf/solrconfig.xml
}
post-destroot {
- # set the lucene-gosen configuration path to config files.
- foreach config {schema.xml solrconfig.xml} {
- reinplace "s|@gosen_path@|${java_basepath}/lucene-gosen|g" \
- ${destroot}${solr_home_ja}/conf/${config}
- }
# install a property file for UTF-8 encoding.
copy ${filespath}/velocity.properties ${destroot}${solr_home_ja}/conf
# copy a sample Japanese doc for testing.
Modified: trunk/dports/java/apache-solr/files/patch-solr-ja.diff
===================================================================
--- trunk/dports/java/apache-solr/files/patch-solr-ja.diff 2012-04-12 14:47:47 UTC (rev 91888)
+++ trunk/dports/java/apache-solr/files/patch-solr-ja.diff 2012-04-12 16:20:30 UTC (rev 91889)
@@ -1,100 +1,7 @@
---- example/solr-ja/conf/schema.xml.orig 2011-11-22 22:02:40.000000000 +0900
-+++ example/solr-ja/conf/schema.xml 2011-11-27 00:08:15.000000000 +0900
-@@ -469,6 +469,92 @@
- See http://wiki.apache.org/solr/SpatialSearch
- -->
- <fieldtype name="geohash" class="solr.GeoHashField"/>
-+
-+ <!-- configuration for japanese text, using a morphological analyzer
-+ Most possibilities for customization are specified here in the schema.
-+
-+ Note: you can set the default query operator to be OR, AND, or PHRASE:
-+ OR: Use these defaults (autoGeneratePhraseQueries="false", <solrQueryParser defaultOperator="OR"/>
-+ In this case Solr works like it does with the English language. The default query is OR,
-+ but documents that contain more of the query terms get a special boost. You can probably
-+ use a less aggressive stopwords/stoptags in this case, and its probably a good idea to use
-+ enablePositionIncrements=true, so that if a user puts a query in quotes, they get a much more
-+ exact phrase query.
-+ AND: Set autoGeneratePhraseQueries=false, but set <solrQueryParser defaultOperator="AND"/> in
-+ your schema.xml. Note if you do this, you should use a more aggressive stopwords/stoptags
-+ list (at least at query-time), otherwise a document might not match simply because it does
-+ not contain a prefix or particle. As in the above case, its probably a good idea to use
-+ enablePositionIncrements=true for explicit phrase queries from the user.
-+ PHRASE: Set autoGeneratePhraseQueries=true. If you do this, you should probably use both a very
-+ aggressive stopwords list, and you should probably also set enablePositionIncrements=false
-+ everywhere. Otherwise, even documents that contain the query's phrase in exact order will
-+ not match because of slightly different grammatical structure.
-+ -->
-+ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
-+ <analyzer>
-+ <!-- map characters before the tokenizer:
-+ Optionally, instead of the JapaneseWidthFactory, you can choose to do the width
-+ mappings before the text is sent to the tokenizer.
-+ <charFilter class="solr.MappingCharFilterFactory" mapping="@gosen_path@/conf/mapping-japanese.txt"/>
-+ -->
-+
-+ <!-- morphological tokenizer: sets the SURFACE form as the token, but also sets these attributes:
-+ BasicFormAttribute, ConjugationAttribute, PartOfSpeechAttribute, PronunciationsAttribute,
-+ ReadingsAttribute, and SentenceStartAttribute.
-+ -->
-+ <tokenizer class="solr.JapaneseTokenizerFactory"/>
-+
-+ <!-- normalizes CJK width differences:
-+ 1. Folds fullwidth ASCII variants into the equivalent basic latin
-+ 2. Folds halfwidth Katakana variants into the equivalent kana
-+
-+ Note: alternatively you can use a MappingCharFilter before the tokenizer for this, but please note
-+ that mapping characters can change how Sen tokenizes text.
-+ -->
-+ <filter class="solr.JapaneseWidthFilterFactory"/>
-+
-+ <!-- the punctuation filter removes all-punctuation tokens base on Unicode properties.
-+ punctuation tokens are tagged as "unknown", and its better to do this than to remove
-+ tokens with an unknown pos (as they might be valuable!). Because this punctuation
-+ usually signifies a phrase or sentence boundary, enablePositionIncrements can be
-+ used to prevent phrase queries from matching across natural phrase/sentence boundaries -->
-+ <filter class="solr.JapanesePunctuationFilterFactory" enablePositionIncrements="true"/>
-+
-+ <!-- this is a part-of-speech based stopfilter, it removes any tokens that have a certain
-+ of speech. you can set enablePositionIncrements for tighter phrase queries -->
-+ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="@gosen_path@/conf/stoptags_ja.txt" enablePositionIncrements="true"/>
-+
-+ <!-- a standard stopfilter, to specify specific stopwords. -->
-+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="@gosen_path@/conf/stopwords_ja.txt" enablePositionIncrements="true"/>
-+
-+ <!-- alternatively, instead of using a part-of-speech based stopfilter, you can use a
-+ part-of-speech based keepfilter: specifying only the parts of speech you wish to index.
-+ anything else will be removed. HOWEVER: this could be a little dangerous, because if
-+ we upgrade ipadic they might add some new tags (the tags are fairly specific), and suddenly
-+ things that you were indexing before are no longer being indexed. Its recommended to
-+ use the part-of-speech based stopfilter above if at all possible, for safety.
-+ <filter class="solr.JapanesePartOfSpeechKeepFilterFactory" tags="@gosen_path@/conf/keeptags_ja.txt" enablePositionIncrements="true"/>
-+ -->
-+
-+ <!-- before any stemming/lemmatization, you can protect words from being modified by specifying
-+ a protwords.txt.
-+ <filter class="solr.KeywordMarkerFilterFactory" protected="@gosen_path@/conf/protwords_ja.txt" ignoreCase="false"/>
-+
-+ or you can also supply a custom stem dictionary for inflected forms (tab separated). No
-+ further stemming/lemmatization will modify this.
-+ <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
-+ -->
-+
-+ <!-- the basic form filter converts inflected verbs and adjectives to their dictionary citation form. -->
-+ <filter class="solr.JapaneseBasicFormFilterFactory"/>
-+
-+ <!-- this filter heuristically normalizes katakana forms with a final prolonged sound mark -->
-+ <filter class="solr.JapaneseKatakanaStemFilterFactory"/>
-+
-+ <!-- you might want to lowercase for any english text content you have -->
-+ <filter class="solr.LowerCaseFilterFactory"/>
-+ </analyzer>
-+ </fieldType>
- </types>
+--- example/solr-ja/conf/schema.xml.orig 2012-03-31 01:07:12.000000000 +0900
++++ example/solr-ja/conf/schema.xml 2012-04-13 00:51:09.000000000 +0900
+@@ -931,7 +931,7 @@
-
-@@ -534,7 +620,7 @@
-
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
- <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
@@ -102,17 +9,9 @@
<!-- catchall text field that indexes tokens both normally and in reverse for efficient
leading wildcard queries. -->
---- example/solr-ja/conf/solrconfig.xml.orig 2011-11-22 22:02:40.000000000 +0900
-+++ example/solr-ja/conf/solrconfig.xml 2011-11-27 00:08:15.000000000 +0900
-@@ -86,6 +86,7 @@
- is found that matches, it will be ignored
- -->
- <lib dir="../../contrib/clustering/lib/" />
-+ <lib dir="@gosen_path@/lib/" />
- <lib dir="/total/crap/dir/ignored" />
-
- <!-- an exact path can be used to specify a specific file. This
-@@ -791,6 +792,7 @@
+--- example/solr-ja/conf/solrconfig.xml.orig 2012-03-31 01:07:12.000000000 +0900
++++ example/solr-ja/conf/solrconfig.xml 2012-04-13 00:51:09.000000000 +0900
+@@ -798,6 +798,7 @@
<str name="wt">velocity</str>
<str name="v.template">browse</str>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macports-changes/attachments/20120412/ff55a625/attachment.html>
More information about the macports-changes
mailing list