Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added tika text extraction support for lucene indexing #1277

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
11 changes: 6 additions & 5 deletions build.moxie
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ dependencies:
- compile 'org.apache.wicket:wicket-extensions:${wicket.version}' :war !org.mockito
- compile 'org.apache.lucene:lucene-core:${lucene.version}' :war :fedclient
- compile 'org.apache.lucene:lucene-analyzers-common:${lucene.version}' :war :fedclient
- compile 'org.apache.lucene:lucene-highlighter:${lucene.version}' :war :fedclient !org.apache.lucene:lucene-join
- compile 'org.apache.lucene:lucene-highlighter:${lucene.version}' :war :fedclient
- compile 'org.apache.lucene:lucene-memory:${lucene.version}' :war :fedclient
- compile 'org.apache.lucene:lucene-queryparser:${lucene.version}' :war :fedclient !org.apache.lucene:lucene-spatial
- compile 'org.pegdown:pegdown:1.5.0' :war
Expand All @@ -169,17 +169,18 @@ dependencies:
- compile 'com.unboundid:unboundid-ldapsdk:2.3.8' :war
- compile 'org.apache.ivy:ivy:2.2.0' :war
- compile 'com.toedter:jcalendar:1.3.2' :authority
- compile 'org.apache.commons:commons-compress:1.4.1' :war
- compile 'commons-io:commons-io:2.2' :war
- compile 'org.apache.commons:commons-compress:1.16' :war
- compile 'commons-io:commons-io:2.6' :war
- compile 'com.force.api:force-partner-api:24.0.0' :war
- compile 'org.freemarker:freemarker:2.3.22' :war
- compile 'com.github.dblock.waffle:waffle-jna:1.7.3' :war
- compile 'org.kohsuke:libpam4j:1.8' :war
- compile 'args4j:args4j:2.0.29' :war :fedclient
- compile 'commons-codec:commons-codec:1.7' :war
- compile 'commons-codec:commons-codec:1.11' :war
- compile 'redis.clients:jedis:2.6.2' :war
- compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war
- compile 'org.apache.tika:tika-core:1.5' :war
- compile 'org.apache.tika:tika-core:1.17' :war
- compile 'org.apache.tika:tika-parsers:1.17' :war
- compile 'org.jsoup:jsoup:1.7.3' :war
- test 'junit:junit:4.12'
# Dependencies for Selenium web page testing
Expand Down
11 changes: 11 additions & 0 deletions src/main/distrib/data/defaults.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,17 @@ web.documents = readme home index changelog contributing submitting_patches copy
# SINCE 0.9.0
web.luceneIgnoreExtensions = 7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt pptx png so swf tar xcf xls xlsx zip

# Use TIKA to extract text content of the file
#
# SPACE-DELIMITED
# SINCE 1.9.0
web.tikaExtensions = pdf doc xls xlsx docx

# Set default lucene query operator to AND
#
# SINCE 1.9.0
web.luceneDefaultOperatorAnd = false

# Registered extensions for google-code-prettify
#
# SPACE-DELIMITED
Expand Down
Loading