Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Elasticsearch 7.* and 8.* integration. OpenSearch integration. #469

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@
<artifactId>guava</artifactId>
<version>32.0.0-jre</version>
</dependency>
<dependency>
<groupId>co.elastic.clients</groupId>
<artifactId>elasticsearch-java</artifactId>
<version>8.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package edu.cornell.mannlib.vitro.webapp.searchengine.base;

import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class SearchEngineUtil {

private static final Log log = LogFactory.getLog(SearchEngineUtil.class);

public static String getSearchEngineURLProperty() {
ConfigurationProperties config = ConfigurationProperties.getInstance();
if (config.getProperty("vitro.local.searchengine.url", "").isEmpty()) {
return tryFetchLegacySolrConfiguration(config);
}

return config.getProperty("vitro.local.searchengine.url", "");
}

private static String tryFetchLegacySolrConfiguration(ConfigurationProperties config) {
String legacyConfigValue = config.getProperty("vitro.local.solr.url", "");
if (!legacyConfigValue.isEmpty()) {
log.warn(
"vitro.local.solr.url is deprecated, switch to using" +
" vitro.local.searchengine.url as soon as possible.");
}

return legacyConfigValue;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;

import co.elastic.clients.elasticsearch._types.query_dsl.ExistsQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.FuzzyQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.MatchPhraseQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.MatchQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.PrefixQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.Query;
import co.elastic.clients.elasticsearch._types.query_dsl.RangeQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.WildcardQuery;

public class CustomQueryBuilder {

private static final String MAX_FUZZY_EDITS = "2";

public static Query buildQuery(SearchType queryType, String field, String value) {
validateInput(field, value);

switch (queryType) {
case MATCH:
return MatchQuery.of(m -> m
.field(field)
.query(value)
)._toQuery();
case FUZZY:
return FuzzyQuery.of(m -> m
.field(field)
.value(value.replace("~", ""))
.fuzziness(MAX_FUZZY_EDITS)
)._toQuery();
case PREFIX:
return PrefixQuery.of(m -> m
.field(field)
.value(value)
)._toQuery();
case RANGE:
String[] values = value.split("TO");
return RangeQuery.of(m -> m
.field(field)
.from(values[0])
.to(values[1])
)._toQuery();
case EXISTS:
return ExistsQuery.of(m -> m
.field(field)
)._toQuery();
case WILDCARD:
return WildcardQuery.of(m -> m
.field(field)
.value(value)
)._toQuery();
default:
return MatchPhraseQuery.of(m -> m
.field(field)
.query(value.substring(1, value.length() - 1)) // Remove leading and trailing '"' character
)._toQuery();
}
}

private static void validateInput(String field, String value) {
if (field == null || field.isEmpty()) {
throw new IllegalArgumentException("Field not specified");
}
if (value == null) {
throw new IllegalArgumentException("Value not specified");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,21 @@
import java.util.List;
import java.util.Map;

import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import edu.cornell.mannlib.vitro.webapp.utils.http.ESHttpsBasicClientFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.entity.ContentType;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPut;

import com.fasterxml.jackson.databind.ObjectMapper;

import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
import org.apache.http.entity.StringEntity;
import org.apache.http.util.EntityUtils;

/**
* The nuts and bolts of adding a document to the Elasticsearch index
Expand Down Expand Up @@ -80,10 +84,19 @@ private void putToElastic(String json, String docId)
try {
String url = baseUrl + "/_doc/"
+ URLEncoder.encode(docId, "UTF8");
Response response = Request.Put(url)
.bodyString(json, ContentType.APPLICATION_JSON).execute();
HttpClient httpClient;
if (baseUrl.startsWith("https")) {
httpClient = ESHttpsBasicClientFactory.getHttpClient();
} else {
httpClient = HttpClientFactory.getHttpClient();
}

HttpPut request = new HttpPut(url);
request.addHeader("Content-Type", "application/json");
request.setEntity(new StringEntity(json));
HttpResponse response = httpClient.execute(request);
log.debug("Response from Elasticsearch: "
+ response.returnContent().asString());
+ EntityUtils.toString(response.getEntity()));
} catch (Exception e) {
throw new SearchEngineException("Failed to put to Elasticsearch",
e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
import java.util.HashMap;
import java.util.Map;

import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import edu.cornell.mannlib.vitro.webapp.utils.http.ESHttpsBasicClientFactory;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;

import com.fasterxml.jackson.databind.ObjectMapper;

import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;

/**
* The nuts and bolts of getting the number of documents in the Elasticsearch
Expand All @@ -26,8 +30,14 @@ public ESCounter(String baseUrl) {
public int count() throws SearchEngineException {
try {
String url = baseUrl + "/_doc/_count";
Response response = Request.Get(url).execute();
String json = response.returnContent().asString();
HttpClient httpClient;
if (baseUrl.startsWith("https")) {
httpClient = ESHttpsBasicClientFactory.getHttpClient();
} else {
httpClient = HttpClientFactory.getHttpClient();
}
HttpResponse response = httpClient.execute(new HttpGet(url));
String json = EntityUtils.toString(response.getEntity());

@SuppressWarnings("unchecked")
Map<String, Object> map = new ObjectMapper().readValue(json,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,20 @@
import java.util.List;
import java.util.Map;

import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import edu.cornell.mannlib.vitro.webapp.utils.http.ESHttpsBasicClientFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.entity.ContentType;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.util.EntityUtils;

import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
Expand Down Expand Up @@ -52,8 +56,15 @@ private void deleteById(String id) throws SearchEngineException {
try {
String url = baseUrl + "/_doc/"
+ URLEncoder.encode(id, "UTF8");
Response response = Request.Delete(url).execute();
String json = response.returnContent().asString();
HttpClient httpClient;
if (baseUrl.startsWith("https")) {
httpClient = ESHttpsBasicClientFactory.getHttpClient();
} else {
httpClient = HttpClientFactory.getHttpClient();
}

HttpResponse response = httpClient.execute(new HttpDelete(url));
String json = EntityUtils.toString(response.getEntity());
} catch (HttpResponseException e) {
if (e.getStatusCode() == 404) {
// Don't care if it has already been deleted.
Expand All @@ -73,12 +84,20 @@ public void deleteByQuery(String queryString) throws SearchEngineException {
String queryJson = new QueryConverter(query).asString();

try {
Response response = Request.Post(url)
.bodyString(queryJson, ContentType.APPLICATION_JSON)
.execute();
HttpClient httpClient;
if (baseUrl.startsWith("https")) {
httpClient = ESHttpsBasicClientFactory.getHttpClient();
} else {
httpClient = HttpClientFactory.getHttpClient();
}

HttpPost request = new HttpPost(url);
request.addHeader("Content-Type", "application/json");
request.setEntity(new StringEntity(queryJson));
HttpResponse response = httpClient.execute(request);

BaseResponseHandler handler = new BaseResponseHandler();
response.handleResponse(handler);
handler.handleResponse(response);
if (handler.getStatusCode() >= 400) {
log.warn(String.format(
"Failed to delete Elasticsearch documents by query: %s, %d - %s\n%s",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@

package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;

import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import edu.cornell.mannlib.vitro.webapp.utils.http.ESHttpsBasicClientFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;

import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;

/**
* Just does a "commit" or "flush" to the index.
Expand All @@ -29,8 +33,14 @@ public void flush(boolean wait) throws SearchEngineException {
try {
String url = baseUrl + "/_flush"
+ (wait ? "?wait_for_ongoing" : "");
Response response = Request.Get(url).execute();
String json = response.returnContent().asString();
HttpClient httpClient;
if (baseUrl.startsWith("https")) {
httpClient = ESHttpsBasicClientFactory.getHttpClient();
} else {
httpClient = HttpClientFactory.getHttpClient();
}
HttpResponse response = httpClient.execute(new HttpGet(url));
String json = EntityUtils.toString(response.getEntity());
log.debug("flush response: " + json);
} catch (Exception e) {
throw new SearchEngineException("Failed to put to Elasticsearch",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import java.net.URI;
import java.net.URISyntaxException;

import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import edu.cornell.mannlib.vitro.webapp.utils.http.ESHttpsBasicClientFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
Expand All @@ -17,7 +19,6 @@
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;

/**
* Convert a SearchQuery to JSON, send it to Elasticsearch, and convert the JSON
Expand Down Expand Up @@ -90,6 +91,9 @@ public ESFunkyGetRequest bodyString(String contents,

public HttpResponse execute() throws SearchEngineException {
try {
if (this.getURI().getScheme().equals("https")) {
return ESHttpsBasicClientFactory.getHttpClient().execute(this);
}
return HttpClientFactory.getHttpClient().execute(this);
} catch (IOException e) {
throw new SearchEngineException(e);
Expand Down
Loading
Loading