Skip to content

Commit

Permalink
Improve error handling on GROBID server connection issues (#7026)
Browse files Browse the repository at this point in the history
* add explicit configuration for connectTimeout in URLDownload
add information on GROBID server connection issue for user
relates to #6517 and #6891

* add CHANGELOG entry

* fix style issues detected by checkstyle

* use .toMilliseconds() is more convenient
use assertThrow is more convenient

* incorporate feedback from review
reduce detail level from user message
rework to use FetcherException instead of UncheckedIOException

* switch to debug level in order to reduce verboseness in the log
  • Loading branch information
nitram509 committed Oct 18, 2020
1 parent 5625da7 commit 4213a8d
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ inserting new citations in a OpenOffic/LibreOffice document. [#6957](https://git
- We improved the duplicate detection when identifiers like DOI or arxiv are semantiaclly the same, but just syntactically differ (e.g. with or without http(s):// prefix). [#6707](https://github.com/JabRef/jabref/issues/6707)
- We changed in the group interface "Generate groups from keywords in a BibTeX field" by "Generate groups from keywords in the following field". [#6983](https://github.com/JabRef/jabref/issues/6983)
- We changed the name of a group type from "Searching for keywords" to "Searching for a keyword". [6995](https://github.com/JabRef/jabref/pull/6995)
- We changed connect timeouts for server requests to 30 seconds in general and 5 seconds for GROBID server (special) and improved user notifications on connection issues. [7026](https://github.com/JabRef/jabref/pull/7026)

### Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@
import org.jabref.gui.externalfiletype.ExternalFileTypes;
import org.jabref.gui.util.BackgroundTask;
import org.jabref.gui.util.TaskExecutor;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.fetcher.GrobidCitationFetcher;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.util.FileUpdateMonitor;
import org.jabref.preferences.JabRefPreferences;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BibtexExtractorViewModel {

private static final Logger LOGGER = LoggerFactory.getLogger(BibtexExtractorViewModel.class);

private final StringProperty inputTextProperty = new SimpleStringProperty("");
private DialogService dialogService;
private GrobidCitationFetcher currentCitationfetcher;
Expand Down Expand Up @@ -58,6 +64,15 @@ public StringProperty inputTextProperty() {
public void startParsing() {
BackgroundTask.wrap(() -> currentCitationfetcher.performSearch(inputTextProperty.getValue()))
.onRunning(() -> dialogService.notify(Localization.lang("Your text is being parsed...")))
.onFailure((e) -> {
if (e instanceof FetcherException) {
String msg = Localization.lang("There are connection issues with a JabRef server. Detailed information: %0.",
e.getMessage());
dialogService.notify(msg);
} else {
LOGGER.warn("Missing exception handling.", e);
}
})
.onSuccess(parsedEntries -> {
dialogService.notify(Localization.lang("%0 entries were parsed from your query.", String.valueOf(parsedEntries.size())));
importHandler.importEntries(parsedEntries);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.SocketTimeoutException;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.SearchBasedFetcher;
Expand All @@ -20,13 +22,18 @@
public class GrobidCitationFetcher implements SearchBasedFetcher {

private static final Logger LOGGER = LoggerFactory.getLogger(GrobidCitationFetcher.class);

private static final String GROBID_URL = "http://grobid.jabref.org:8070";
private ImportFormatPreferences importFormatPreferences;
private GrobidService grobidService;

public GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences) {
this(importFormatPreferences, new GrobidService(GROBID_URL));
}

GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences, GrobidService grobidService) {
this.importFormatPreferences = importFormatPreferences;
this.grobidService = new GrobidService(GROBID_URL);
this.grobidService = grobidService;
}

/**
Expand All @@ -38,9 +45,14 @@ public GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences) {
private Optional<String> parseUsingGrobid(String plainText) {
try {
return Optional.of(grobidService.processCitation(plainText, GrobidService.ConsolidateCitations.WITH_METADATA));
} catch (SocketTimeoutException e) {
String msg = "Connection timed out.";
LOGGER.debug(msg, e);
throw new RuntimeException(msg, e);
} catch (IOException e) {
LOGGER.debug("Could not process citation", e);
return Optional.empty();
String msg = "Could not process citation. " + e.getMessage();
LOGGER.debug(msg, e);
throw new RuntimeException(msg, e);
}
}

Expand All @@ -54,20 +66,28 @@ private Optional<BibEntry> parseBibToBibEntry(String bibtexString) {
}

@Override
public List<BibEntry> performSearch(String query) {
return Arrays
.stream(query.split("\\r\\r+|\\n\\n+|\\r\\n(\\r\\n)+"))
.map(String::trim)
.filter(str -> !str.isBlank())
.map(reference -> parseUsingGrobid(reference))
.flatMap(Optional::stream)
.map(reference -> parseBibToBibEntry(reference))
.flatMap(Optional::stream)
.collect(Collectors.toList());
public List<BibEntry> performSearch(String query) throws FetcherException {
List<BibEntry> bibEntries = null;
try {
bibEntries = Arrays
.stream(query.split("\\r\\r+|\\n\\n+|\\r\\n(\\r\\n)+"))
.map(String::trim)
.filter(str -> !str.isBlank())
.map(this::parseUsingGrobid)
.flatMap(Optional::stream)
.map(this::parseBibToBibEntry)
.flatMap(Optional::stream)
.collect(Collectors.toList());
} catch (RuntimeException e) {
// un-wrap the wrapped exceptions
throw new FetcherException(e.getMessage(), e.getCause());
}
return bibEntries;
}

@Override
public String getName() {
return "GROBID";
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.time.Duration;

import org.jabref.logic.net.URLDownload;

Expand Down Expand Up @@ -47,6 +48,7 @@ public String processCitation(String rawCitation, ConsolidateCitations consolida
rawCitation = URLEncoder.encode(rawCitation, StandardCharsets.UTF_8);
URLDownload urlDownload = new URLDownload(grobidServerURL
+ "/api/processCitation");
urlDownload.setConnectTimeout(Duration.ofSeconds(5));
urlDownload.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
urlDownload.setPostData("citations=" + rawCitation + "&consolidateCitations=" + consolidateCitations);
String httpResponse = urlDownload.asString();
Expand Down
16 changes: 15 additions & 1 deletion src/main/java/org/jabref/logic/net/URLDownload.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.nio.file.StandardCopyOption;
import java.security.SecureRandom;
import java.security.cert.X509Certificate;
import java.time.Duration;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -62,11 +63,13 @@
public class URLDownload {

public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0";

private static final Logger LOGGER = LoggerFactory.getLogger(URLDownload.class);
private static final Duration DEFAULT_CONNECT_TIMEOUT = Duration.ofSeconds(30);

private final URL source;
private final Map<String, String> parameters = new HashMap<>();
private String postData = "";
private Duration connectTimeout = DEFAULT_CONNECT_TIMEOUT;

/**
* @param source the URL to download from
Expand Down Expand Up @@ -316,6 +319,7 @@ private void copy(InputStream in, Writer out, Charset encoding) throws IOExcepti

private URLConnection openConnection() throws IOException {
URLConnection connection = this.source.openConnection();
connection.setConnectTimeout((int) connectTimeout.toMillis());
for (Entry<String, String> entry : this.parameters.entrySet()) {
connection.setRequestProperty(entry.getKey(), entry.getValue());
}
Expand Down Expand Up @@ -346,4 +350,14 @@ private URLConnection openConnection() throws IOException {

return connection;
}

public void setConnectTimeout(Duration connectTimeout) {
if (connectTimeout != null) {
this.connectTimeout = connectTimeout;
}
}

public Duration getConnectTimeout() {
return connectTimeout;
}
}
1 change: 1 addition & 0 deletions src/main/resources/l10n/JabRef_de.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1637,6 +1637,7 @@ User=Benutzer
Connect=Verbinden
Connection\ error=Verbindungsfehler
Connection\ to\ %0\ server\ established.=Verbindung zum %0 Server hergestellt.
There\ are\ connection\ issues\ with\ a\ JabRef\ server.\ Detailed\ information\:\ %0.=Es gibt Verbindungsprobleme mit einem JabRef Server. Detailinformation: %0.
Required\ field\ "%0"\ is\ empty.=Erforederliches Feld "%0" ist leer.
%0\ driver\ not\ available.=%0-Treiber nicht verfügbar.
The\ connection\ to\ the\ server\ has\ been\ terminated.=Verbindung zum Server wurde abgebrochen.
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,7 @@ User=User
Connect=Connect
Connection\ error=Connection error
Connection\ to\ %0\ server\ established.=Connection to %0 server established.
There\ are\ connection\ issues\ with\ a\ JabRef\ server.\ Detailed\ information\:\ %0.=There are connection issues with a JabRef server. Detailed information: %0.
Required\ field\ "%0"\ is\ empty.=Required field "%0" is empty.
%0\ driver\ not\ available.=%0 driver not available.
The\ connection\ to\ the\ server\ has\ been\ terminated.=The connection to the server has been terminated.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.util.GrobidService;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
Expand All @@ -17,7 +20,11 @@
import org.mockito.Answers;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

@FetcherTest
public class GrobidCitationFetcherTest {
Expand Down Expand Up @@ -76,28 +83,38 @@ public static Stream<Arguments> provideInvalidInput() {

@ParameterizedTest(name = "{0}")
@MethodSource("provideExamplesForCorrectResultTest")
public void grobidPerformSearchCorrectResultTest(String testName, BibEntry expectedBibEntry, String searchQuery) {
public void grobidPerformSearchCorrectResultTest(String testName, BibEntry expectedBibEntry, String searchQuery) throws FetcherException {
List<BibEntry> entries = grobidCitationFetcher.performSearch(searchQuery);
assertEquals(List.of(expectedBibEntry), entries);
}

@Test
public void grobidPerformSearchCorrectlySplitsStringTest() {
public void grobidPerformSearchCorrectlySplitsStringTest() throws FetcherException {
List<BibEntry> entries = grobidCitationFetcher.performSearch(example1 + "\n\n" + example2 + "\r\n\r\n" + example3 + "\r\r" + example4);
assertEquals(List.of(example1AsBibEntry, example2AsBibEntry, example3AsBibEntry, example4AsBibEntry), entries);
}

@Test
public void grobidPerformSearchWithEmptyStringsTest() {
public void grobidPerformSearchWithEmptyStringsTest() throws FetcherException {
List<BibEntry> entries = grobidCitationFetcher.performSearch(" \n ");
assertEquals(Collections.emptyList(), entries);
}

@ParameterizedTest
@MethodSource("provideInvalidInput")
public void grobidPerformSearchWithInvalidDataTest(String invalidInput) {
public void grobidPerformSearchWithInvalidDataTest(String invalidInput) throws FetcherException {
List<BibEntry> entries = grobidCitationFetcher.performSearch(invalidInput);
assertEquals(Collections.emptyList(), entries);
}

@Test
public void performSearchThrowsExceptionInCaseOfConnectionIssues() throws IOException {
GrobidService grobidServiceMock = mock(GrobidService.class);
when(grobidServiceMock.processCitation(anyString(), any())).thenThrow(new IOException("Any IO Exception"));
grobidCitationFetcher = new GrobidCitationFetcher(importFormatPreferences, grobidServiceMock);

assertThrows(FetcherException.class, () -> {
grobidCitationFetcher.performSearch("any text");
}, "performSearch should throw an FetcherException, when there are underlying IOException.");
}
}
8 changes: 8 additions & 0 deletions src/test/java/org/jabref/logic/net/URLDownloadTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,12 @@ public void testCheckConnectionFail() throws MalformedURLException {
assertThrows(UnirestException.class, nonsense::canBeReached);
}

@Test
public void connectTimeoutIsNeverNull() throws MalformedURLException {
URLDownload urlDownload = new URLDownload(new URL("http://www.example.com"));
assertNotNull(urlDownload.getConnectTimeout(), "there's a non-null default by the constructor");

urlDownload.setConnectTimeout(null);
assertNotNull(urlDownload.getConnectTimeout(), "no null value can be set");
}
}

0 comments on commit 4213a8d

Please sign in to comment.