Skip to content

Commit

Permalink
fix arxiv html download redirect
Browse files Browse the repository at this point in the history
Fixes #4913
  • Loading branch information
Siedlerchr committed Sep 19, 2024
1 parent ed0e90e commit bfe4ff9
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We fixed an exception when searching for unlinked files. [#11731](https://github.com/JabRef/jabref/issues/11731)
- We fixed an issue where two contradicting notifications were shown when cutting an entry in the main table. [#11724](https://github.com/JabRef/jabref/pull/11724)
- We fixed an issue where unescaped braces in the arXiv fetcher were not treated. [#11704](https://github.com/JabRef/jabref/issues/11704)
- We fixed an issue where HTML instead of the fulltext pdf was downloaded when importing arXiv entries. [#4913](https://github.com/JabRef/jabref/issues/4913)

### Removed

Expand Down
33 changes: 23 additions & 10 deletions src/main/java/org/jabref/logic/net/URLDownload.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.jabref.logic.importer.FetcherServerException;
import org.jabref.logic.util.io.FileUtil;

import kong.unirest.core.HttpResponse;
import kong.unirest.core.Unirest;
import kong.unirest.core.UnirestException;
import org.slf4j.Logger;
Expand Down Expand Up @@ -94,7 +95,8 @@ public static void setSSLVerification(SSLSocketFactory socketFactory, HostnameVe
try {
HttpsURLConnection.setDefaultSSLSocketFactory(socketFactory);
HttpsURLConnection.setDefaultHostnameVerifier(verifier);
} catch (Exception e) {
} catch (
Exception e) {
LOGGER.error("A problem occurred when reset SSL verification", e);
}
}
Expand All @@ -109,7 +111,14 @@ public String getMimeType() {
String contentType;
// Try to use HEAD request to avoid downloading the whole file
try {
contentType = Unirest.head(source.toString()).asString().getHeaders().get("Content-Type").getFirst();
String urlToCheck = source.toString();
HttpResponse<String> response = Unirest.head(urlToCheck).asString();
// check if we have redirects, e.g. arxiv will give otherwise content type html for the original url
String locationHeader = response.getHeaders().getFirst("location");
if (locationHeader != null && !locationHeader.isEmpty()) {
urlToCheck = locationHeader;
}
contentType = Unirest.head(urlToCheck).asString().getHeaders().getFirst("Content-Type");
if ((contentType != null) && !contentType.isEmpty()) {
return contentType;
}
Expand Down Expand Up @@ -224,7 +233,8 @@ public static String asString(Charset encoding, URLConnection connection) throws
Writer output = new StringWriter()) {
copy(input, output, encoding);
return output.toString();
} catch (IOException e) {
} catch (
IOException e) {
throw new FetcherException("Error downloading", e);
}
}
Expand Down Expand Up @@ -341,7 +351,8 @@ public URLConnection openConnection() throws FetcherException {
URLConnection connection;
try {
connection = getUrlConnection();
} catch (IOException e) {
} catch (
IOException e) {
throw new FetcherException("Error opening connection", e);
}

Expand All @@ -350,29 +361,31 @@ public URLConnection openConnection() throws FetcherException {
try {
// this does network i/o: GET + read returned headers
status = httpURLConnection.getResponseCode();
} catch (IOException e) {
} catch (
IOException e) {
LOGGER.error("Error getting response code", e);
throw new FetcherException("Error getting response code", e);
}

if ((status == HttpURLConnection.HTTP_MOVED_TEMP)
|| (status == HttpURLConnection.HTTP_MOVED_PERM)
|| (status == HttpURLConnection.HTTP_SEE_OTHER)) {
|| (status == HttpURLConnection.HTTP_MOVED_PERM)
|| (status == HttpURLConnection.HTTP_SEE_OTHER)) {
// get redirect url from "location" header field
String newUrl = connection.getHeaderField("location");
// open the new connection again
try {
connection = new URLDownload(newUrl).openConnection();
} catch (MalformedURLException e) {
} catch (
MalformedURLException e) {
throw new FetcherException("Could not open URL Download", e);
}
} else if (status >= 400) {
// in case of an error, propagate the error message
SimpleHttpResponse httpResponse = new SimpleHttpResponse(httpURLConnection);
LOGGER.info("{}", httpResponse);
if ((status >= 400) && (status < 500)) {
if (status < 500) {
throw new FetcherClientException(this.source, httpResponse);
} else if (status >= 500) {
} else {
throw new FetcherServerException(this.source, httpResponse);
}
}
Expand Down

0 comments on commit bfe4ff9

Please sign in to comment.