Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New ADS Fetcher #5501

Merged
merged 15 commits into from
Oct 25, 2019
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- We added an option to show the preview as an extra tab in the entry editor (instead of in a split view). [#5244](https://github.com/JabRef/jabref/issues/5244)
- A custom Open/LibreOffice jstyle file now requires a layout line for the entry type `default` [#5452](https://github.com/JabRef/jabref/issues/5452)
- The entry editor is now open by default when JabRef starts up. [#5460](https://github.com/JabRef/jabref/issues/5460)
- We add a new ADS fetcher to use the new ADS API [#4949](https://github.com/JabRef/jabref/issues/4949)

### Fixed

Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;

import org.jabref.logic.cleanup.MoveFieldCleanup;
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
Expand All @@ -25,7 +26,6 @@
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.cleanup.FieldFormatterCleanup;
import org.jabref.model.entry.BibEntry;
Expand All @@ -35,85 +35,96 @@
import org.jabref.model.util.DummyFileUpdateMonitor;

import org.apache.http.client.utils.URIBuilder;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

/**
* Fetches data from the SAO/NASA Astrophysics Data System (http://www.adsabs.harvard.edu/)
*
* Search query-based: http://adsabs.harvard.edu/basic_search.html
* Entry -based: http://adsabs.harvard.edu/abstract_service.html
*
* There is also a new API (https://github.com/adsabs/adsabs-dev-api) but it returns JSON
* (or at least needs multiple calls to get BibTeX, status: September 2016)
* Fetches data from the SAO/NASA Astrophysics Data System (https://ui.adsabs.harvard.edu/)
*/
public class AstrophysicsDataSystem implements IdBasedParserFetcher, SearchBasedParserFetcher, EntryBasedParserFetcher {

private static String API_QUERY_URL = "http://adsabs.harvard.edu/cgi-bin/nph-basic_connect";
private static String API_ENTRY_URL = "http://adsabs.harvard.edu/cgi-bin/nph-abs_connect";
private static String API_DOI_URL = "http://adsabs.harvard.edu/doi/";
private static final String API_SEARCH_URL = "https://api.adsabs.harvard.edu/v1/search/query";
private static final String API_EXPORT_URL = "https://api.adsabs.harvard.edu/v1/export/bibtexabs";

private final String patternRemoveDOI = "^(doi:|DOI:)";
private static final String API_KEY = "tDueGIu6zl96OqkcCS5LOHboWbTgEEx8yAR7Etta";
private final ImportFormatPreferences preferences;

public AstrophysicsDataSystem(ImportFormatPreferences preferences) {
this.preferences = Objects.requireNonNull(preferences);
}

/**
* @param bibcodes collection of bibcodes for which a JSON object should be created
*/
private static String buildPostData(Collection<String> bibcodes) {
JSONObject obj = new JSONObject();
obj.put("bibcode", bibcodes);
return obj.toString();
}

/**
* @return export URL endpoint
*/
private static URL getURLforExport() throws URISyntaxException, MalformedURLException {
return new URIBuilder(API_EXPORT_URL).build().toURL();
}

@Override
public String getName() {
return "SAO/NASA Astrophysics Data System";
}

private URIBuilder getBaseUrl(String apiUrl) throws URISyntaxException {
URIBuilder uriBuilder = new URIBuilder(apiUrl);
uriBuilder.addParameter("data_type", "BIBTEXPLUS");
uriBuilder.addParameter("start_nr", String.valueOf(1));
uriBuilder.addParameter("nr_to_return", String.valueOf(200));
return uriBuilder;
}

/**
* @param query query string, matching the apache solr format
* @return URL which points to a search request for given query
*/
@Override
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = getBaseUrl(API_QUERY_URL);
uriBuilder.addParameter("qsearch", query);
return uriBuilder.build().toURL();
URIBuilder builder = new URIBuilder(API_SEARCH_URL);
builder.addParameter("q", query);
builder.addParameter("fl", "bibcode");
return builder.build().toURL();
}

/**
* @param entry BibEntry for which a search URL is created
* @return URL which points to a search request for given entry
*/
@Override
public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = getBaseUrl(API_ENTRY_URL);

// Search astronomy + physics + arXiv db
uriBuilder.addParameter("db_key", "AST");
uriBuilder.addParameter("db_key", "PHY");
uriBuilder.addParameter("db_key", "PRE");

// Add title search
entry.getFieldOrAlias(StandardField.TITLE).ifPresent(title -> {
uriBuilder.addParameter("ttl_logic", "OR");
uriBuilder.addParameter("title", title);
uriBuilder.addParameter("ttl_syn", "YES"); // Synonym replacement
uriBuilder.addParameter("ttl_wt", "0.3"); // Weight
uriBuilder.addParameter("ttl_wgt", "YES"); // Consider Weight
});

// Add author search
entry.getFieldOrAlias(StandardField.AUTHOR).ifPresent(author -> {
uriBuilder.addParameter("aut_logic", "OR");
uriBuilder.addParameter("author", author);
uriBuilder.addParameter("aut_syn", "YES"); // Synonym replacement
uriBuilder.addParameter("aut_wt", "1.0"); // Weight
uriBuilder.addParameter("aut_wgt", "YES"); // Consider weight
});

return uriBuilder.build().toURL();
StringBuilder stringBuilder = new StringBuilder();

Optional<String> title = entry.getFieldOrAlias(StandardField.TITLE).map(t -> "title:\"" + t + "\"");
Optional<String> author = entry.getFieldOrAlias(StandardField.AUTHOR).map(a -> "author:\"" + a + "\"");

if (title.isPresent()) {
stringBuilder.append(title.get())
.append(author.map(s -> " AND " + s)
.orElse(""));
} else {
stringBuilder.append(author.orElse(""));
}
String query = stringBuilder.toString().trim();

URIBuilder builder = new URIBuilder(API_SEARCH_URL);
builder.addParameter("q", query);
builder.addParameter("fl", "bibcode");
builder.addParameter("rows", "20");
return builder.build().toURL();
}

/**
* @param identifier bibcode or doi for which a search URL is created
* @return URL which points to a search URL for given identifier
*/
@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
String key = identifier.replaceAll(patternRemoveDOI, "");
URIBuilder uriBuilder = new URIBuilder(API_DOI_URL + key);
uriBuilder.addParameter("data_type", "BIBTEXPLUS");
return uriBuilder.build().toURL();
public URL getURLForID(String identifier) throws FetcherException, URISyntaxException, MalformedURLException {
String query = "doi:\"" + identifier + "\" OR " + "bibcode:\"" + identifier + "\"";
URIBuilder builder = new URIBuilder(API_SEARCH_URL);
builder.addParameter("q", query);
builder.addParameter("fl", "bibcode");
return builder.build().toURL();
}

@Override
Expand All @@ -126,45 +137,143 @@ public Parser getParser() {
return new BibtexParser(preferences, new DummyFileUpdateMonitor());
}

@Override
public void doPostCleanup(BibEntry entry) {
new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveBracesFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveNewlinesFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.AUTHOR, new NormalizeNamesFormatter()).cleanup(entry);

// Remove ADS note
new FieldFormatterCleanup(new UnknownField("adsnote"), new ClearFormatter()).cleanup(entry);
// Move adsurl to url field
new MoveFieldCleanup(new UnknownField("adsurl"), StandardField.URL).cleanup(entry);
// The fetcher adds some garbage (number of found entries etc before)
entry.setCommentsBeforeEntry("");
}

@Override
public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {

if (entry.getFieldOrAlias(StandardField.TITLE).isEmpty() && entry.getFieldOrAlias(StandardField.AUTHOR).isEmpty()) {
return Collections.emptyList();
}

try {
List<String> bibcodes = fetchBibcodes(getURLForEntry(entry));
return performSearchByIds(bibcodes);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
throw new FetcherException("A network error occurred", e);
}
}

@Override
public List<BibEntry> performSearch(String query) throws FetcherException {

if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

try {
URLConnection connection = getURLForQuery(query).openConnection();
connection.setRequestProperty("User-Agent", URLDownload.USER_AGENT);
try (InputStream stream = connection.getInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
List<String> bibcodes = fetchBibcodes(getURLForQuery(query));
return performSearchByIds(bibcodes);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
throw new FetcherException("A network error occurred", e);
}
}

/**
* @param url search ul for which bibcode will be returned
* @return list of bibcodes matching the search request. May be empty
*/
private List<String> fetchBibcodes(URL url) throws FetcherException {

try {
URLDownload download = new URLDownload(url);
download.addHeader("Authorization", "Bearer " + API_KEY);
String content = download.asString();
JSONObject obj = new JSONObject(content);
JSONArray codes = obj.getJSONObject("response").getJSONArray("docs");
List<String> bibcodes = new ArrayList<>();
for (int i = 0; i < codes.length(); i++) {
bibcodes.add(codes.getJSONObject(i).getString("bibcode"));
}
return bibcodes;
} catch (IOException e) {
throw new FetcherException("A network error occurred", e);
} catch (JSONException e) {
return Collections.emptyList();
}
}

@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
if (StringUtil.isBlank(identifier)) {
return Optional.empty();
}

try {
List<String> bibcodes = fetchBibcodes(getURLForID(identifier));
List<BibEntry> fetchedEntries = performSearchByIds(bibcodes);

if (fetchedEntries.isEmpty()) {
return Optional.empty();
}
if (fetchedEntries.size() > 1) {
LOGGER.info("Fetcher " + getName() + "found more than one result for identifier " + identifier
+ ". We will use the first entry.");
}
BibEntry entry = fetchedEntries.get(0);
return Optional.of(entry);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
throw new FetcherException("A network error occurred", e);
}
}

/**
* @param identifiers bibcodes for which bibentries ahould be fetched
* @return list of bibentries matching the bibcodes. Can be empty and differ in size to the size of requested
* bibcodes
*/
private List<BibEntry> performSearchByIds(Collection<String> identifiers) throws FetcherException {

List<String> ids = identifiers.stream().filter(identifier -> !StringUtil.isBlank(identifier)).collect(Collectors.toList());
if (ids.isEmpty()) {
return Collections.emptyList();
}
try {
String postData = buildPostData(ids);
URLDownload download = new URLDownload(getURLforExport());
download.addHeader("Authorization", "Bearer " + API_KEY);
download.addHeader("ContentType", "application/json");
download.setPostData(postData);
String content = download.asString();
JSONObject obj = new JSONObject(content);

try {
List<BibEntry> fetchedEntries = getParser().parseEntries(obj.optString("export"));
if (fetchedEntries.isEmpty()) {
return Collections.emptyList();
}
// Post-cleanup
fetchedEntries.forEach(this::doPostCleanup);

return fetchedEntries;
} catch (IOException e) {
throw new FetcherException("An I/O exception occurred", e);
} catch (JSONException e) {
return Collections.emptyList();
}
} catch (URISyntaxException | MalformedURLException e) {
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
throw new FetcherException("An I/O exception occurred", e);
throw new FetcherException("A network error occurred", e);
} catch (ParseException e) {
throw new FetcherException("Error occurred when parsing entry", Localization.lang("Error occurred when parsing entry"), e);
throw new FetcherException("An internal parser error occurred", e);
}
}

@Override
public void doPostCleanup(BibEntry entry) {
new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveBracesFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveNewlinesFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);
new FieldFormatterCleanup(StandardField.AUTHOR, new NormalizeNamesFormatter()).cleanup(entry);

// Remove ADS note
new FieldFormatterCleanup(new UnknownField("adsnote"), new ClearFormatter()).cleanup(entry);
// Move adsurl to url field
new MoveFieldCleanup(new UnknownField("adsurl"), StandardField.URL).cleanup(entry);
// The fetcher adds some garbage (number of found entries etc before)
entry.setCommentsBeforeEntry("");
}
}
Loading