Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LOBID fetcher #10135

Merged
merged 16 commits into from
Sep 18, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We added support for customizating the citation command (e.g., `[@key1,@key2]`) when [pushing to external applications](https://docs.jabref.org/cite/pushtoapplications). [#10133](https://github.com/JabRef/jabref/issues/10133)
- We added an integrity check for more special characters. [#8712](https://github.com/JabRef/jabref/issues/8712)
- We added protected terms described as "Computer science". [#10222](https://github.com/JabRef/jabref/pull/10222)
- We added a fetcher for [LOBID](https://lobid.org/resources/api) resources. [koppor#386](https://github.com/koppor/jabref/issues/386)

### Changed

Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.jabref.logic.importer.fetcher.IEEE;
import org.jabref.logic.importer.fetcher.INSPIREFetcher;
import org.jabref.logic.importer.fetcher.IacrEprintFetcher;
import org.jabref.logic.importer.fetcher.LOBIDFetcher;
import org.jabref.logic.importer.fetcher.LibraryOfCongress;
import org.jabref.logic.importer.fetcher.MathSciNet;
import org.jabref.logic.importer.fetcher.MedlineFetcher;
Expand Down Expand Up @@ -119,6 +120,7 @@ public static SortedSet<SearchBasedFetcher> getSearchBasedFetchers(ImportFormatP
set.add(new SemanticScholar());
set.add(new ResearchGate(importFormatPreferences));
set.add(new BiodiversityLibrary(importerPreferences));
set.add(new LOBIDFetcher(importerPreferences));
return set;
}

Expand Down
203 changes: 203 additions & 0 deletions src/main/java/org/jabref/logic/importer/fetcher/LOBIDFetcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package org.jabref.logic.importer.fetcher;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImporterPreferences;
import org.jabref.logic.importer.PagedSearchBasedParserFetcher;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.fetcher.transformers.LOBIDQueryTransformer;
import org.jabref.logic.util.OS;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.EntryType;
import org.jabref.model.entry.types.StandardEntryType;

import kong.unirest.json.JSONArray;
import kong.unirest.json.JSONObject;
import org.apache.http.client.utils.URIBuilder;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Fetches data from the LOBID API
*
* @see <a href="https://lobid.org/resources/api">API documentation</a> for more details
*/
public class LOBIDFetcher implements PagedSearchBasedParserFetcher {

public static final String FETCHER_NAME = "LOBID";

private static final Logger LOGGER = LoggerFactory.getLogger(LOBIDFetcher.class);

private static final String API_URL = "https://lobid.org/resources/search";

private final ImporterPreferences importerPreferences;

public LOBIDFetcher(ImporterPreferences importerPreferences) {
this.importerPreferences = importerPreferences;
}

/**
* Gets the query URL
*
* @param luceneQuery the search query
* @param pageNumber the number of the page indexed from 0
* @return URL
*/
@Override
public URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(API_URL);
uriBuilder.addParameter("q", new LOBIDQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); // search query
uriBuilder.addParameter("from", String.valueOf(getPageSize() * pageNumber)); // from entry number, starts indexing at 0
uriBuilder.addParameter("size", String.valueOf(getPageSize())); // page size
uriBuilder.addParameter("format", "json"); // response format
return uriBuilder.build().toURL();
}

@Override
public Parser getParser() {
return inputStream -> {
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));
JSONObject jsonObject = new JSONObject(response);

List<BibEntry> entries = new ArrayList<>();
if (jsonObject.has("member")) {
JSONArray results = jsonObject.getJSONArray("member");
for (int i = 0; i < results.length(); i++) {
JSONObject jsonEntry = results.getJSONObject(i);
BibEntry entry = parseJSONtoBibtex(jsonEntry);
entries.add(entry);
}
}

return entries;
};
}

private BibEntry parseJSONtoBibtex(JSONObject jsonEntry) {
BibEntry entry = new BibEntry();
Field nametype = StandardField.JOURNAL;
EntryType entryType = StandardEntryType.InCollection;

// publication type
JSONArray typeArray = jsonEntry.optJSONArray("type");
String types = "";
if (typeArray != null) {
List<String> typeList = IntStream.range(0, typeArray.length())
.mapToObj(typeArray::optString)
.filter(type -> !type.isEmpty())
.toList();
types = String.join(", ", typeList);
entry.setField(StandardField.TYPE, types);
}

if (types.toLowerCase().contains("book")) {
entryType = StandardEntryType.Book;
nametype = StandardField.BOOKTITLE;
} else if (types.toLowerCase().contains("article")) {
entryType = StandardEntryType.Article;
}
entry.setType(entryType);

// isbn
String isbn = getFirstArrayElement(jsonEntry, "isbn");
entry.setField(StandardField.ISBN, isbn);

// parent resource
String bibliographicCitation = jsonEntry.optString("bibliographicCitation", "");
String[] bibSplit = bibliographicCitation.split("/");
String parentResource = "";
if (bibSplit.length > 0) {
parentResource = bibSplit[0].trim();
entry.setField(nametype, parentResource);
}

entry.setField(StandardField.ISSN, getFirstArrayElement(jsonEntry, "issn"));
entry.setField(StandardField.TITLE, jsonEntry.optString("title", ""));
entry.setField(StandardField.ABSTRACT, getFirstArrayElement(jsonEntry, "note"));
entry.setField(StandardField.TITLEADDON, getFirstArrayElement(jsonEntry, "otherTitleInformation"));
entry.setField(StandardField.EDITION, getFirstArrayElement(jsonEntry, "edition"));

// authors
JSONArray authors = jsonEntry.optJSONArray("contribution");
if (authors != null) {
List<String> authorNames = getAuthorNames(authors);
if (!authors.isEmpty()) {
entry.setField(StandardField.AUTHOR, String.join(" and ", authorNames));
}
}

// publication
Optional.ofNullable(jsonEntry.optJSONArray("publication"))
.map(array -> array.getJSONObject(0))
.ifPresent(publication -> {
entry.setField(StandardField.PUBLISHER, getFirstArrayElement(publication, "publishedBy"));
entry.setField(StandardField.LOCATION, getFirstArrayElement(publication, "location"));
String date = publication.optString("startDate");
entry.setField(StandardField.DATE, date);
entry.setField(StandardField.YEAR, date);
});

// url
JSONObject describedBy = jsonEntry.optJSONObject("describedBy");
if (describedBy != null) {
entry.setField(StandardField.URL, describedBy.optString("id"));
}

// language
JSONArray languageArray = jsonEntry.optJSONArray("language");
if (languageArray != null) {
List<String> languageList = IntStream.range(0, languageArray.length())
.mapToObj(languageArray::getJSONObject)
.filter(Objects::nonNull)
.map(language -> language.optString("label"))
.toList();
entry.setField(StandardField.LANGUAGE, String.join(" and ", languageList));
}

// keywords
JSONArray keywordArray = jsonEntry.optJSONArray("subjectslabels");
if (keywordArray != null) {
List<String> keywordList = IntStream.range(0, keywordArray.length())
.mapToObj(keywordArray::optString)
.filter(keyword -> !keyword.isEmpty())
.toList();
entry.setField(StandardField.KEYWORDS, String.join(", ", keywordList));
}

return entry;
}

private static List<String> getAuthorNames(JSONArray authors) {
return IntStream.range(0, authors.length())
.mapToObj(authors::getJSONObject)
.map(author -> author.optJSONObject("agent"))
.filter(Objects::nonNull)
.map(agent -> agent.optString("label"))
.toList();
}

private static String getFirstArrayElement(JSONObject jsonEntry, String key) {
return Optional.ofNullable(jsonEntry.optJSONArray(key))
.map(array -> array.getString(0))
.orElse("");
}

@Override
public String getName() {
return FETCHER_NAME;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package org.jabref.logic.importer.fetcher.transformers;

public class LOBIDQueryTransformer extends AbstractQueryTransformer {

@Override
public String getLogicalAndOperator() {
return " AND ";
}

@Override
public String getLogicalOrOperator() {
return " OR ";
}

@Override
protected String getLogicalNotOperator() {
return "-";
}

@Override
protected String handleAuthor(String author) {
return createKeyValuePair("contribution.agent.label", author);
}

@Override
protected String handleTitle(String title) {
return createKeyValuePair("title", title);
}

@Override
protected String handleJournal(String journalTitle) {
return createKeyValuePair("bibliographicCitation", journalTitle);
}

@Override
protected String handleYear(String year) {
return "publication.startDate:[" + year + " TO " + year + "]";
}

@Override
protected String handleYearRange(String yearRange) {
parseYearRange(yearRange);
if (endYear == Integer.MAX_VALUE) {
return yearRange;
}
return "publication.startDate:[" + startYear + " TO " + endYear + "]";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public void emptyStudyConstructorFillsDatabasesCorrectly() {
new StudyCatalogItem("GVK", false),
new StudyCatalogItem("IEEEXplore", true),
new StudyCatalogItem("INSPIRE", false),
new StudyCatalogItem("LOBID", false),
new StudyCatalogItem("MathSciNet", false),
new StudyCatalogItem("Medline/PubMed", false),
new StudyCatalogItem("ResearchGate", false),
Expand Down Expand Up @@ -86,6 +87,7 @@ public void studyConstructorFillsDatabasesCorrectly(@TempDir Path tempDir) {
new StudyCatalogItem("GVK", false),
new StudyCatalogItem("IEEEXplore", false),
new StudyCatalogItem("INSPIRE", false),
new StudyCatalogItem("LOBID", false),
new StudyCatalogItem("MathSciNet", false),
new StudyCatalogItem("Medline/PubMed", false),
new StudyCatalogItem("ResearchGate", false),
Expand Down
Loading
Loading