From b97cf907f06fc36ace11f431a1f4d7fb1f74a41f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20W=C3=BCrtele?= Date: Mon, 6 Mar 2023 09:46:42 +0100 Subject: [PATCH 1/7] Add fulltext fetcher for IACR eprints --- .../jabref/logic/importer/WebFetchers.java | 1 + .../importer/fetcher/IacrEprintFetcher.java | 33 ++++++++++++++++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/WebFetchers.java b/src/main/java/org/jabref/logic/importer/WebFetchers.java index 7bde63f9678..453ac0c19db 100644 --- a/src/main/java/org/jabref/logic/importer/WebFetchers.java +++ b/src/main/java/org/jabref/logic/importer/WebFetchers.java @@ -196,6 +196,7 @@ public static Set getFullTextFetchers(ImportFormatPreferences i fetchers.add(new ArXivFetcher(importFormatPreferences)); fetchers.add(new IEEE(importFormatPreferences, importerPreferences)); fetchers.add(new ApsFetcher()); + fetchers.add(new IacrEprintFetcher(importFormatPreferences)); // Meta search // fetchers.add(new JstorFetcher(importFormatPreferences)); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java index 42777780515..375185f393b 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java @@ -1,14 +1,13 @@ package org.jabref.logic.importer.fetcher; import java.io.IOException; +import java.net.URL; +import java.util.Objects; import java.util.Optional; import java.util.function.Predicate; import java.util.regex.Pattern; -import org.jabref.logic.importer.FetcherException; -import org.jabref.logic.importer.IdBasedFetcher; -import org.jabref.logic.importer.ImportFormatPreferences; -import org.jabref.logic.importer.ParseException; +import org.jabref.logic.importer.*; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.l10n.Localization; import org.jabref.logic.net.URLDownload; @@ -17,7 +16,7 @@ import org.jabref.model.strings.StringUtil; import org.jabref.model.util.DummyFileUpdateMonitor; -public class IacrEprintFetcher implements IdBasedFetcher { +public class IacrEprintFetcher implements FulltextFetcher, IdBasedFetcher { public static final String NAME = "IACR eprints"; @@ -26,6 +25,7 @@ public class IacrEprintFetcher implements IdBasedFetcher { private static final Predicate IDENTIFIER_PREDICATE = Pattern.compile("\\d{4}/\\d{3,5}").asPredicate(); private static final String CITATION_URL_PREFIX = "https://eprint.iacr.org/"; private static final String DESCRIPTION_URL_PREFIX = "https://eprint.iacr.org/"; + private static final String FULLTEXT_URL_PREFIX = "https://eprint.iacr.org/archive/"; private static final String VERSION_URL_PREFIX = "https://eprint.iacr.org/archive/versions/"; private final ImportFormatPreferences prefs; @@ -130,4 +130,27 @@ private boolean isFromOrAfterYear2000(BibEntry entry) throws FetcherException { public String getName() { return NAME; } + + @Override + public Optional findFullText(BibEntry entry) throws IOException, FetcherException { + Objects.requireNonNull(entry); + + Optional urlField = entry.getField(StandardField.URL); + if (urlField.isPresent()) { + String descriptiveHtml = getHtml(urlField.get()); + String startOfFulltextLink = " Date: Mon, 6 Mar 2023 09:59:42 +0100 Subject: [PATCH 2/7] Add IACR full text fetcher to CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2073b82de19..707621a150c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve ### Added - We added a field showing the BibTeX/biblatex source for added and deleted entries in the "External Changes Resolver" dialog. [#9509](https://github.com/JabRef/jabref/issues/9509) - +- We added a full text fetcher for IACR eprints. From e574896c39b486b3ad75a77fe575f94892b79969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20W=C3=BCrtele?= Date: Mon, 6 Mar 2023 10:28:16 +0100 Subject: [PATCH 3/7] Add tests and fixes for IACR full text fetcher --- .../importer/fetcher/IacrEprintFetcher.java | 4 +- .../fetcher/IacrEprintFetcherTest.java | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java index 375185f393b..dadfa083864 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java @@ -25,7 +25,7 @@ public class IacrEprintFetcher implements FulltextFetcher, IdBasedFetcher { private static final Predicate IDENTIFIER_PREDICATE = Pattern.compile("\\d{4}/\\d{3,5}").asPredicate(); private static final String CITATION_URL_PREFIX = "https://eprint.iacr.org/"; private static final String DESCRIPTION_URL_PREFIX = "https://eprint.iacr.org/"; - private static final String FULLTEXT_URL_PREFIX = "https://eprint.iacr.org/archive/"; + private static final String FULLTEXT_URL_PREFIX = "https://eprint.iacr.org/"; private static final String VERSION_URL_PREFIX = "https://eprint.iacr.org/archive/versions/"; private final ImportFormatPreferences prefs; @@ -142,7 +142,7 @@ public Optional findFullText(BibEntry entry) throws IOException, FetcherExc String fulltextLinkAsInHtml = getRequiredValueBetween(startOfFulltextLink, ".pdf", descriptiveHtml); // There is an additional "\n href=\"/archive/" we have to remove - and for some reason, // getRequiredValueBetween refuses to match across the line break. - fulltextLinkAsInHtml = fulltextLinkAsInHtml.replaceFirst(".*href=\"/archive/", "").trim(); + fulltextLinkAsInHtml = fulltextLinkAsInHtml.replaceFirst(".*href=\"/", "").trim(); String fulltextLink = FULLTEXT_URL_PREFIX + fulltextLinkAsInHtml + ".pdf"; return Optional.of(new URL(fulltextLink)); } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java index bf5bf9eef3a..63d3295b806 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java @@ -1,5 +1,7 @@ package org.jabref.logic.importer.fetcher; +import java.io.IOException; +import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -36,6 +38,7 @@ public class IacrEprintFetcherTest { private IacrEprintFetcher fetcher; private BibEntry abram2017; + private BibEntry abram2017noVersion; private BibEntry beierle2016; private BibEntry delgado2017; @@ -55,6 +58,17 @@ public void setUp() { .withField(StandardField.VERSION, "20171124:064527") .withField(StandardField.YEAR, "2017"); + abram2017noVersion = new BibEntry(StandardEntryType.Misc) + .withCitationKey("cryptoeprint:2017/1118") + .withField(StandardField.ABSTRACT, "dummy") + .withField(StandardField.AUTHOR, "Ittai Abraham and Dahlia Malkhi and Kartik Nayak and Ling Ren and Alexander Spiegelman") + .withField(StandardField.DATE, "2017-11-24") + .withField(StandardField.HOWPUBLISHED, "Cryptology ePrint Archive, Paper 2017/1118") + .withField(StandardField.NOTE, "\\url{https://eprint.iacr.org/2017/1118}") + .withField(StandardField.TITLE, "Solida: A Blockchain Protocol Based on Reconfigurable Byzantine Consensus") + .withField(StandardField.URL, "https://eprint.iacr.org/2017/1118") + .withField(StandardField.YEAR, "2017"); + beierle2016 = new BibEntry(StandardEntryType.Misc) .withCitationKey("cryptoeprint:2016/119") .withField(StandardField.ABSTRACT, "dummy") @@ -185,4 +199,34 @@ private static Stream allNonWithdrawnIdsWithOldHtmlFormat() { ids.removeAll(withdrawnIds); return ids.stream(); } + + + @Test + public void getFulltextWithVersion() throws FetcherException, IOException { + Optional pdfUrl = fetcher.findFullText(abram2017); + assertTrue(pdfUrl.isPresent()); + assertEquals("https://eprint.iacr.org/archive/2017/1118/1511505927.pdf", pdfUrl.get().toString()); + } + + @Test + public void getFulltextWithoutVersion() throws FetcherException, IOException { + Optional pdfUrl = fetcher.findFullText(abram2017noVersion); + assertTrue(pdfUrl.isPresent()); + assertEquals("https://eprint.iacr.org/2017/1118.pdf", pdfUrl.get().toString()); + } + + @Test + public void getFulltextWithoutUrl() throws FetcherException, IOException { + BibEntry abram2017WithoutUrl = abram2017; + abram2017WithoutUrl.clearField(StandardField.URL); + Optional pdfUrl = fetcher.findFullText(abram2017WithoutUrl); + assertTrue(pdfUrl.isEmpty()); + } + + @Test + public void getFulltextWithNonIACRUrl() throws FetcherException, IOException { + BibEntry abram2017WithNonIACRUrl = abram2017; + abram2017WithNonIACRUrl.setField(StandardField.URL, "https://example.com"); + assertThrows(FetcherException.class, () -> fetcher.findFullText(abram2017WithNonIACRUrl)); + } } From 05c8ad35525e6711e862368eab7e3de004720424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20W=C3=BCrtele?= Date: Mon, 6 Mar 2023 10:51:04 +0100 Subject: [PATCH 4/7] Make checkstyle happy --- .../jabref/logic/importer/fetcher/IacrEprintFetcher.java | 6 +++++- .../logic/importer/fetcher/IacrEprintFetcherTest.java | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java index dadfa083864..65419881b46 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java @@ -7,7 +7,11 @@ import java.util.function.Predicate; import java.util.regex.Pattern; -import org.jabref.logic.importer.*; +import org.jabref.logic.importer.FetcherException; +import org.jabref.logic.importer.FulltextFetcher; +import org.jabref.logic.importer.IdBasedFetcher; +import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.l10n.Localization; import org.jabref.logic.net.URLDownload; diff --git a/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java index 63d3295b806..4432df75bcd 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java @@ -200,7 +200,6 @@ private static Stream allNonWithdrawnIdsWithOldHtmlFormat() { return ids.stream(); } - @Test public void getFulltextWithVersion() throws FetcherException, IOException { Optional pdfUrl = fetcher.findFullText(abram2017); From 1336b28469ebdc10dd0433bf59036b660bbafb92 Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Mon, 6 Mar 2023 20:37:05 +0100 Subject: [PATCH 5/7] simplified test --- .../logic/importer/fetcher/IacrEprintFetcherTest.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java index 4432df75bcd..2eb837d2364 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java @@ -203,15 +203,13 @@ private static Stream allNonWithdrawnIdsWithOldHtmlFormat() { @Test public void getFulltextWithVersion() throws FetcherException, IOException { Optional pdfUrl = fetcher.findFullText(abram2017); - assertTrue(pdfUrl.isPresent()); - assertEquals("https://eprint.iacr.org/archive/2017/1118/1511505927.pdf", pdfUrl.get().toString()); + assertEquals(Optional.of("https://eprint.iacr.org/archive/2017/1118/1511505927.pdf"), pdfUrl.map(URL::toString)); } @Test public void getFulltextWithoutVersion() throws FetcherException, IOException { Optional pdfUrl = fetcher.findFullText(abram2017noVersion); - assertTrue(pdfUrl.isPresent()); - assertEquals("https://eprint.iacr.org/2017/1118.pdf", pdfUrl.get().toString()); + assertEquals(Optional.of("https://eprint.iacr.org/2017/1118.pdf"), pdfUrl.map(URL::toString)); } @Test @@ -219,11 +217,11 @@ public void getFulltextWithoutUrl() throws FetcherException, IOException { BibEntry abram2017WithoutUrl = abram2017; abram2017WithoutUrl.clearField(StandardField.URL); Optional pdfUrl = fetcher.findFullText(abram2017WithoutUrl); - assertTrue(pdfUrl.isEmpty()); + assertEquals(Optional.empty(), pdfUrl); } @Test - public void getFulltextWithNonIACRUrl() throws FetcherException, IOException { + public void getFulltextWithNonIACRUrl() throws IOException { BibEntry abram2017WithNonIACRUrl = abram2017; abram2017WithNonIACRUrl.setField(StandardField.URL, "https://example.com"); assertThrows(FetcherException.class, () -> fetcher.findFullText(abram2017WithNonIACRUrl)); From 9cb813da4d9aa5ab6838bddc4e6e1750361bf647 Mon Sep 17 00:00:00 2001 From: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com> Date: Mon, 6 Mar 2023 20:46:52 +0100 Subject: [PATCH 6/7] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89f374d3f74..d1402953758 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We added a field showing the BibTeX/biblatex source for added and deleted entries in the "External Changes Resolver" dialog. [#9509](https://github.com/JabRef/jabref/issues/9509) - We added a full text fetcher for IACR eprints. -- Add "Attach file from URL" to right-click context menu which downloads file from URL and stores it with reference library. +- We added "Attach file from URL" to right-click context menu to download and store a file with the reference library. From d8dd937bd6a6b119fec161fc2494e5fe3170b86e Mon Sep 17 00:00:00 2001 From: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com> Date: Mon, 6 Mar 2023 20:54:19 +0100 Subject: [PATCH 7/7] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1402953758..e0c698ec6cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,10 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve ### Added - We added a field showing the BibTeX/biblatex source for added and deleted entries in the "External Changes Resolver" dialog. [#9509](https://github.com/JabRef/jabref/issues/9509) -- We added a full text fetcher for IACR eprints. -- We added "Attach file from URL" to right-click context menu to download and store a file with the reference library. +- We added a full text fetcher for IACR eprints. [#9651](https://github.com/JabRef/jabref/pull/9651) +- We added "Attach file from URL" to right-click context menu to download and store a file with the reference library. [#9646](https://github.com/JabRef/jabref/issues/9646) +- We enabled updating an existing entry with data from InspireHEP. [#9351](https://github.com/JabRef/jabref/issues/9351) +