diff --git a/CHANGELOG.md b/CHANGELOG.md index d88155777dc..e1e59e5e77f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv ### Added +- We added support for offline extracting refereferences from PDFs following the IEEE format. [#11156](https://github.com/JabRef/jabref/pull/11156) - We added a new keyboard shortcut ctrl + , to open the preferences. [#11154](https://github.com/JabRef/jabref/pull/11154) ### Changed diff --git a/src/main/java/org/jabref/gui/actions/ActionFactory.java b/src/main/java/org/jabref/gui/actions/ActionFactory.java index 3deb6d068ef..2e0ce9b1be5 100644 --- a/src/main/java/org/jabref/gui/actions/ActionFactory.java +++ b/src/main/java/org/jabref/gui/actions/ActionFactory.java @@ -78,8 +78,11 @@ private static Label getAssociatedNode(MenuItem menuItem) { public MenuItem configureMenuItem(Action action, Command command, MenuItem menuItem) { ActionUtils.configureMenuItem(new JabRefAction(action, command, keyBindingRepository, Sources.FromMenu), menuItem); setGraphic(menuItem, action); + enableTooltips(command, menuItem); + return menuItem; + } - // Show tooltips + private static void enableTooltips(Command command, MenuItem menuItem) { if (command instanceof SimpleCommand simpleCommand) { EasyBind.subscribe( simpleCommand.statusMessageProperty(), @@ -96,8 +99,6 @@ public MenuItem configureMenuItem(Action action, Command command, MenuItem menuI } ); } - - return menuItem; } public MenuItem createMenuItem(Action action, Command command) { diff --git a/src/main/java/org/jabref/gui/actions/SimpleCommand.java b/src/main/java/org/jabref/gui/actions/SimpleCommand.java index 39e3b7ae1ad..89ef7ad100a 100644 --- a/src/main/java/org/jabref/gui/actions/SimpleCommand.java +++ b/src/main/java/org/jabref/gui/actions/SimpleCommand.java @@ -15,10 +15,6 @@ public abstract class SimpleCommand extends CommandBase { protected ReadOnlyStringWrapper statusMessage = new ReadOnlyStringWrapper(""); - public String getStatusMessage() { - return statusMessage.get(); - } - public ReadOnlyStringProperty statusMessageProperty() { return statusMessage.getReadOnlyProperty(); } diff --git a/src/main/java/org/jabref/gui/actions/StandardActions.java b/src/main/java/org/jabref/gui/actions/StandardActions.java index 8b4c830eeda..78ddc95db18 100644 --- a/src/main/java/org/jabref/gui/actions/StandardActions.java +++ b/src/main/java/org/jabref/gui/actions/StandardActions.java @@ -32,7 +32,8 @@ public enum StandardActions implements Action { REBUILD_FULLTEXT_SEARCH_INDEX(Localization.lang("Rebuild fulltext search index"), IconTheme.JabRefIcons.FILE), REDOWNLOAD_MISSING_FILES(Localization.lang("Redownload missing files"), IconTheme.JabRefIcons.DOWNLOAD), OPEN_EXTERNAL_FILE(Localization.lang("Open file"), IconTheme.JabRefIcons.FILE, KeyBinding.OPEN_FILE), - EXTRACT_FILE_REFERENCES(Localization.lang("Extract references from file"), IconTheme.JabRefIcons.FILE_STAR), + EXTRACT_FILE_REFERENCES_ONLINE(Localization.lang("Extract references from file (online)"), IconTheme.JabRefIcons.FILE_STAR), + EXTRACT_FILE_REFERENCES_OFFLINE(Localization.lang("Extract references from file (offline)"), IconTheme.JabRefIcons.FILE_STAR), OPEN_URL(Localization.lang("Open URL or DOI"), IconTheme.JabRefIcons.WWW, KeyBinding.OPEN_URL_OR_DOI), SEARCH_SHORTSCIENCE(Localization.lang("Search ShortScience")), MERGE_WITH_FETCHED_ENTRY(Localization.lang("Get bibliographic data from %0", "DOI/ISBN/...")), diff --git a/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java b/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java index d46854c5f55..c8d82333817 100644 --- a/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java +++ b/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java @@ -1,9 +1,13 @@ package org.jabref.gui.maintable; import java.nio.file.Path; -import java.util.LinkedList; +import java.util.Iterator; import java.util.List; +import java.util.Optional; +import java.util.StringJoiner; import java.util.concurrent.Callable; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.jabref.gui.DialogService; import org.jabref.gui.StateManager; @@ -13,16 +17,32 @@ import org.jabref.gui.util.BackgroundTask; import org.jabref.gui.util.TaskExecutor; import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.importer.fetcher.GrobidPreferences; +import org.jabref.logic.importer.fileformat.BibliographyFromPdfImporter; import org.jabref.logic.importer.util.GrobidService; import org.jabref.logic.l10n.Localization; import org.jabref.logic.util.io.FileUtil; +import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.LinkedFile; +import org.jabref.model.entry.field.StandardField; import org.jabref.preferences.PreferencesService; +import org.jspecify.annotations.NonNull; +import org.jspecify.annotations.Nullable; + +/** + * SIDE EFFECT: Sets the "cites" field of the entry having the linked files + * + * Mode choice A: online or offline + * Mode choice B: complete entry or single file (the latter is not implemented) + * + * The different modes should be implemented as sub classes. However, this was too complicated, thus we use variables at the constructor to parameterize this class. + */ public class ExtractReferencesAction extends SimpleCommand { private final int FILES_LIMIT = 10; + private final boolean online; private final DialogService dialogService; private final StateManager stateManager; private final PreferencesService preferencesService; @@ -30,31 +50,45 @@ public class ExtractReferencesAction extends SimpleCommand { private final LinkedFile linkedFile; private final TaskExecutor taskExecutor; - public ExtractReferencesAction(DialogService dialogService, + private final BibliographyFromPdfImporter bibliographyFromPdfImporter; + + public ExtractReferencesAction(boolean online, + DialogService dialogService, StateManager stateManager, PreferencesService preferencesService, TaskExecutor taskExecutor) { - this(dialogService, stateManager, preferencesService, null, null, taskExecutor); + this(online, dialogService, stateManager, preferencesService, null, null, taskExecutor); } - public ExtractReferencesAction(DialogService dialogService, - StateManager stateManager, - PreferencesService preferencesService, - BibEntry entry, - LinkedFile linkedFile, - TaskExecutor taskExecutor) { + /** + * Can be used to bind the action on a context menu in the linked file view (future work) + * + * @param entry the entry to handle (can be null) + * @param linkedFile the linked file (can be null) + */ + private ExtractReferencesAction(boolean online, + @NonNull DialogService dialogService, + @NonNull StateManager stateManager, + @NonNull PreferencesService preferencesService, + @Nullable BibEntry entry, + @Nullable LinkedFile linkedFile, + @NonNull TaskExecutor taskExecutor) { + this.online = online; this.dialogService = dialogService; this.stateManager = stateManager; this.preferencesService = preferencesService; this.entry = entry; this.linkedFile = linkedFile; this.taskExecutor = taskExecutor; + bibliographyFromPdfImporter = new BibliographyFromPdfImporter(preferencesService.getCitationKeyPatternPreferences()); + + String text; + GrobidPreferences grobidPreferences = preferencesService.getGrobidPreferences(); if (this.linkedFile == null) { this.executable.bind( ActionHelper.needsEntriesSelected(stateManager) .and(ActionHelper.hasLinkedFileForSelectedEntries(stateManager)) - .and(this.preferencesService.getGrobidPreferences().grobidEnabledProperty()) ); } else { this.setExecutable(true); @@ -68,34 +102,118 @@ public void execute() { private void extractReferences() { stateManager.getActiveDatabase().ifPresent(databaseContext -> { - List selectedEntries = new LinkedList<>(); + assert online == this.preferencesService.getGrobidPreferences().isGrobidEnabled(); + + List selectedEntries; if (entry == null) { selectedEntries = stateManager.getSelectedEntries(); } else { - selectedEntries.add(entry); + selectedEntries = List.of(entry); } - List fileList = FileUtil.getListOfLinkedFiles(selectedEntries, databaseContext.getFileDirectories(preferencesService.getFilePreferences())); - if (fileList.size() > FILES_LIMIT) { - boolean continueOpening = dialogService.showConfirmationDialogAndWait(Localization.lang("Processing a large number of files"), - Localization.lang("You are about to process %0 files. Continue?", fileList.size()), - Localization.lang("Continue"), Localization.lang("Cancel")); - if (!continueOpening) { + Callable parserResultCallable; + if (online) { + Optional> parserResultCallableOnline = getParserResultCallableOnline(databaseContext, selectedEntries); + if (parserResultCallableOnline.isEmpty()) { return; } + parserResultCallable = parserResultCallableOnline.get(); + } else { + parserResultCallable = getParserResultCallableOffline(databaseContext, selectedEntries); } - - Callable parserResultCallable = () -> new ParserResult( - new GrobidService(this.preferencesService.getGrobidPreferences()).processReferences(fileList, preferencesService.getImportFormatPreferences()) - ); BackgroundTask task = BackgroundTask.wrap(parserResultCallable) .withInitialMessage(Localization.lang("Processing PDF(s)")); task.onFailure(dialogService::showErrorDialogAndWait); ImportEntriesDialog dialog = new ImportEntriesDialog(stateManager.getActiveDatabase().get(), task); - dialog.setTitle(Localization.lang("Extract References")); + String title; + if (online) { + title = Localization.lang("Extract References (online)"); + } else { + title = Localization.lang("Extract References (offline)"); + } + dialog.setTitle(title); dialogService.showCustomDialogAndWait(dialog); }); } + + private @NonNull Callable getParserResultCallableOffline(BibDatabaseContext databaseContext, List selectedEntries) { + return () -> { + BibEntry currentEntry = selectedEntries.getFirst(); + List fileList = FileUtil.getListOfLinkedFiles(selectedEntries, databaseContext.getFileDirectories(preferencesService.getFilePreferences())); + + // We need to have ParserResult handled at the importer, because it imports the meta data (library type, encoding, ...) + ParserResult result = bibliographyFromPdfImporter.importDatabase(fileList.getFirst()); + + // subsequent files are just appended to result + Iterator fileListIterator = fileList.iterator(); + fileListIterator.next(); // skip first file + extractReferences(fileListIterator, result, currentEntry); + + // handle subsequent entries + Iterator selectedEntriesIterator = selectedEntries.iterator(); + selectedEntriesIterator.next(); // skip first entry + while (selectedEntriesIterator.hasNext()) { + currentEntry = selectedEntriesIterator.next(); + fileList = FileUtil.getListOfLinkedFiles(List.of(currentEntry), databaseContext.getFileDirectories(preferencesService.getFilePreferences())); + fileListIterator = fileList.iterator(); + extractReferences(fileListIterator, result, currentEntry); + } + + return result; + }; + } + + private void extractReferences(Iterator fileListIterator, ParserResult result, BibEntry currentEntry) { + while (fileListIterator.hasNext()) { + result.getDatabase().insertEntries(bibliographyFromPdfImporter.importDatabase(fileListIterator.next()).getDatabase().getEntries()); + } + + StringJoiner cites = new StringJoiner(","); + int count = 0; + for (BibEntry importedEntry : result.getDatabase().getEntries()) { + count++; + Optional citationKey = importedEntry.getCitationKey(); + String citationKeyToAdd; + if (citationKey.isPresent()) { + citationKeyToAdd = citationKey.get(); + } else { + // No key present -> generate one based on + // the citation key of the entry holding the files and + // the number of the current entry (extracted from the reference; fallback: current number of the entry (count variable)) + + String sourceCitationKey = currentEntry.getCitationKey().orElse("unknown"); + String newCitationKey; + // Could happen if no author and no year is present + // We use the number of the comment field (because there is no other way to get the number reliable) + Pattern pattern = Pattern.compile("^\\[(\\d+)\\]"); + Matcher matcher = pattern.matcher(importedEntry.getField(StandardField.COMMENT).orElse("")); + if (matcher.hasMatch()) { + newCitationKey = sourceCitationKey + "-" + matcher.group(1); + } else { + newCitationKey = sourceCitationKey + "-" + count; + } + importedEntry.setCitationKey(newCitationKey); + citationKeyToAdd = newCitationKey; + } + cites.add(citationKeyToAdd); + } + currentEntry.setField(StandardField.CITES, cites.toString()); + } + + private Optional> getParserResultCallableOnline(BibDatabaseContext databaseContext, List selectedEntries) { + List fileList = FileUtil.getListOfLinkedFiles(selectedEntries, databaseContext.getFileDirectories(preferencesService.getFilePreferences())); + if (fileList.size() > FILES_LIMIT) { + boolean continueOpening = dialogService.showConfirmationDialogAndWait(Localization.lang("Processing a large number of files"), + Localization.lang("You are about to process %0 files. Continue?", fileList.size()), + Localization.lang("Continue"), Localization.lang("Cancel")); + if (!continueOpening) { + return Optional.empty(); + } + } + return Optional.of(() -> new ParserResult( + new GrobidService(this.preferencesService.getGrobidPreferences()).processReferences(fileList, preferencesService.getImportFormatPreferences()) + )); + } } diff --git a/src/main/java/org/jabref/gui/maintable/RightClickMenu.java b/src/main/java/org/jabref/gui/maintable/RightClickMenu.java index aa8a5477902..f19ad726454 100644 --- a/src/main/java/org/jabref/gui/maintable/RightClickMenu.java +++ b/src/main/java/org/jabref/gui/maintable/RightClickMenu.java @@ -4,6 +4,7 @@ import javafx.scene.control.ContextMenu; import javafx.scene.control.Menu; +import javafx.scene.control.MenuItem; import javafx.scene.control.SeparatorMenuItem; import org.jabref.gui.ClipBoardManager; @@ -34,6 +35,8 @@ import org.jabref.preferences.PreferencesService; import org.jabref.preferences.PreviewPreferences; +import com.tobiasdiez.easybind.EasyBind; + public class RightClickMenu { public static ContextMenu create(BibEntryTableViewModel entry, @@ -50,6 +53,9 @@ public static ContextMenu create(BibEntryTableViewModel entry, ActionFactory factory = new ActionFactory(keyBindingRepository); ContextMenu contextMenu = new ContextMenu(); + MenuItem extractFileReferencesOnline = factory.createMenuItem(StandardActions.EXTRACT_FILE_REFERENCES_ONLINE, new ExtractReferencesAction(true, dialogService, stateManager, preferencesService, taskExecutor)); + MenuItem extractFileReferencesOffline = factory.createMenuItem(StandardActions.EXTRACT_FILE_REFERENCES_OFFLINE, new ExtractReferencesAction(false, dialogService, stateManager, preferencesService, taskExecutor)); + contextMenu.getItems().addAll( factory.createMenuItem(StandardActions.COPY, new EditAction(StandardActions.COPY, () -> libraryTab, stateManager, undoManager)), createCopySubMenu(factory, dialogService, stateManager, preferencesService, clipBoardManager, abbreviationRepository, taskExecutor), @@ -75,7 +81,8 @@ public static ContextMenu create(BibEntryTableViewModel entry, factory.createMenuItem(StandardActions.ATTACH_FILE_FROM_URL, new AttachFileFromURLAction(dialogService, stateManager, taskExecutor, preferencesService)), factory.createMenuItem(StandardActions.OPEN_FOLDER, new OpenFolderAction(dialogService, stateManager, preferencesService, taskExecutor)), factory.createMenuItem(StandardActions.OPEN_EXTERNAL_FILE, new OpenExternalFileAction(dialogService, stateManager, preferencesService, taskExecutor)), - factory.createMenuItem(StandardActions.EXTRACT_FILE_REFERENCES, new ExtractReferencesAction(dialogService, stateManager, preferencesService, taskExecutor)), + extractFileReferencesOnline, + extractFileReferencesOffline, factory.createMenuItem(StandardActions.OPEN_URL, new OpenUrlAction(dialogService, stateManager, preferencesService)), factory.createMenuItem(StandardActions.SEARCH_SHORTSCIENCE, new SearchShortScienceAction(dialogService, stateManager, preferencesService)), @@ -86,6 +93,11 @@ public static ContextMenu create(BibEntryTableViewModel entry, factory.createMenuItem(StandardActions.MERGE_WITH_FETCHED_ENTRY, new MergeWithFetchedEntryAction(dialogService, stateManager, taskExecutor, preferencesService, undoManager)) ); + EasyBind.subscribe(preferencesService.getGrobidPreferences().grobidEnabledProperty(), enabled -> { + extractFileReferencesOnline.setVisible(enabled); + extractFileReferencesOffline.setVisible(!enabled); + }); + return contextMenu; } diff --git a/src/main/java/org/jabref/gui/menus/ChangeEntryTypeAction.java b/src/main/java/org/jabref/gui/menus/ChangeEntryTypeAction.java index 97037881c5d..2b08e7b9ed3 100644 --- a/src/main/java/org/jabref/gui/menus/ChangeEntryTypeAction.java +++ b/src/main/java/org/jabref/gui/menus/ChangeEntryTypeAction.java @@ -4,7 +4,6 @@ import javax.swing.undo.UndoManager; -import javafx.beans.property.ReadOnlyStringProperty; import javafx.beans.property.ReadOnlyStringWrapper; import org.jabref.gui.EntryTypeView; @@ -36,14 +35,4 @@ public void execute() { .ifPresent(change -> compound.addEdit(new UndoableChangeType(change)))); undoManager.addEdit(compound); } - - @Override - public String getStatusMessage() { - return statusMessage.get(); - } - - @Override - public ReadOnlyStringProperty statusMessageProperty() { - return statusMessageProperty.getReadOnlyProperty(); - } } diff --git a/src/main/java/org/jabref/logic/importer/AuthorListParser.java b/src/main/java/org/jabref/logic/importer/AuthorListParser.java index 6ef699d3db1..9e2b0649f2b 100644 --- a/src/main/java/org/jabref/logic/importer/AuthorListParser.java +++ b/src/main/java/org/jabref/logic/importer/AuthorListParser.java @@ -8,6 +8,9 @@ import java.util.Locale; import java.util.Optional; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.jabref.model.entry.Author; import org.jabref.model.entry.AuthorList; @@ -33,6 +36,9 @@ public class AuthorListParser { // Constant HashSet containing names of TeX special characters private static final Set TEX_NAMES = Set.of( "aa", "ae", "l", "o", "oe", "i", "AA", "AE", "L", "O", "OE", "j"); + + private static final Pattern STARTS_WITH_CAPITAL_LETTER_DOT = Pattern.compile("^[A-Z]\\. "); + /** * the raw bibtex author/editor field */ @@ -108,6 +114,8 @@ public AuthorList parse(@NonNull String listOfNames) { andOthersPresent = false; } + listOfNames = checkNamesCommaSeparated(listOfNames); + // Handle case names in order lastname, firstname and separated by "," // E.g., Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H. final boolean authorsContainAND = listOfNames.toUpperCase(Locale.ENGLISH).contains(" AND "); @@ -170,6 +178,29 @@ public AuthorList parse(@NonNull String listOfNames) { return AuthorList.of(authors); } + /** + * Handle cases names in order Firstname Lastname, separated by "," and a final ", and " + * E.g, "I. Podadera, J. M. Carmona, A. Ibarra, and J. Molla" + * + * @return the original or patched version of listOfNames + */ + private static String checkNamesCommaSeparated(String listOfNames) { + int commandAndPos = listOfNames.lastIndexOf(", and "); + if (commandAndPos >= 0) { + String lastContainedName = listOfNames.substring(commandAndPos + ", and ".length()); + Matcher matcher = STARTS_WITH_CAPITAL_LETTER_DOT.matcher(lastContainedName); + if (matcher.find()) { + String namesBeforeAndString = listOfNames.substring(0, commandAndPos); + String[] namesBeforeAnd = namesBeforeAndString.split(", "); + if (Arrays.stream(namesBeforeAnd).allMatch(name -> STARTS_WITH_CAPITAL_LETTER_DOT.matcher(name).find())) { + // Format found + listOfNames = Arrays.stream(namesBeforeAnd).collect(Collectors.joining(" and ", "", " and " + lastContainedName)); + } + } + } + return listOfNames; + } + /** * Parses one author name and returns preformatted information. * diff --git a/src/main/java/org/jabref/logic/importer/Importer.java b/src/main/java/org/jabref/logic/importer/Importer.java index fa4159dc404..d991b087d3b 100644 --- a/src/main/java/org/jabref/logic/importer/Importer.java +++ b/src/main/java/org/jabref/logic/importer/Importer.java @@ -168,22 +168,6 @@ public static BufferedReader getReader(InputStream stream) { return new BufferedReader(reader); } - /** - * Returns the name of this import format. - * - *

The name must be unique.

- * - * @return format name, must be unique and not null - */ - public abstract String getName(); - - /** - * Returns the type of files that this importer can read - * - * @return {@link FileType} corresponding to the importer - */ - public abstract FileType getFileType(); - /** * Returns a one-word ID which identifies this importer. Used for example, to identify the importer when used from * the command line. @@ -202,6 +186,15 @@ public String getId() { return result.toString(); } + /** + * Returns the name of this import format. + * + *

The name must be unique.

+ * + * @return format name, must be unique and not null + */ + public abstract String getName(); + /** * Returns the description of the import format. *

@@ -216,6 +209,13 @@ public String getId() { */ public abstract String getDescription(); + /** + * Returns the type of files that this importer can read + * + * @return {@link FileType} corresponding to the importer + */ + public abstract FileType getFileType(); + @Override public int hashCode() { return getName().hashCode(); diff --git a/src/main/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter.java new file mode 100644 index 00000000000..f2605594e13 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter.java @@ -0,0 +1,304 @@ +package org.jabref.logic.importer.fileformat; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringWriter; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jabref.logic.citationkeypattern.CitationKeyGenerator; +import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences; +import org.jabref.logic.importer.Importer; +import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.l10n.Localization; +import org.jabref.logic.util.FileType; +import org.jabref.logic.util.StandardFileType; +import org.jabref.logic.xmp.EncryptedPdfsNotSupportedException; +import org.jabref.logic.xmp.XmpUtilReader; +import org.jabref.model.entry.AuthorList; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.Date; +import org.jabref.model.entry.field.Field; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Parses the references from the "References" section from a PDF + *

+ * Currently, IEEE two column format is supported. + *

+ */ +public class BibliographyFromPdfImporter extends Importer { + + private static final Logger LOGGER = LoggerFactory.getLogger(BibliographyFromPdfImporter.class); + + private static final Pattern REFERENCE_PATTERN = Pattern.compile("\\[(\\d+)\\](.*?)(?=\\[|$)", Pattern.DOTALL); + private static final Pattern YEAR_AT_END = Pattern.compile(", (\\d{4})\\.$"); + private static final Pattern PAGES = Pattern.compile(", pp\\. (\\d+--?\\d+)\\.?(.*)"); + private static final Pattern PAGE = Pattern.compile(", p\\. (\\d+)(.*)"); + private static final Pattern MONTH_RANGE_AND_YEAR = Pattern.compile(", ([A-Z][a-z]{2,7}\\.?)-[A-Z][a-z]{2,7}\\.? (\\d+)(.*)"); + private static final Pattern MONTH_AND_YEAR = Pattern.compile(", ([A-Z][a-z]{2,7}\\.? \\d+),? ?(.*)"); + private static final Pattern VOLUME = Pattern.compile(", vol\\. (\\d+)(.*)"); + private static final Pattern NO = Pattern.compile(", no\\. (\\d+)(.*)"); + private static final Pattern AUTHORS_AND_TITLE_AT_BEGINNING = Pattern.compile("^([^“]+), “(.*?)”, "); + private static final Pattern TITLE = Pattern.compile("“(.*?)”, (.*)"); + + private final CitationKeyPatternPreferences citationKeyPatternPreferences; + + public BibliographyFromPdfImporter(CitationKeyPatternPreferences citationKeyPatternPreferences) { + this.citationKeyPatternPreferences = citationKeyPatternPreferences; + } + + @Override + public boolean isRecognizedFormat(BufferedReader input) throws IOException { + return input.readLine().startsWith("%PDF"); + } + + @Override + public ParserResult importDatabase(BufferedReader reader) throws IOException { + Objects.requireNonNull(reader); + throw new UnsupportedOperationException("BibliopgraphyFromPdfImporter does not support importDatabase(BufferedReader reader)." + + "Instead use importDatabase(Path filePath)."); + } + + @Override + public String getName() { + return "Bibliography from PDF"; + } + + @Override + public String getDescription() { + return "Reads the references from the 'References' section of a PDF file."; + } + + @Override + public FileType getFileType() { + return StandardFileType.PDF; + } + + @Override + public ParserResult importDatabase(Path filePath) { + List result; + + try (PDDocument document = new XmpUtilReader().loadWithAutomaticDecryption(filePath)) { + String contents = getLastPageContents(document); + result = getEntriesFromPDFContent(contents); + } catch (EncryptedPdfsNotSupportedException e) { + return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported.")); + } catch (IOException exception) { + return ParserResult.fromError(exception); + } + + ParserResult parserResult = new ParserResult(result); + + // Generate citation keys for result + CitationKeyGenerator citationKeyGenerator = new CitationKeyGenerator(parserResult.getDatabaseContext(), citationKeyPatternPreferences); + parserResult.getDatabase().getEntries().forEach(citationKeyGenerator::generateAndSetKey); + + return parserResult; + } + + private record IntermediateData(String number, String reference) { + } + + /** + * In: "[1] ...\n...\n...[2]...\n...\n...\n[3]..."
+ * Out: List<String> = ["[1] ...", "[2]...", "[3]..."] + */ + private List getEntriesFromPDFContent(String contents) { + List referencesStrings = new ArrayList<>(); + Matcher matcher = REFERENCE_PATTERN.matcher(contents); + while (matcher.find()) { + String reference = matcher.group(2).replaceAll("\\r?\\n", " ").trim(); + referencesStrings.add(new IntermediateData(matcher.group(1), reference)); + } + + return referencesStrings.stream() + .map(data -> parseReference(data.number(), data.reference())) + .toList(); + } + + private String getLastPageContents(PDDocument document) throws IOException { + PDFTextStripper stripper = new PDFTextStripper(); + + int lastPage = document.getNumberOfPages(); + stripper.setStartPage(lastPage); + stripper.setEndPage(lastPage); + StringWriter writer = new StringWriter(); + stripper.writeText(document, writer); + + return writer.toString(); + } + + /** + * Example: J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. doi:10.1088/ 1741-4326/aa6a6a + * + * @param number The number of the reference - used for logging only + */ + @VisibleForTesting + BibEntry parseReference(String number, String reference) { + String originalReference = "[" + number + "] " + reference; + BibEntry result = new BibEntry(StandardEntryType.Article); + + reference = reference.replace(".-", "-"); + + // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. doi:10.1088/ 1741-4326/aa6a6a + // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019, pp. 977-979. doi:10.18429/ JACoW-IPAC2019-MOPTS051 + int pos = reference.indexOf("doi:"); + if (pos >= 0) { + String doi = reference.substring(pos + "doi:".length()).trim(); + doi = doi.replace(" ", ""); + result.setField(StandardField.DOI, doi); + reference = reference.substring(0, pos).trim(); + } + + // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. + // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019, pp. 977-979 + Matcher matcher = YEAR_AT_END.matcher(reference); + if (matcher.find()) { + result.setField(StandardField.YEAR, matcher.group(1)); + reference = reference.substring(0, matcher.start()).trim(); + } + + reference = updateEntryAndReferenceIfMatches(reference, PAGES, result, StandardField.PAGES); + + // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016 + // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019 + reference = updateEntryAndReferenceIfMatches(reference, PAGE, result, StandardField.PAGES); + + matcher = MONTH_RANGE_AND_YEAR.matcher(reference); + if (matcher.find()) { + // strip out second month + reference = reference.substring(0, matcher.start()) + ", " + matcher.group(1) + " " + matcher.group(2) + matcher.group(3); + } + + // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57 + // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019 + matcher = MONTH_AND_YEAR.matcher(reference); + if (matcher.find()) { + Optional parsedDate = Date.parse(matcher.group(1)); + if (parsedDate.isPresent()) { + Date date = parsedDate.get(); + date.getYear().ifPresent(year -> result.setField(StandardField.YEAR, year.toString())); + date.getMonth().ifPresent(month -> result.setField(StandardField.MONTH, month.getJabRefFormat())); + + String prefix = reference.substring(0, matcher.start()).trim(); + String suffix = matcher.group(2); + if (!suffix.isEmpty() && !".".equals(suffix)) { + suffix = ", " + suffix.replaceAll("^\\. ", ""); + } else { + suffix = ""; + } + reference = prefix + suffix; + } + } + + // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57 + // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia + reference = updateEntryAndReferenceIfMatches(reference, VOLUME, result, StandardField.VOLUME); + + reference = updateEntryAndReferenceIfMatches(reference, NO, result, StandardField.NUMBER); + + // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion + // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia + matcher = AUTHORS_AND_TITLE_AT_BEGINNING.matcher(reference); + if (matcher.find()) { + String authors = matcher.group(1) + .replace("- ", "") + .replaceAll("et al\\.?", "and others"); + result.setField(StandardField.AUTHOR, AuthorList.fixAuthorFirstNameFirst(authors)); + result.setField(StandardField.TITLE, matcher.group(2) + .replace("- ", "") + .replaceAll("et al\\.?", "and others")); + reference = reference.substring(matcher.end()).trim(); + } else { + // No authors present + // Example: “AF4.1.1 SRF Linac Engineering Design Report”, Internal note. + reference = updateEntryAndReferenceIfMatches(reference, TITLE, result, StandardField.TITLE); + } + + // Nucl. Fusion + // in Proc. IPAC’19, Mel- bourne, Australia + // presented at th 8th DITANET Topical Workshop on Beam Position Monitors, CERN, Geneva, Switzreland + List stringsToRemove = List.of("presented at", "to be presented at"); + // need to use "iterator()" instead of "stream().foreach", because "reference" is modified inside the loop + Iterator iterator = stringsToRemove.iterator(); + while (iterator.hasNext()) { + String check = iterator.next(); + if (reference.startsWith(check)) { + reference = reference.substring(check.length()).trim(); + result.setType(StandardEntryType.InProceedings); + } + } + + boolean startsWithInProc = reference.startsWith("in Proc."); + boolean conainsWorkshop = reference.contains("Workshop"); + if (startsWithInProc || conainsWorkshop) { + int beginIndex = startsWithInProc ? 3 : 0; + result.setField(StandardField.BOOKTITLE, reference.substring(beginIndex).replace("- ", "").trim()); + result.setType(StandardEntryType.InProceedings); + reference = ""; + } + + // Nucl. Fusion + reference = reference.trim() + .replace("- ", "") + .replaceAll("\\.$", ""); + if (!reference.contains(",") && !reference.isEmpty()) { + if (reference.endsWith(" Note") || reference.endsWith(" note")) { + result.setField(StandardField.NOTE, reference); + result.setType(StandardEntryType.TechReport); + } else { + result.setField(StandardField.JOURNAL, reference.replace("- ", "")); + } + reference = ""; + } else { + String toAdd = reference; + result.setType(StandardEntryType.InProceedings); + if (result.hasField(StandardField.BOOKTITLE)) { + String oldTitle = result.getField(StandardField.BOOKTITLE).get(); + result.setField(StandardField.BOOKTITLE, oldTitle + toAdd); + } else { + result.setField(StandardField.BOOKTITLE, toAdd); + } + reference = ""; + LOGGER.debug("InProceedings fallback used for current state of handled string {}", reference); + } + + if (reference.isEmpty()) { + result.setField(StandardField.COMMENT, originalReference); + } else { + result.setField(StandardField.COMMENT, "Unprocessed: " + reference + "\n\n" + originalReference); + } + return result; + } + + /** + * @param pattern A pattern matching two groups: The first one to take, the second one to leave at the end of the string + */ + private static String updateEntryAndReferenceIfMatches(String reference, Pattern pattern, BibEntry result, Field field) { + Matcher matcher; + matcher = pattern.matcher(reference); + if (matcher.find()) { + result.setField(field, matcher.group(1).replace("- ", "")); + String suffix = matcher.group(2); + if (!suffix.isEmpty()) { + suffix = " " + suffix; + } + reference = reference.substring(0, matcher.start()).trim() + suffix; + } + return reference; + } +} diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java index 3d2e4f7aa86..40233fc288d 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java @@ -5,6 +5,7 @@ import java.io.StringWriter; import java.nio.file.Path; import java.util.ArrayList; +import java.util.List; import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -182,16 +183,9 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException { + "Instead use importDatabase(Path filePath, Charset defaultEncoding)."); } - @Override - public ParserResult importDatabase(String data) throws IOException { - Objects.requireNonNull(data); - throw new UnsupportedOperationException("PdfContentImporter does not support importDatabase(String data)." - + "Instead use importDatabase(Path filePath, Charset defaultEncoding)."); - } - @Override public ParserResult importDatabase(Path filePath) { - final ArrayList result = new ArrayList<>(1); + List result = new ArrayList<>(1); try (PDDocument document = new XmpUtilReader().loadWithAutomaticDecryption(filePath)) { String firstPageContents = getFirstPageContents(document); Optional entry = getEntryFromPDFContent(firstPageContents, OS.NEWLINE); diff --git a/src/main/java/org/jabref/model/entry/Date.java b/src/main/java/org/jabref/model/entry/Date.java index 8c127f6bf2f..3509b8cc304 100644 --- a/src/main/java/org/jabref/model/entry/Date.java +++ b/src/main/java/org/jabref/model/entry/Date.java @@ -41,10 +41,13 @@ public class Date { "M/uu", // covers 9/15 "MMMM d, uuuu", // covers September 1, 2015 "MMMM, uuuu", // covers September, 2015 + "MMMM uuuu", // covers September 2015 "d.M.uuuu", // covers 15.1.2015 "uuuu.M.d", // covers 2015.1.15 "uuuu", // covers 2015 "MMM, uuuu", // covers Jan, 2020 + "MMM. uuuu", // covers Oct. 2020 + "MMM uuuu", // covers Jan 2020 "uuuu.MM.d", // covers 2015.10.15 "d MMMM u/d MMMM u", // covers 20 January 2015/20 February 2015 "d MMMM u", // covers 20 January 2015 @@ -141,7 +144,7 @@ public static Optional parse(String dateString) { TemporalAccessor parsedEndDate = SIMPLE_DATE_FORMATS.parse(strDates[1].strip()); return Optional.of(new Date(parsedDate, parsedEndDate)); } catch (DateTimeParseException e) { - LOGGER.debug("Invalid Date format for range", e); + LOGGER.warn("Invalid Date format for range", e); return Optional.empty(); } } else if (dateString.matches( @@ -162,7 +165,7 @@ public static Optional parse(String dateString) { TemporalAccessor parsedEndDate = SIMPLE_DATE_FORMATS.parse(strDates[1].strip()); return Optional.of(new Date(parsedDate, parsedEndDate)); } catch (DateTimeParseException e) { - LOGGER.debug("Invalid Date format range", e); + LOGGER.warn("Invalid Date format range", e); return Optional.empty(); } } else if (dateString.matches( @@ -179,7 +182,7 @@ public static Optional parse(String dateString) { TemporalAccessor parsedEndDate = parseDateWithEraIndicator(strDates[1]); return Optional.of(new Date(parsedDate, parsedEndDate)); } catch (DateTimeParseException e) { - LOGGER.debug("Invalid Date format range", e); + LOGGER.warn("Invalid Date format range", e); return Optional.empty(); } } else if (dateString.matches( @@ -196,13 +199,13 @@ public static Optional parse(String dateString) { TemporalAccessor parsedEndDate = parseDateWithEraIndicator(strDates[1]); return Optional.of(new Date(parsedDate, parsedEndDate)); } catch (DateTimeParseException e) { - LOGGER.debug("Invalid Date format range", e); + LOGGER.warn("Invalid Date format range", e); return Optional.empty(); } } // if dateString is single year - if (dateString.matches("\\d{4}-|" + "\\d{4}\\?")) { + if (dateString.matches("\\d{4}-|\\d{4}\\?")) { try { String year = dateString.substring(0, dateString.length() - 1); TemporalAccessor parsedDate = SIMPLE_DATE_FORMATS.parse(year); @@ -225,7 +228,7 @@ public static Optional parse(String dateString) { TemporalAccessor date = parseDateWithEraIndicator(dateString); return Optional.of(new Date(date)); } catch (DateTimeParseException e) { - LOGGER.debug("Invalid Date format with era indicator", e); + LOGGER.warn("Invalid Date format with era indicator", e); return Optional.empty(); } } diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index ce06acfe80b..d11e5c11275 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -329,8 +329,10 @@ Export\ preferences\ to\ file=Export preferences to file Export\ to\ clipboard=Export to clipboard Export\ to\ text\ file.=Export to text file. -Extract\ references\ from\ file=Extract references from file -Extract\ References=Extract References +Extract\ references\ from\ file\ (online)=Extract references from file (online) +Extract\ references\ from\ file\ (offline)=Extract references from file (offline) +Extract\ References\ (online)=Extract References (online) +Extract\ References\ (offline)=Extract References (offline) Processing\ PDF(s)=Processing PDF(s) Processing\ a\ large\ number\ of\ files=Processing a large number of files You\ are\ about\ to\ process\ %0\ files.\ Continue?=You are about to process %0 files. Continue? diff --git a/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java b/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java index 2943fc5fec6..aa983ab6ab3 100644 --- a/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java +++ b/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java @@ -11,6 +11,9 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +/** + * Similar tests are available in {@link org.jabref.model.entry.AuthorListTest} + */ class AuthorListParserTest { AuthorListParser parser = new AuthorListParser(); @@ -50,7 +53,15 @@ private static Stream parseMultipleCorrectly() { new Author("Alexander", "A.", null, "Artemenko", null), Author.OTHERS ), - "Alexander Artemenko and others") + "Alexander Artemenko and others"), + Arguments.of( + AuthorList.of( + new Author("I.", "I.", null, "Podadera", null), + new Author("J. M.", "J. M.", null, "Carmona", null), + new Author("A.", "A.", null, "Ibarra", null), + new Author("J.", "J.", null, "Molla", null) + ), + "I. Podadera, J. M. Carmona, A. Ibarra, and J. Molla") ); } diff --git a/src/test/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporterTest.java b/src/test/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporterTest.java new file mode 100644 index 00000000000..9318d99e977 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporterTest.java @@ -0,0 +1,274 @@ +package org.jabref.logic.importer.fileformat; + +import java.nio.file.Path; +import java.util.List; +import java.util.stream.Stream; + +import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences; +import org.jabref.logic.citationkeypattern.GlobalCitationKeyPattern; +import org.jabref.logic.importer.ParserResult; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.jabref.logic.citationkeypattern.CitationKeyGenerator.DEFAULT_UNWANTED_CHARACTERS; +import static org.junit.jupiter.api.Assertions.assertEquals; + +class BibliographyFromPdfImporterTest { + + private static final BibEntry KNASTER_2017 = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "J. Knaster and others") + .withField(StandardField.TITLE, "Overview of the IFMIF/EVEDA project") + .withField(StandardField.JOURNAL, "Nucl. Fusion") + .withField(StandardField.VOLUME, "57") + .withField(StandardField.PAGES, "102016") + .withField(StandardField.YEAR, "2017") + .withField(StandardField.DOI, "10.1088/1741-4326/aa6a6a") + .withField(StandardField.COMMENT, "[1] J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. doi:10.1088/ 1741-4326/aa6a6a"); + private static final BibEntry SHIMOSAKI_2019 = new BibEntry(StandardEntryType.InProceedings) + .withField(StandardField.AUTHOR, "Y. Shimosaki and others") + .withField(StandardField.TITLE, "Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc") + .withField(StandardField.BOOKTITLE, "Proc. IPAC’19, Melbourne, Australia") + .withField(StandardField.MONTH, "#may#") + .withField(StandardField.YEAR, "2019") + .withField(StandardField.PAGES, "977-979") + .withField(StandardField.DOI, "10.18429/JACoW-IPAC2019-MOPTS051") + .withField(StandardField.COMMENT, "[3] Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019, pp. 977-979. doi:10.18429/ JACoW-IPAC2019-MOPTS051"); + private static final BibEntry BELLAN_2021 = new BibEntry(StandardEntryType.InProceedings) + .withField(StandardField.AUTHOR, "L. Bellan and others") + .withField(StandardField.TITLE, "Acceleration of the high current deuteron beam through the IFMIF-EVEDA beam dynamics performances") + .withField(StandardField.BOOKTITLE, "Proc. HB’21, Batavia, IL, USA") + .withField(StandardField.MONTH, "#oct#") + .withField(StandardField.YEAR, "2021") + .withField(StandardField.PAGES, "197-202") + .withField(StandardField.DOI, "10.18429/JACoW-HB2021-WEDC2") + .withField(StandardField.COMMENT, "[6] L. Bellan et al., “Acceleration of the high current deuteron beam through the IFMIF-EVEDA beam dynamics perfor- mances”, in Proc. HB’21, Batavia, IL, USA, Oct. 2021, pp. 197-202. doi:10.18429/JACoW-HB2021-WEDC2"); + private static final BibEntry MASUDA_2022 = new BibEntry(StandardEntryType.InProceedings) + .withField(StandardField.AUTHOR, "K. Masuda and others") + .withField(StandardField.TITLE, "Commissioning of IFMIF Prototype Accelerator towards CW operation") + .withField(StandardField.BOOKTITLE, "Proc. LINAC’22, Liverpool, UK") + .withField(StandardField.MONTH, "#aug#") + .withField(StandardField.YEAR, "2022") + .withField(StandardField.PAGES, "319-323") + .withField(StandardField.DOI, "10.18429/JACoW-LINAC2022-TU2AA04") + .withField(StandardField.COMMENT, "[7] K. Masuda et al., “Commissioning of IFMIF Prototype Ac- celerator towards CW operation”, in Proc. LINAC’22, Liv- erpool, UK, Aug.-Sep. 2022, pp. 319-323. doi:10.18429/ JACoW-LINAC2022-TU2AA04"); + private static final BibEntry PODADERA_2012 = new BibEntry(StandardEntryType.InProceedings) + .withField(StandardField.AUTHOR, "I. Podadera and J. M. Carmona and A. Ibarra and J. Molla") + .withField(StandardField.TITLE, "Beam position monitor development for LIPAc") + .withField(StandardField.BOOKTITLE, "th 8th DITANET Topical Workshop on Beam Position Monitors, CERN, Geneva, Switzreland") + .withField(StandardField.MONTH, "#jan#") + .withField(StandardField.YEAR, "2012") + .withField(StandardField.COMMENT, "[11] I. Podadera, J. M. Carmona, A. Ibarra, and J. Molla, “Beam position monitor development for LIPAc”, presented at th 8th DITANET Topical Workshop on Beam Position Monitors, CERN, Geneva, Switzreland, Jan. 2012."); + private static final BibEntry AKAGI_2023 = new BibEntry(StandardEntryType.InProceedings) + .withField(StandardField.AUTHOR, "T. Akagi and others") + .withField(StandardField.TITLE, "Achievement of high-current continuouswave deuteron injector for Linear IFMIF Prototype Accelerator (LIPAc)") + .withField(StandardField.BOOKTITLE, "IAEA FEC’23, London, UK, https://www.iaea.org/events/fec2023") + .withField(StandardField.MONTH, "#oct#") + .withField(StandardField.YEAR, "2023") + .withField(StandardField.COMMENT, "[15] T. Akagi et al., “Achievement of high-current continuous- wave deuteron injector for Linear IFMIF Prototype Accelera- tor (LIPAc)”, to be presented at IAEA FEC’23, London, UK, Oct. 2023. https://www.iaea.org/events/fec2023"); + private static final BibEntry INTERNAL_NOTE = new BibEntry(StandardEntryType.TechReport) + .withField(StandardField.TITLE, "AF4.1.1 SRF Linac Engineering Design Report") + .withField(StandardField.NOTE, "Internal note") + .withField(StandardField.COMMENT, "[16] “AF4.1.1 SRF Linac Engineering Design Report”, Internal note."); + private static final BibEntry KWON_2023 = new BibEntry(StandardEntryType.InProceedings) + .withField(StandardField.AUTHOR, "S. Kwon and others") + .withField(StandardField.TITLE, "High beam current operation with beam di-agnostics at LIPAc") + .withField(StandardField.BOOKTITLE, "HB’23, Geneva, Switzerland, paper FRC1I2, this conference") + .withField(StandardField.MONTH, "#oct#") + .withField(StandardField.YEAR, "2023") + .withField(StandardField.COMMENT, "[14] S. Kwon et al., “High beam current operation with beam di-agnostics at LIPAc”, presented at HB’23, Geneva, Switzer- land, Oct. 2023, paper FRC1I2, this conference."); + private BibliographyFromPdfImporter bibliographyFromPdfImporter; + + @BeforeEach + void setup() { + GlobalCitationKeyPattern globalCitationKeyPattern = GlobalCitationKeyPattern.fromPattern("[auth][year]"); + CitationKeyPatternPreferences citationKeyPatternPreferences = new CitationKeyPatternPreferences( + false, + false, + false, + CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A, + "", + "", + DEFAULT_UNWANTED_CHARACTERS, + globalCitationKeyPattern, + "", + ','); + bibliographyFromPdfImporter = new BibliographyFromPdfImporter(citationKeyPatternPreferences); + } + + @Test + void tua3i2refpage() throws Exception { + Path file = Path.of(BibliographyFromPdfImporterTest.class.getResource("tua3i2refpage.pdf").toURI()); + ParserResult parserResult = bibliographyFromPdfImporter.importDatabase(file); + BibEntry entry02 = new BibEntry(StandardEntryType.Article) + .withCitationKey("Kondo2020") + .withField(StandardField.AUTHOR, "K. Kondo and others") + .withField(StandardField.TITLE, "Validation of the Linear IFMIF Prototype Accelerator (LIPAc) in Rokkasho") + .withField(StandardField.JOURNAL, "Fusion Eng. Des") // TODO: Final dot should be kept + .withField(StandardField.VOLUME, "153") + .withField(StandardField.YEAR, "2020") + .withField(StandardField.PAGES, "111503") + .withField(StandardField.DOI, "10.1016/j.fusengdes.2020.111503") + .withField(StandardField.COMMENT, "[2] K. Kondo et al., “Validation of the Linear IFMIF Prototype Accelerator (LIPAc) in Rokkasho”, Fusion Eng. Des., vol. 153, p. 111503, 2020. doi:10.1016/j.fusengdes.2020. 111503"); + + BibEntry entry04 = new BibEntry(StandardEntryType.InProceedings) + .withCitationKey("Devanz2017") + .withField(StandardField.AUTHOR, "G. Devanz and others") + .withField(StandardField.TITLE, "Manufacturing and validation tests of IFMIF low-beta HWRs") + .withField(StandardField.BOOKTITLE, "Proc. IPAC’17, Copenhagen, Denmark") + .withField(StandardField.MONTH, "#may#") + .withField(StandardField.YEAR, "2017") + .withField(StandardField.PAGES, "942-944") + .withField(StandardField.DOI, "10.18429/JACoW-IPAC2017-MOPVA039") + .withField(StandardField.COMMENT, "[4] G. Devanz et al., “Manufacturing and validation tests of IFMIF low-beta HWRs”, in Proc. IPAC’17, Copen- hagen, Denmark, May 2017, pp. 942-944. doi:10.18429/ JACoW-IPAC2017-MOPVA039"); + + BibEntry entry05 = new BibEntry(StandardEntryType.Article) + .withCitationKey("Branas2018") + .withField(StandardField.AUTHOR, "B. Brañas and others") + .withField(StandardField.TITLE, "The LIPAc Beam Dump") + .withField(StandardField.JOURNAL, "Fusion Eng. Des") + .withField(StandardField.VOLUME, "127") + .withField(StandardField.PAGES, "127-138") + .withField(StandardField.YEAR, "2018") + .withField(StandardField.DOI, "10.1016/j.fusengdes.2017.12.018") + .withField(StandardField.COMMENT, "[5] B. Brañas et al., “The LIPAc Beam Dump”, Fusion Eng. Des., vol. 127, pp. 127-138, 2018. doi:10.1016/j.fusengdes. 2017.12.018"); + + BibEntry entry08 = new BibEntry(StandardEntryType.InProceedings) + .withCitationKey("Scantamburlo2023") + .withField(StandardField.AUTHOR, "F. Scantamburlo and others") + .withField(StandardField.TITLE, "Linear IFMIF Prototype Accelera-tor (LIPAc) Radio Frequency Quadrupole’s (RFQ) RF couplers enhancement towards CW operation at nominal voltage") + .withField(StandardField.BOOKTITLE, "Proc. ISFNT’23, Las Palmas de Gran Canaria, Spain.") + .withField(StandardField.MONTH, "#sep#") + .withField(StandardField.YEAR, "2023") + .withField(StandardField.COMMENT, "[8] F. Scantamburlo et al., “Linear IFMIF Prototype Accelera-tor (LIPAc) Radio Frequency Quadrupole’s (RFQ) RF couplers enhancement towards CW operation at nominal voltage”, in Proc. ISFNT’23, Sep. 2023, Las Palmas de Gran Canaria, Spain."); + + BibEntry entry09 = new BibEntry(StandardEntryType.InProceedings) + .withCitationKey("Franco2023") + .withField(StandardField.AUTHOR, "A. De Franco and others") + .withField(StandardField.BOOKTITLE, "Proc. IPAC’23, Venice, Italy") + .withField(StandardField.TITLE, "RF conditioning towards continuous wave of the FRQ of the Linear IFMIF Prototype Accelerator") + .withField(StandardField.PAGES, "2345-2348") + .withField(StandardField.MONTH, "#may#") + .withField(StandardField.YEAR, "2023") + .withField(StandardField.DOI, "10.18429/JACoW-IPAC2023-TUPM065") + .withField(StandardField.COMMENT, "[9] A. De Franco et al., “RF conditioning towards continuous wave of the FRQ of the Linear IFMIF Prototype Accelerator”, in Proc. IPAC’23, Venice, Italy, May 2023, pp. 2345-2348. doi:10.18429/JACoW-IPAC2023-TUPM065"); + + BibEntry entry10 = new BibEntry(StandardEntryType.InProceedings) + .withCitationKey("Hirosawa") + .withField(StandardField.AUTHOR, "K. Hirosawa and others") + .withField(StandardField.BOOKTITLE, "Proc. PASJ’23, 2023, Japan.") + .withField(StandardField.TITLE, "High-Power RF tests of repaired circulator for LIPAc RFQ") + .withField(StandardField.COMMENT, "[10] K. Hirosawa et al., “High-Power RF tests of repaired circu- lator for LIPAc RFQ”, in Proc. PASJ’23, 2023, Japan."); + + BibEntry entry12 = new BibEntry(StandardEntryType.InProceedings) + .withCitationKey("Podadera2019") + .withField(StandardField.AUTHOR, "I. Podadera and others") + .withField(StandardField.TITLE, "Beam commissioning of beam position and phase monitors for LIPAc") + .withField(StandardField.BOOKTITLE, "Proc. IBIC’19, Malmö, Sweden") + .withField(StandardField.PAGES, "534-538") + .withField(StandardField.MONTH, "#sep#") + .withField(StandardField.YEAR, "2019") + .withField(StandardField.DOI, "10.18429/JACoW-IBIC2019-WEPP013") + .withField(StandardField.COMMENT, "[12] I. Podadera et al., “Beam commissioning of beam posi- tion and phase monitors for LIPAc”, in Proc. IBIC’19, Malmö, Sweden, Sep. 2019, pp. 534-538. doi:10.18429/ JACoW-IBIC2019-WEPP013"); + + BibEntry entry13 = new BibEntry(StandardEntryType.Article) + .withCitationKey("Kondo2021") + .withField(StandardField.AUTHOR, "K. Kondo and others") + .withField(StandardField.TITLE, "Neutron production measurement in the 125 mA 5 MeV Deuteron beam commissioning of Linear IFMIF Prototype Accelerator (LIPAc) RFQ") + .withField(StandardField.JOURNAL, "Nucl. Fusion") + .withField(StandardField.VOLUME, "61") + .withField(StandardField.NUMBER, "1") + .withField(StandardField.PAGES, "116002") + .withField(StandardField.YEAR, "2021") + .withField(StandardField.DOI, "82310.1088/1741-4326/ac233c") + .withField(StandardField.COMMENT, "[13] K. Kondo et al., “Neutron production measurement in the 125 mA 5 MeV Deuteron beam commissioning of Linear IFMIF Prototype Accelerator (LIPAc) RFQ”, Nucl. Fusion, vol. 61, no. 1, p. 116002, 2021. doi:82310.1088/1741-4326/ ac233c"); + + BibEntry entry17 = new BibEntry(StandardEntryType.InProceedings) + .withCitationKey("Bellan2021a") + .withField(StandardField.AUTHOR, "L. Bellan and others") + .withField(StandardField.BOOKTITLE, "Proc. ICIS’21, TRIUMF, Vancouver, BC, Canada, https://indico.cern.ch/event/1027296/") + .withField(StandardField.COMMENT, "[17] L. Bellan et al., “Extraction and low energy beam transport models used for the IFMIF/EVEDA RFQ commissioning”, in Proc. ICIS’21, TRIUMF, Vancouver, BC, Canada, Sep. 2021. https://indico.cern.ch/event/1027296/") + .withField(StandardField.MONTH, "#sep#") + .withField(StandardField.TITLE, "Extraction and low energy beam transport models used for the IFMIF/EVEDA RFQ commissioning") + .withField(StandardField.YEAR, "2021"); + + // We use the existing test entries, but add a citation key (which is added by the importer) + // We need to clone to keep the static entries unmodified + assertEquals(List.of( + ((BibEntry) KNASTER_2017.clone()).withCitationKey("Knaster2017"), + entry02, + ((BibEntry) SHIMOSAKI_2019.clone()).withCitationKey("Shimosaki2019"), + entry04, + entry05, + ((BibEntry) BELLAN_2021.clone()).withCitationKey("Bellan2021"), + ((BibEntry) MASUDA_2022.clone()).withCitationKey("Masuda2022"), + entry08, + entry09, + entry10, + ((BibEntry) PODADERA_2012.clone()).withCitationKey("Podadera2012"), + entry12, + entry13, + ((BibEntry) KWON_2023.clone()).withCitationKey("Kwon2023"), + ((BibEntry) AKAGI_2023.clone()).withCitationKey("Akagi2023"), + ((BibEntry) INTERNAL_NOTE.clone()), + entry17), + parserResult.getDatabase().getEntries()); + } + + static Stream references() { + return Stream.of( + Arguments.of( + KNASTER_2017, + "1", + "J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. doi:10.1088/ 1741-4326/aa6a6a" + ), + Arguments.of( + SHIMOSAKI_2019, + "3", + "Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019, pp. 977-979. doi:10.18429/ JACoW-IPAC2019-MOPTS051" + ), + Arguments.of( + BELLAN_2021, + "6", + "L. Bellan et al., “Acceleration of the high current deuteron beam through the IFMIF-EVEDA beam dynamics perfor- mances”, in Proc. HB’21, Batavia, IL, USA, Oct. 2021, pp. 197-202. doi:10.18429/JACoW-HB2021-WEDC2" + ), + Arguments.of( + MASUDA_2022, + "7", + "K. Masuda et al., “Commissioning of IFMIF Prototype Ac- celerator towards CW operation”, in Proc. LINAC’22, Liv- erpool, UK, Aug.-Sep. 2022, pp. 319-323. doi:10.18429/ JACoW-LINAC2022-TU2AA04" + ), + Arguments.of( + PODADERA_2012, + "11", + "I. Podadera, J. M. Carmona, A. Ibarra, and J. Molla, “Beam position monitor development for LIPAc”, presented at th 8th DITANET Topical Workshop on Beam Position Monitors, CERN, Geneva, Switzreland, Jan. 2012." + ), + Arguments.of( + KWON_2023, + "14", + "S. Kwon et al., “High beam current operation with beam di-agnostics at LIPAc”, presented at HB’23, Geneva, Switzer- land, Oct. 2023, paper FRC1I2, this conference." + ), + Arguments.of( + AKAGI_2023, + "15", + "T. Akagi et al., “Achievement of high-current continuous- wave deuteron injector for Linear IFMIF Prototype Accelera- tor (LIPAc)”, to be presented at IAEA FEC’23, London, UK, Oct. 2023. https://www.iaea.org/events/fec2023" + ), + Arguments.of( + INTERNAL_NOTE, + "16", + "“AF4.1.1 SRF Linac Engineering Design Report”, Internal note." + ) + ); + } + + @ParameterizedTest + @MethodSource + void references(BibEntry expectedEntry, String number, String reference) { + assertEquals(expectedEntry, bibliographyFromPdfImporter.parseReference(number, reference)); + } +} diff --git a/src/test/java/org/jabref/model/entry/AuthorListTest.java b/src/test/java/org/jabref/model/entry/AuthorListTest.java index 6f49c711bae..57617913ddd 100644 --- a/src/test/java/org/jabref/model/entry/AuthorListTest.java +++ b/src/test/java/org/jabref/model/entry/AuthorListTest.java @@ -14,6 +14,9 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +/** + * Other parsing tests are available in {@link org.jabref.logic.importer.AuthorListParserTest} + */ public class AuthorListTest { /* diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/tua3i2refpage.pdf b/src/test/resources/org/jabref/logic/importer/fileformat/tua3i2refpage.pdf new file mode 100644 index 00000000000..5f98c97a533 Binary files /dev/null and b/src/test/resources/org/jabref/logic/importer/fileformat/tua3i2refpage.pdf differ