Skip to content

Commit

Permalink
Fix PDF export (#10361)
Browse files Browse the repository at this point in the history
* Create PDF WIP

* Create PDF WIP

* Create PDF WIP

* Add @test to XmpPdfExporterTest WIP

* Add Importer to XmpPdfExporterTest

* WIP

* Fix testRoundtripExportImport

* Finish testRoundtripExportImport

* Add @AfterEach to XmpPdfExporterTest.java

* Change @test to @ParameterizedTest

* Delete IllegalArgumentException in XmpPdfExporter.java

* add changelog and change message

---------

Co-authored-by: Siedlerchr <siedlerkiller@gmail.com>
  • Loading branch information
Luggas4you and Siedlerchr committed Oct 3, 2023
1 parent 303281e commit a73bb07
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- The export formats `listrefs`, `tablerefs`, `tablerefsabsbib`, now use the ISO date format in the footer [#10383](https://github.com/JabRef/jabref/pull/10383).
- When searching for an identifier in the "Web search", the title of the search window is now "Identifier-based Web Search". [#10391](https://github.com/JabRef/jabref/pull/10391)
- The ampersand checker now skips verbatim fields (`file`, `url`, ...). [#10419](https://github.com/JabRef/jabref/pull/10419)
- If no existing document is selected for exporting "XMP annotated pdf" JabRef will now create a new PDF file with a sample text and the metadata. [#10102](https://github.com/JabRef/jabref/issues/10102)

### Fixed

Expand All @@ -38,6 +39,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We fixed an issue where it was possible to create a group with no name or with a group separator inside the name [#9776](https://github.com/JabRef/jabref/issues/9776)
- Biblatex's `journaltitle` is now also respected for showing the journal information. [#10397](https://github.com/JabRef/jabref/issues/10397)
- JabRef does not hang anymore when exporting via CLI. [#10380](https://github.com/JabRef/jabref/issues/10380)
- We fixed an issue where exporting "XMP annotated pdf" without selecting an existing document would produce an exception. [#10102](https://github.com/JabRef/jabref/issues/10102)

### Removed

Expand Down
27 changes: 26 additions & 1 deletion src/main/java/org/jabref/logic/exporter/XmpPdfExporter.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.jabref.logic.exporter;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Objects;
Expand All @@ -11,6 +13,12 @@
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;

public class XmpPdfExporter extends Exporter {

private final XmpPreferences xmpPreferences;
Expand All @@ -26,7 +34,24 @@ public void export(BibDatabaseContext databaseContext, Path pdfFile, List<BibEnt
Objects.requireNonNull(pdfFile);
Objects.requireNonNull(entries);

if (pdfFile.toString().endsWith(".pdf")) {
Path filePath = pdfFile.toAbsolutePath();

if (!Files.exists(filePath)) {
try (PDDocument document = new PDDocument()) {
PDPage page = new PDPage();
document.addPage(page);

try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
contentStream.beginText();
contentStream.newLineAtOffset(25, 500);
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12);
contentStream.showText("This PDF was created by JabRef. It demonstrates the embedding of XMP data in PDF files. Please open the file metadata view of your PDF viewer to see the attached files. Note that the normal usage is to embed the BibTeX data in an existing PDF.");
contentStream.endText();
}
document.save(filePath.toString());
} catch (IOException e) {
throw new Exception("Error creating PDF", e);
}
new XmpUtilWriter(xmpPreferences).writeXmp(pdfFile, entries, databaseContext.getDatabase());
}
}
Expand Down
72 changes: 69 additions & 3 deletions src/test/java/org/jabref/logic/exporter/XmpPdfExporterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@
import java.util.stream.Stream;

import javafx.beans.property.SimpleObjectProperty;
import javafx.collections.FXCollections;

import org.jabref.logic.cleanup.FieldFormatterCleanup;
import org.jabref.logic.formatter.bibtexfields.NormalizeNamesFormatter;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.fileformat.PdfXmpImporter;
import org.jabref.logic.journals.JournalAbbreviationRepository;
import org.jabref.logic.xmp.XmpPreferences;
import org.jabref.logic.xmp.XmpUtilWriter;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
Expand All @@ -21,12 +27,18 @@

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.Answers;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
Expand All @@ -41,6 +53,8 @@ class XmpPdfExporterTest {
private static BibEntry vapnik2000 = new BibEntry(StandardEntryType.Article);

private XmpPdfExporter exporter;
private PdfXmpImporter importer;
private XmpPreferences xmpPreferences;

private BibDatabaseContext databaseContext;
private JournalAbbreviationRepository abbreviationRepository;
Expand Down Expand Up @@ -84,7 +98,7 @@ private static void initBibEntries() throws IOException {
vapnik2000.setCitationKey("vapnik2000");
vapnik2000.setField(StandardField.TITLE, "The Nature of Statistical Learning Theory");
vapnik2000.setField(StandardField.PUBLISHER, "Springer Science + Business Media");
vapnik2000.setField(StandardField.AUTHOR, "Vladimir N. Vapnik");
vapnik2000.setField(StandardField.AUTHOR, "Vapnik, Vladimir N.");
vapnik2000.setField(StandardField.DOI, "10.1007/978-1-4757-3264-1");
vapnik2000.setField(StandardField.OWNER, "Ich");
}
Expand All @@ -99,9 +113,13 @@ void setUp() throws IOException {
when(filePreferences.getUserAndHost()).thenReturn(tempDir.toAbsolutePath().toString());
when(filePreferences.shouldStoreFilesRelativeToBibFile()).thenReturn(false);

XmpPreferences xmpPreferences = new XmpPreferences(false, Collections.emptySet(), new SimpleObjectProperty<>(','));
xmpPreferences = new XmpPreferences(false, Collections.emptySet(), new SimpleObjectProperty<>(','));
exporter = new XmpPdfExporter(xmpPreferences);

ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS);
when(importFormatPreferences.fieldPreferences().getNonWrappableFields()).thenReturn(FXCollections.emptyObservableList());
importer = new PdfXmpImporter(xmpPreferences);

databaseContext = new BibDatabaseContext();
BibDatabase dataBase = databaseContext.getDatabase();

Expand All @@ -111,6 +129,17 @@ void setUp() throws IOException {
dataBase.insertEntry(vapnik2000);
}

@AfterEach
void reset() throws IOException {
List<BibEntry> expectedEntries = databaseContext.getEntries();
for (BibEntry entry : expectedEntries) {
entry.clearField(StandardField.FILE);
}
LinkedFile linkedFile = createDefaultLinkedFile("existing.pdf", tempDir);
olly2018.setFiles(List.of(linkedFile));
toral2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
}

@ParameterizedTest
@MethodSource("provideBibEntriesWithValidPdfFileLinks")
void successfulExportToAllFilesOfEntry(BibEntry bibEntryWithValidPdfFileLink) throws Exception {
Expand Down Expand Up @@ -143,6 +172,39 @@ void unsuccessfulExportToFileByPath(Path path) throws Exception {
assertFalse(exporter.exportToFileByPath(databaseContext, filePreferences, path, abbreviationRepository));
}

@ParameterizedTest
@MethodSource("providePathToNewPDFs")
public void testRoundtripExportImport(Path path) throws Exception {
try (PDDocument document = new PDDocument()) {
PDPage page = new PDPage();
document.addPage(page);

try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
contentStream.beginText();
contentStream.newLineAtOffset(25, 500);
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12);
contentStream.showText("This PDF was created by JabRef. It demonstrates the embedding of XMP data in PDF files. Please open the file metadata view of your PDF viewer to see the attached files. Note that the normal usage is to embed the BibTeX data in an existing PDF.");
contentStream.endText();
}
document.save(path.toString());
}
new XmpUtilWriter(xmpPreferences).writeXmp(path, databaseContext.getEntries(), databaseContext.getDatabase());

List<BibEntry> importedEntries = importer.importDatabase(path).getDatabase().getEntries();
importedEntries.forEach(bibEntry -> new FieldFormatterCleanup(StandardField.AUTHOR, new NormalizeNamesFormatter()).cleanup(bibEntry));

List<BibEntry> expectedEntries = databaseContext.getEntries();
for (BibEntry entry : expectedEntries) {
entry.clearField(StandardField.FILE);
entry.addFile(createDefaultLinkedFile("original.pdf", tempDir));
}
assertEquals(expectedEntries, importedEntries);
}

public static Stream<Arguments> providePathToNewPDFs() {
return Stream.of(Arguments.of(tempDir.resolve("original.pdf").toAbsolutePath()));
}

public static Stream<Arguments> providePathsToValidPDFs() {
return Stream.of(Arguments.of(tempDir.resolve("existing.pdf").toAbsolutePath()));
}
Expand All @@ -156,12 +218,16 @@ public static Stream<Arguments> providePathsToInvalidPDFs() throws IOException {
}

private static LinkedFile createDefaultLinkedFile(String fileName, Path tempDir) throws IOException {
return createDefaultLinkedFile("", fileName, tempDir);
}

private static LinkedFile createDefaultLinkedFile(String description, String fileName, Path tempDir) throws IOException {
Path pdfFile = tempDir.resolve(fileName);
try (PDDocument pdf = new PDDocument()) {
pdf.addPage(new PDPage());
pdf.save(pdfFile.toAbsolutePath().toString());
}

return new LinkedFile("A linked pdf", pdfFile, "PDF");
return new LinkedFile("", pdfFile, "PDF");
}
}

0 comments on commit a73bb07

Please sign in to comment.