Skip to content

Commit

Permalink
JabRef#8491 : Add Missing Setters/Fillers. Support XMP(DC) -> BibTex …
Browse files Browse the repository at this point in the history
…For Day Field.
  • Loading branch information
addak committed Mar 1, 2022
1 parent f17eb40 commit 6d35ff5
Show file tree
Hide file tree
Showing 8 changed files with 235 additions and 53 deletions.
168 changes: 131 additions & 37 deletions src/main/java/org/jabref/logic/xmp/DublinCoreExtractor.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.jabref.logic.xmp;

import java.io.IOException;
import java.util.Calendar;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map.Entry;
Expand All @@ -16,6 +15,7 @@
import org.jabref.model.entry.Author;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.Date;
import org.jabref.model.entry.Month;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.FieldFactory;
Expand All @@ -24,14 +24,18 @@
import org.jabref.model.entry.types.EntryTypeFactory;
import org.jabref.model.strings.StringUtil;

import org.apache.xmpbox.DateConverter;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DublinCoreExtractor {

public static final String DC_COVERAGE = "coverage";
public static final String DC_RIGHTS = "rights";
public static final String DC_SOURCE = "source";

private static final Logger LOGGER = LoggerFactory.getLogger(DublinCoreExtractor.class);

private final DublinCoreSchema dcSchema;
Expand Down Expand Up @@ -78,24 +82,20 @@ private void extractAuthor() {
* property filled with jan will override this behavior and no data is lost. In the cases, where xmp is written by
* another service, the assumption is, that the 1st January is not a publication date at all.
*/
private void extractYearAndMonth() {
private void extractDate() {
List<String> dates = dcSchema.getUnqualifiedSequenceValueList("date");
if ((dates != null) && !dates.isEmpty()) {

String date = dates.get(0).trim();
Calendar calender = null;
try {
calender = DateConverter.toCalendar(date);
} catch (IOException ignored) {
// Ignored
}
if (calender != null) {
bibEntry.setField(StandardField.YEAR, String.valueOf(calender.get(Calendar.YEAR)));
int monthNumber = calender.get(Calendar.MONTH) + 1;
// not the 1st of January
if (!((monthNumber == 1) && (calender.get(Calendar.DAY_OF_MONTH) == 1))) {
Month.getMonthByNumber(monthNumber)
.ifPresent(month -> bibEntry.setMonth(month));
}
Date.parse(date)
.ifPresent(dateValue -> {
dateValue.getDay().ifPresent(day -> bibEntry.setField(StandardField.DAY, Integer.toString(day)));
dateValue.getMonth().ifPresent(bibEntry::setMonth);
dateValue.getYear().ifPresent(year -> bibEntry.setField(StandardField.YEAR, Integer.toString(year)));
});
} catch (RuntimeException e) {
LOGGER.error("Failed To Parse Date\n {}", ExceptionUtils.getStackTrace(e));
}
}
}
Expand Down Expand Up @@ -182,7 +182,7 @@ private void extractRights() {
LOGGER.warn("Could not extract rights", e);
}
if (!StringUtil.isNullOrEmpty(rights)) {
bibEntry.setField(new UnknownField("rights"), rights);
bibEntry.setField(new UnknownField(DC_RIGHTS), rights);
}
}

Expand All @@ -192,7 +192,7 @@ private void extractRights() {
private void extractSource() {
String source = dcSchema.getSource();
if (!StringUtil.isNullOrEmpty(source)) {
bibEntry.setField(new UnknownField("source"), source);
bibEntry.setField(new UnknownField(DC_SOURCE), source);
}
}

Expand Down Expand Up @@ -234,6 +234,29 @@ private void extractType() {
}
}

/**
* No Equivalent in BibTex. Will create an Unknown "Coverage" Field
*/
private void extractCoverage() {
String coverage = dcSchema.getCoverage();
if (!StringUtil.isNullOrEmpty(coverage)) {
bibEntry.setField(FieldFactory.parseField(DC_COVERAGE), coverage);
}
}

/**
* Language is equivalent in both formats (BibTex and DublinCore)
*/
private void extractLanguages() {
StringBuilder builder = new StringBuilder();

List<String> languages = dcSchema.getLanguages();
if (languages != null && !languages.isEmpty()) {
languages.forEach(language -> builder.append(",").append(language));
bibEntry.setField(StandardField.LANGUAGE, builder.substring(1));
}
}

/**
* Helper function for retrieving a BibEntry from the DublinCore metadata in a PDF file.
* <p>
Expand All @@ -252,7 +275,7 @@ public Optional<BibEntry> extractBibtexEntry() {
// then extract all "standard" dublin core entries
this.extractEditor();
this.extractAuthor();
this.extractYearAndMonth();
this.extractDate();
this.extractAbstract();
this.extractDOI();
this.extractPublisher();
Expand All @@ -261,6 +284,8 @@ public Optional<BibEntry> extractBibtexEntry() {
this.extractSubject();
this.extractTitle();
this.extractType();
this.extractCoverage();
this.extractLanguages();

// we pass a new BibEntry in the constructor which is never empty as it already consists of "@misc"
if (bibEntry.getFieldMap().isEmpty()) {
Expand Down Expand Up @@ -350,6 +375,43 @@ private void fillTitle(String title) {
dcSchema.setTitle(title);
}

/**
* BibTex : Coverage (Custom Field); DC Field : Coverage
*
* @param coverage
*/
private void fillCoverage(String coverage) {
dcSchema.setCoverage(coverage);
}

/**
* BibTex Field : language ; DC Field : dc:language
*
* @param languages
*/
private void fillLanguages(String languages) {
Arrays.stream(languages.split(","))
.forEach(dcSchema::addLanguage);
}

/**
* BibTex : Rights (Custom Field); DC Field : dc:rights
*
* @param rights
*/
private void fillRights(String rights) {
dcSchema.addRights(null, rights.split(",")[0]);
}

/**
* BibTex : Source (Custom Field); DC Field : Source
*
* @param source
*/
private void fillSource(String source) {
dcSchema.setSource(source);
}

/**
* All others (+ citation key) get packaged in the relation attribute
*
Expand All @@ -366,29 +428,61 @@ public void fillDublinCoreSchema() {

Set<Entry<Field, String>> fieldValues = new TreeSet<>(Comparator.comparing(fieldStringEntry -> fieldStringEntry.getKey().getName()));
fieldValues.addAll(bibEntry.getFieldMap().entrySet());
boolean hasStandardYearField = fieldValues.stream().anyMatch(field -> StandardField.YEAR.equals(field.getKey()));
for (Entry<Field, String> field : fieldValues) {
if (useXmpPrivacyFilter && xmpPreferences.getXmpPrivacyFilter().contains(field.getKey())) {
continue;
}

if (StandardField.EDITOR.equals(field.getKey())) {
this.fillContributor(field.getValue());
} else if (StandardField.AUTHOR.equals(field.getKey())) {
this.fillCreator(field.getValue());
} else if (StandardField.YEAR.equals(field.getKey())) {
this.fillDate();
} else if (StandardField.ABSTRACT.equals(field.getKey())) {
this.fillDescription(field.getValue());
} else if (StandardField.DOI.equals(field.getKey())) {
this.fillIdentifier(field.getValue());
} else if (StandardField.PUBLISHER.equals(field.getKey())) {
this.fillPublisher(field.getValue());
} else if (StandardField.KEYWORDS.equals(field.getKey())) {
this.fillKeywords(field.getValue());
} else if (StandardField.TITLE.equals(field.getKey())) {
this.fillTitle(field.getValue());
Field fieldEntry = field.getKey();
if (fieldEntry instanceof StandardField) {
switch ((StandardField) fieldEntry) {
case EDITOR:
this.fillContributor(field.getValue());
break;
case AUTHOR:
this.fillCreator(field.getValue());
break;
case YEAR:
this.fillDate();
break;
case ABSTRACT:
this.fillDescription(field.getValue());
break;
case DOI:
this.fillIdentifier(field.getValue());
break;
case PUBLISHER:
this.fillPublisher(field.getValue());
break;
case KEYWORDS:
this.fillKeywords(field.getValue());
break;
case TITLE:
this.fillTitle(field.getValue());
break;
case LANGUAGE:
this.fillLanguages(field.getValue());
break;
case DAY:
case MONTH:
if (hasStandardYearField) {
break;
}
default:
this.fillCustomField(field.getKey(), field.getValue());
}
} else {
this.fillCustomField(field.getKey(), field.getValue());

if (DC_COVERAGE.equals(fieldEntry.getName())) {
this.fillCoverage(field.getValue());
} else if (DC_RIGHTS.equals(fieldEntry.getName())) {
this.fillRights(field.getValue());
} else if (DC_SOURCE.equals(fieldEntry.getName())) {
this.fillSource(field.getValue());
} else {
this.fillCustomField(field.getKey(), field.getValue());
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/logic/xmp/XmpUtilReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.LinkedFile;
import org.jabref.model.schema.DublinCoreSchemaCustom;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -77,8 +77,8 @@ public static List<BibEntry> readXmp(Path path, XmpPreferences xmpPreferences)
if (!xmpMetaList.isEmpty()) {
// Only support Dublin Core since JabRef 4.2
for (XMPMetadata xmpMeta : xmpMetaList) {
DublinCoreSchema dcSchema = xmpMeta.getDublinCoreSchema();

DublinCoreSchemaCustom dcSchema = DublinCoreSchemaCustom.copyDublinCoreSchema(xmpMeta.getDublinCoreSchema());
if (dcSchema != null) {
DublinCoreExtractor dcExtractor = new DublinCoreExtractor(dcSchema, xmpPreferences, new BibEntry());
Optional<BibEntry> entry = dcExtractor.extractBibtexEntry();
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/jabref/logic/xmp/XmpUtilWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.schema.DublinCoreSchemaCustom;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
Expand Down Expand Up @@ -160,7 +161,7 @@ private static void writeDublinCore(PDDocument document,
meta.removeSchema(meta.getDublinCoreSchema());

for (BibEntry entry : resolvedEntries) {
DublinCoreSchema dcSchema = meta.createAndAddDublinCoreSchema();
DublinCoreSchemaCustom dcSchema = DublinCoreSchemaCustom.copyDublinCoreSchema(meta.createAndAddDublinCoreSchema());
XmpUtilWriter.writeToDCSchema(dcSchema, entry, null, xmpPreferences);
}

Expand Down
25 changes: 13 additions & 12 deletions src/main/java/org/jabref/model/entry/Date.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,19 @@ public class Date {
private static final DateTimeFormatter SIMPLE_DATE_FORMATS;
static {
List<String> formatStrings = Arrays.asList(
"uuuu-M-d", // covers 2009-1-15
"uuuu-M", // covers 2009-11
"d-M-uuuu", // covers 15-1-2012
"M-uuuu", // covers 1-2012
"M/uuuu", // covers 9/2015 and 09/2015
"M/uu", // covers 9/15
"MMMM d, uuuu", // covers September 1, 2015
"MMMM, uuuu", // covers September, 2015
"d.M.uuuu", // covers 15.1.2015
"uuuu.M.d", // covers 2015.1.15
"uuuu", // covers 2015
"MMM, uuuu"); // covers Jan, 2020
"uuuu-MM-dd'T'HH:mm:ss[xxx][xx][X]", // covers 2018-10-03T07:24:14+03:00
"uuuu-M-d", // covers 2009-1-15
"uuuu-M", // covers 2009-11
"d-M-uuuu", // covers 15-1-2012
"M-uuuu", // covers 1-2012
"M/uuuu", // covers 9/2015 and 09/2015
"M/uu", // covers 9/15
"MMMM d, uuuu", // covers September 1, 2015
"MMMM, uuuu", // covers September, 2015
"d.M.uuuu", // covers 15.1.2015
"uuuu.M.d", // covers 2015.1.15
"uuuu", // covers 2015
"MMM, uuuu"); // covers Jan, 2020

SIMPLE_DATE_FORMATS = formatStrings.stream()
.map(DateTimeFormatter::ofPattern)
Expand Down
Loading

0 comments on commit 6d35ff5

Please sign in to comment.