Skip to content

Commit

Permalink
Add Missing Fillers/Extractors for Supported Fields and Support Day C…
Browse files Browse the repository at this point in the history
…onversion (#8531)

* #8491 : Add Missing Setters/Fillers. Support XMP(DC) -> BibTex For Day Field.

* #8491 : Implement Suggestions. Refactor. Add Test for XMP->BibTex for Date Field

* #8491 : Change changelog message and update method description

* #8491 : Address Requested Changes
  • Loading branch information
addak committed Mar 9, 2022
1 parent 3383f9b commit 7bc6766
Show file tree
Hide file tree
Showing 13 changed files with 268 additions and 64 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We moved the search box in preview preferences closer to the available citation styles list. [#8370](https://github.com/JabRef/jabref/pull/8370)
- Changing the preference to show the preview panel as a separate tab now has effect without restarting JabRef. [#8370](https://github.com/JabRef/jabref/pull/8370)
- We enabled switching themes in JabRef without the need to restart JabRef. [#7335](https://github.com/JabRef/jabref/pull/7335)
- We added support for the field `day`, `rights`, `coverage` and `language` when reading XMP data in Dublin Core format. [#8491](https://github.com/JabRef/jabref/issues/8491)

### Fixed

Expand Down
167 changes: 122 additions & 45 deletions src/main/java/org/jabref/logic/xmp/DublinCoreExtractor.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.jabref.logic.xmp;

import java.io.IOException;
import java.util.Calendar;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map.Entry;
Expand All @@ -16,6 +15,7 @@
import org.jabref.model.entry.Author;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.Date;
import org.jabref.model.entry.Month;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.FieldFactory;
Expand All @@ -24,14 +24,17 @@
import org.jabref.model.entry.types.EntryTypeFactory;
import org.jabref.model.strings.StringUtil;

import org.apache.xmpbox.DateConverter;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DublinCoreExtractor {

public static final String DC_COVERAGE = "coverage";
public static final String DC_RIGHTS = "rights";
public static final String DC_SOURCE = "source";

private static final Logger LOGGER = LoggerFactory.getLogger(DublinCoreExtractor.class);

private final DublinCoreSchema dcSchema;
Expand Down Expand Up @@ -71,32 +74,19 @@ private void extractAuthor() {
}

/**
* Year in BibTex - Date in DublinCore is only the year information, because dc interprets empty months as January.
* Tries to extract the month as well. In JabRef the bibtex/month/value is prioritized. <br/> The problem is the
* default value of the calendar, which is always January, also if there is no month information in the xmp metdata.
* The idea is, to reject all information with YYYY-01-01. In cases, where xmp is written with JabRef the month
* property filled with jan will override this behavior and no data is lost. In the cases, where xmp is written by
* another service, the assumption is, that the 1st January is not a publication date at all.
* Bibtex-Fields : year, [month], [day] - 'dc:date' in DublinCore
*/
private void extractYearAndMonth() {
private void extractDate() {
List<String> dates = dcSchema.getUnqualifiedSequenceValueList("date");
if ((dates != null) && !dates.isEmpty()) {

String date = dates.get(0).trim();
Calendar calender = null;
try {
calender = DateConverter.toCalendar(date);
} catch (IOException ignored) {
// Ignored
}
if (calender != null) {
bibEntry.setField(StandardField.YEAR, String.valueOf(calender.get(Calendar.YEAR)));
int monthNumber = calender.get(Calendar.MONTH) + 1;
// not the 1st of January
if (!((monthNumber == 1) && (calender.get(Calendar.DAY_OF_MONTH) == 1))) {
Month.getMonthByNumber(monthNumber)
.ifPresent(month -> bibEntry.setMonth(month));
}
}
Date.parse(date)
.ifPresent(dateValue -> {
dateValue.getDay().ifPresent(day -> bibEntry.setField(StandardField.DAY, Integer.toString(day)));
dateValue.getMonth().ifPresent(bibEntry::setMonth);
dateValue.getYear().ifPresent(year -> bibEntry.setField(StandardField.YEAR, Integer.toString(year)));
});
}
}

Expand Down Expand Up @@ -182,7 +172,7 @@ private void extractRights() {
LOGGER.warn("Could not extract rights", e);
}
if (!StringUtil.isNullOrEmpty(rights)) {
bibEntry.setField(new UnknownField("rights"), rights);
bibEntry.setField(new UnknownField(DC_RIGHTS), rights);
}
}

Expand All @@ -192,7 +182,7 @@ private void extractRights() {
private void extractSource() {
String source = dcSchema.getSource();
if (!StringUtil.isNullOrEmpty(source)) {
bibEntry.setField(new UnknownField("source"), source);
bibEntry.setField(new UnknownField(DC_SOURCE), source);
}
}

Expand Down Expand Up @@ -234,6 +224,29 @@ private void extractType() {
}
}

/**
* No Equivalent in BibTex. Will create an Unknown "Coverage" Field
*/
private void extractCoverage() {
String coverage = dcSchema.getCoverage();
if (!StringUtil.isNullOrEmpty(coverage)) {
bibEntry.setField(FieldFactory.parseField(DC_COVERAGE), coverage);
}
}

/**
* Language is equivalent in both formats (BibTex and DublinCore)
*/
private void extractLanguages() {
StringBuilder builder = new StringBuilder();

List<String> languages = dcSchema.getLanguages();
if (languages != null && !languages.isEmpty()) {
languages.forEach(language -> builder.append(",").append(language));
bibEntry.setField(StandardField.LANGUAGE, builder.substring(1));
}
}

/**
* Helper function for retrieving a BibEntry from the DublinCore metadata in a PDF file.
* <p>
Expand All @@ -252,7 +265,7 @@ public Optional<BibEntry> extractBibtexEntry() {
// then extract all "standard" dublin core entries
this.extractEditor();
this.extractAuthor();
this.extractYearAndMonth();
this.extractDate();
this.extractAbstract();
this.extractDOI();
this.extractPublisher();
Expand All @@ -261,6 +274,8 @@ public Optional<BibEntry> extractBibtexEntry() {
this.extractSubject();
this.extractTitle();
this.extractType();
this.extractCoverage();
this.extractLanguages();

// we pass a new BibEntry in the constructor which is never empty as it already consists of "@misc"
if (bibEntry.getFieldMap().isEmpty()) {
Expand Down Expand Up @@ -350,6 +365,37 @@ private void fillTitle(String title) {
dcSchema.setTitle(title);
}

/**
* BibTex : Coverage (Custom Field); DC Field : Coverage
*
* @param coverage
*/
private void fillCoverage(String coverage) {
dcSchema.setCoverage(coverage);
}

/**
* BibTex Field : language ; DC Field : dc:language
*/
private void fillLanguages(String languages) {
Arrays.stream(languages.split(","))
.forEach(dcSchema::addLanguage);
}

/**
* BibTex : Rights (Custom Field); DC Field : dc:rights
*/
private void fillRights(String rights) {
dcSchema.addRights(null, rights.split(",")[0]);
}

/**
* BibTex : Source (Custom Field); DC Field : Source
*/
private void fillSource(String source) {
dcSchema.setSource(source);
}

/**
* All others (+ citation key) get packaged in the relation attribute
*
Expand All @@ -366,29 +412,60 @@ public void fillDublinCoreSchema() {

Set<Entry<Field, String>> fieldValues = new TreeSet<>(Comparator.comparing(fieldStringEntry -> fieldStringEntry.getKey().getName()));
fieldValues.addAll(bibEntry.getFieldMap().entrySet());
boolean hasStandardYearField = fieldValues.stream().anyMatch(field -> StandardField.YEAR.equals(field.getKey()));
for (Entry<Field, String> field : fieldValues) {
if (useXmpPrivacyFilter && xmpPreferences.getXmpPrivacyFilter().contains(field.getKey())) {
continue;
}

if (StandardField.EDITOR.equals(field.getKey())) {
this.fillContributor(field.getValue());
} else if (StandardField.AUTHOR.equals(field.getKey())) {
this.fillCreator(field.getValue());
} else if (StandardField.YEAR.equals(field.getKey())) {
this.fillDate();
} else if (StandardField.ABSTRACT.equals(field.getKey())) {
this.fillDescription(field.getValue());
} else if (StandardField.DOI.equals(field.getKey())) {
this.fillIdentifier(field.getValue());
} else if (StandardField.PUBLISHER.equals(field.getKey())) {
this.fillPublisher(field.getValue());
} else if (StandardField.KEYWORDS.equals(field.getKey())) {
this.fillKeywords(field.getValue());
} else if (StandardField.TITLE.equals(field.getKey())) {
this.fillTitle(field.getValue());
Field fieldEntry = field.getKey();
if (fieldEntry instanceof StandardField) {
switch ((StandardField) fieldEntry) {
case EDITOR:
this.fillContributor(field.getValue());
break;
case AUTHOR:
this.fillCreator(field.getValue());
break;
case YEAR:
this.fillDate();
break;
case ABSTRACT:
this.fillDescription(field.getValue());
break;
case DOI:
this.fillIdentifier(field.getValue());
break;
case PUBLISHER:
this.fillPublisher(field.getValue());
break;
case KEYWORDS:
this.fillKeywords(field.getValue());
break;
case TITLE:
this.fillTitle(field.getValue());
break;
case LANGUAGE:
this.fillLanguages(field.getValue());
break;
case DAY:
case MONTH:
if (hasStandardYearField) {
break;
}
default:
this.fillCustomField(field.getKey(), field.getValue());
}
} else {
this.fillCustomField(field.getKey(), field.getValue());
if (DC_COVERAGE.equals(fieldEntry.getName())) {
this.fillCoverage(field.getValue());
} else if (DC_RIGHTS.equals(fieldEntry.getName())) {
this.fillRights(field.getValue());
} else if (DC_SOURCE.equals(fieldEntry.getName())) {
this.fillSource(field.getValue());
} else {
this.fillCustomField(field.getKey(), field.getValue());
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/jabref/logic/xmp/XmpUtilReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.LinkedFile;
import org.jabref.model.schema.DublinCoreSchemaCustom;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
Expand Down Expand Up @@ -77,8 +78,8 @@ public static List<BibEntry> readXmp(Path path, XmpPreferences xmpPreferences)
if (!xmpMetaList.isEmpty()) {
// Only support Dublin Core since JabRef 4.2
for (XMPMetadata xmpMeta : xmpMetaList) {
DublinCoreSchema dcSchema = xmpMeta.getDublinCoreSchema();

DublinCoreSchema dcSchema = DublinCoreSchemaCustom.copyDublinCoreSchema(xmpMeta.getDublinCoreSchema());
if (dcSchema != null) {
DublinCoreExtractor dcExtractor = new DublinCoreExtractor(dcSchema, xmpPreferences, new BibEntry());
Optional<BibEntry> entry = dcExtractor.extractBibtexEntry();
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/jabref/logic/xmp/XmpUtilWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.schema.DublinCoreSchemaCustom;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
Expand Down Expand Up @@ -160,7 +161,7 @@ private static void writeDublinCore(PDDocument document,
meta.removeSchema(meta.getDublinCoreSchema());

for (BibEntry entry : resolvedEntries) {
DublinCoreSchema dcSchema = meta.createAndAddDublinCoreSchema();
DublinCoreSchema dcSchema = DublinCoreSchemaCustom.copyDublinCoreSchema(meta.createAndAddDublinCoreSchema());
XmpUtilWriter.writeToDCSchema(dcSchema, entry, null, xmpPreferences);
}

Expand Down
25 changes: 13 additions & 12 deletions src/main/java/org/jabref/model/entry/Date.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,19 @@ public class Date {
private static final DateTimeFormatter SIMPLE_DATE_FORMATS;
static {
List<String> formatStrings = Arrays.asList(
"uuuu-M-d", // covers 2009-1-15
"uuuu-M", // covers 2009-11
"d-M-uuuu", // covers 15-1-2012
"M-uuuu", // covers 1-2012
"M/uuuu", // covers 9/2015 and 09/2015
"M/uu", // covers 9/15
"MMMM d, uuuu", // covers September 1, 2015
"MMMM, uuuu", // covers September, 2015
"d.M.uuuu", // covers 15.1.2015
"uuuu.M.d", // covers 2015.1.15
"uuuu", // covers 2015
"MMM, uuuu"); // covers Jan, 2020
"uuuu-MM-dd'T'HH:mm:ss[xxx][xx][X]", // covers 2018-10-03T07:24:14+03:00
"uuuu-M-d", // covers 2009-1-15
"uuuu-M", // covers 2009-11
"d-M-uuuu", // covers 15-1-2012
"M-uuuu", // covers 1-2012
"M/uuuu", // covers 9/2015 and 09/2015
"M/uu", // covers 9/15
"MMMM d, uuuu", // covers September 1, 2015
"MMMM, uuuu", // covers September, 2015
"d.M.uuuu", // covers 15.1.2015
"uuuu.M.d", // covers 2015.1.15
"uuuu", // covers 2015
"MMM, uuuu"); // covers Jan, 2020

SIMPLE_DATE_FORMATS = formatStrings.stream()
.map(DateTimeFormatter::ofPattern)
Expand Down
Loading

0 comments on commit 7bc6766

Please sign in to comment.