Add Missing Fillers/Extractors for Supported Fields and Support Day C…

…onversion (#8531) * #8491 : Add Missing Setters/Fillers. Support XMP(DC) -> BibTex For Day Field. * #8491 : Implement Suggestions. Refactor. Add Test for XMP->BibTex for Date Field * #8491 : Change changelog message and update method description * #8491 : Address Requested Changes
JabRef · Mar 9, 2022 · 7bc6766 · 7bc6766
1 parent 3383f9b
commit 7bc6766
Show file tree

Hide file tree

Showing 13 changed files with 268 additions and 64 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -54,6 +54,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
 - We moved the search box in preview preferences closer to the available citation styles list. [#8370](https://github.com/JabRef/jabref/pull/8370)
 - Changing the preference to show the preview panel as a separate tab now has effect without restarting JabRef. [#8370](https://github.com/JabRef/jabref/pull/8370)
 - We enabled switching themes in JabRef without the need to restart JabRef. [#7335](https://github.com/JabRef/jabref/pull/7335)
+- We added support for the field `day`, `rights`, `coverage` and `language` when reading XMP data in Dublin Core format. [#8491](https://github.com/JabRef/jabref/issues/8491)
 
 ### Fixed
 

diff --git a/src/main/java/org/jabref/logic/xmp/DublinCoreExtractor.java b/src/main/java/org/jabref/logic/xmp/DublinCoreExtractor.java
@@ -1,7 +1,6 @@
 package org.jabref.logic.xmp;
 
-import java.io.IOException;
-import java.util.Calendar;
+import java.util.Arrays;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map.Entry;
@@ -16,6 +15,7 @@
 import org.jabref.model.entry.Author;
 import org.jabref.model.entry.AuthorList;
 import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.Date;
 import org.jabref.model.entry.Month;
 import org.jabref.model.entry.field.Field;
 import org.jabref.model.entry.field.FieldFactory;
@@ -24,14 +24,17 @@
 import org.jabref.model.entry.types.EntryTypeFactory;
 import org.jabref.model.strings.StringUtil;
 
-import org.apache.xmpbox.DateConverter;
 import org.apache.xmpbox.schema.DublinCoreSchema;
 import org.apache.xmpbox.type.BadFieldValueException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class DublinCoreExtractor {
 
+    public static final String DC_COVERAGE = "coverage";
+    public static final String DC_RIGHTS = "rights";
+    public static final String DC_SOURCE = "source";
+
     private static final Logger LOGGER = LoggerFactory.getLogger(DublinCoreExtractor.class);
 
     private final DublinCoreSchema dcSchema;
@@ -71,32 +74,19 @@ private void extractAuthor() {
     }
 
     /**
-     * Year in BibTex - Date in DublinCore is only the year information, because dc interprets empty months as January.
-     * Tries to extract the month as well. In JabRef the bibtex/month/value is prioritized. <br/> The problem is the
-     * default value of the calendar, which is always January, also if there is no month information in the xmp metdata.
-     * The idea is, to reject all information with YYYY-01-01. In cases, where xmp is written with JabRef the month
-     * property filled with jan will override this behavior and no data is lost. In the cases, where xmp is written by
-     * another service, the assumption is, that the 1st January is not a publication date at all.
+     * Bibtex-Fields : year, [month], [day] - 'dc:date' in DublinCore
      */
-    private void extractYearAndMonth() {
+    private void extractDate() {
         List<String> dates = dcSchema.getUnqualifiedSequenceValueList("date");
         if ((dates != null) && !dates.isEmpty()) {
+
             String date = dates.get(0).trim();
-            Calendar calender = null;
-            try {
-                calender = DateConverter.toCalendar(date);
-            } catch (IOException ignored) {
-                // Ignored
-            }
-            if (calender != null) {
-                bibEntry.setField(StandardField.YEAR, String.valueOf(calender.get(Calendar.YEAR)));
-                int monthNumber = calender.get(Calendar.MONTH) + 1;
-                // not the 1st of January
-                if (!((monthNumber == 1) && (calender.get(Calendar.DAY_OF_MONTH) == 1))) {
-                    Month.getMonthByNumber(monthNumber)
-                         .ifPresent(month -> bibEntry.setMonth(month));
-                }
-            }
+            Date.parse(date)
+                    .ifPresent(dateValue -> {
+                        dateValue.getDay().ifPresent(day -> bibEntry.setField(StandardField.DAY, Integer.toString(day)));
+                        dateValue.getMonth().ifPresent(bibEntry::setMonth);
+                        dateValue.getYear().ifPresent(year -> bibEntry.setField(StandardField.YEAR, Integer.toString(year)));
+                    });
         }
     }
 
@@ -182,7 +172,7 @@ private void extractRights() {
            LOGGER.warn("Could not extract rights", e);
         }
         if (!StringUtil.isNullOrEmpty(rights)) {
-            bibEntry.setField(new UnknownField("rights"), rights);
+            bibEntry.setField(new UnknownField(DC_RIGHTS), rights);
         }
     }
 
@@ -192,7 +182,7 @@ private void extractRights() {
     private void extractSource() {
         String source = dcSchema.getSource();
         if (!StringUtil.isNullOrEmpty(source)) {
-            bibEntry.setField(new UnknownField("source"), source);
+            bibEntry.setField(new UnknownField(DC_SOURCE), source);
         }
     }
 
@@ -234,6 +224,29 @@ private void extractType() {
         }
     }
 
+    /**
+     * No Equivalent in BibTex. Will create an Unknown "Coverage" Field
+     */
+    private void extractCoverage() {
+        String coverage = dcSchema.getCoverage();
+        if (!StringUtil.isNullOrEmpty(coverage)) {
+            bibEntry.setField(FieldFactory.parseField(DC_COVERAGE), coverage);
+        }
+    }
+
+    /**
+     *  Language is equivalent in both formats (BibTex and DublinCore)
+     */
+    private void extractLanguages() {
+        StringBuilder builder = new StringBuilder();
+
+        List<String> languages = dcSchema.getLanguages();
+        if (languages != null && !languages.isEmpty()) {
+            languages.forEach(language -> builder.append(",").append(language));
+            bibEntry.setField(StandardField.LANGUAGE, builder.substring(1));
+        }
+    }
+
     /**
      * Helper function for retrieving a BibEntry from the DublinCore metadata in a PDF file.
      * <p>
@@ -252,7 +265,7 @@ public Optional<BibEntry> extractBibtexEntry() {
         // then extract all "standard" dublin core entries
         this.extractEditor();
         this.extractAuthor();
-        this.extractYearAndMonth();
+        this.extractDate();
         this.extractAbstract();
         this.extractDOI();
         this.extractPublisher();
@@ -261,6 +274,8 @@ public Optional<BibEntry> extractBibtexEntry() {
         this.extractSubject();
         this.extractTitle();
         this.extractType();
+        this.extractCoverage();
+        this.extractLanguages();
 
         // we pass a new BibEntry in the constructor which is never empty as it already consists of "@misc"
         if (bibEntry.getFieldMap().isEmpty()) {
@@ -350,6 +365,37 @@ private void fillTitle(String title) {
         dcSchema.setTitle(title);
     }
 
+    /**
+     * BibTex : Coverage (Custom Field); DC Field : Coverage
+     *
+     * @param coverage
+     */
+    private void fillCoverage(String coverage) {
+        dcSchema.setCoverage(coverage);
+    }
+
+    /**
+     * BibTex Field : language ; DC Field : dc:language
+     */
+    private void fillLanguages(String languages) {
+        Arrays.stream(languages.split(","))
+                .forEach(dcSchema::addLanguage);
+    }
+
+    /**
+     * BibTex : Rights (Custom Field); DC Field : dc:rights
+     */
+    private void fillRights(String rights) {
+        dcSchema.addRights(null, rights.split(",")[0]);
+    }
+
+    /**
+     * BibTex : Source (Custom Field); DC Field : Source
+     */
+    private void fillSource(String source) {
+        dcSchema.setSource(source);
+    }
+
     /**
      * All others (+ citation key) get packaged in the relation attribute
      *
@@ -366,29 +412,60 @@ public void fillDublinCoreSchema() {
 
         Set<Entry<Field, String>> fieldValues = new TreeSet<>(Comparator.comparing(fieldStringEntry -> fieldStringEntry.getKey().getName()));
         fieldValues.addAll(bibEntry.getFieldMap().entrySet());
+        boolean hasStandardYearField = fieldValues.stream().anyMatch(field -> StandardField.YEAR.equals(field.getKey()));
         for (Entry<Field, String> field : fieldValues) {
             if (useXmpPrivacyFilter && xmpPreferences.getXmpPrivacyFilter().contains(field.getKey())) {
                 continue;
             }
 
-            if (StandardField.EDITOR.equals(field.getKey())) {
-                this.fillContributor(field.getValue());
-            } else if (StandardField.AUTHOR.equals(field.getKey())) {
-                this.fillCreator(field.getValue());
-            } else if (StandardField.YEAR.equals(field.getKey())) {
-                this.fillDate();
-            } else if (StandardField.ABSTRACT.equals(field.getKey())) {
-                this.fillDescription(field.getValue());
-            } else if (StandardField.DOI.equals(field.getKey())) {
-                this.fillIdentifier(field.getValue());
-            } else if (StandardField.PUBLISHER.equals(field.getKey())) {
-                this.fillPublisher(field.getValue());
-            } else if (StandardField.KEYWORDS.equals(field.getKey())) {
-                this.fillKeywords(field.getValue());
-            } else if (StandardField.TITLE.equals(field.getKey())) {
-                this.fillTitle(field.getValue());
+            Field fieldEntry = field.getKey();
+            if (fieldEntry instanceof StandardField) {
+                switch ((StandardField) fieldEntry) {
+                    case EDITOR:
+                        this.fillContributor(field.getValue());
+                        break;
+                    case AUTHOR:
+                        this.fillCreator(field.getValue());
+                        break;
+                    case YEAR:
+                        this.fillDate();
+                        break;
+                    case ABSTRACT:
+                        this.fillDescription(field.getValue());
+                        break;
+                    case DOI:
+                        this.fillIdentifier(field.getValue());
+                        break;
+                    case PUBLISHER:
+                        this.fillPublisher(field.getValue());
+                        break;
+                    case KEYWORDS:
+                        this.fillKeywords(field.getValue());
+                        break;
+                    case TITLE:
+                        this.fillTitle(field.getValue());
+                        break;
+                    case LANGUAGE:
+                        this.fillLanguages(field.getValue());
+                        break;
+                    case DAY:
+                    case MONTH:
+                        if (hasStandardYearField) {
+                            break;
+                        }
+                    default:
+                        this.fillCustomField(field.getKey(), field.getValue());
+                }
             } else {
-                this.fillCustomField(field.getKey(), field.getValue());
+                if (DC_COVERAGE.equals(fieldEntry.getName())) {
+                    this.fillCoverage(field.getValue());
+                } else if (DC_RIGHTS.equals(fieldEntry.getName())) {
+                    this.fillRights(field.getValue());
+                } else if (DC_SOURCE.equals(fieldEntry.getName())) {
+                    this.fillSource(field.getValue());
+                } else {
+                    this.fillCustomField(field.getKey(), field.getValue());
+                }
             }
         }
 

diff --git a/src/main/java/org/jabref/logic/xmp/XmpUtilReader.java b/src/main/java/org/jabref/logic/xmp/XmpUtilReader.java
@@ -10,6 +10,7 @@
 
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.LinkedFile;
+import org.jabref.model.schema.DublinCoreSchemaCustom;
 
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -77,8 +78,8 @@ public static List<BibEntry> readXmp(Path path, XmpPreferences xmpPreferences)
             if (!xmpMetaList.isEmpty()) {
                 // Only support Dublin Core since JabRef 4.2
                 for (XMPMetadata xmpMeta : xmpMetaList) {
-                    DublinCoreSchema dcSchema = xmpMeta.getDublinCoreSchema();
 
+                    DublinCoreSchema dcSchema = DublinCoreSchemaCustom.copyDublinCoreSchema(xmpMeta.getDublinCoreSchema());
                     if (dcSchema != null) {
                         DublinCoreExtractor dcExtractor = new DublinCoreExtractor(dcSchema, xmpPreferences, new BibEntry());
                         Optional<BibEntry> entry = dcExtractor.extractBibtexEntry();

diff --git a/src/main/java/org/jabref/logic/xmp/XmpUtilWriter.java b/src/main/java/org/jabref/logic/xmp/XmpUtilWriter.java
@@ -19,6 +19,7 @@
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.field.Field;
 import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.schema.DublinCoreSchemaCustom;
 
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -160,7 +161,7 @@ private static void writeDublinCore(PDDocument document,
         meta.removeSchema(meta.getDublinCoreSchema());
 
         for (BibEntry entry : resolvedEntries) {
-            DublinCoreSchema dcSchema = meta.createAndAddDublinCoreSchema();
+            DublinCoreSchema dcSchema = DublinCoreSchemaCustom.copyDublinCoreSchema(meta.createAndAddDublinCoreSchema());
             XmpUtilWriter.writeToDCSchema(dcSchema, entry, null, xmpPreferences);
         }
 

diff --git a/src/main/java/org/jabref/model/entry/Date.java b/src/main/java/org/jabref/model/entry/Date.java
@@ -20,18 +20,19 @@ public class Date {
     private static final DateTimeFormatter SIMPLE_DATE_FORMATS;
     static {
         List<String> formatStrings = Arrays.asList(
-                "uuuu-M-d",     // covers 2009-1-15
-                "uuuu-M",       // covers 2009-11
-                "d-M-uuuu",     // covers 15-1-2012
-                "M-uuuu",       // covers 1-2012
-                "M/uuuu",       // covers 9/2015 and 09/2015
-                "M/uu",         // covers 9/15
-                "MMMM d, uuuu", // covers September 1, 2015
-                "MMMM, uuuu",   // covers September, 2015
-                "d.M.uuuu",     // covers 15.1.2015
-                "uuuu.M.d",     // covers 2015.1.15
-                "uuuu",         // covers 2015
-                "MMM, uuuu");   // covers Jan, 2020
+                "uuuu-MM-dd'T'HH:mm:ss[xxx][xx][X]",    // covers 2018-10-03T07:24:14+03:00
+                "uuuu-M-d",                             // covers 2009-1-15
+                "uuuu-M",                               // covers 2009-11
+                "d-M-uuuu",                             // covers 15-1-2012
+                "M-uuuu",                               // covers 1-2012
+                "M/uuuu",                               // covers 9/2015 and 09/2015
+                "M/uu",                                 // covers 9/15
+                "MMMM d, uuuu",                         // covers September 1, 2015
+                "MMMM, uuuu",                           // covers September, 2015
+                "d.M.uuuu",                             // covers 15.1.2015
+                "uuuu.M.d",                             // covers 2015.1.15
+                "uuuu",                                 // covers 2015
+                "MMM, uuuu");                           // covers Jan, 2020
 
         SIMPLE_DATE_FORMATS = formatStrings.stream()
                                            .map(DateTimeFormatter::ofPattern)