From c94fa4882bb11ab1e6bc0f02f473e4d4aa92c25a Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Sun, 14 Nov 2021 20:28:25 +0100 Subject: [PATCH] Fix "Library has changed externally" with CRLF markers (#8239) * Add Javadoc to Entrycomparator#compare (and merge - in code to one place) * Refine duplicate checker - Compare remaining fields if requried and optional do not differ - Ignore CRLF at field comparison * When loading a .bib file, it is opened read only * Fix checkstyle --- CHANGELOG.md | 2 + .../bibtex/comparator/EntryComparator.java | 19 +++-- .../jabref/logic/database/DuplicateCheck.java | 69 ++++++++++++------- .../org/jabref/logic/importer/Importer.java | 5 +- .../java/org/jabref/model/entry/BibEntry.java | 2 +- .../org/jabref/model/strings/StringUtil.java | 1 + .../comparator/BibDatabaseDiffTest.java | 39 +++++++++++ .../comparator/EntryComparatorTest.java | 27 ++++++++ .../logic/database/DuplicateCheckTest.java | 21 ++++++ 9 files changed, 153 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6876f448adb..07393e4f9ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We Included all standard fields with citation key when exporting to Old OpenOffice/LibreOffice Calc Format [#8176](https://github.com/JabRef/jabref/pull/8176) - We present options to manually enter an article or return to the New Entry menu when the fetcher DOI fails to find an entry for an ID [#7870](https://github.com/JabRef/jabref/issues/7870) - We trim white space and non-ASCII characters from DOI [#8127](https://github.com/JabRef/jabref/issues/8127) +- The duplicate checker now inspects other fields in case no difference in the required and optional fields are found. ### Fixed @@ -57,6 +58,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We fixed an issue where typing an invalid UNC path into the "Main file directory" text field caused an error. [#8107](https://github.com/JabRef/jabref/issues/8107) - We fixed an issue where "Open Folder" didn't select the file on macOS in Finder [#8130](https://github.com/JabRef/jabref/issues/8130) - We fixed an issue where importing PDFs resulted in an uncaught exception [#8143](https://github.com/JabRef/jabref/issues/8143) +- We fixed "The library has been modified by another program" showing up when line breaks change [#4877](https://github.com/JabRef/jabref/issues/4877) - The default directory of the "LaTeX Citations" tab is now the directory of the currently opened database (and not the directory chosen at the last open file dialog or the last database save) [koppor#538](https://github.com/koppor/jabref/issues/538) - We fixed an issue where right-clicking on a tab and selecting close will close the focused tab even if it is not the tab we right-clicked [#8193](https://github.com/JabRef/jabref/pull/8193) - We fixed an issue where selecting a citation style in the preferences would sometimes produce an exception [#7860](https://github.com/JabRef/jabref/issues/7860) diff --git a/src/main/java/org/jabref/logic/bibtex/comparator/EntryComparator.java b/src/main/java/org/jabref/logic/bibtex/comparator/EntryComparator.java index 871a1516269..3892d220251 100644 --- a/src/main/java/org/jabref/logic/bibtex/comparator/EntryComparator.java +++ b/src/main/java/org/jabref/logic/bibtex/comparator/EntryComparator.java @@ -24,6 +24,13 @@ public class EntryComparator implements Comparator { private final boolean binary; private final Comparator next; + /** + * + * @param binary true: the presence of fields is checked; false: the content of the fields is compared + * @param descending true: if the most different entry should get the highest score + * @param field the field to sort on + * @param next the next comparator to use (if the current comparator results in equality) + */ public EntryComparator(boolean binary, boolean descending, Field field, Comparator next) { this.binary = binary; this.sortField = field; @@ -42,7 +49,7 @@ public EntryComparator(boolean binary, boolean descending, Field field) { public int compare(BibEntry e1, BibEntry e2) { // default equals // TODO: with the new default equals this does not only return 0 for identical objects, - // but for all objects that have the same id and same fields + // but for all objects that have the same id and same fields if (Objects.equals(e1, e2)) { return 0; } @@ -103,21 +110,21 @@ public int compare(BibEntry e1, BibEntry e2) { int result; if ((f1 instanceof Integer) && (f2 instanceof Integer)) { - result = -((Integer) f1).compareTo((Integer) f2); + result = ((Integer) f1).compareTo((Integer) f2); } else if (f2 instanceof Integer) { Integer f1AsInteger = Integer.valueOf(f1.toString()); - result = -f1AsInteger.compareTo((Integer) f2); + result = f1AsInteger.compareTo((Integer) f2); } else if (f1 instanceof Integer) { Integer f2AsInteger = Integer.valueOf(f2.toString()); - result = -((Integer) f1).compareTo(f2AsInteger); + result = ((Integer) f1).compareTo(f2AsInteger); } else { String ours = ((String) f1).toLowerCase(Locale.ROOT); String theirs = ((String) f2).toLowerCase(Locale.ROOT); int comp = ours.compareTo(theirs); - result = -comp; + result = comp; } if (result != 0) { - return descending ? result : -result; // Primary sort. + return descending ? -result : result; // Primary sort. } if (next == null) { return idCompare(e1, e2); // If still equal, we use the unique IDs. diff --git a/src/main/java/org/jabref/logic/database/DuplicateCheck.java b/src/main/java/org/jabref/logic/database/DuplicateCheck.java index f7de07e79cf..fbcd0dd4c3f 100644 --- a/src/main/java/org/jabref/logic/database/DuplicateCheck.java +++ b/src/main/java/org/jabref/logic/database/DuplicateCheck.java @@ -10,6 +10,7 @@ import java.util.Set; import java.util.stream.Collectors; +import org.jabref.logic.util.OS; import org.jabref.logic.util.strings.StringSimilarity; import org.jabref.model.database.BibDatabase; import org.jabref.model.database.BibDatabaseMode; @@ -25,6 +26,7 @@ import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.identifier.DOI; import org.jabref.model.entry.identifier.ISBN; +import org.jabref.model.strings.StringUtil; import com.google.common.collect.Sets; import org.slf4j.Logger; @@ -101,13 +103,16 @@ private static boolean haveDifferentChaptersOrPagesOfTheSameBook(final BibEntry private static double[] compareRequiredFields(final BibEntryType type, final BibEntry one, final BibEntry two) { final Set requiredFields = type.getRequiredFields(); - return requiredFields == null + return requiredFields.isEmpty() ? new double[] {0., 0.} : DuplicateCheck.compareFieldSet(requiredFields.stream().map(OrFields::getPrimary).collect(Collectors.toSet()), one, two); } private static boolean isFarFromThreshold(double value) { - return Math.abs(value - DuplicateCheck.DUPLICATE_THRESHOLD) > DuplicateCheck.DOUBT_RANGE; + if (value < 0.0) { + LOGGER.debug("Value {} is below zero. Should not happen", value); + } + return value - DuplicateCheck.DUPLICATE_THRESHOLD > DuplicateCheck.DOUBT_RANGE; } private static boolean compareOptionalFields(final BibEntryType type, @@ -115,7 +120,7 @@ private static boolean compareOptionalFields(final BibEntryType type, final BibEntry two, final double[] req) { final Set optionalFields = type.getOptionalFields(); - if (optionalFields == null) { + if (optionalFields.isEmpty()) { return req[0] >= DuplicateCheck.DUPLICATE_THRESHOLD; } final double[] opt = DuplicateCheck.compareFieldSet(optionalFields.stream().map(BibField::getField).collect(Collectors.toSet()), one, two); @@ -126,22 +131,26 @@ private static boolean compareOptionalFields(final BibEntryType type, } private static double[] compareFieldSet(final Collection fields, final BibEntry one, final BibEntry two) { - double res = 0; - double totWeights = 0.; + if (fields.isEmpty()) { + return new double[] {0.0, 0.0}; + } + double equalWeights = 0; + double totalWeights = 0.; for (final Field field : fields) { - final double weight = DuplicateCheck.FIELD_WEIGHTS.getOrDefault(field, 1.0); - totWeights += weight; + final double currentWeight = DuplicateCheck.FIELD_WEIGHTS.getOrDefault(field, 1.0); + totalWeights += currentWeight; int result = DuplicateCheck.compareSingleField(field, one, two); if (result == EQUAL) { - res += weight; + equalWeights += currentWeight; } else if (result == EMPTY_IN_BOTH) { - totWeights -= weight; + totalWeights -= currentWeight; } } - if (totWeights > 0) { - return new double[] {res / totWeights, totWeights}; + if (totalWeights > 0) { + return new double[] {equalWeights / totalWeights, totalWeights}; } - return new double[] {0.5, 0.0}; + // all fields are empty in both --> have no difference at all + return new double[] {0.0, 0.0}; } private static int compareSingleField(final Field field, final BibEntry one, final BibEntry two) { @@ -220,8 +229,8 @@ private static int compareChapterField(final String stringOne, final String stri } private static int compareField(final String stringOne, final String stringTwo) { - final String processedStringOne = stringOne.toLowerCase(Locale.ROOT).trim(); - final String processedStringTwo = stringTwo.toLowerCase(Locale.ROOT).trim(); + final String processedStringOne = StringUtil.unifyLineBreaks(stringOne.toLowerCase(Locale.ROOT).trim(), OS.NEWLINE); + final String processedStringTwo = StringUtil.unifyLineBreaks(stringTwo.toLowerCase(Locale.ROOT).trim(), OS.NEWLINE); final double similarity = DuplicateCheck.correlateByWords(processedStringOne, processedStringTwo); if (similarity > 0.8) { return EQUAL; @@ -236,9 +245,7 @@ public static double compareEntriesStrictly(BibEntry one, BibEntry two) { int score = 0; for (final Field field : allFields) { - final Optional stringOne = one.getField(field); - final Optional stringTwo = two.getField(field); - if (stringOne.equals(stringTwo)) { + if (isSingleFieldEqual(one, two, field)) { score++; } } @@ -248,6 +255,19 @@ public static double compareEntriesStrictly(BibEntry one, BibEntry two) { return (double) score / allFields.size(); } + private static boolean isSingleFieldEqual(BibEntry one, BibEntry two, Field field) { + final Optional stringOne = one.getField(field); + final Optional stringTwo = two.getField(field); + if (stringOne.isEmpty() && stringTwo.isEmpty()) { + return true; + } + if (stringOne.isEmpty() || stringTwo.isEmpty()) { + return false; + } + return (StringUtil.unifyLineBreaks(stringOne.get(), OS.NEWLINE).equals( + StringUtil.unifyLineBreaks(stringTwo.get(), OS.NEWLINE))); + } + /** * Compare two strings on the basis of word-by-word correlation analysis. * @@ -293,7 +313,7 @@ private static double similarity(final String first, final String second) { } final double distanceIgnoredCase = new StringSimilarity().editDistanceIgnoreCase(longer, shorter); final double similarity = (longerLength - distanceIgnoredCase) / longerLength; - LOGGER.debug("Longer string: " + longer + " Shorter string: " + shorter + " Similarity: " + similarity); + LOGGER.debug("Longer string: {} Shorter string: {} Similarity: {}", longer, shorter, similarity); return similarity; } @@ -330,7 +350,8 @@ public boolean isDuplicate(final BibEntry one, final BibEntry two, final BibData final Optional type = entryTypesManager.enrich(one.getType(), bibDatabaseMode); if (type.isPresent()) { - final double[] reqCmpResult = compareRequiredFields(type.get(), one, two); + BibEntryType entryType = type.get(); + final double[] reqCmpResult = compareRequiredFields(entryType, one, two); if (isFarFromThreshold(reqCmpResult[0])) { // Far from the threshold value, so we base our decision on the required fields only @@ -338,11 +359,13 @@ public boolean isDuplicate(final BibEntry one, final BibEntry two, final BibData } // Close to the threshold value, so we take a look at the optional fields, if any: - return compareOptionalFields(type.get(), one, two, reqCmpResult); - } else { - // We don't know about the type, so simply compare fields without any distinction between optional/required - return compareFieldSet(Sets.union(one.getFields(), two.getFields()), one, two)[0] >= DuplicateCheck.DUPLICATE_THRESHOLD; + if (compareOptionalFields(type.get(), one, two, reqCmpResult)) { + return true; + } } + // if type is not present, so simply compare fields without any distinction between optional/required + // In case both required and optional fields are equal, we also use this fallback + return compareFieldSet(Sets.union(one.getFields(), two.getFields()), one, two)[0] >= DuplicateCheck.DUPLICATE_THRESHOLD; } /** diff --git a/src/main/java/org/jabref/logic/importer/Importer.java b/src/main/java/org/jabref/logic/importer/Importer.java index 86df77999f3..1c790537677 100644 --- a/src/main/java/org/jabref/logic/importer/Importer.java +++ b/src/main/java/org/jabref/logic/importer/Importer.java @@ -1,14 +1,15 @@ package org.jabref.logic.importer; import java.io.BufferedReader; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.Objects; import org.jabref.logic.util.FileType; @@ -123,7 +124,7 @@ protected static BufferedReader getUTF16Reader(Path filePath) throws IOException public static BufferedReader getReader(Path filePath, Charset encoding) throws IOException { - InputStream stream = new FileInputStream(filePath.toFile()); + InputStream stream = Files.newInputStream(filePath, StandardOpenOption.READ); return new BufferedReader(new InputStreamReader(stream, encoding)); } diff --git a/src/main/java/org/jabref/model/entry/BibEntry.java b/src/main/java/org/jabref/model/entry/BibEntry.java index e0311d8b615..d0e1786a8c7 100644 --- a/src/main/java/org/jabref/model/entry/BibEntry.java +++ b/src/main/java/org/jabref/model/entry/BibEntry.java @@ -397,7 +397,7 @@ public Optional setType(EntryType newType, EntriesEventSource event } /** - * Returns an set containing the names of all fields that are set for this particular entry. + * Returns a set containing the names of all fields that are set for this particular entry. * * @return a set of existing field names */ diff --git a/src/main/java/org/jabref/model/strings/StringUtil.java b/src/main/java/org/jabref/model/strings/StringUtil.java index cb31437d442..ab4266733c7 100644 --- a/src/main/java/org/jabref/model/strings/StringUtil.java +++ b/src/main/java/org/jabref/model/strings/StringUtil.java @@ -394,6 +394,7 @@ private static String removeSingleBracesAroundCapitals(String s) { /** * Replaces all platform-dependent line breaks by OS.NEWLINE line breaks. + * AKA normalize newlines *

* We do NOT use UNIX line breaks as the user explicitly configures its linebreaks and this method is used in bibtex field writing * diff --git a/src/test/java/org/jabref/logic/bibtex/comparator/BibDatabaseDiffTest.java b/src/test/java/org/jabref/logic/bibtex/comparator/BibDatabaseDiffTest.java index eb10dc02721..58a75f5470f 100644 --- a/src/test/java/org/jabref/logic/bibtex/comparator/BibDatabaseDiffTest.java +++ b/src/test/java/org/jabref/logic/bibtex/comparator/BibDatabaseDiffTest.java @@ -49,6 +49,45 @@ void compareOfDifferentEntriesWithSameDataReportsNoDifferences() throws Exceptio assertEquals(Collections.emptyList(), diff.getEntryDifferences()); } + @Test + void compareOfTwoEntriesWithSameContentAndLfEndingsReportsNoDifferences() throws Exception { + BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + BibDatabaseContext databaseOne = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryOne))); + + BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + BibDatabaseContext databaseTwo = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryTwo))); + + BibDatabaseDiff diff = BibDatabaseDiff.compare(databaseOne, databaseTwo); + + assertEquals(Collections.emptyList(), diff.getEntryDifferences()); + } + + @Test + void compareOfTwoEntriesWithSameContentAndCrLfEndingsReportsNoDifferences() throws Exception { + BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + BibDatabaseContext databaseOne = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryOne))); + + BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + BibDatabaseContext databaseTwo = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryTwo))); + + BibDatabaseDiff diff = BibDatabaseDiff.compare(databaseOne, databaseTwo); + + assertEquals(Collections.emptyList(), diff.getEntryDifferences()); + } + + @Test + void compareOfTwoEntriesWithSameContentAndMixedLineEndingsReportsNoDifferences() throws Exception { + BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + BibDatabaseContext databaseOne = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryOne))); + + BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + BibDatabaseContext databaseTwo = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryTwo))); + + BibDatabaseDiff diff = BibDatabaseDiff.compare(databaseOne, databaseTwo); + + assertEquals(Collections.emptyList(), diff.getEntryDifferences()); + } + @Test void compareOfTwoDifferentEntriesWithDifferentDataReportsDifferences() throws Exception { BibEntry entryOne = new BibEntry(BibEntry.DEFAULT_TYPE).withField(StandardField.TITLE, "test"); diff --git a/src/test/java/org/jabref/logic/bibtex/comparator/EntryComparatorTest.java b/src/test/java/org/jabref/logic/bibtex/comparator/EntryComparatorTest.java index ae4a437c9c3..4dd225aa275 100644 --- a/src/test/java/org/jabref/logic/bibtex/comparator/EntryComparatorTest.java +++ b/src/test/java/org/jabref/logic/bibtex/comparator/EntryComparatorTest.java @@ -99,6 +99,33 @@ void compareObjectsByKeyWithBlank() { assertEquals(-1, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e1, e2)); assertEquals(1, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e2, e1)); } + + @Test + void compareWithCrLfFields() { + BibEntry e1 = new BibEntry() + .withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + BibEntry e2 = new BibEntry() + .withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + assertEquals(0, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e1, e2)); + } + + @Test + void compareWithLfFields() { + BibEntry e1 = new BibEntry() + .withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + BibEntry e2 = new BibEntry() + .withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + assertEquals(0, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e1, e2)); + } + + @Test + void compareWithMixedLineEndings() { + BibEntry e1 = new BibEntry() + .withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + BibEntry e2 = new BibEntry() + .withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + assertEquals(-1, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e1, e2)); + } } diff --git a/src/test/java/org/jabref/logic/database/DuplicateCheckTest.java b/src/test/java/org/jabref/logic/database/DuplicateCheckTest.java index af7de7e11ee..e043aab7150 100644 --- a/src/test/java/org/jabref/logic/database/DuplicateCheckTest.java +++ b/src/test/java/org/jabref/logic/database/DuplicateCheckTest.java @@ -306,4 +306,25 @@ public void sameBooksWithDifferentEditionsAreNotDuplicates() { assertFalse(duplicateChecker.isDuplicate(editionOne, editionTwo, BibDatabaseMode.BIBTEX)); } + + @Test + void compareOfTwoEntriesWithSameContentAndLfEndingsReportsNoDifferences() throws Exception { + BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + assertTrue(duplicateChecker.isDuplicate(entryOne, entryTwo, BibDatabaseMode.BIBTEX)); + } + + @Test + void compareOfTwoEntriesWithSameContentAndCrLfEndingsReportsNoDifferences() throws Exception { + BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + assertTrue(duplicateChecker.isDuplicate(entryOne, entryTwo, BibDatabaseMode.BIBTEX)); + } + + @Test + void compareOfTwoEntriesWithSameContentAndMixedLineEndingsReportsNoDifferences() throws Exception { + BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5"); + BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5"); + assertTrue(duplicateChecker.isDuplicate(entryOne, entryTwo, BibDatabaseMode.BIBTEX)); + } }