Skip to content

Commit

Permalink
Fix "Library has changed externally" with CRLF markers (#8239)
Browse files Browse the repository at this point in the history
* Add Javadoc to Entrycomparator#compare (and merge - in code to one place)

* Refine duplicate checker

- Compare remaining fields if requried and optional do not differ
- Ignore CRLF at field comparison

* When loading a .bib file, it is opened read only

* Fix checkstyle
  • Loading branch information
koppor committed Nov 14, 2021
1 parent 10a2760 commit c94fa48
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 32 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We Included all standard fields with citation key when exporting to Old OpenOffice/LibreOffice Calc Format [#8176](https://github.com/JabRef/jabref/pull/8176)
- We present options to manually enter an article or return to the New Entry menu when the fetcher DOI fails to find an entry for an ID [#7870](https://github.com/JabRef/jabref/issues/7870)
- We trim white space and non-ASCII characters from DOI [#8127](https://github.com/JabRef/jabref/issues/8127)
- The duplicate checker now inspects other fields in case no difference in the required and optional fields are found.

### Fixed

Expand All @@ -57,6 +58,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue where typing an invalid UNC path into the "Main file directory" text field caused an error. [#8107](https://github.com/JabRef/jabref/issues/8107)
- We fixed an issue where "Open Folder" didn't select the file on macOS in Finder [#8130](https://github.com/JabRef/jabref/issues/8130)
- We fixed an issue where importing PDFs resulted in an uncaught exception [#8143](https://github.com/JabRef/jabref/issues/8143)
- We fixed "The library has been modified by another program" showing up when line breaks change [#4877](https://github.com/JabRef/jabref/issues/4877)
- The default directory of the "LaTeX Citations" tab is now the directory of the currently opened database (and not the directory chosen at the last open file dialog or the last database save) [koppor#538](https://github.com/koppor/jabref/issues/538)
- We fixed an issue where right-clicking on a tab and selecting close will close the focused tab even if it is not the tab we right-clicked [#8193](https://github.com/JabRef/jabref/pull/8193)
- We fixed an issue where selecting a citation style in the preferences would sometimes produce an exception [#7860](https://github.com/JabRef/jabref/issues/7860)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ public class EntryComparator implements Comparator<BibEntry> {
private final boolean binary;
private final Comparator<BibEntry> next;

/**
*
* @param binary true: the presence of fields is checked; false: the content of the fields is compared
* @param descending true: if the most different entry should get the highest score
* @param field the field to sort on
* @param next the next comparator to use (if the current comparator results in equality)
*/
public EntryComparator(boolean binary, boolean descending, Field field, Comparator<BibEntry> next) {
this.binary = binary;
this.sortField = field;
Expand All @@ -42,7 +49,7 @@ public EntryComparator(boolean binary, boolean descending, Field field) {
public int compare(BibEntry e1, BibEntry e2) {
// default equals
// TODO: with the new default equals this does not only return 0 for identical objects,
// but for all objects that have the same id and same fields
// but for all objects that have the same id and same fields
if (Objects.equals(e1, e2)) {
return 0;
}
Expand Down Expand Up @@ -103,21 +110,21 @@ public int compare(BibEntry e1, BibEntry e2) {
int result;

if ((f1 instanceof Integer) && (f2 instanceof Integer)) {
result = -((Integer) f1).compareTo((Integer) f2);
result = ((Integer) f1).compareTo((Integer) f2);
} else if (f2 instanceof Integer) {
Integer f1AsInteger = Integer.valueOf(f1.toString());
result = -f1AsInteger.compareTo((Integer) f2);
result = f1AsInteger.compareTo((Integer) f2);
} else if (f1 instanceof Integer) {
Integer f2AsInteger = Integer.valueOf(f2.toString());
result = -((Integer) f1).compareTo(f2AsInteger);
result = ((Integer) f1).compareTo(f2AsInteger);
} else {
String ours = ((String) f1).toLowerCase(Locale.ROOT);
String theirs = ((String) f2).toLowerCase(Locale.ROOT);
int comp = ours.compareTo(theirs);
result = -comp;
result = comp;
}
if (result != 0) {
return descending ? result : -result; // Primary sort.
return descending ? -result : result; // Primary sort.
}
if (next == null) {
return idCompare(e1, e2); // If still equal, we use the unique IDs.
Expand Down
69 changes: 46 additions & 23 deletions src/main/java/org/jabref/logic/database/DuplicateCheck.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.util.Set;
import java.util.stream.Collectors;

import org.jabref.logic.util.OS;
import org.jabref.logic.util.strings.StringSimilarity;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseMode;
Expand All @@ -25,6 +26,7 @@
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.identifier.ISBN;
import org.jabref.model.strings.StringUtil;

import com.google.common.collect.Sets;
import org.slf4j.Logger;
Expand Down Expand Up @@ -101,21 +103,24 @@ private static boolean haveDifferentChaptersOrPagesOfTheSameBook(final BibEntry

private static double[] compareRequiredFields(final BibEntryType type, final BibEntry one, final BibEntry two) {
final Set<OrFields> requiredFields = type.getRequiredFields();
return requiredFields == null
return requiredFields.isEmpty()
? new double[] {0., 0.}
: DuplicateCheck.compareFieldSet(requiredFields.stream().map(OrFields::getPrimary).collect(Collectors.toSet()), one, two);
}

private static boolean isFarFromThreshold(double value) {
return Math.abs(value - DuplicateCheck.DUPLICATE_THRESHOLD) > DuplicateCheck.DOUBT_RANGE;
if (value < 0.0) {
LOGGER.debug("Value {} is below zero. Should not happen", value);
}
return value - DuplicateCheck.DUPLICATE_THRESHOLD > DuplicateCheck.DOUBT_RANGE;
}

private static boolean compareOptionalFields(final BibEntryType type,
final BibEntry one,
final BibEntry two,
final double[] req) {
final Set<BibField> optionalFields = type.getOptionalFields();
if (optionalFields == null) {
if (optionalFields.isEmpty()) {
return req[0] >= DuplicateCheck.DUPLICATE_THRESHOLD;
}
final double[] opt = DuplicateCheck.compareFieldSet(optionalFields.stream().map(BibField::getField).collect(Collectors.toSet()), one, two);
Expand All @@ -126,22 +131,26 @@ private static boolean compareOptionalFields(final BibEntryType type,
}

private static double[] compareFieldSet(final Collection<Field> fields, final BibEntry one, final BibEntry two) {
double res = 0;
double totWeights = 0.;
if (fields.isEmpty()) {
return new double[] {0.0, 0.0};
}
double equalWeights = 0;
double totalWeights = 0.;
for (final Field field : fields) {
final double weight = DuplicateCheck.FIELD_WEIGHTS.getOrDefault(field, 1.0);
totWeights += weight;
final double currentWeight = DuplicateCheck.FIELD_WEIGHTS.getOrDefault(field, 1.0);
totalWeights += currentWeight;
int result = DuplicateCheck.compareSingleField(field, one, two);
if (result == EQUAL) {
res += weight;
equalWeights += currentWeight;
} else if (result == EMPTY_IN_BOTH) {
totWeights -= weight;
totalWeights -= currentWeight;
}
}
if (totWeights > 0) {
return new double[] {res / totWeights, totWeights};
if (totalWeights > 0) {
return new double[] {equalWeights / totalWeights, totalWeights};
}
return new double[] {0.5, 0.0};
// all fields are empty in both --> have no difference at all
return new double[] {0.0, 0.0};
}

private static int compareSingleField(final Field field, final BibEntry one, final BibEntry two) {
Expand Down Expand Up @@ -220,8 +229,8 @@ private static int compareChapterField(final String stringOne, final String stri
}

private static int compareField(final String stringOne, final String stringTwo) {
final String processedStringOne = stringOne.toLowerCase(Locale.ROOT).trim();
final String processedStringTwo = stringTwo.toLowerCase(Locale.ROOT).trim();
final String processedStringOne = StringUtil.unifyLineBreaks(stringOne.toLowerCase(Locale.ROOT).trim(), OS.NEWLINE);
final String processedStringTwo = StringUtil.unifyLineBreaks(stringTwo.toLowerCase(Locale.ROOT).trim(), OS.NEWLINE);
final double similarity = DuplicateCheck.correlateByWords(processedStringOne, processedStringTwo);
if (similarity > 0.8) {
return EQUAL;
Expand All @@ -236,9 +245,7 @@ public static double compareEntriesStrictly(BibEntry one, BibEntry two) {

int score = 0;
for (final Field field : allFields) {
final Optional<String> stringOne = one.getField(field);
final Optional<String> stringTwo = two.getField(field);
if (stringOne.equals(stringTwo)) {
if (isSingleFieldEqual(one, two, field)) {
score++;
}
}
Expand All @@ -248,6 +255,19 @@ public static double compareEntriesStrictly(BibEntry one, BibEntry two) {
return (double) score / allFields.size();
}

private static boolean isSingleFieldEqual(BibEntry one, BibEntry two, Field field) {
final Optional<String> stringOne = one.getField(field);
final Optional<String> stringTwo = two.getField(field);
if (stringOne.isEmpty() && stringTwo.isEmpty()) {
return true;
}
if (stringOne.isEmpty() || stringTwo.isEmpty()) {
return false;
}
return (StringUtil.unifyLineBreaks(stringOne.get(), OS.NEWLINE).equals(
StringUtil.unifyLineBreaks(stringTwo.get(), OS.NEWLINE)));
}

/**
* Compare two strings on the basis of word-by-word correlation analysis.
*
Expand Down Expand Up @@ -293,7 +313,7 @@ private static double similarity(final String first, final String second) {
}
final double distanceIgnoredCase = new StringSimilarity().editDistanceIgnoreCase(longer, shorter);
final double similarity = (longerLength - distanceIgnoredCase) / longerLength;
LOGGER.debug("Longer string: " + longer + " Shorter string: " + shorter + " Similarity: " + similarity);
LOGGER.debug("Longer string: {} Shorter string: {} Similarity: {}", longer, shorter, similarity);
return similarity;
}

Expand Down Expand Up @@ -330,19 +350,22 @@ public boolean isDuplicate(final BibEntry one, final BibEntry two, final BibData

final Optional<BibEntryType> type = entryTypesManager.enrich(one.getType(), bibDatabaseMode);
if (type.isPresent()) {
final double[] reqCmpResult = compareRequiredFields(type.get(), one, two);
BibEntryType entryType = type.get();
final double[] reqCmpResult = compareRequiredFields(entryType, one, two);

if (isFarFromThreshold(reqCmpResult[0])) {
// Far from the threshold value, so we base our decision on the required fields only
return reqCmpResult[0] >= DuplicateCheck.DUPLICATE_THRESHOLD;
}

// Close to the threshold value, so we take a look at the optional fields, if any:
return compareOptionalFields(type.get(), one, two, reqCmpResult);
} else {
// We don't know about the type, so simply compare fields without any distinction between optional/required
return compareFieldSet(Sets.union(one.getFields(), two.getFields()), one, two)[0] >= DuplicateCheck.DUPLICATE_THRESHOLD;
if (compareOptionalFields(type.get(), one, two, reqCmpResult)) {
return true;
}
}
// if type is not present, so simply compare fields without any distinction between optional/required
// In case both required and optional fields are equal, we also use this fallback
return compareFieldSet(Sets.union(one.getFields(), two.getFields()), one, two)[0] >= DuplicateCheck.DUPLICATE_THRESHOLD;
}

/**
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/jabref/logic/importer/Importer.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
package org.jabref.logic.importer;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Objects;

import org.jabref.logic.util.FileType;
Expand Down Expand Up @@ -123,7 +124,7 @@ protected static BufferedReader getUTF16Reader(Path filePath) throws IOException

public static BufferedReader getReader(Path filePath, Charset encoding)
throws IOException {
InputStream stream = new FileInputStream(filePath.toFile());
InputStream stream = Files.newInputStream(filePath, StandardOpenOption.READ);
return new BufferedReader(new InputStreamReader(stream, encoding));
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/model/entry/BibEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ public Optional<FieldChange> setType(EntryType newType, EntriesEventSource event
}

/**
* Returns an set containing the names of all fields that are set for this particular entry.
* Returns a set containing the names of all fields that are set for this particular entry.
*
* @return a set of existing field names
*/
Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/jabref/model/strings/StringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ private static String removeSingleBracesAroundCapitals(String s) {

/**
* Replaces all platform-dependent line breaks by OS.NEWLINE line breaks.
* AKA normalize newlines
* <p>
* We do NOT use UNIX line breaks as the user explicitly configures its linebreaks and this method is used in bibtex field writing
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,45 @@ void compareOfDifferentEntriesWithSameDataReportsNoDifferences() throws Exceptio
assertEquals(Collections.emptyList(), diff.getEntryDifferences());
}

@Test
void compareOfTwoEntriesWithSameContentAndLfEndingsReportsNoDifferences() throws Exception {
BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
BibDatabaseContext databaseOne = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryOne)));

BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
BibDatabaseContext databaseTwo = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryTwo)));

BibDatabaseDiff diff = BibDatabaseDiff.compare(databaseOne, databaseTwo);

assertEquals(Collections.emptyList(), diff.getEntryDifferences());
}

@Test
void compareOfTwoEntriesWithSameContentAndCrLfEndingsReportsNoDifferences() throws Exception {
BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
BibDatabaseContext databaseOne = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryOne)));

BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
BibDatabaseContext databaseTwo = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryTwo)));

BibDatabaseDiff diff = BibDatabaseDiff.compare(databaseOne, databaseTwo);

assertEquals(Collections.emptyList(), diff.getEntryDifferences());
}

@Test
void compareOfTwoEntriesWithSameContentAndMixedLineEndingsReportsNoDifferences() throws Exception {
BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
BibDatabaseContext databaseOne = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryOne)));

BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
BibDatabaseContext databaseTwo = new BibDatabaseContext(new BibDatabase(Collections.singletonList(entryTwo)));

BibDatabaseDiff diff = BibDatabaseDiff.compare(databaseOne, databaseTwo);

assertEquals(Collections.emptyList(), diff.getEntryDifferences());
}

@Test
void compareOfTwoDifferentEntriesWithDifferentDataReportsDifferences() throws Exception {
BibEntry entryOne = new BibEntry(BibEntry.DEFAULT_TYPE).withField(StandardField.TITLE, "test");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,33 @@ void compareObjectsByKeyWithBlank() {
assertEquals(-1, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e1, e2));
assertEquals(1, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e2, e1));
}

@Test
void compareWithCrLfFields() {
BibEntry e1 = new BibEntry()
.withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
BibEntry e2 = new BibEntry()
.withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
assertEquals(0, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e1, e2));
}

@Test
void compareWithLfFields() {
BibEntry e1 = new BibEntry()
.withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
BibEntry e2 = new BibEntry()
.withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
assertEquals(0, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e1, e2));
}

@Test
void compareWithMixedLineEndings() {
BibEntry e1 = new BibEntry()
.withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
BibEntry e2 = new BibEntry()
.withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
assertEquals(-1, new EntryComparator(false, false, InternalField.KEY_FIELD).compare(e1, e2));
}
}


21 changes: 21 additions & 0 deletions src/test/java/org/jabref/logic/database/DuplicateCheckTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -306,4 +306,25 @@ public void sameBooksWithDifferentEditionsAreNotDuplicates() {

assertFalse(duplicateChecker.isDuplicate(editionOne, editionTwo, BibDatabaseMode.BIBTEX));
}

@Test
void compareOfTwoEntriesWithSameContentAndLfEndingsReportsNoDifferences() throws Exception {
BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
assertTrue(duplicateChecker.isDuplicate(entryOne, entryTwo, BibDatabaseMode.BIBTEX));
}

@Test
void compareOfTwoEntriesWithSameContentAndCrLfEndingsReportsNoDifferences() throws Exception {
BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
assertTrue(duplicateChecker.isDuplicate(entryOne, entryTwo, BibDatabaseMode.BIBTEX));
}

@Test
void compareOfTwoEntriesWithSameContentAndMixedLineEndingsReportsNoDifferences() throws Exception {
BibEntry entryOne = new BibEntry().withField(StandardField.COMMENT, "line1\n\nline3\n\nline5");
BibEntry entryTwo = new BibEntry().withField(StandardField.COMMENT, "line1\r\n\r\nline3\r\n\r\nline5");
assertTrue(duplicateChecker.isDuplicate(entryOne, entryTwo, BibDatabaseMode.BIBTEX));
}
}

0 comments on commit c94fa48

Please sign in to comment.