Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore Initial Unprotected Uppercase In Translated Titles #10461

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We modified the DOI cleanup to infer the DOI from an ArXiV ID if it's present. [#10426](https://github.com/JabRef/jabref/issues/10426)
- The ISI importer uses the field `comment` for notes (instead of `review). [#10478](https://github.com/JabRef/jabref/pull/10478)
- If no existing document is selected for exporting "Embedded BibTeX pdf" JabRef will now create a new PDF file with a sample text and the metadata. [#10101](https://github.com/JabRef/jabref/issues/10101)
- Translated titles format no longer raise a warning. [#10459](https://github.com/JabRef/jabref/issues/10459)

### Fixed

Expand Down
20 changes: 6 additions & 14 deletions src/main/java/org/jabref/logic/integrity/TitleChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.logic.l10n.Localization;
Expand All @@ -12,7 +11,7 @@
public class TitleChecker implements ValueChecker {

private static final Pattern INSIDE_CURLY_BRAKETS = Pattern.compile("\\{[^}\\{]*\\}");
private static final Pattern DELIMITERS = Pattern.compile("\\.|\\!|\\?|\\;|\\:");
private static final Pattern DELIMITERS = Pattern.compile("\\.|\\!|\\?|\\;|\\:|\\[");
private static final Predicate<String> HAS_CAPITAL_LETTERS = Pattern.compile("[\\p{Lu}\\p{Lt}]").asPredicate();

private final BibDatabaseContext databaseContext;
Expand All @@ -23,13 +22,13 @@ public TitleChecker(BibDatabaseContext databaseContext) {

/**
* Algorithm:
* - remove everything that is in brackets
* - split the title into sub titles based on the delimiters
* (defined in the local variable DELIMITERS, currently . ! ? ; :)
* - remove everything that is in curly brackets
* - split the title into subtitles based on the delimiters
* (defined in the local variable DELIMITERS, currently . ! ? ; : [)
* - for each sub title:
* - remove trailing whitespaces
* - ignore first letter as this can always be written in caps
* - check if at least one capital letter is in the sub title
* - check if at least one capital letter is in the subtitle
*/
@Override
public Optional<String> checkValue(String value) {
Expand All @@ -41,14 +40,7 @@ public Optional<String> checkValue(String value) {
return Optional.empty();
}

String valueOnlySpacesWithinCurlyBraces = value;
while (true) {
Matcher matcher = INSIDE_CURLY_BRAKETS.matcher(valueOnlySpacesWithinCurlyBraces);
if (!matcher.find()) {
break;
}
valueOnlySpacesWithinCurlyBraces = matcher.replaceAll("");
}
String valueOnlySpacesWithinCurlyBraces = INSIDE_CURLY_BRAKETS.matcher(value).replaceAll("");

String[] splitTitle = DELIMITERS.split(valueOnlySpacesWithinCurlyBraces);
for (String subTitle : splitTitle) {
Expand Down
308 changes: 100 additions & 208 deletions src/test/java/org/jabref/logic/integrity/TitleCheckerTest.java
Original file line number Diff line number Diff line change
@@ -1,223 +1,115 @@
package org.jabref.logic.integrity;

import java.util.Optional;
import java.util.stream.Stream;

import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.database.BibDatabaseMode;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;

public class TitleCheckerTest {

private TitleChecker checker;
private TitleChecker checkerBiblatex;

@BeforeEach
public void setUp() {
BibDatabaseContext databaseContext = new BibDatabaseContext();
BibDatabaseContext databaseBiblatex = new BibDatabaseContext();
databaseContext.setMode(BibDatabaseMode.BIBTEX);
checker = new TitleChecker(databaseContext);
databaseBiblatex.setMode(BibDatabaseMode.BIBLATEX);
checkerBiblatex = new TitleChecker(databaseBiblatex);
}

@Test
public void firstLetterAsOnlyCapitalLetterInSubTitle2() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1: This is a sub title 2"));
}

@Test
public void noCapitalLetterInSubTitle2() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1: this is a sub title 2"));
}

@Test
public void twoCapitalLettersInSubTitle2() {
assertNotEquals(Optional.empty(), checker.checkValue("This is a sub title 1: This is A sub title 2"));
}

@Test
public void middleLetterAsOnlyCapitalLetterInSubTitle2() {
assertNotEquals(Optional.empty(), checker.checkValue("This is a sub title 1: this is A sub title 2"));
}

@Test
public void twoCapitalLettersInSubTitle2WithCurlyBrackets() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1: This is {A} sub title 2"));
}

@Test
public void middleLetterAsOnlyCapitalLetterInSubTitle2WithCurlyBrackets() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1: this is {A} sub title 2"));
}

@Test
public void firstLetterAsOnlyCapitalLetterInSubTitle2AfterContinuousDelimiters() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1...This is a sub title 2"));
}

@Test
public void middleLetterAsOnlyCapitalLetterInSubTitle2AfterContinuousDelimiters() {
assertNotEquals(Optional.empty(), checker.checkValue("This is a sub title 1... this is a sub Title 2"));
}

@Test
public void firstLetterAsOnlyCapitalLetterInEverySubTitleWithContinuousDelimiters() {
assertEquals(Optional.empty(), checker.checkValue("This is; A sub title 1.... This is a sub title 2"));
}

@Test
public void firstLetterAsOnlyCapitalLetterInEverySubTitleWithRandomDelimiters() {
assertEquals(Optional.empty(), checker.checkValue("This!is!!A!Title??"));
}

@Test
public void moreThanOneCapitalLetterInSubTitleWithoutCurlyBrackets() {
assertNotEquals(Optional.empty(), checker.checkValue("This!is!!A!TitlE??"));
}

@Test
void bibTexAcceptsTitleWithOnlyFirstCapitalLetter() {
assertEquals(Optional.empty(), checker.checkValue("This is a title"));
}

@Test
void bibTexDoesNotAcceptCapitalLettersInsideTitle() {
assertNotEquals(Optional.empty(), checker.checkValue("This is a Title"));
}

@Test
void bibTexRemovesCapitalLetterInsideTitle() {
assertEquals(Optional.empty(), checker.checkValue("This is a {T}itle"));
}

@Test
void bibTexRemovesEverythingInBracketsAndAcceptsNoTitleInput() {
assertEquals(Optional.empty(), checker.checkValue("{This is a Title}"));
}

@Test
void bibTexRemovesEverythingInBrackets() {
assertEquals(Optional.empty(), checker.checkValue("This is a {Title}"));
}

@Test
void bibTexAcceptsTitleWithLowercaseFirstLetter() {
assertEquals(Optional.empty(), checker.checkValue("{C}urrent {C}hronicle"));
}

@Test
void bibTexAcceptsSubTitlesWithOnlyFirstCapitalLetter() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1: This is a sub title 2"));
}

@Test
void bibTexAcceptsSubTitleWithLowercaseFirstLetter() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1: this is a sub title 2"));
}

@Test
void bibTexDoesNotAcceptCapitalLettersInsideSubTitle() {
assertNotEquals(Optional.empty(), checker.checkValue("This is a sub title 1: This is A sub title 2"));
}

@Test
void bibTexRemovesCapitalLetterInsideSubTitle() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1: this is {A} sub title 2"));
}

@Test
void bibTexSplitsSubTitlesBasedOnDots() {
assertEquals(Optional.empty(), checker.checkValue("This is a sub title 1...This is a sub title 2"));
}

@Test
void bibTexSplitsSubTitleBasedOnSpecialCharacters() {
assertEquals(Optional.empty(), checker.checkValue("This is; A sub title 1.... This is a sub title 2"));
}

@Test
void bibTexAcceptsCapitalLetterAfterSpecialCharacter() {
assertEquals(Optional.empty(), checker.checkValue("This!is!!A!Title??"));
}

@Test
void bibTexAcceptsCapitalLetterOnlyAfterSpecialCharacter() {
assertNotEquals(Optional.empty(), checker.checkValue("This!is!!A!TitlE??"));
}

@Test
void bibLaTexAcceptsTitleWithOnlyFirstCapitalLetter() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a title"));
}

@Test
void bibLaTexAcceptsCapitalLettersInsideTitle() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a Title"));
}

@Test
void bibLaTexRemovesCapitalLetterInsideTitle() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a {T}itle"));
}

@Test
void bibLaTexRemovesEverythingInBracketsAndAcceptsNoTitleInput() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("{This is a Title}"));
}

@Test
void bibLaTexRemovesEverythingInBrackets() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a {Title}"));
}

@Test
void bibLaTexAcceptsTitleWithLowercaseFirstLetter() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("{C}urrent {C}hronicle"));
}

@Test
void bibLaTexAcceptsSubTitlesWithOnlyFirstCapitalLetter() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a sub title 1: This is a sub title 2"));
}

@Test
void bibLaTexAcceptsSubTitleWithLowercaseFirstLetter() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a sub title 1: this is a sub title 2"));
}

@Test
void bibLaTexAcceptsCapitalLettersInsideSubTitle() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a sub title 1: This is A sub title 2"));
}

@Test
void bibLaTexRemovesCapitalLetterInsideSubTitle() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a sub title 1: this is {A} sub title 2"));
}

@Test
void bibLaTexSplitsSubTitlesBasedOnDots() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is a sub title 1...This is a sub title 2"));
}

@Test
void bibLaTexSplitsSubTitleBasedOnSpecialCharacters() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This is; A sub title 1.... This is a sub title 2"));
}

@Test
void bibLaTexAcceptsCapitalLetterAfterSpecialCharacter() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This!is!!A!Title??"));
}

@Test
void bibLaTexAcceptsCapitalLetterNotOnlyAfterSpecialCharacter() {
assertEquals(Optional.empty(), checkerBiblatex.checkValue("This!is!!A!TitlE??"));
@Nested
class BibTexChecker {
private TitleChecker checker;

@BeforeEach
public void setUp() {
BibDatabaseContext databaseContext = new BibDatabaseContext();
databaseContext.setMode(BibDatabaseMode.BIBTEX);
checker = new TitleChecker(databaseContext);
}

@ParameterizedTest(name = "{index}. Title: \"{1}\" {0}")
@MethodSource("validTitle")
void titleShouldNotRaiseWarning(String message, String title) {
assertEquals(Optional.empty(), checker.checkValue(title));
assertEquals(Optional.empty(), checker.checkValue(translatedTitle(title)));
}

@ParameterizedTest(name = "{index}. Title: \"{1}\" {0}")
@MethodSource("invalidTitle")
void titleShouldRaiseWarning(String message, String title) {
assertNotEquals(Optional.empty(), checker.checkValue(title));
assertNotEquals(Optional.empty(), checker.checkValue(translatedTitle(title)));
}

static Stream<Arguments> validTitle() {
return Stream.of(
Arguments.of("firstLetterAsOnlyCapitalLetterInSubTitle2 ", "This is a sub title 1: This is a sub title 2"),
Arguments.of("noCapitalLetterInSubTitle2 ", "This is a sub title 1: this is a sub title 2"),
Arguments.of("twoCapitalLettersInSubTitle2WithCurlyBrackets ", "This is a sub title 1: This is {A} sub title 2"),
Arguments.of("middleLetterAsOnlyCapitalLetterInSubTitle2WithCurlyBrackets ", "This is a sub title 1: this is {A} sub title 2"),
Arguments.of("firstLetterAsOnlyCapitalLetterInSubTitle2AfterContinuousDelimiters ", "This is a sub title 1...This is a sub title 2"),
Arguments.of("firstLetterAsOnlyCapitalLetterInEverySubTitleWithContinuousDelimiters ", "This is; A sub title 1.... This is a sub title 2"),
Arguments.of("firstLetterAsOnlyCapitalLetterInEverySubTitleWithRandomDelimiters ", "This!is!!A!Title??"),
Arguments.of("bibTexAcceptsTitleWithOnlyFirstCapitalLetter ", "This is a title"),
Arguments.of("bibTexRemovesCapitalLetterInsideTitle ", "This is a {T}itle"),
Arguments.of("bibTexRemovesEverythingInBracketsAndAcceptsNoTitleInput ", "{This is a Title}"),
Arguments.of("bibTexRemovesEverythingInBrackets ", "This is a {Title}"),
Arguments.of("bibTexAcceptsTitleWithLowercaseFirstLetter ", "{C}urrent {C}hronicle"),
Arguments.of("bibTexAcceptsOriginalAndTranslatedTitle ", "This is a title [This is translated title]")
);
}

static Stream<Arguments> invalidTitle() {
return Stream.of(
Arguments.of("twoCapitalLettersInSubTitle2 ", "This is a sub title 1: This is A sub title 2"),
Arguments.of("middleLetterAsOnlyCapitalLetterInSubTitle2 ", "This is a sub title 1: this is A sub title 2"),
Arguments.of("middleLetterAsOnlyCapitalLetterInSubTitle2AfterContinuousDelimiters ", "This is a sub title 1... this is a sub Title 2"),
Arguments.of("moreThanOneCapitalLetterInSubTitleWithoutCurlyBrackets ", "This!is!!A!TitlE??"),
Arguments.of("bibTexDoesNotAcceptCapitalLettersInsideTitle ", "This is a Title"),
Arguments.of("bibTexDoesNotAcceptsLeadingTranslatedTitleWithOriginal ", "[This is translated title] This is a title")
);
}
}

@Nested
class BibLaTexChecker {
private TitleChecker checkerBiblatex;

@BeforeEach
public void setUp() {
BibDatabaseContext databaseBiblatex = new BibDatabaseContext();
databaseBiblatex.setMode(BibDatabaseMode.BIBLATEX);
checkerBiblatex = new TitleChecker(databaseBiblatex);
}

@ParameterizedTest(name = "{index}. Title: \"{1}\" {0}")
@MethodSource("validTitle")
void titleShouldNotRaiseWarning(String message, String title) {
assertEquals(Optional.empty(), checkerBiblatex.checkValue(title));
assertEquals(Optional.empty(), checkerBiblatex.checkValue(translatedTitle(title)));
}

static Stream<Arguments> validTitle() {
return Stream.of(
Arguments.of("bibLaTexAcceptsTitleWithOnlyFirstCapitalLetter", "This is a title"),
Arguments.of("bibLaTexAcceptsCapitalLettersInsideTitle", "This is a Title"),
Arguments.of("bibLaTexRemovesCapitalLetterInsideTitle", "This is a {T}itle"),
Arguments.of("bibLaTexRemovesEverythingInBracketsAndAcceptsNoTitleInput", "{This is a Title}"),
Arguments.of("bibLaTexRemovesEverythingInBrackets", "This is a {Title}"),
Arguments.of("bibLaTexAcceptsTitleWithLowercaseFirstLetter", "{C}urrent {C}hronicle"),
Arguments.of("bibLaTexAcceptsSubTitlesWithOnlyFirstCapitalLetter", "This is a sub title 1: This is a sub title 2"),
Arguments.of("bibLaTexAcceptsSubTitleWithLowercaseFirstLetter", "This is a sub title 1: this is a sub title 2"),
Arguments.of("bibLaTexAcceptsCapitalLettersInsideSubTitle", "This is a sub title 1: This is A sub title 2"),
Arguments.of("bibLaTexRemovesCapitalLetterInsideSubTitle", "This is a sub title 1: this is {A} sub title 2"),
Arguments.of("bibLaTexSplitsSubTitlesBasedOnDots", "This is a sub title 1...This is a sub title 2"),
Arguments.of("bibLaTexSplitsSubTitleBasedOnSpecialCharacters", "This is; A sub title 1.... This is a sub title 2"),
Arguments.of("bibLaTexAcceptsCapitalLetterAfterSpecialCharacter", "This!is!!A!Title??"),
Arguments.of("bibLaTexAcceptsCapitalLetterNotOnlyAfterSpecialCharacter", "This!is!!A!TitlE??")
);
}
}

private String translatedTitle(String title) {
return "[%s]".formatted(title);
}
}