From 9e2a53da15f6a2f194f08be2ccb5076d30e7f8fa Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Mon, 24 Jun 2024 14:19:18 +0200 Subject: [PATCH] Fixed OTHER_POS regex not matching strings longer than 1 char --- .../org/ivdnt/galahad/evaluation/comparison/TermFilter.kt | 2 +- .../org/ivdnt/galahad/evaluation/confusion/Confusion.kt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt b/server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt index 4a7ceda..4bf7265 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt @@ -14,7 +14,7 @@ class PosLemmaTermFilter ( val lemma: String? = null, ) : TermFilter { private val multiplePosFilter: (Term) -> Boolean = { t: Term -> t.isMultiPos } - private val otherPosFilter: (Term) -> Boolean = { t: Term -> t.pos?.matches(OTHER_POS_REGEX.toRegex()) ?: false} + private val otherPosFilter: (Term) -> Boolean = { t: Term -> t.pos?.contains(Regex(OTHER_POS_REGEX)) ?: false} private val singlePosFilter: (Term) -> Boolean = { t: Term -> t.posHeadGroupOrDefault == posHeadGroup } val posFilter: (Term) -> Boolean val lemmaFilter: (Term) -> Boolean diff --git a/server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt b/server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt index 256c353..fab0380 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt @@ -9,7 +9,7 @@ import org.ivdnt.galahad.port.csv.CSVFile const val MULTIPLE_POS = "MULTIPLE" const val OTHER_POS = "OTHER" -const val OTHER_POS_REGEX = """^[^A-Z]""" +const val OTHER_POS_REGEX = """^[^a-zA-Z]""" /** * Generic class for the part of speech confusion of a corpus or document. @@ -111,8 +111,8 @@ open class Confusion(private val truncate: Boolean = true): CsvSampleExporter { pos1.contains('+') -> add(MULTIPLE_POS, pos2, evaluationEntry) pos2.contains('+') -> add(pos1, MULTIPLE_POS, evaluationEntry) // Non-alphabetical pos are mapped to a single category "other" - pos1.matches(OTHER_POS_REGEX.toRegex()) -> add(OTHER_POS, pos2, evaluationEntry) - pos2.matches(OTHER_POS_REGEX.toRegex()) -> add(pos1, OTHER_POS, evaluationEntry) + pos1.contains(Regex(OTHER_POS_REGEX)) -> add(OTHER_POS, pos2, evaluationEntry) + pos2.contains(Regex(OTHER_POS_REGEX)) -> add(pos1, OTHER_POS, evaluationEntry) // Otherwise a simple merge else -> matrix.merge(Pair(pos1, pos2), evaluationEntry) { a, b -> EvaluationEntry.add(a, b, truncate) } }