From 9e2a53da15f6a2f194f08be2ccb5076d30e7f8fa Mon Sep 17 00:00:00 2001
From: Vincent Prins <vincent.prins@ivdnt.org>
Date: Mon, 24 Jun 2024 14:19:18 +0200
Subject: [PATCH] Fixed OTHER_POS regex not matching strings longer than 1 char

---
 .../org/ivdnt/galahad/evaluation/comparison/TermFilter.kt   | 2 +-
 .../org/ivdnt/galahad/evaluation/confusion/Confusion.kt     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt b/server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt
index 4a7ceda..4bf7265 100644
--- a/server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt
+++ b/server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt
@@ -14,7 +14,7 @@ class PosLemmaTermFilter (
     val lemma: String? = null,
 ) : TermFilter {
     private val multiplePosFilter: (Term) -> Boolean = { t: Term -> t.isMultiPos }
-    private val otherPosFilter: (Term) -> Boolean = { t: Term -> t.pos?.matches(OTHER_POS_REGEX.toRegex()) ?: false}
+    private val otherPosFilter: (Term) -> Boolean = { t: Term -> t.pos?.contains(Regex(OTHER_POS_REGEX)) ?: false}
     private val singlePosFilter: (Term) -> Boolean = { t: Term -> t.posHeadGroupOrDefault == posHeadGroup }
     val posFilter: (Term) -> Boolean
     val lemmaFilter: (Term) -> Boolean
diff --git a/server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt b/server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt
index 256c353..fab0380 100644
--- a/server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt
+++ b/server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt
@@ -9,7 +9,7 @@ import org.ivdnt.galahad.port.csv.CSVFile
 
 const val MULTIPLE_POS = "MULTIPLE"
 const val OTHER_POS = "OTHER"
-const val OTHER_POS_REGEX = """^[^A-Z]"""
+const val OTHER_POS_REGEX = """^[^a-zA-Z]"""
 
 /**
  * Generic class for the part of speech confusion of a corpus or document.
@@ -111,8 +111,8 @@ open class Confusion(private val truncate: Boolean = true): CsvSampleExporter {
             pos1.contains('+') -> add(MULTIPLE_POS, pos2, evaluationEntry)
             pos2.contains('+') -> add(pos1, MULTIPLE_POS, evaluationEntry)
             // Non-alphabetical pos are mapped to a single category "other"
-            pos1.matches(OTHER_POS_REGEX.toRegex()) -> add(OTHER_POS, pos2, evaluationEntry)
-            pos2.matches(OTHER_POS_REGEX.toRegex()) -> add(pos1, OTHER_POS, evaluationEntry)
+            pos1.contains(Regex(OTHER_POS_REGEX)) -> add(OTHER_POS, pos2, evaluationEntry)
+            pos2.contains(Regex(OTHER_POS_REGEX)) -> add(pos1, OTHER_POS, evaluationEntry)
             // Otherwise a simple merge
             else -> matrix.merge(Pair(pos1, pos2), evaluationEntry) { a, b -> EvaluationEntry.add(a, b, truncate) }
         }