Skip to content

Commit

Permalink
Fixed OTHER_POS regex not matching strings longer than 1 char
Browse files Browse the repository at this point in the history
  • Loading branch information
PrinsINT committed Jun 24, 2024
1 parent d776e7c commit 9e2a53d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class PosLemmaTermFilter (
val lemma: String? = null,
) : TermFilter {
private val multiplePosFilter: (Term) -> Boolean = { t: Term -> t.isMultiPos }
private val otherPosFilter: (Term) -> Boolean = { t: Term -> t.pos?.matches(OTHER_POS_REGEX.toRegex()) ?: false}
private val otherPosFilter: (Term) -> Boolean = { t: Term -> t.pos?.contains(Regex(OTHER_POS_REGEX)) ?: false}
private val singlePosFilter: (Term) -> Boolean = { t: Term -> t.posHeadGroupOrDefault == posHeadGroup }
val posFilter: (Term) -> Boolean
val lemmaFilter: (Term) -> Boolean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import org.ivdnt.galahad.port.csv.CSVFile

const val MULTIPLE_POS = "MULTIPLE"
const val OTHER_POS = "OTHER"
const val OTHER_POS_REGEX = """^[^A-Z]"""
const val OTHER_POS_REGEX = """^[^a-zA-Z]"""

/**
* Generic class for the part of speech confusion of a corpus or document.
Expand Down Expand Up @@ -111,8 +111,8 @@ open class Confusion(private val truncate: Boolean = true): CsvSampleExporter {
pos1.contains('+') -> add(MULTIPLE_POS, pos2, evaluationEntry)
pos2.contains('+') -> add(pos1, MULTIPLE_POS, evaluationEntry)
// Non-alphabetical pos are mapped to a single category "other"
pos1.matches(OTHER_POS_REGEX.toRegex()) -> add(OTHER_POS, pos2, evaluationEntry)
pos2.matches(OTHER_POS_REGEX.toRegex()) -> add(pos1, OTHER_POS, evaluationEntry)
pos1.contains(Regex(OTHER_POS_REGEX)) -> add(OTHER_POS, pos2, evaluationEntry)
pos2.contains(Regex(OTHER_POS_REGEX)) -> add(pos1, OTHER_POS, evaluationEntry)
// Otherwise a simple merge
else -> matrix.merge(Pair(pos1, pos2), evaluationEntry) { a, b -> EvaluationEntry.add(a, b, truncate) }
}
Expand Down

0 comments on commit 9e2a53d

Please sign in to comment.