Skip to content

Commit

Permalink
Merge pull request #56 from chakki-works/enhancement/BILOU
Browse files Browse the repository at this point in the history
Add BILOU as a scheme
  • Loading branch information
Hironsan committed Oct 12, 2020
2 parents 18f8688 + 601a8f8 commit 7e62fbe
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 10 deletions.
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ which can be used for measuring the performance of a system that has processed t
## Support features

seqeval supports following formats:
* IOB1
* IOB2
* IOE1
* IOE2
* IOBES

- IOB1
- IOB2
- IOE1
- IOE2
- IOBES
- BILOU

and supports following metrics:

Expand Down Expand Up @@ -55,7 +57,7 @@ weighted avg 0.50 0.50 0.50 2
If you want to explicitly specify the evaluation scheme, use `mode='strict'`:

```python
>>> from seqeval.scheme import IOB2
>>> from seqeval.scheme import IOB2, IOBES, BILOU
>>> classification_report(y_true, y_pred, mode='strict', scheme=IOB2)
precision recall f1-score support

Expand Down
35 changes: 34 additions & 1 deletion seqeval/scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ class Prefix(enum.Flag):
B = enum.auto()
E = enum.auto()
S = enum.auto()
ANY = I | O | B | E | S
U = enum.auto()
L = enum.auto()
ANY = I | O | B | E | S | U | L


class Tag(enum.Flag):
Expand Down Expand Up @@ -205,6 +207,24 @@ class IOBES(Token):
}


class BILOU(Token):
allowed_prefix = Prefix.B | Prefix.I | Prefix.L | Prefix.O | Prefix.U
start_patterns = {
(Prefix.ANY, Prefix.B, Tag.ANY),
(Prefix.ANY, Prefix.U, Tag.ANY)
}
inside_patterns = {
(Prefix.B, Prefix.I, Tag.SAME),
(Prefix.B, Prefix.L, Tag.SAME),
(Prefix.I, Prefix.I, Tag.SAME),
(Prefix.I, Prefix.L, Tag.SAME)
}
end_patterns = {
(Prefix.U, Prefix.ANY, Tag.ANY),
(Prefix.L, Prefix.ANY, Tag.ANY)
}


class Tokens:

def __init__(self, tokens: List[str], scheme: Type[Token],
Expand Down Expand Up @@ -324,11 +344,24 @@ def auto_detect(sequences: List[List[str]], suffix: bool = False, delimiter: str
{Prefix.B, Prefix.E},
{Prefix.S}
]
allowed_bilou_prefixes = [
{Prefix.I, Prefix.O, Prefix.B, Prefix.L, Prefix.U},
{Prefix.I, Prefix.B, Prefix.L, Prefix.U},
{Prefix.I, Prefix.O, Prefix.B, Prefix.L},
{Prefix.O, Prefix.B, Prefix.L, Prefix.U},
{Prefix.I, Prefix.B, Prefix.L},
{Prefix.B, Prefix.L, Prefix.U},
{Prefix.O, Prefix.B, Prefix.L},
{Prefix.B, Prefix.L},
{Prefix.U}
]
if prefixes in allowed_iob2_prefixes:
return IOB2
elif prefixes in allowed_ioe2_prefixes:
return IOE2
elif prefixes in allowed_iobes_prefixes:
return IOBES
elif prefixes in allowed_bilou_prefixes:
return BILOU
else:
raise ValueError(error_message.format(prefixes))
138 changes: 135 additions & 3 deletions tests/test_scheme.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from seqeval.scheme import (IOB1, IOB2, IOBES, IOE1, IOE2, Entities, Entity,
Prefix, Token, Tokens, auto_detect)
from seqeval.scheme import (BILOU, IOB1, IOB2, IOBES, IOE1, IOE2, Entities,
Entity, Prefix, Token, Tokens, auto_detect)


def test_entity_repr():
Expand Down Expand Up @@ -211,6 +211,43 @@ def test_iobes_start_inside_end(prev, token, expected):
expects_start_inside_end_to_be_correct(prev, token, expected, IOBES)


@pytest.mark.parametrize(
'prev, token, expected',
[
('O', 'O', [False, False, False]),
('O', 'I-PER', [False, False, False]),
('O', 'B-PER', [True, False, False]),
('O', 'L-PER', [False, False, False]),
('O', 'U-PER', [True, False, False]),
('I-PER', 'O', [False, False, False]),
('I-PER', 'I-PER', [False, True, False]),
('I-PER', 'I-ORG', [False, False, False]),
('I-PER', 'B-PER', [True, False, False]),
('I-PER', 'L-PER', [False, True, False]),
('I-PER', 'L-ORG', [False, False, False]),
('I-PER', 'U-PER', [True, False, False]),
('B-PER', 'O', [False, False, False]),
('B-PER', 'I-PER', [False, True, False]),
('B-PER', 'I-ORG', [False, False, False]),
('B-PER', 'L-PER', [False, True, False]),
('B-PER', 'L-ORG', [False, False, False]),
('B-PER', 'U-PER', [True, False, False]),
('L-PER', 'O', [False, False, True]),
('L-PER', 'I-PER', [False, False, True]),
('L-PER', 'B-PER', [True, False, True]),
('L-PER', 'L-PER', [False, False, True]),
('L-PER', 'U-PER', [True, False, True]),
('U-PER', 'O', [False, False, True]),
('U-PER', 'I-PER', [False, False, True]),
('U-PER', 'B-PER', [True, False, True]),
('U-PER', 'L-PER', [False, False, True]),
('U-PER', 'U-PER', [True, False, True])
]
)
def test_bilou_start_inside_end(prev, token, expected):
expects_start_inside_end_to_be_correct(prev, token, expected, BILOU)


@pytest.mark.parametrize(
'tokens, expected',
[
Expand Down Expand Up @@ -501,6 +538,92 @@ def test_iobes_tokens_without_tag(tokens, expected):
assert entities == expected


@pytest.mark.parametrize(
'tokens, expected',
[
(['O'], []),
(['I-PER'], []),
(['B-PER'], []),
(['L-PER'], []),
(['U-PER'], [('PER', 0, 1)]),
(['O', 'O'], []),
(['O', 'I-PER'], []),
(['O', 'B-PER'], []),
(['O', 'L-PER'], []),
(['O', 'U-PER'], [('PER', 1, 2)]),
(['I-PER', 'O'], []),
(['I-PER', 'I-PER'], []),
(['I-PER', 'I-ORG'], []),
(['I-PER', 'B-PER'], []),
(['I-PER', 'L-PER'], []),
(['I-PER', 'L-ORG'], []),
(['I-PER', 'U-PER'], [('PER', 1, 2)]),
(['B-PER', 'O'], []),
(['B-PER', 'I-PER'], []),
(['B-PER', 'I-ORG'], []),
(['B-PER', 'B-PER'], []),
(['B-PER', 'L-PER'], [('PER', 0, 2)]),
(['B-PER', 'L-ORG'], []),
(['B-PER', 'U-PER'], [('PER', 1, 2)]),
(['L-PER', 'O'], []),
(['L-PER', 'I-PER'], []),
(['L-PER', 'B-PER'], []),
(['L-PER', 'L-PER'], []),
(['L-PER', 'U-PER'], [('PER', 1, 2)]),
(['U-PER', 'O'], [('PER', 0, 1)]),
(['U-PER', 'I-PER'], [('PER', 0, 1)]),
(['U-PER', 'B-PER'], [('PER', 0, 1)]),
(['U-PER', 'L-PER'], [('PER', 0, 1)]),
(['U-PER', 'U-PER'], [('PER', 0, 1), ('PER', 1, 2)])
]
)
def test_iobes_tokens(tokens, expected):
tokens = Tokens(tokens, BILOU)
entities = [entity.to_tuple()[1:] for entity in tokens.entities]
assert entities == expected


@pytest.mark.parametrize(
'tokens, expected',
[
(['O'], []),
(['I'], []),
(['B'], []),
(['L'], []),
(['U'], [('_', 0, 1)]),
(['O', 'O'], []),
(['O', 'I'], []),
(['O', 'B'], []),
(['O', 'L'], []),
(['O', 'U'], [('_', 1, 2)]),
(['I', 'O'], []),
(['I', 'I'], []),
(['I', 'B'], []),
(['I', 'L'], []),
(['I', 'U'], [('_', 1, 2)]),
(['B', 'O'], []),
(['B', 'I'], []),
(['B', 'B'], []),
(['B', 'L'], [('_', 0, 2)]),
(['B', 'U'], [('_', 1, 2)]),
(['L', 'O'], []),
(['L', 'I'], []),
(['L', 'B'], []),
(['L', 'L'], []),
(['L', 'U'], [('_', 1, 2)]),
(['U', 'O'], [('_', 0, 1)]),
(['U', 'I'], [('_', 0, 1)]),
(['U', 'B'], [('_', 0, 1)]),
(['U', 'L'], [('_', 0, 1)]),
(['U', 'U'], [('_', 0, 1), ('_', 1, 2)])
]
)
def test_iobes_tokens_without_tag(tokens, expected):
tokens = Tokens(tokens, BILOU)
entities = [entity.to_tuple()[1:] for entity in tokens.entities]
assert entities == expected


class TestToken:

def test_raises_type_error_if_input_is_binary_string(self):
Expand Down Expand Up @@ -582,7 +705,16 @@ class TestAutoDetect:
([['B', 'E', 'S']], IOBES),
([['O', 'B', 'E']], IOBES),
([['B', 'E']], IOBES),
([['S']], IOBES)
([['S']], IOBES),
([['I', 'O', 'B', 'L', 'U']], BILOU),
([['I', 'B', 'L', 'U']], BILOU),
([['I', 'O', 'B', 'L']], BILOU),
([['O', 'B', 'L', 'U']], BILOU),
([['I', 'B', 'L']], BILOU),
([['B', 'L', 'U']], BILOU),
([['O', 'B', 'L']], BILOU),
([['B', 'L']], BILOU),
([['U']], BILOU)
]
)
def test_valid_scheme(self, sequences, expected):
Expand Down

0 comments on commit 7e62fbe

Please sign in to comment.