Merge pull request #54 from chakki-works/enhancement/strictMode

Enhancement/strict mode
chakki-works · Oct 11, 2020 · 91215f5 · 91215f5
2 parents a0c562a + 4921147
commit 91215f5
Show file tree

Hide file tree

Showing 12 changed files with 848 additions and 228 deletions.
diff --git a/Pipfile b/Pipfile
@@ -12,11 +12,12 @@ isort = "*"
 
 [packages]
 numpy = "*"
+scikit-learn = "*"
 
 [requires]
 python_version = "3.8"
 
 [scripts]
 isort = "isort . -c"
-test = "pytest tests/test_scheme.py --cov=seqeval --cov-report=term-missing -vv"
+test = "pytest tests/test_scheme.py tests/test_reporters.py tests/test_v1.py --cov=seqeval --cov-report=term-missing -vv"
 flake8 = "flake8 seqeval --ignore=F401,E741"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/README.md b/README.md
@@ -1,11 +1,13 @@
 # seqeval
+
 seqeval is a Python framework for sequence labeling evaluation.
 seqeval can evaluate the performance of chunking tasks such as named-entity recognition, part-of-speech tagging, semantic role labeling and so on.
 
 This is well-tested by using the Perl script [conlleval](https://www.clips.uantwerpen.be/conll2002/ner/bin/conlleval.txt),
 which can be used for measuring the performance of a system that has processed the CoNLL-2000 shared task data.
 
 ## Support features
+
 seqeval supports following formats:
 * IOB1
 * IOB2
@@ -24,6 +26,7 @@ and supports following metrics:
 | classification_report(y\_true, y\_pred, digits=2)  | Build a text report showing the main classification metrics. `digits` is number of digits for formatting output floating point values. Default value is `2`. |
 
 ## Usage
+
 Behold, the power of seqeval:
 
 ```python
@@ -39,41 +42,36 @@ Behold, the power of seqeval:
 >>> accuracy_score(y_true, y_pred)
 0.80
 >>> classification_report(y_true, y_pred)
-             precision    recall  f1-score   support
+              precision    recall  f1-score   support
 
-       MISC       0.00      0.00      0.00         1
-        PER       1.00      1.00      1.00         1
+        MISC       0.00      0.00      0.00         1
+         PER       1.00      1.00      1.00         1
 
-  micro avg       0.50      0.50      0.50         2
-  macro avg       0.50      0.50      0.50         2
+   micro avg       0.50      0.50      0.50         2
+   macro avg       0.50      0.50      0.50         2
+weighted avg       0.50      0.50      0.50         2
 ```
 
-### Keras Callback
-
-Seqeval provides a callback for Keras:
+If you want to explicitly specify the evaluation scheme, use `mode='strict'`:
 
 ```python
-from seqeval.callbacks import F1Metrics
+>>> from seqeval.scheme import IOB2
+>>> classification_report(y_true, y_pred, mode='strict', scheme=IOB2)
+              precision    recall  f1-score   support
+
+        MISC       0.00      0.00      0.00         1
+         PER       1.00      1.00      1.00         1
 
-id2label = {0: '<PAD>', 1: 'B-LOC', 2: 'I-LOC'}
-callbacks = [F1Metrics(id2label)]
-model.fit(x, y, validation_data=(x_val, y_val), callbacks=callbacks)
+   micro avg       0.50      0.50      0.50         2
+   macro avg       0.50      0.50      0.50         2
+weighted avg       0.50      0.50      0.50         2
 ```
 
+Note: The behavior of the strict mode is different from the default one which is designed to simulate conlleval.
+
 ## Installation
 To install seqeval, simply run:
 
 ```
-$ pip install seqeval[cpu]
+$ pip install seqeval
 ```
-
-If you want to install seqeval on GPU environment, please run:
-
-```bash
-$ pip install seqeval[gpu]
-```
-
-## Requirement
-
-* numpy >= 1.14.0
-* tensorflow(optional)
diff --git a/seqeval/callbacks.py b/seqeval/callbacks.py
diff --git a/seqeval/metrics/sequence_labeling.py b/seqeval/metrics/sequence_labeling.py
@@ -13,6 +13,7 @@
 import numpy as np
 
 from seqeval.reporters import DictReporter, StringReporter
+from seqeval.metrics.v1 import classification_report as cr
 
 
 def get_entities(seq, suffix=False):
@@ -303,15 +304,42 @@ def performance_measure(y_true, y_pred):
     return performance_dict
 
 
-def classification_report(y_true, y_pred, digits=2, suffix=False, output_dict=False):
+def classification_report(y_true, y_pred,
+                          digits=2,
+                          suffix=False,
+                          output_dict=False,
+                          mode=None,
+                          sample_weight=None,
+                          zero_division='warn',
+                          scheme=None):
     """Build a text report showing the main classification metrics.
 
     Args:
         y_true : 2d array. Ground truth (correct) target values.
+
         y_pred : 2d array. Estimated targets as returned by a classifier.
+
         digits : int. Number of digits for formatting output floating point values.
+
         output_dict : bool(default=False). If True, return output as dict else str.
 
+        mode : str. If mode="strict", use new classification_report.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights.
+
+        zero_division : "warn", 0 or 1, default="warn"
+            Sets the value to return when there is a zero division:
+               - recall: when there are no positive labels
+               - precision: when there are no positive predictions
+               - f-score: both
+
+            If set to "warn", this acts as 0, but warnings are also raised.
+
+        scheme : Token, [IOB2, IOE2, IOBES]
+
+        suffix : bool, False by default.
+
     Returns:
         report : string/dict. Summary of the precision, recall, F1 score for each class.
 
@@ -330,6 +358,16 @@ def classification_report(y_true, y_pred, digits=2, suffix=False, output_dict=Fa
        weighted avg       0.50      0.50      0.50         2
         <BLANKLINE>
     """
+    if mode == 'strict':
+        return cr(y_true, y_pred,
+                  digits=digits,
+                  output_dict=output_dict,
+                  sample_weight=sample_weight,
+                  zero_division=zero_division,
+                  scheme=scheme,
+                  suffix=suffix
+                  )
+
     true_entities = set(get_entities(y_true, suffix))
     pred_entities = set(get_entities(y_pred, suffix))