0.1.4

straussmaximilian · Dec 8, 2023 · f875246 · f875246
1 parent 1be9c23
commit f875246
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 54 deletions.
diff --git a/ExampleNotebook.ipynb b/ExampleNotebook.ipynb
diff --git a/README.md b/README.md
@@ -44,18 +44,20 @@ Output (Text, Confidence, BoundingBox):
 - You can use as a class (`ocrmac.OCR`) or function `ocrmac.text_from_image`)
 - You can pass several arguments:
     - `recognition_level`: `fast` or `accurate`
-    - `language_preference`: A list with languages for post-processing, e.g. `['en', 'zh', 'de']`. 
+    - `language_preference`: A list with languages for post-processing, e.g. `['en-US', 'zh-Hans', 'de-DE']`. 
 - You can get an annotated output either as PIL image (`annotate_PIL`) or matplotlib figure (`annotate_matplotlib`)
 
 #### Example: Select Language Preference
 
 You can set a language preference like so:
 
 ```python
-    ocrmac.OCR('test.png',language_preference=['en'])
+    ocrmac.OCR('test.png',language_preference=['en-US'])
 ```
 
-What abbreviation should you use for your language of choice? [Here](https://www.alchemysoftware.com/livedocs/ezscript/Topics/Catalyst/Language.htm) is an overview of language codes, e.g.: `Chinese (Simplified)` -> `zh`, `English` -> `en` ..
+What abbreviation should you use for your language of choice? [Here](https://www.alchemysoftware.com/livedocs/ezscript/Topics/Catalyst/Language.htm) is an overview of language codes, e.g.: `Chinese (Simplified)` -> `zh-Hans`, `English` -> `en-US` ..
+
+If you set a wrong language you will see an error message showing the languages available. Note that the `recognition_level` will affect the languages available (fast has fewer)
 
 See also this [Example Notebook](https://github.com/straussmaximilian/ocrmac/blob/main/ExampleNotebook.ipynb) for implementation details.
 

diff --git a/ocrmac/__init__.py b/ocrmac/__init__.py
@@ -2,4 +2,4 @@
 
 __author__ = """Maximilian Strauss"""
 __email__ = "straussmaximilian@gmail.com"
-__version__ = "0.1.3"
+__version__ = "0.1.4"
diff --git a/ocrmac/ocrmac.py b/ocrmac/ocrmac.py
@@ -83,7 +83,16 @@ def text_from_image(image, recognition_level="accurate", language_preference=Non
         else:
             req.setRecognitionLevel_(0)
 
+
+
         if language_preference is not None:
+
+            available_languages = req.supportedRecognitionLanguagesAndReturnError_(None)[0]
+
+            if not set(language_preference).issubset(set(available_languages)):
+                raise ValueError(
+                    f"Invalid language preference. Language preference must be a subset of {available_languages}."
+                )
             req.setRecognitionLanguages_(language_preference)
 
         handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(
@@ -129,13 +138,17 @@ def __init__(self, image, recognition_level="accurate", language_preference=None
         self.language_preference = language_preference
         self.res = None
 
-    def recognize(self):
+    def recognize(self, px=False):
         res = text_from_image(
             self.image, self.recognition_level, self.language_preference
         )
         self.res = res
+
+        if px:
+            return [(text, conf, convert_coordinates_pil(bbox, self.image.width, self.image.height)) for text, conf, bbox in res]
 
-        return res
+        else:
+            return res
 
     def annotate_matplotlib(
         self, figsize=(20, 20), color="red", alpha=0.5, fontsize=12