Skip to content

Commit

Permalink
[formrecognizer] Add samples for converting to and from dictionary (A…
Browse files Browse the repository at this point in the history
…zure#21770)

Fixes in part Azure#21592 

Pending: update samples readme and mentioning these samples in the changelog.
  • Loading branch information
catalinaperalta committed Nov 24, 2021
1 parent 0db03b1 commit 9e1d561
Show file tree
Hide file tree
Showing 4 changed files with 334 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict_async.py
DESCRIPTION:
This sample demonstrates how to convert models returned from a recognize operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.
USAGE:
python sample_convert_to_and_from_dict_async.py
Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json
import asyncio

async def convert_to_and_from_dict_async():
path_to_sample_forms = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"..",
"./sample_forms/id_documents/license.jpg",
)
)

from azure.core.serialization import AzureJSONEncoder
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient
from azure.ai.formrecognizer import RecognizedForm

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

form_recognizer_client = FormRecognizerClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with form_recognizer_client:
with open(path_to_sample_forms, "rb") as f:
poller = await form_recognizer_client.begin_recognize_identity_documents(identity_document=f)

id_documents = await poller.result()

# convert the received model to a dictionary
recognized_form_dict = [doc.to_dict() for doc in id_documents]

# save the dictionary as JSON content in a JSON file, use the AzureJSONEncoder
# to help make types, such as dates, JSON serializable
# NOTE: AzureJSONEncoder is only available with azure.core>=1.18.0.
with open('data.json', 'w') as f:
json.dump(recognized_form_dict, f, cls=AzureJSONEncoder)

# convert the dictionary back to the original model
model = [RecognizedForm.from_dict(doc) for doc in recognized_form_dict]

# use the model as normal
for idx, id_document in enumerate(model):
print("--------Recognizing converted ID document #{}--------".format(idx+1))
first_name = id_document.fields.get("FirstName")
if first_name:
print("First Name: {} has confidence: {}".format(first_name.value, first_name.confidence))
last_name = id_document.fields.get("LastName")
if last_name:
print("Last Name: {} has confidence: {}".format(last_name.value, last_name.confidence))
document_number = id_document.fields.get("DocumentNumber")
if document_number:
print("Document Number: {} has confidence: {}".format(document_number.value, document_number.confidence))

print("----------------------------------------")


async def main():
await convert_to_and_from_dict_async()


if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict.py
DESCRIPTION:
This sample demonstrates how to convert models returned from an analyze operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.
USAGE:
python sample_convert_to_and_from_dict.py
Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json

def convert_to_and_from_dict():
path_to_sample_forms = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./sample_forms/id_documents/license.jpg",
)
)

from azure.core.serialization import AzureJSONEncoder
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient, RecognizedForm

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

form_recognizer_client = FormRecognizerClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
with open(path_to_sample_forms, "rb") as f:
poller = form_recognizer_client.begin_recognize_identity_documents(identity_document=f)

id_documents = poller.result()

# convert the received model to a dictionary
recognized_form_dict = [doc.to_dict() for doc in id_documents]

# save the dictionary as JSON content in a JSON file, use the AzureJSONEncoder
# to help make types, such as dates, JSON serializable
# NOTE: AzureJSONEncoder is only available with azure.core>=1.18.0.
with open('data.json', 'w') as f:
json.dump(recognized_form_dict, f, cls=AzureJSONEncoder)

# convert the dictionary back to the original model
model = [RecognizedForm.from_dict(doc) for doc in recognized_form_dict]

# use the model as normal
for idx, id_document in enumerate(model):
print("--------Recognizing converted ID document #{}--------".format(idx+1))
first_name = id_document.fields.get("FirstName")
if first_name:
print("First Name: {} has confidence: {}".format(first_name.value, first_name.confidence))
last_name = id_document.fields.get("LastName")
if last_name:
print("Last Name: {} has confidence: {}".format(last_name.value, last_name.confidence))
document_number = id_document.fields.get("DocumentNumber")
if document_number:
print("Document Number: {} has confidence: {}".format(document_number.value, document_number.confidence))

print("----------------------------------------")


if __name__ == "__main__":
convert_to_and_from_dict()
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict_async.py
DESCRIPTION:
This sample demonstrates how to convert models returned from an analyze operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.
USAGE:
python sample_convert_to_and_from_dict_async.py
Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json
import asyncio

async def convert_to_and_from_dict_async():
path_to_sample_documents = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"..",
"./sample_forms/forms/Form_1.jpg",
)
)

from azure.core.serialization import AzureJSONEncoder
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentAnalysisClient
from azure.ai.formrecognizer import AnalyzeResult

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with document_analysis_client:
with open(path_to_sample_documents, "rb") as f:
poller = await document_analysis_client.begin_analyze_document(
"prebuilt-document", document=f
)
result = await poller.result()

# convert the received model to a dictionary
analyze_result_dict = result.to_dict()

# save the dictionary as JSON content in a JSON file, use the AzureJSONEncoder
# to help make types, such as dates, JSON serializable
# NOTE: AzureJSONEncoder is only available with azure.core>=1.18.0.
with open('data.json', 'w') as f:
json.dump(analyze_result_dict, f, cls=AzureJSONEncoder)

# convert the dictionary back to the original model
model = AnalyzeResult.from_dict(analyze_result_dict)

# use the model as normal
print("----Converted from dictionary AnalyzeResult----")
print("Model ID: '{}'".format(model.model_id))
print("Number of pages analyzed {}".format(len(model.pages)))
print("API version used: {}".format(model.api_version))

print("----------------------------------------")


async def main():
await convert_to_and_from_dict_async()


if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict.py
DESCRIPTION:
This sample demonstrates how to convert models returned from an analyze operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.
USAGE:
python sample_convert_to_and_from_dict.py
Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json

def convert_to_and_from_dict():
path_to_sample_documents = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./sample_forms/forms/Form_1.jpg",
)
)

from azure.core.serialization import AzureJSONEncoder
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
with open(path_to_sample_documents, "rb") as f:
poller = document_analysis_client.begin_analyze_document(
"prebuilt-document", document=f
)
result = poller.result()

# convert the received model to a dictionary
analyze_result_dict = result.to_dict()

# save the dictionary as JSON content in a JSON file, use the AzureJSONEncoder
# to help make types, such as dates, JSON serializable
# NOTE: AzureJSONEncoder is only available with azure.core>=1.18.0.
with open('data.json', 'w') as f:
json.dump(analyze_result_dict, f, cls=AzureJSONEncoder)

# convert the dictionary back to the original model
model = AnalyzeResult.from_dict(analyze_result_dict)

# use the model as normal
print("----Converted from dictionary AnalyzeResult----")
print("Model ID: '{}'".format(model.model_id))
print("Number of pages analyzed {}".format(len(model.pages)))
print("API version used: {}".format(model.api_version))

print("----------------------------------------")


if __name__ == "__main__":
convert_to_and_from_dict()

0 comments on commit 9e1d561

Please sign in to comment.