From 4e43eb00839db48c73621538aad0ed311d0fe249 Mon Sep 17 00:00:00 2001 From: catalinaperalta Date: Fri, 25 Mar 2022 17:33:29 -0700 Subject: [PATCH] [formrecognizer] Fix dictionary methods on DocumentField model (#23673) * add fix for dict and list methods * improve testing * add invoice tests * update changelog * remove extra invoice test * update tests to use AzureJSONEncoder * move currency value in if * pylint fix --- .../azure-ai-formrecognizer/CHANGELOG.md | 1 + .../azure/ai/formrecognizer/_models.py | 26 ++++++++++++++++--- .../tests/test_dac_analyze_prebuilts.py | 7 +++++ .../tests/test_dac_analyze_prebuilts_async.py | 7 +++++ 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md b/sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md index 0bcceb24c4d4..40efe68a0f16 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md +++ b/sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md @@ -10,6 +10,7 @@ - Renamed `model_count` and `model_limit` on `AccountInfo` to `document_model_count` and `document_model_limit`. ### Bugs Fixed +- Fixed `to_dict()` and `from_dict()` methods on `DocumentField` to support converting lists, dictionaries, and CurrenyValue field types to and from a dictionary. ### Other Changes - Renamed `sample_copy_model.py` and `sample_copy_model_async.py` to `sample_copy_model_to.py` and `sample_copy_model_to_async.py` under the `3.2-beta` samples folder. Updated the samples to use renamed copy model method. diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py index 06a7409cf9ca..5f4d791e6044 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py @@ -2598,9 +2598,18 @@ def to_dict(self): :return: dict :rtype: dict """ + value = self.value + # CurrencyValue objects are interpreted as dict, therefore need to be processed first + # to call the proper to_dict() method. + if self.value_type == "currency": + value = self.value.to_dict() + elif isinstance(self.value, dict): + value = {k: v.to_dict() for k, v in self.value.items()} + elif isinstance(self.value, list): + value = [v.to_dict() for v in self.value] return { "value_type": self.value_type, - "value": self.value, + "value": value, "content": self.content, "bounding_regions": [f.to_dict() for f in self.bounding_regions] if self.bounding_regions @@ -2620,9 +2629,20 @@ def from_dict(cls, data): :return: DocumentField :rtype: DocumentField """ + + value = data.get("value", None) + # CurrencyValue objects are interpreted as dict, therefore need to be processed first + # to call the proper from_dict() method. + if data.get("value_type", None) == "currency": + value = CurrencyValue.from_dict(data.get("value")) #type: ignore + elif isinstance(data.get("value"), dict): + value = {k: DocumentField.from_dict(v) for k, v in data.get("value").items()} # type: ignore + elif isinstance(data.get("value"), list): + value = [DocumentField.from_dict(v) for v in data.get("value")] # type: ignore + return cls( value_type=data.get("value_type", None), - value=data.get("value", None), + value=value, content=data.get("content", None), bounding_regions=[BoundingRegion.from_dict(v) for v in data.get("bounding_regions")] # type: ignore if len(data.get("bounding_regions", [])) > 0 @@ -2880,7 +2900,7 @@ class DocumentPage(object): :vartype width: float :ivar height: The height of the image/PDF in pixels/inches, respectively. :vartype height: float - :ivar unit: The unit used by the width, height, and boundingBox properties. For + :ivar unit: The unit used by the width, height, and bounding box properties. For images, the unit is "pixel". For PDF, the unit is "inch". Possible values include: "pixel", "inch". :vartype unit: str diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts.py index 114f413bea27..c9dfddae771e 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts.py @@ -5,12 +5,14 @@ # ------------------------------------ import pytest +import json import functools from datetime import date, time from devtools_testutils import recorded_by_proxy from io import BytesIO from azure.core.exceptions import ClientAuthenticationError, ServiceRequestError, HttpResponseError from azure.core.credentials import AzureKeyCredential +from azure.core.serialization import AzureJSONEncoder from azure.ai.formrecognizer._generated.v2022_01_30_preview.models import AnalyzeResultOperation from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult, FormContentType from testcase import FormRecognizerTest @@ -182,6 +184,9 @@ def test_invoice_jpg(self, client, **kwargs): poller = client.begin_analyze_document("prebuilt-invoice", invoice) result = poller.result() + d = result.to_dict() + json.dumps(d, cls=AzureJSONEncoder) + result = AnalyzeResult.from_dict(d) assert len(result.documents) == 1 invoice = result.documents[0] @@ -419,6 +424,8 @@ def test_receipt_multipage(self, client): result = poller.result() d = result.to_dict() + # this is simply checking that the dict is JSON serializable + json.dumps(d, cls=AzureJSONEncoder) result = AnalyzeResult.from_dict(d) assert len(result.documents) == 2 diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts_async.py index 48ffd1b7c0a8..2f876d467f24 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts_async.py @@ -5,11 +5,13 @@ # ------------------------------------ import pytest +import json import functools from io import BytesIO from devtools_testutils.aio import recorded_by_proxy_async from datetime import date, time from azure.core.exceptions import HttpResponseError +from azure.core.serialization import AzureJSONEncoder from azure.ai.formrecognizer.aio import DocumentAnalysisClient from azure.ai.formrecognizer import AnalyzeResult from azure.ai.formrecognizer._generated.v2022_01_30_preview.models import AnalyzeResultOperation @@ -210,6 +212,8 @@ async def test_receipt_multipage(self, client): result = await poller.result() d = result.to_dict() + # this is simply checking that the dict is JSON serializable + json.dumps(d, cls=AzureJSONEncoder) result = AnalyzeResult.from_dict(d) assert len(result.documents) == 2 @@ -491,6 +495,9 @@ async def test_invoice_jpg(self, client, **kwargs): poller = await client.begin_analyze_document("prebuilt-invoice", invoice) result = await poller.result() + d = result.to_dict() + json.dumps(d, cls=AzureJSONEncoder) + result = AnalyzeResult.from_dict(d) assert len(result.documents) == 1 invoice = result.documents[0]