[formrecognizer] doc updates (#15346)

* docs fixes, wip * more docs updates * more doc updates
Azure · Nov 17, 2020 · fc58de3 · fc58de3
1 parent ec6443e
commit fc58de3
Show file tree

Hide file tree

Showing 8 changed files with 85 additions and 52 deletions.
diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/README.md b/sdk/formrecognizer/azure-ai-formrecognizer/README.md
@@ -160,7 +160,7 @@ Long-running operations are operations which consist of an initial request sent
 followed by polling the service at intervals to determine whether the operation has completed or failed, and if it has
 succeeded, to get the result.
 
-Methods that train models, recognize values from forms, or copy models are modeled as long-running operations.
+Methods that train models, recognize values from forms, or copy/compose models are modeled as long-running operations.
 The client exposes a `begin_<method-name>` method that returns an `LROPoller` or `AsyncLROPoller`. Callers should wait
 for the operation to complete by calling `result()` on the poller object returned from the `begin_<method-name>` method.
 Sample code snippets are provided to illustrate using long-running operations [below](#examples "Examples").
@@ -221,7 +221,7 @@ result = poller.result()
 ```
 
 ### Recognize Content
-Recognize text and table structures, along with their bounding box coordinates, from documents.
+Recognize text, selection marks, and table structures, along with their bounding box coordinates, from documents.
 
 ```python
 from azure.ai.formrecognizer import FormRecognizerClient

diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_api_versions.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_api_versions.py
@@ -9,7 +9,7 @@
 class FormRecognizerApiVersion(str, Enum):
     """Form Recognizer API versions supported by this package"""
 
-    #: this is the default version
+    #: This is the default version
     V2_1_PREVIEW = "2.1-preview.2"
     V2_0 = "2.0"
 

diff --git a/...formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py b/...formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py
@@ -32,9 +32,9 @@
 
 class FormRecognizerClient(FormRecognizerClientBase):
     """FormRecognizerClient extracts information from forms and images into structured data.
-    It is the interface to use for analyzing receipts, recognizing content/layout from
-    forms, and analyzing custom forms from trained models. It provides different methods
-    based on inputs from a URL and inputs from a stream.
+    It is the interface to use for analyzing receipts, business cards, invoices, recognizing
+    content/layout from forms, and analyzing custom forms from trained models. It provides
+    different methods based on inputs from a URL and inputs from a stream.
 
     :param str endpoint: Supported Cognitive Services endpoints (protocol and hostname,
         for example: https://westus2.api.cognitive.microsoft.com).
@@ -82,7 +82,8 @@ def begin_recognize_receipts(self, receipt, **kwargs):
         :param receipt: JPEG, PNG, PDF, TIFF, or BMP type file stream or bytes.
         :type receipt: bytes or IO[bytes]
         :keyword bool include_field_elements:
-            Whether or not to include field elements such as lines and words in addition to form fields.
+            Whether or not to include all lines per page and field elements such as lines, words,
+            and selection marks for each form field.
         :keyword content_type: Content-type of the body sent to the API. Content-type is
             auto-detected, but can be overridden by passing this keyword argument. For options,
             see :class:`~azure.ai.formrecognizer.FormContentType`.
@@ -98,7 +99,7 @@ def begin_recognize_receipts(self, receipt, **kwargs):
         :raises ~azure.core.exceptions.HttpResponseError:
 
         .. versionadded:: v2.1-preview
-            The *locale* keyword argument
+            The *locale* keyword argument and support for image/bmp content
 
         .. admonition:: Example:
 
@@ -147,7 +148,8 @@ def begin_recognize_receipts_from_url(self, receipt_url, **kwargs):
         :param str receipt_url: The URL of the receipt to analyze. The input must be a valid, encoded URL
             of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP.
         :keyword bool include_field_elements:
-            Whether or not to include field elements such as lines and words in addition to form fields.
+            Whether or not to include all lines per page and field elements such as lines, words,
+            and selection marks for each form field.
         :keyword int polling_interval: Waiting time between two polls for LRO operations
             if no Retry-After header is present. Defaults to 5 seconds.
         :keyword str continuation_token: A continuation token to restart a poller from a saved state.
@@ -159,7 +161,7 @@ def begin_recognize_receipts_from_url(self, receipt_url, **kwargs):
         :raises ~azure.core.exceptions.HttpResponseError:
 
         .. versionadded:: v2.1-preview
-            The *locale* keyword argument
+            The *locale* keyword argument and support for image/bmp content
 
         .. admonition:: Example:
 
@@ -208,7 +210,8 @@ def begin_recognize_business_cards(
         :keyword str locale: Locale of the business card. Supported locales include: en-US, en-AU, en-CA, en-GB,
             and en-IN.
         :keyword bool include_field_elements:
-            Whether or not to include field elements such as lines and words in addition to form fields.
+            Whether or not to include all lines per page and field elements such as lines, words,
+            and selection marks for each form field.
         :keyword content_type: Content-type of the body sent to the API. Content-type is
             auto-detected, but can be overridden by passing this keyword argument. For options,
             see :class:`~azure.ai.formrecognizer.FormContentType`.
@@ -276,7 +279,8 @@ def begin_recognize_business_cards_from_url(
         :keyword str locale: Locale of the business card. Supported locales include: en-US, en-AU, en-CA, en-GB,
             and en-IN.
         :keyword bool include_field_elements:
-            Whether or not to include field elements such as lines and words in addition to form fields.
+            Whether or not to include all lines per page and field elements such as lines, words,
+            and selection marks for each form field.
         :keyword int polling_interval: Waiting time between two polls for LRO operations
             if no Retry-After header is present. Defaults to 5 seconds.
         :keyword str continuation_token: A continuation token to restart a poller from a saved state.
@@ -326,7 +330,8 @@ def begin_recognize_invoices(
         :type invoice: bytes or IO[bytes]
         :keyword str locale: Locale of the invoice. Supported locales include: en-US
         :keyword bool include_field_elements:
-            Whether or not to include field elements such as lines and words in addition to form fields.
+            Whether or not to include all lines per page and field elements such as lines, words,
+            and selection marks for each form field.
         :keyword content_type: Content-type of the body sent to the API. Content-type is
             auto-detected, but can be overridden by passing this keyword argument. For options,
             see :class:`~azure.ai.formrecognizer.FormContentType`.
@@ -393,7 +398,8 @@ def begin_recognize_invoices_from_url(
             of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP.
         :keyword str locale: Locale of the invoice. Supported locales include: en-US
         :keyword bool include_field_elements:
-            Whether or not to include field elements such as lines and words in addition to form fields.
+            Whether or not to include all lines per page and field elements such as lines, words,
+            and selection marks for each form field.
         :keyword int polling_interval: Waiting time between two polls for LRO operations
             if no Retry-After header is present. Defaults to 5 seconds.
         :keyword str continuation_token: A continuation token to restart a poller from a saved state.
@@ -459,7 +465,7 @@ def begin_recognize_content(self, form, **kwargs):
         :raises ~azure.core.exceptions.HttpResponseError:
 
         .. versionadded:: v2.1-preview
-            The *pages* and *language* keyword arguments
+            The *pages* and *language* keyword arguments and support for image/bmp content
 
         .. admonition:: Example:
 
@@ -527,7 +533,7 @@ def begin_recognize_content_from_url(self, form_url, **kwargs):
         :raises ~azure.core.exceptions.HttpResponseError:
 
         .. versionadded:: v2.1-preview
-            The *pages* and *language* keyword arguments
+            The *pages* and *language* keyword arguments and support for image/bmp content
         """
         pages = kwargs.pop("pages", None)
         language = kwargs.pop("language", None)
@@ -565,7 +571,8 @@ def begin_recognize_custom_forms(self, model_id, form, **kwargs):
         :param form: JPEG, PNG, PDF, or TIFF type file stream or bytes.
         :type form: bytes or IO[bytes]
         :keyword bool include_field_elements:
-            Whether or not to include field elements such as lines and words in addition to form fields.
+            Whether or not to include all lines per page and field elements such as lines, words,
+            and selection marks for each form field.
         :keyword content_type: Content-type of the body sent to the API. Content-type is
             auto-detected, but can be overridden by passing this keyword argument. For options,
             see :class:`~azure.ai.formrecognizer.FormContentType`.
@@ -627,7 +634,8 @@ def begin_recognize_custom_forms_from_url(self, model_id, form_url, **kwargs):
         :param str form_url: The URL of the form to analyze. The input must be a valid, encoded URL
             of one of the supported formats: JPEG, PNG, PDF, or TIFF.
         :keyword bool include_field_elements:
-            Whether or not to include field elements such as lines and words in addition to form fields.
+            Whether or not to include all lines per page and field elements such as lines, words,
+            and selection marks for each form field.
         :keyword int polling_interval: Waiting time between two polls for LRO operations
             if no Retry-After header is present. Defaults to 5 seconds.
         :keyword str continuation_token: A continuation token to restart a poller from a saved state.

diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_training_client.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_training_client.py
@@ -42,10 +42,11 @@
 
 
 class FormTrainingClient(FormRecognizerClientBase):
-    """FormTrainingClient is the Form Recognizer interface to use for creating,
+    """FormTrainingClient is the Form Recognizer interface to use for creating
     and managing custom models. It provides methods for training models on the forms
     you provide, as well as methods for viewing and deleting models, accessing
-    account properties, and copying a model to another Form Recognizer resource.
+    account properties, copying models to another Form Recognizer resource, and
+    composing models from a collection of existing models trained with labels.
 
     :param str endpoint: Supported Cognitive Services endpoints (protocol and hostname,
         for example: https://westus2.api.cognitive.microsoft.com).
@@ -83,7 +84,7 @@ def begin_training(self, training_files_url, use_training_labels, **kwargs):
         externally accessible Azure storage blob container URI (preferably a Shared Access Signature URI). Note that
         a container URI (without SAS) is accepted only when the container is public.
         Models are trained using documents that are of the following content type - 'application/pdf',
-        'image/jpeg', 'image/png', 'image/tiff'. Other type of content in the container is ignored.
+        'image/jpeg', 'image/png', 'image/tiff'. Other types of content in the container is ignored.
 
         :param str training_files_url: An Azure Storage blob container's SAS URI. A container URI (without SAS)
             can be used if the container is public. For more information on setting up a training data set, see:
@@ -399,7 +400,11 @@ def begin_create_composed_model(
         **kwargs
     ):
         # type: (List[str], Any) -> LROPoller[CustomFormModel]
-        """Creates a composed model from a collection of existing trained models with labels.
+        """Creates a composed model from a collection of existing models that were trained with labels.
+
+        A composed model allows multiple models to be called with a single model ID. When a document is
+        submitted to be analyzed with a composed model ID, a classification step is first performed to
+        route it to the correct custom model
 
         :param list[str] model_ids: List of model IDs to use in the composed model.
         :keyword str model_name: An optional, user-defined name to associate with your model.

diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py
@@ -61,7 +61,7 @@ def get_field_value(field, value, read_result):  # pylint: disable=too-many-retu
             for key, value in value.value_object.items()
         }
     if value.type == "selectionMark":
-        return value.text
+        return value.text  # FIXME https://github.com/Azure/azure-sdk-for-python/issues/15276
 
     return None
 
@@ -151,7 +151,7 @@ def __new__(cls, first_page_number, last_page_number):
 class FormElement(object):
     """Base type which includes properties for a form element.
 
-    :ivar str text: The text content of the line.
+    :ivar str text: The text content of the element.
     :ivar list[~azure.ai.formrecognizer.Point] bounding_box:
         A list of 4 points representing the quadrilateral bounding box
         that outlines the text. The points are listed in clockwise
@@ -191,7 +191,7 @@ class RecognizedForm(object):
         The first and last page number of the input form.
     :ivar list[~azure.ai.formrecognizer.FormPage] pages:
         A list of pages recognized from the input document. Contains lines,
-        words, tables and page metadata.
+        words, selection marks, tables and page metadata.
 
     .. versionadded:: v2.1-preview
         The *form_type_confidence* and *model_id* properties
@@ -296,9 +296,12 @@ class FieldData(object):
     :ivar field_elements:
         When `include_field_elements` is set to true, a list of
         elements constituting this field or value is returned. The list
-        constitutes of elements such as lines and words.
+        constitutes of elements such as lines, words, and selection marks.
     :vartype field_elements: list[Union[~azure.ai.formrecognizer.FormElement, ~azure.ai.formrecognizer.FormWord,
         ~azure.ai.formrecognizer.FormLine,  ~azure.ai.formrecognizer.FormSelectionMark]]
+
+    .. versionadded:: v2.1-preview
+        *FormSelectionMark* is added to the types returned in the list of field_elements
     """
 
     def __init__(self, **kwargs):
@@ -341,7 +344,7 @@ def __repr__(self):
 
 class FormPage(object):
     """Represents a page recognized from the input document. Contains lines,
-    words, tables and page metadata.
+    words, selection marks, tables and page metadata.
 
     :ivar int page_number:
         The 1-based number of the page in which this content is present.
@@ -496,8 +499,9 @@ class FormSelectionMark(FormElement):
         that outlines the text. The points are listed in clockwise
         order: top-left, top-right, bottom-right, bottom-left.
         Units are in pixels for images and inches for PDF.
-    :ivar float confidence: Confidence value.
-    :ivar str state: Required. State of the selection mark. Possible values include: "selected",
+    :ivar float confidence:
+        Measures the degree of certainty of the recognition result. Value is between [0.0, 1.0].
+    :ivar str state: State of the selection mark. Possible values include: "selected",
      "unselected".
     :ivar int page_number:
         The 1-based number of the page in which this content is present.
@@ -593,10 +597,13 @@ class FormTableCell(object):  # pylint:disable=too-many-instance-attributes
     :ivar field_elements:
         When `include_field_elements` is set to true, a list of
         elements constituting this cell is returned. The list
-        constitutes of elements such as lines and words.
+        constitutes of elements such as lines, words, and selection marks.
         For calls to begin_recognize_content(), this list is always populated.
     :vartype field_elements: list[Union[~azure.ai.formrecognizer.FormElement, ~azure.ai.formrecognizer.FormWord,
         ~azure.ai.formrecognizer.FormLine, ~azure.ai.formrecognizer.FormSelectionMark]]
+
+    .. versionadded:: v2.1-preview
+        *FormSelectionMark* is added to the types returned in the list of field_elements
     """
 
     def __init__(self, **kwargs):