From c7454ea568c5cb8ad0e644300e61ff3e6885aa5c Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Wed, 29 Sep 2021 19:34:13 -0700 Subject: [PATCH 01/13] WIP --- .../Sample_AnalyzeDocumentFromFileAsync.cs | 115 +++++++++++++++ .../Sample_AnalyzeLayoutFromFileAsync.cs | 98 +++++++++++++ ...e_AnalyzeWithPrebuiltModelFromFileAsync.cs | 134 ++++++++++++++++++ 3 files changed, 347 insertions(+) create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs new file mode 100644 index 0000000000000..4369e82990b29 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.IO; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.DocumentAnalysis.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples +{ + public partial class DocumentAnalysisSamples : SamplesBase + { + [Test] + public async Task AnalyzeDocumentFromFileAsync() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:FormRecognizerAnalyzeDocumentFromFileAsync +#if SNIPPET + string filePath = "filePath"; +#else + string filePath = DocumentAnalysisTestEnvironment.CreatePath("Form_1.jpg"); +#endif + using var stream = new FileStream(filePath, FileMode.Open); + + AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentAsync("prebuilt-document", stream); + + await operation.WaitForCompletionAsync(); + + AnalyzeResult result = operation.Value; + + foreach (DocumentPage page in result.Pages) + { + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + + for (int i = 0; i < page.Lines.Count; i++) + { + DocumentLine line = page.Lines[i]; + Console.WriteLine($" Line {i} has content: '{line.Content}'."); + + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); + } + + for (int i = 0; i < page.SelectionMarks.Count; i++) + { + DocumentSelectionMark selectionMark = page.SelectionMarks[i]; + + Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); + } + } + + foreach (DocumentStyle style in result.Styles) + { + // Check the style and style confidence to see if text is handwritten. + // Note that value '0.8' is used as an example. + + bool isHandwritten = style.IsHandwritten.HasValue && style.IsHandwritten == true; + + if (isHandwritten && style.Confidence > 0.8) + { + Console.WriteLine($"Handwritten content found in spans:"); + + foreach (DocumentSpan span in style.Spans) + { + Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + } + } + } + + for (int i = 0; i < result.Tables.Count; i++) + { + DocumentTable table = result.Tables[i]; + Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + + foreach (DocumentTableCell cell in table.Cells) + { + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + } + } + + foreach (DocumentEntity entity in result.Entities) + { + if (entity.SubCategory == null) + { + Console.WriteLine($"Found entity with category {entity.Category}: '{entity.Content}'"); + } + else + { + Console.WriteLine($"Found entity with category {entity.Category} and sub-category {entity.SubCategory}: '{entity.Content}'"); + } + } + + foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) + { + Console.WriteLine($"Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + } + + #endregion + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs new file mode 100644 index 0000000000000..748b78e2dc249 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs @@ -0,0 +1,98 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.IO; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.DocumentAnalysis.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples +{ + public partial class DocumentAnalysisSamples : SamplesBase + { + [Test] + public async Task AnalyzeLayoutFromFileAsync() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:FormRecognizerAnalyzeLayoutFromFileAsync +#if SNIPPET + string filePath = "filePath"; +#else + string filePath = DocumentAnalysisTestEnvironment.CreatePath("Form_1.jpg"); +#endif + using var stream = new FileStream(filePath, FileMode.Open); + + AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentAsync("prebuilt-layout", stream); + + await operation.WaitForCompletionAsync(); + + AnalyzeResult result = operation.Value; + + foreach (DocumentPage page in result.Pages) + { + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + + for (int i = 0; i < page.Lines.Count; i++) + { + DocumentLine line = page.Lines[i]; + Console.WriteLine($" Line {i} has content: '{line.Content}'."); + + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); + } + + for (int i = 0; i < page.SelectionMarks.Count; i++) + { + DocumentSelectionMark selectionMark = page.SelectionMarks[i]; + + Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); + } + } + + foreach (DocumentStyle style in result.Styles) + { + // Check the style and style confidence to see if text is handwritten. + // Note that value '0.8' is used as an example. + + bool isHandwritten = style.IsHandwritten.HasValue && style.IsHandwritten == true; + + if (isHandwritten && style.Confidence > 0.8) + { + Console.WriteLine($"Handwritten content found in spans:"); + + foreach (DocumentSpan span in style.Spans) + { + Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + } + } + } + + for (int i = 0; i < result.Tables.Count; i++) + { + DocumentTable table = result.Tables[i]; + Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + + foreach (DocumentTableCell cell in table.Cells) + { + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + } + } + + #endregion + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs new file mode 100644 index 0000000000000..6648e0314990b --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.DocumentAnalysis.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples +{ + public partial class DocumentAnalysisSamples : SamplesBase + { + [Test] + public async Task AnalyzeWithPrebuiltModelFromFileAsync() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:FormRecognizerSampleAnalyzeWithPrebuiltModelFromFile +#if SNIPPET + string receiptPath = ""; +#else + string receiptPath = DocumentAnalysisTestEnvironment.CreatePath("recommended_invoice.jpg"); +#endif + + using var stream = new FileStream(receiptPath, FileMode.Open); + var options = new AnalyzeDocumentOptions() { Locale = "en-US" }; + + AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentAsync("prebuilt-invoice", stream, options); + + await operation.WaitForCompletionAsync(); + + AnalyzeResult result = operation.Value; + + // To see the list of all the supported fields returned by service and its corresponding types for the + // prebuilt-invoice model, consult: + // https://aka.ms/formrecognizer/invoicefields + + for (int i = 0; i < result.Documents.Count; i++) + { + Console.WriteLine($"Document {i}:"); + + AnalyzedDocument document = result.Documents[i]; + + if (document.Fields.TryGetValue("VendorName", out DocumentField vendorNameField)) + { + if (vendorNameField.ValueType == DocumentFieldType.String) + { + string vendorName = vendorNameField.AsString(); + Console.WriteLine($"Vendor Name: '{vendorName}', with confidence {vendorNameField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("CustomerName", out DocumentField customerNameField)) + { + if (customerNameField.ValueType == DocumentFieldType.String) + { + string customerName = customerNameField.AsString(); + Console.WriteLine($"Customer Name: '{customerName}', with confidence {customerNameField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("Items", out DocumentField itemsField)) + { + if (itemsField.ValueType == DocumentFieldType.List) + { + foreach (DocumentField itemField in itemsField.AsList()) + { + Console.WriteLine("Item:"); + + if (itemField.ValueType == DocumentFieldType.Dictionary) + { + IReadOnlyDictionary itemFields = itemField.AsDictionary(); + + if (itemFields.TryGetValue("Description", out DocumentField itemDescriptionField)) + { + if (itemDescriptionField.ValueType == DocumentFieldType.String) + { + string itemDescription = itemDescriptionField.AsString(); + + Console.WriteLine($" Description: '{itemDescription}', with confidence {itemDescriptionField.Confidence}"); + } + } + + if (itemFields.TryGetValue("Amount", out DocumentField itemAmountField)) + { + if (itemAmountField.ValueType == DocumentFieldType.Double) + { + double itemAmount = itemAmountField.AsDouble(); + + Console.WriteLine($" Amount: '{itemAmount}', with confidence {itemAmountField.Confidence}"); + } + } + } + } + } + } + + if (document.Fields.TryGetValue("SubTotal", out DocumentField subTotalField)) + { + if (subTotalField.ValueType == DocumentFieldType.Double) + { + double subTotal = subTotalField.AsDouble(); + Console.WriteLine($"Sub Total: '{subTotal}', with confidence {subTotalField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("TotalTax", out DocumentField totalTaxField)) + { + if (totalTaxField.ValueType == DocumentFieldType.Double) + { + double totalTax = totalTaxField.AsDouble(); + Console.WriteLine($"Total Tax: '{totalTax}', with confidence {totalTaxField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("InvoiceTotal", out DocumentField invoiceTotalField)) + { + if (invoiceTotalField.ValueType == DocumentFieldType.Double) + { + double invoiceTotal = invoiceTotalField.AsDouble(); + Console.WriteLine($"Invoice Total: '{invoiceTotal}', with confidence {invoiceTotalField.Confidence}"); + } + } + } + #endregion + } + } +} From 0d711c3541e7283fc15f02cff3f90dfeb31fa5c5 Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Wed, 29 Sep 2021 20:42:30 -0700 Subject: [PATCH 02/13] Samples --- ...yzeDocumentWithCustomModelFromFileAsync.cs | 105 ++++++++++++++++++ ...e_AnalyzeWithPrebuiltModelFromFileAsync.cs | 2 +- 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/AnalyzeDocumentWithCustomModelFromFileAsync.cs diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/AnalyzeDocumentWithCustomModelFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/AnalyzeDocumentWithCustomModelFromFileAsync.cs new file mode 100644 index 0000000000000..513b988bf73d1 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/AnalyzeDocumentWithCustomModelFromFileAsync.cs @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.DocumentAnalysis.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples +{ + public partial class DocumentAnalysisSamples : SamplesBase + { + [Test] + public async Task AnalyzeWithCustomModelFromFileAsync() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + Uri trainingFileUri = new Uri(TestEnvironment.BlobContainerSasUrl); + + // Firstly, create a custom built model we can use to recognize the custom document. Please note + // that models can also be trained using a graphical user interface such as the Form Recognizer + // Labeling Tool found here: + // https://docs.microsoft.com/azure/cognitive-services/form-recognizer/label-tool?tabs=v2-1 + + var adminClient = new DocumentModelAdministrationClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + BuildModelOperation buildOperation = await adminClient.StartBuildModelAsync(trainingFileUri); + + await buildOperation.WaitForCompletionAsync(); + + DocumentModel customModel = buildOperation.Value; + + // Proceed with the custom document recognition. + + DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:FormRecognizerAnalyzeWithCustomModelFromFileAsync +#if SNIPPET + string modelId = ""; + string filePath = ""; +#else + string filePath = DocumentAnalysisTestEnvironment.CreatePath("Form_1.jpg"); + string modelId = customModel.ModelId; +#endif + + using var stream = new FileStream(filePath, FileMode.Open); + + AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentAsync(modelId, stream); + + await operation.WaitForCompletionAsync(); + + AnalyzeResult result = operation.Value; + + Console.WriteLine($"Document was analyzed with model with ID: {result.ModelId}"); + + foreach (AnalyzedDocument document in result.Documents) + { + Console.WriteLine($"Document of type: {document.DocType}"); + + foreach (KeyValuePair fieldKvp in document.Fields) + { + string fieldName = fieldKvp.Key; + DocumentField field = fieldKvp.Value; + + Console.WriteLine($"Field '{fieldName}': "); + + Console.WriteLine($" Content: '{field.Content}'"); + Console.WriteLine($" Confidence: '{field.Confidence}'"); + } + } + #endregion + + // Iterate over lines and selection marks on each page + foreach (DocumentPage page in result.Pages) + { + Console.WriteLine($"Lines found on page {page.PageNumber}"); + foreach (var line in page.Lines) + { + Console.WriteLine($" {line.Content}"); + } + + Console.WriteLine($"Selection marks found on page {page.PageNumber}"); + foreach (var selectionMark in page.SelectionMarks) + { + Console.WriteLine($" Selection mark is '{selectionMark.State}' with confidence {selectionMark.Confidence}"); + } + } + + // Iterate over the document tables + for (int i = 0; i < result.Tables.Count; i++) + { + Console.WriteLine($"Table {i + 1}"); + foreach (var cell in result.Tables[i].Cells) + { + Console.WriteLine($" Cell[{cell.RowIndex}][{cell.ColumnIndex}] has content '{cell.Content}' with kind '{cell.Kind}'"); + } + } + + // Delete the model on completion to clean environment. + await adminClient.DeleteModelAsync(customModel.ModelId); + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs index 6648e0314990b..f5ff8d25a4f06 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs @@ -21,7 +21,7 @@ public async Task AnalyzeWithPrebuiltModelFromFileAsync() DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); - #region Snippet:FormRecognizerSampleAnalyzeWithPrebuiltModelFromFile + #region Snippet:FormRecognizerAnalyzeWithPrebuiltModelFromFileAsync #if SNIPPET string receiptPath = ""; #else From 2dc19256336a981053c696b6a6964bbc82bd52ba Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Wed, 29 Sep 2021 21:12:02 -0700 Subject: [PATCH 03/13] Added all samples --- .../Sample_AnalyzeDocumentFromUriAsync.cs | 113 +++++++++++++++ .../Sample_AnalyzeLayoutFromUriAsync.cs | 96 +++++++++++++ ...le_AnalyzeWithCustomModelFromFileAsync.cs} | 2 +- ...mple_AnalyzeWithCustomModelFromUriAsync.cs | 102 ++++++++++++++ ...le_AnalyzeWithPrebuiltModelFromUriAsync.cs | 132 ++++++++++++++++++ 5 files changed, 444 insertions(+), 1 deletion(-) create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs rename sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/{AnalyzeDocumentWithCustomModelFromFileAsync.cs => Sample_AnalyzeWithCustomModelFromFileAsync.cs} (97%) create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromUriAsync.cs diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs new file mode 100644 index 0000000000000..d938f0cfc1e38 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.DocumentAnalysis.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples +{ + public partial class DocumentAnalysisSamples : SamplesBase + { + [Test] + public async Task AnalyzeDocumentFromUriAsync() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:FormRecognizerAnalyzeDocumentFromUriAsync +#if SNIPPET + string fileUri = ""; +#else + Uri fileUri = DocumentAnalysisTestEnvironment.CreateUri("Form_1.jpg"); +#endif + + AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync("prebuilt-document", fileUri); + + await operation.WaitForCompletionAsync(); + + AnalyzeResult result = operation.Value; + + foreach (DocumentPage page in result.Pages) + { + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + + for (int i = 0; i < page.Lines.Count; i++) + { + DocumentLine line = page.Lines[i]; + Console.WriteLine($" Line {i} has content: '{line.Content}'."); + + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); + } + + for (int i = 0; i < page.SelectionMarks.Count; i++) + { + DocumentSelectionMark selectionMark = page.SelectionMarks[i]; + + Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); + } + } + + foreach (DocumentStyle style in result.Styles) + { + // Check the style and style confidence to see if text is handwritten. + // Note that value '0.8' is used as an example. + + bool isHandwritten = style.IsHandwritten.HasValue && style.IsHandwritten == true; + + if (isHandwritten && style.Confidence > 0.8) + { + Console.WriteLine($"Handwritten content found in spans:"); + + foreach (DocumentSpan span in style.Spans) + { + Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + } + } + } + + for (int i = 0; i < result.Tables.Count; i++) + { + DocumentTable table = result.Tables[i]; + Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + + foreach (DocumentTableCell cell in table.Cells) + { + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + } + } + + foreach (DocumentEntity entity in result.Entities) + { + if (entity.SubCategory == null) + { + Console.WriteLine($"Found entity with category {entity.Category}: '{entity.Content}'"); + } + else + { + Console.WriteLine($"Found entity with category {entity.Category} and sub-category {entity.SubCategory}: '{entity.Content}'"); + } + } + + foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) + { + Console.WriteLine($"Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + } + + #endregion + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs new file mode 100644 index 0000000000000..850afe04824be --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.DocumentAnalysis.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples +{ + public partial class DocumentAnalysisSamples : SamplesBase + { + [Test] + public async Task AnalyzeLayoutFromUriAsync() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:FormRecognizerAnalyzeLayoutFromUriAsync +#if SNIPPET + string fileUri = ""; +#else + Uri fileUri = DocumentAnalysisTestEnvironment.CreateUri("Form_1.jpg"); +#endif + + AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync("prebuilt-layout", fileUri); + + await operation.WaitForCompletionAsync(); + + AnalyzeResult result = operation.Value; + + foreach (DocumentPage page in result.Pages) + { + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + + for (int i = 0; i < page.Lines.Count; i++) + { + DocumentLine line = page.Lines[i]; + Console.WriteLine($" Line {i} has content: '{line.Content}'."); + + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); + } + + for (int i = 0; i < page.SelectionMarks.Count; i++) + { + DocumentSelectionMark selectionMark = page.SelectionMarks[i]; + + Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); + } + } + + foreach (DocumentStyle style in result.Styles) + { + // Check the style and style confidence to see if text is handwritten. + // Note that value '0.8' is used as an example. + + bool isHandwritten = style.IsHandwritten.HasValue && style.IsHandwritten == true; + + if (isHandwritten && style.Confidence > 0.8) + { + Console.WriteLine($"Handwritten content found in spans:"); + + foreach (DocumentSpan span in style.Spans) + { + Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + } + } + } + + for (int i = 0; i < result.Tables.Count; i++) + { + DocumentTable table = result.Tables[i]; + Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + + foreach (DocumentTableCell cell in table.Cells) + { + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + } + } + + #endregion + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/AnalyzeDocumentWithCustomModelFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromFileAsync.cs similarity index 97% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/AnalyzeDocumentWithCustomModelFromFileAsync.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromFileAsync.cs index 513b988bf73d1..409e8096efd5a 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/AnalyzeDocumentWithCustomModelFromFileAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromFileAsync.cs @@ -21,7 +21,7 @@ public async Task AnalyzeWithCustomModelFromFileAsync() Uri trainingFileUri = new Uri(TestEnvironment.BlobContainerSasUrl); // Firstly, create a custom built model we can use to recognize the custom document. Please note - // that models can also be trained using a graphical user interface such as the Form Recognizer + // that models can also be built using a graphical user interface such as the Form Recognizer // Labeling Tool found here: // https://docs.microsoft.com/azure/cognitive-services/form-recognizer/label-tool?tabs=v2-1 diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs new file mode 100644 index 0000000000000..22510ba29ac6f --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs @@ -0,0 +1,102 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.DocumentAnalysis.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples +{ + public partial class DocumentAnalysisSamples : SamplesBase + { + [Test] + public async Task AnalyzeWithCustomModelFromUriAsync() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + Uri trainingFileUri = new Uri(TestEnvironment.BlobContainerSasUrl); + + // Firstly, create a custom built model we can use to recognize the custom document. Please note + // that models can also be built using a graphical user interface such as the Form Recognizer + // Labeling Tool found here: + // https://docs.microsoft.com/azure/cognitive-services/form-recognizer/label-tool?tabs=v2-1 + + var adminClient = new DocumentModelAdministrationClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + BuildModelOperation buildOperation = await adminClient.StartBuildModelAsync(trainingFileUri); + + await buildOperation.WaitForCompletionAsync(); + + DocumentModel customModel = buildOperation.Value; + + // Proceed with the custom document recognition. + + DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:FormRecognizerAnalyzeWithCustomModelFromUriAsync +#if SNIPPET + string modelId = ""; + string fileUri = ""; +#else + Uri fileUri = DocumentAnalysisTestEnvironment.CreateUri("Form_1.jpg"); + string modelId = customModel.ModelId; +#endif + + AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync(modelId, fileUri); + + await operation.WaitForCompletionAsync(); + + AnalyzeResult result = operation.Value; + + Console.WriteLine($"Document was analyzed with model with ID: {result.ModelId}"); + + foreach (AnalyzedDocument document in result.Documents) + { + Console.WriteLine($"Document of type: {document.DocType}"); + + foreach (KeyValuePair fieldKvp in document.Fields) + { + string fieldName = fieldKvp.Key; + DocumentField field = fieldKvp.Value; + + Console.WriteLine($"Field '{fieldName}': "); + + Console.WriteLine($" Content: '{field.Content}'"); + Console.WriteLine($" Confidence: '{field.Confidence}'"); + } + } + #endregion + + // Iterate over lines and selection marks on each page + foreach (DocumentPage page in result.Pages) + { + Console.WriteLine($"Lines found on page {page.PageNumber}"); + foreach (var line in page.Lines) + { + Console.WriteLine($" {line.Content}"); + } + + Console.WriteLine($"Selection marks found on page {page.PageNumber}"); + foreach (var selectionMark in page.SelectionMarks) + { + Console.WriteLine($" Selection mark is '{selectionMark.State}' with confidence {selectionMark.Confidence}"); + } + } + + // Iterate over the document tables + for (int i = 0; i < result.Tables.Count; i++) + { + Console.WriteLine($"Table {i + 1}"); + foreach (var cell in result.Tables[i].Cells) + { + Console.WriteLine($" Cell[{cell.RowIndex}][{cell.ColumnIndex}] has content '{cell.Content}' with kind '{cell.Kind}'"); + } + } + + // Delete the model on completion to clean environment. + await adminClient.DeleteModelAsync(customModel.ModelId); + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromUriAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromUriAsync.cs new file mode 100644 index 0000000000000..700df113ca8cb --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromUriAsync.cs @@ -0,0 +1,132 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.DocumentAnalysis.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples +{ + public partial class DocumentAnalysisSamples : SamplesBase + { + [Test] + public async Task AnalyzeWithPrebuiltModelFromUriAsync() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:FormRecognizerAnalyzeWithPrebuiltModelFromUriAsync +#if SNIPPET + string fileUri = ""; +#else + Uri fileUri = DocumentAnalysisTestEnvironment.CreateUri("Form_1.jpg"); +#endif + + var options = new AnalyzeDocumentOptions() { Locale = "en-US" }; + + AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync("prebuilt-invoice", fileUri, options); + + await operation.WaitForCompletionAsync(); + + AnalyzeResult result = operation.Value; + + // To see the list of all the supported fields returned by service and its corresponding types for the + // prebuilt-invoice model, consult: + // https://aka.ms/formrecognizer/invoicefields + + for (int i = 0; i < result.Documents.Count; i++) + { + Console.WriteLine($"Document {i}:"); + + AnalyzedDocument document = result.Documents[i]; + + if (document.Fields.TryGetValue("VendorName", out DocumentField vendorNameField)) + { + if (vendorNameField.ValueType == DocumentFieldType.String) + { + string vendorName = vendorNameField.AsString(); + Console.WriteLine($"Vendor Name: '{vendorName}', with confidence {vendorNameField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("CustomerName", out DocumentField customerNameField)) + { + if (customerNameField.ValueType == DocumentFieldType.String) + { + string customerName = customerNameField.AsString(); + Console.WriteLine($"Customer Name: '{customerName}', with confidence {customerNameField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("Items", out DocumentField itemsField)) + { + if (itemsField.ValueType == DocumentFieldType.List) + { + foreach (DocumentField itemField in itemsField.AsList()) + { + Console.WriteLine("Item:"); + + if (itemField.ValueType == DocumentFieldType.Dictionary) + { + IReadOnlyDictionary itemFields = itemField.AsDictionary(); + + if (itemFields.TryGetValue("Description", out DocumentField itemDescriptionField)) + { + if (itemDescriptionField.ValueType == DocumentFieldType.String) + { + string itemDescription = itemDescriptionField.AsString(); + + Console.WriteLine($" Description: '{itemDescription}', with confidence {itemDescriptionField.Confidence}"); + } + } + + if (itemFields.TryGetValue("Amount", out DocumentField itemAmountField)) + { + if (itemAmountField.ValueType == DocumentFieldType.Double) + { + double itemAmount = itemAmountField.AsDouble(); + + Console.WriteLine($" Amount: '{itemAmount}', with confidence {itemAmountField.Confidence}"); + } + } + } + } + } + } + + if (document.Fields.TryGetValue("SubTotal", out DocumentField subTotalField)) + { + if (subTotalField.ValueType == DocumentFieldType.Double) + { + double subTotal = subTotalField.AsDouble(); + Console.WriteLine($"Sub Total: '{subTotal}', with confidence {subTotalField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("TotalTax", out DocumentField totalTaxField)) + { + if (totalTaxField.ValueType == DocumentFieldType.Double) + { + double totalTax = totalTaxField.AsDouble(); + Console.WriteLine($"Total Tax: '{totalTax}', with confidence {totalTaxField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("InvoiceTotal", out DocumentField invoiceTotalField)) + { + if (invoiceTotalField.ValueType == DocumentFieldType.Double) + { + double invoiceTotal = invoiceTotalField.AsDouble(); + Console.WriteLine($"Invoice Total: '{invoiceTotal}', with confidence {invoiceTotalField.Confidence}"); + } + } + } + #endregion + } + } +} From 0a02f4ec740e1e4f85f662c25139b7784f98d9a1 Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Wed, 29 Sep 2021 22:03:33 -0700 Subject: [PATCH 04/13] WIP: md files --- .../samples/Sample_AnalyzeDocument.md | 35 ++++++++++++++++++ .../samples/Sample_AnalyzeLayout.md | 36 +++++++++++++++++++ .../samples/Sample_AnalyzeWithCustomModel.md | 0 .../Sample_AnalyzeWithPrebuiltModel.md | 0 .../src/DocumentAnalysisClient.cs | 2 -- .../tests/samples/SampleSnippets.cs | 2 +- 6 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md create mode 100644 sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md new file mode 100644 index 0000000000000..9718142aa6123 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md @@ -0,0 +1,35 @@ +# Analyze a document + +This sample demonstrates how to extract text, tables, styles, selection marks like radio buttons, entities, key-value pairs, and layout information from documents, without the need to train a model. + +To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. + +## Creating a `DocumentAnalysisClient` + +To create a new `DocumentAnalysisClient` you need the endpoint and credentials from your resource. In the sample below you'll use a Form Recognizer API key credential by creating an `AzureKeyCredential` object, that if needed, will allow you to update the API key without creating a new client. + +You can set `endpoint` and `apiKey` based on an environment variable, a configuration setting, or any way that works for your application. + +```C# Snippet:CreateDocumentAnalysisClient +``` + +## Analyze a document from a URI + +To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. + +```C# Snippet:FormRecognizerAnalyzeDocumentFromUriAsync +``` + +## Analyze a document from a file stream + +To analyze a given file at a file stream, use the `StartAnalyzeDocument` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. + +```C# Snippet:FormRecognizerAnalyzeDocumentFromFileAsync +``` + +To see the full example source files, see: + +* [Analyze document from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs) +* [Analyze document from document](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs) + +[README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md new file mode 100644 index 0000000000000..52f124139c63f --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md @@ -0,0 +1,36 @@ +# Analyze the layout of a document + +This sample demonstrates how to extract text, tables, styles, selection marks like radio buttons, and layout information from documents, without the need to train a model. If you want to extract entities and key-value pairs in addition to this data, please see the [Analyze a document][document_sample] sample. + +To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. + +## Creating a `DocumentAnalysisClient` + +To create a new `DocumentAnalysisClient` you need the endpoint and credentials from your resource. In the sample below you'll use a Form Recognizer API key credential by creating an `AzureKeyCredential` object, that if needed, will allow you to update the API key without creating a new client. + +You can set `endpoint` and `apiKey` based on an environment variable, a configuration setting, or any way that works for your application. + +```C# Snippet:CreateDocumentAnalysisClient +``` + +## Analyze the layout of a document from a URI + +To analyze the layout from a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. + +```C# Snippet:FormRecognizerAnalyzeLayoutFromUriAsync +``` + +## Analyze the layout of a document from a file stream + +To analyze the layout from a given file at a file stream, use the `StartAnalyzeDocument` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. + +```C# Snippet:FormRecognizerAnalyzeLayoutFromFileAsync +``` + +To see the full example source files, see: + +* [Analyze layout from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs) +* [Analyze layout from document](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs) + +[README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started +[document_sample]: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.cs diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/DocumentAnalysisClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/DocumentAnalysisClient.cs index 2cc4727d55e77..8a97bc10d854f 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/DocumentAnalysisClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/DocumentAnalysisClient.cs @@ -8,8 +8,6 @@ using Azure.Core; using Azure.Core.Pipeline; -using Constants = Azure.AI.FormRecognizer.Constants; - namespace Azure.AI.FormRecognizer.DocumentAnalysis { /// diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/SampleSnippets.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/SampleSnippets.cs index 152423d6f9076..fce3381d8fbe7 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/SampleSnippets.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/SampleSnippets.cs @@ -17,7 +17,7 @@ public partial class Snippets : SamplesBase [Test] public void CreateDocumentAnalysisClient() { - #region Snippet:DocumentAnalysisClient + #region Snippet:CreateDocumentAnalysisClient #if SNIPPET string endpoint = ""; string apiKey = ""; From 9dbf4c6f2fbd1ce3e96817024b3ff71055359075 Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Wed, 29 Sep 2021 22:54:47 -0700 Subject: [PATCH 05/13] Added more md files --- .../samples/Sample_AnalyzeDocument.md | 4 +- .../samples/Sample_AnalyzeLayout.md | 4 +- .../Sample_AnalyzeWithPrebuiltModel.md | 53 +++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md index 9718142aa6123..b686152bf6ad8 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md @@ -15,14 +15,14 @@ You can set `endpoint` and `apiKey` based on an environment variable, a configur ## Analyze a document from a URI -To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. +To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method and pass `prebuilt-document` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeDocumentFromUriAsync ``` ## Analyze a document from a file stream -To analyze a given file at a file stream, use the `StartAnalyzeDocument` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. +To analyze a given file at a file stream, use the `StartAnalyzeDocument` method and pass `prebuilt-document` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeDocumentFromFileAsync ``` diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md index 52f124139c63f..6a3281f21a481 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md @@ -15,14 +15,14 @@ You can set `endpoint` and `apiKey` based on an environment variable, a configur ## Analyze the layout of a document from a URI -To analyze the layout from a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. +To analyze the layout from a given file at a URI, use the `StartAnalyzeDocumentFromUri` method and pass `prebuilt-layout` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeLayoutFromUriAsync ``` ## Analyze the layout of a document from a file stream -To analyze the layout from a given file at a file stream, use the `StartAnalyzeDocument` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. +To analyze the layout from a given file at a file stream, use the `StartAnalyzeDocument` method and pass `prebuilt-layout` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeLayoutFromFileAsync ``` diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md index e69de29bb2d1d..3dec636c95675 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md @@ -0,0 +1,53 @@ +# Analyze a document with a prebuilt model + +This sample demonstrates how to extract text and key information from documents with one of the service's prebuilt models, using an invoice as an example. For a list of the types of documents supported by the Form Recognize service's prebuilt models, please check the [Choosing the prebuilt model ID][choosing-the-prebuilt-model-id] section. + +To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. + +## Creating a `DocumentAnalysisClient` + +To create a new `DocumentAnalysisClient` you need the endpoint and credentials from your resource. In the sample below you'll use a Form Recognizer API key credential by creating an `AzureKeyCredential` object, that if needed, will allow you to update the API key without creating a new client. + +You can set `endpoint` and `apiKey` based on an environment variable, a configuration setting, or any way that works for your application. + +```C# Snippet:CreateDocumentAnalysisClient +``` + +## Choosing the prebuilt model ID + +The model to use for the analyze operation depends on the type of document to be analyzed. These are the IDs of the prebuilt models currently supported by the Form Recognizer service: + +- prebuilt-businessCard: extracts text and key information from English business cards. [Supported fields][businessCard_fields]. +- prebuilt-idDocument: extracts text and key information from US driver licenses and international passports. [Supported fields][idDocument_fields]. +- prebuilt-invoice: extracts text, selection marks, tables, key-value pairs, and key information from English invoices. [Supported fields][invoice_fields]. +- prebuilt-receipt: extracts text and key information from English receipts. [Supported fields][receipt_fields]. + +## Use a prebuilt model to analyze a document from a URI + +To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. Since we're analyzing an English invoice, we'll pass the model ID `prebuilt-invoice` to the method. + +For simplicity, we are not showing all the fields that the service returns. To see the list of all the supported fields returned by service and its corresponding types, consult the [Choosing the prebuilt model ID][choosing-the-prebuilt-model-id] section. + +```C# Snippet:FormRecognizerAnalyzeWithPrebuiltModelFromUriAsync +``` + +## Use a prebuilt model to analyze a document from a file stream + +To analyze a given file at a file stream, use the `StartAnalyzeDocument` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. Since we're analyzing an English invoice, we'll pass the model ID `prebuilt-invoice` to the method. + +For simplicity, we are not showing all the fields that the service returns. To see the list of all the supported fields returned by service and its corresponding types, consult the [Choosing the prebuilt model ID][choosing-the-prebuilt-model-id] section. + +```C# Snippet:FormRecognizerAnalyzeWithPrebuiltModelFromFileAsync +``` + +To see the full example source files, see: + +* [Analyze document from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs) +* [Analyze document from document](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs) + +[businessCard_fields]: https://aka.ms/formrecognizer/businesscardfields +[idDocument_fields]: https://aka.ms/formrecognizer/iddocumentfields +[invoice_fieds]: https://aka.ms/formrecognizer/invoicefields +[receipt_fields]: https://aka.ms/formrecognizer/receiptfields + +[README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started From 0bf1dd01ef145db7797e05acd945c2e1e0475ce5 Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Wed, 29 Sep 2021 23:04:39 -0700 Subject: [PATCH 06/13] More md --- .../samples/Sample_AnalyzeDocument.md | 2 +- .../samples/Sample_AnalyzeLayout.md | 2 +- .../samples/Sample_AnalyzeWithCustomModel.md | 36 +++++++++++++++++++ .../Sample_AnalyzeWithPrebuiltModel.md | 4 +-- 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md index b686152bf6ad8..6c1dea2b625a7 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md @@ -30,6 +30,6 @@ To analyze a given file at a file stream, use the `StartAnalyzeDocument` method To see the full example source files, see: * [Analyze document from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs) -* [Analyze document from document](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs) +* [Analyze document from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs) [README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md index 6a3281f21a481..db8ddb6081638 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md @@ -30,7 +30,7 @@ To analyze the layout from a given file at a file stream, use the `StartAnalyzeD To see the full example source files, see: * [Analyze layout from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs) -* [Analyze layout from document](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs) +* [Analyze layout from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs) [README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started [document_sample]: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.cs diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md index e69de29bb2d1d..2412fd46df883 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md @@ -0,0 +1,36 @@ +# Analyze a document with a custom model + +This sample demonstrates how to extract text and key information from your custom documents, using models you built with your own document types. For more information on how to do the training, see [build a model][build_a_model]. + +To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. + +## Creating a `DocumentAnalysisClient` + +To create a new `DocumentAnalysisClient` you need the endpoint and credentials from your resource. In the sample below you'll use a Form Recognizer API key credential by creating an `AzureKeyCredential` object, that if needed, will allow you to update the API key without creating a new client. + +You can set `endpoint` and `apiKey` based on an environment variable, a configuration setting, or any way that works for your application. + +```C# Snippet:CreateDocumentAnalysisClient +``` + +## Use a custom model to analyze a document from a URI + +To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. + +```C# Snippet:FormRecognizerAnalyzeWithCustomModelFromUriAsync +``` + +## Use a custom model to analyze a document from a file stream + +To analyze a given file at a file stream, use the `StartAnalyzeDocument` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. + +```C# Snippet:FormRecognizerAnalyzeWithCustomModelFromFileAsync +``` + +To see the full example source files, see: + +* [Analyze with custom model from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs) +* [Analyze with custom model from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromFileAsync.cs) + +[README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started +[build_a_model]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_BuildModel.md diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md index 3dec636c95675..c48378f5e9ba0 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md @@ -42,8 +42,8 @@ For simplicity, we are not showing all the fields that the service returns. To s To see the full example source files, see: -* [Analyze document from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs) -* [Analyze document from document](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs) +* [Analyze with prebuilt model from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromUriAsync.cs) +* [Analyze with prebuilt model from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs) [businessCard_fields]: https://aka.ms/formrecognizer/businesscardfields [idDocument_fields]: https://aka.ms/formrecognizer/iddocumentfields From 72b17c32d713bfbc756e6728bca1a01a06e7752d Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Wed, 29 Sep 2021 23:06:09 -0700 Subject: [PATCH 07/13] Snippets update --- .../samples/Sample_AnalyzeDocument.md | 169 ++++++++++++++ .../samples/Sample_AnalyzeLayout.md | 135 ++++++++++++ .../samples/Sample_AnalyzeWithCustomModel.md | 58 +++++ .../Sample_AnalyzeWithPrebuiltModel.md | 207 ++++++++++++++++++ 4 files changed, 569 insertions(+) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md index 6c1dea2b625a7..e886124cfe9b9 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md @@ -11,6 +11,10 @@ To create a new `DocumentAnalysisClient` you need the endpoint and credentials f You can set `endpoint` and `apiKey` based on an environment variable, a configuration setting, or any way that works for your application. ```C# Snippet:CreateDocumentAnalysisClient +string endpoint = ""; +string apiKey = ""; +var credential = new AzureKeyCredential(apiKey); +var client = new DocumentAnalysisClient(new Uri(endpoint), credential); ``` ## Analyze a document from a URI @@ -18,6 +22,88 @@ You can set `endpoint` and `apiKey` based on an environment variable, a configur To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method and pass `prebuilt-document` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeDocumentFromUriAsync +string fileUri = ""; + +AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync("prebuilt-document", fileUri); + +await operation.WaitForCompletionAsync(); + +AnalyzeResult result = operation.Value; + +foreach (DocumentPage page in result.Pages) +{ + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + + for (int i = 0; i < page.Lines.Count; i++) + { + DocumentLine line = page.Lines[i]; + Console.WriteLine($" Line {i} has content: '{line.Content}'."); + + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); + } + + for (int i = 0; i < page.SelectionMarks.Count; i++) + { + DocumentSelectionMark selectionMark = page.SelectionMarks[i]; + + Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); + } +} + +foreach (DocumentStyle style in result.Styles) +{ + // Check the style and style confidence to see if text is handwritten. + // Note that value '0.8' is used as an example. + + bool isHandwritten = style.IsHandwritten.HasValue && style.IsHandwritten == true; + + if (isHandwritten && style.Confidence > 0.8) + { + Console.WriteLine($"Handwritten content found in spans:"); + + foreach (DocumentSpan span in style.Spans) + { + Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + } + } +} + +for (int i = 0; i < result.Tables.Count; i++) +{ + DocumentTable table = result.Tables[i]; + Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + + foreach (DocumentTableCell cell in table.Cells) + { + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + } +} + +foreach (DocumentEntity entity in result.Entities) +{ + if (entity.SubCategory == null) + { + Console.WriteLine($"Found entity with category {entity.Category}: '{entity.Content}'"); + } + else + { + Console.WriteLine($"Found entity with category {entity.Category} and sub-category {entity.SubCategory}: '{entity.Content}'"); + } +} + +foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) +{ + Console.WriteLine($"Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); +} ``` ## Analyze a document from a file stream @@ -25,6 +111,89 @@ To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method a To analyze a given file at a file stream, use the `StartAnalyzeDocument` method and pass `prebuilt-document` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeDocumentFromFileAsync +string filePath = "filePath"; +using var stream = new FileStream(filePath, FileMode.Open); + +AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentAsync("prebuilt-document", stream); + +await operation.WaitForCompletionAsync(); + +AnalyzeResult result = operation.Value; + +foreach (DocumentPage page in result.Pages) +{ + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + + for (int i = 0; i < page.Lines.Count; i++) + { + DocumentLine line = page.Lines[i]; + Console.WriteLine($" Line {i} has content: '{line.Content}'."); + + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); + } + + for (int i = 0; i < page.SelectionMarks.Count; i++) + { + DocumentSelectionMark selectionMark = page.SelectionMarks[i]; + + Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); + } +} + +foreach (DocumentStyle style in result.Styles) +{ + // Check the style and style confidence to see if text is handwritten. + // Note that value '0.8' is used as an example. + + bool isHandwritten = style.IsHandwritten.HasValue && style.IsHandwritten == true; + + if (isHandwritten && style.Confidence > 0.8) + { + Console.WriteLine($"Handwritten content found in spans:"); + + foreach (DocumentSpan span in style.Spans) + { + Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + } + } +} + +for (int i = 0; i < result.Tables.Count; i++) +{ + DocumentTable table = result.Tables[i]; + Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + + foreach (DocumentTableCell cell in table.Cells) + { + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + } +} + +foreach (DocumentEntity entity in result.Entities) +{ + if (entity.SubCategory == null) + { + Console.WriteLine($"Found entity with category {entity.Category}: '{entity.Content}'"); + } + else + { + Console.WriteLine($"Found entity with category {entity.Category} and sub-category {entity.SubCategory}: '{entity.Content}'"); + } +} + +foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) +{ + Console.WriteLine($"Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); +} ``` To see the full example source files, see: diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md index db8ddb6081638..181a54906813a 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md @@ -11,6 +11,10 @@ To create a new `DocumentAnalysisClient` you need the endpoint and credentials f You can set `endpoint` and `apiKey` based on an environment variable, a configuration setting, or any way that works for your application. ```C# Snippet:CreateDocumentAnalysisClient +string endpoint = ""; +string apiKey = ""; +var credential = new AzureKeyCredential(apiKey); +var client = new DocumentAnalysisClient(new Uri(endpoint), credential); ``` ## Analyze the layout of a document from a URI @@ -18,6 +22,71 @@ You can set `endpoint` and `apiKey` based on an environment variable, a configur To analyze the layout from a given file at a URI, use the `StartAnalyzeDocumentFromUri` method and pass `prebuilt-layout` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeLayoutFromUriAsync +string fileUri = ""; + +AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync("prebuilt-layout", fileUri); + +await operation.WaitForCompletionAsync(); + +AnalyzeResult result = operation.Value; + +foreach (DocumentPage page in result.Pages) +{ + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + + for (int i = 0; i < page.Lines.Count; i++) + { + DocumentLine line = page.Lines[i]; + Console.WriteLine($" Line {i} has content: '{line.Content}'."); + + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); + } + + for (int i = 0; i < page.SelectionMarks.Count; i++) + { + DocumentSelectionMark selectionMark = page.SelectionMarks[i]; + + Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); + } +} + +foreach (DocumentStyle style in result.Styles) +{ + // Check the style and style confidence to see if text is handwritten. + // Note that value '0.8' is used as an example. + + bool isHandwritten = style.IsHandwritten.HasValue && style.IsHandwritten == true; + + if (isHandwritten && style.Confidence > 0.8) + { + Console.WriteLine($"Handwritten content found in spans:"); + + foreach (DocumentSpan span in style.Spans) + { + Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + } + } +} + +for (int i = 0; i < result.Tables.Count; i++) +{ + DocumentTable table = result.Tables[i]; + Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + + foreach (DocumentTableCell cell in table.Cells) + { + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + } +} ``` ## Analyze the layout of a document from a file stream @@ -25,6 +94,72 @@ To analyze the layout from a given file at a URI, use the `StartAnalyzeDocumentF To analyze the layout from a given file at a file stream, use the `StartAnalyzeDocument` method and pass `prebuilt-layout` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeLayoutFromFileAsync +string filePath = "filePath"; +using var stream = new FileStream(filePath, FileMode.Open); + +AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentAsync("prebuilt-layout", stream); + +await operation.WaitForCompletionAsync(); + +AnalyzeResult result = operation.Value; + +foreach (DocumentPage page in result.Pages) +{ + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + + for (int i = 0; i < page.Lines.Count; i++) + { + DocumentLine line = page.Lines[i]; + Console.WriteLine($" Line {i} has content: '{line.Content}'."); + + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); + } + + for (int i = 0; i < page.SelectionMarks.Count; i++) + { + DocumentSelectionMark selectionMark = page.SelectionMarks[i]; + + Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); + Console.WriteLine($" Its bounding box is:"); + Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); + Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); + Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); + Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); + } +} + +foreach (DocumentStyle style in result.Styles) +{ + // Check the style and style confidence to see if text is handwritten. + // Note that value '0.8' is used as an example. + + bool isHandwritten = style.IsHandwritten.HasValue && style.IsHandwritten == true; + + if (isHandwritten && style.Confidence > 0.8) + { + Console.WriteLine($"Handwritten content found in spans:"); + + foreach (DocumentSpan span in style.Spans) + { + Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + } + } +} + +for (int i = 0; i < result.Tables.Count; i++) +{ + DocumentTable table = result.Tables[i]; + Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + + foreach (DocumentTableCell cell in table.Cells) + { + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + } +} ``` To see the full example source files, see: diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md index 2412fd46df883..384efabfbdcfa 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md @@ -11,6 +11,10 @@ To create a new `DocumentAnalysisClient` you need the endpoint and credentials f You can set `endpoint` and `apiKey` based on an environment variable, a configuration setting, or any way that works for your application. ```C# Snippet:CreateDocumentAnalysisClient +string endpoint = ""; +string apiKey = ""; +var credential = new AzureKeyCredential(apiKey); +var client = new DocumentAnalysisClient(new Uri(endpoint), credential); ``` ## Use a custom model to analyze a document from a URI @@ -18,6 +22,32 @@ You can set `endpoint` and `apiKey` based on an environment variable, a configur To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeWithCustomModelFromUriAsync +string modelId = ""; +string fileUri = ""; + +AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync(modelId, fileUri); + +await operation.WaitForCompletionAsync(); + +AnalyzeResult result = operation.Value; + +Console.WriteLine($"Document was analyzed with model with ID: {result.ModelId}"); + +foreach (AnalyzedDocument document in result.Documents) +{ + Console.WriteLine($"Document of type: {document.DocType}"); + + foreach (KeyValuePair fieldKvp in document.Fields) + { + string fieldName = fieldKvp.Key; + DocumentField field = fieldKvp.Value; + + Console.WriteLine($"Field '{fieldName}': "); + + Console.WriteLine($" Content: '{field.Content}'"); + Console.WriteLine($" Confidence: '{field.Confidence}'"); + } +} ``` ## Use a custom model to analyze a document from a file stream @@ -25,6 +55,34 @@ To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. To analyze a given file at a file stream, use the `StartAnalyzeDocument` method. The returned value is an `AnalyzeResult` object containing data about the submitted document. ```C# Snippet:FormRecognizerAnalyzeWithCustomModelFromFileAsync +string modelId = ""; +string filePath = ""; + +using var stream = new FileStream(filePath, FileMode.Open); + +AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentAsync(modelId, stream); + +await operation.WaitForCompletionAsync(); + +AnalyzeResult result = operation.Value; + +Console.WriteLine($"Document was analyzed with model with ID: {result.ModelId}"); + +foreach (AnalyzedDocument document in result.Documents) +{ + Console.WriteLine($"Document of type: {document.DocType}"); + + foreach (KeyValuePair fieldKvp in document.Fields) + { + string fieldName = fieldKvp.Key; + DocumentField field = fieldKvp.Value; + + Console.WriteLine($"Field '{fieldName}': "); + + Console.WriteLine($" Content: '{field.Content}'"); + Console.WriteLine($" Confidence: '{field.Confidence}'"); + } +} ``` To see the full example source files, see: diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md index c48378f5e9ba0..bf4ae3f593204 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md @@ -11,6 +11,10 @@ To create a new `DocumentAnalysisClient` you need the endpoint and credentials f You can set `endpoint` and `apiKey` based on an environment variable, a configuration setting, or any way that works for your application. ```C# Snippet:CreateDocumentAnalysisClient +string endpoint = ""; +string apiKey = ""; +var credential = new AzureKeyCredential(apiKey); +var client = new DocumentAnalysisClient(new Uri(endpoint), credential); ``` ## Choosing the prebuilt model ID @@ -29,6 +33,107 @@ To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method. For simplicity, we are not showing all the fields that the service returns. To see the list of all the supported fields returned by service and its corresponding types, consult the [Choosing the prebuilt model ID][choosing-the-prebuilt-model-id] section. ```C# Snippet:FormRecognizerAnalyzeWithPrebuiltModelFromUriAsync +string fileUri = ""; + +var options = new AnalyzeDocumentOptions() { Locale = "en-US" }; + +AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync("prebuilt-invoice", fileUri, options); + +await operation.WaitForCompletionAsync(); + +AnalyzeResult result = operation.Value; + +// To see the list of all the supported fields returned by service and its corresponding types for the +// prebuilt-invoice model, consult: +// https://aka.ms/formrecognizer/invoicefields + +for (int i = 0; i < result.Documents.Count; i++) +{ + Console.WriteLine($"Document {i}:"); + + AnalyzedDocument document = result.Documents[i]; + + if (document.Fields.TryGetValue("VendorName", out DocumentField vendorNameField)) + { + if (vendorNameField.ValueType == DocumentFieldType.String) + { + string vendorName = vendorNameField.AsString(); + Console.WriteLine($"Vendor Name: '{vendorName}', with confidence {vendorNameField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("CustomerName", out DocumentField customerNameField)) + { + if (customerNameField.ValueType == DocumentFieldType.String) + { + string customerName = customerNameField.AsString(); + Console.WriteLine($"Customer Name: '{customerName}', with confidence {customerNameField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("Items", out DocumentField itemsField)) + { + if (itemsField.ValueType == DocumentFieldType.List) + { + foreach (DocumentField itemField in itemsField.AsList()) + { + Console.WriteLine("Item:"); + + if (itemField.ValueType == DocumentFieldType.Dictionary) + { + IReadOnlyDictionary itemFields = itemField.AsDictionary(); + + if (itemFields.TryGetValue("Description", out DocumentField itemDescriptionField)) + { + if (itemDescriptionField.ValueType == DocumentFieldType.String) + { + string itemDescription = itemDescriptionField.AsString(); + + Console.WriteLine($" Description: '{itemDescription}', with confidence {itemDescriptionField.Confidence}"); + } + } + + if (itemFields.TryGetValue("Amount", out DocumentField itemAmountField)) + { + if (itemAmountField.ValueType == DocumentFieldType.Double) + { + double itemAmount = itemAmountField.AsDouble(); + + Console.WriteLine($" Amount: '{itemAmount}', with confidence {itemAmountField.Confidence}"); + } + } + } + } + } + } + + if (document.Fields.TryGetValue("SubTotal", out DocumentField subTotalField)) + { + if (subTotalField.ValueType == DocumentFieldType.Double) + { + double subTotal = subTotalField.AsDouble(); + Console.WriteLine($"Sub Total: '{subTotal}', with confidence {subTotalField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("TotalTax", out DocumentField totalTaxField)) + { + if (totalTaxField.ValueType == DocumentFieldType.Double) + { + double totalTax = totalTaxField.AsDouble(); + Console.WriteLine($"Total Tax: '{totalTax}', with confidence {totalTaxField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("InvoiceTotal", out DocumentField invoiceTotalField)) + { + if (invoiceTotalField.ValueType == DocumentFieldType.Double) + { + double invoiceTotal = invoiceTotalField.AsDouble(); + Console.WriteLine($"Invoice Total: '{invoiceTotal}', with confidence {invoiceTotalField.Confidence}"); + } + } +} ``` ## Use a prebuilt model to analyze a document from a file stream @@ -38,6 +143,108 @@ To analyze a given file at a file stream, use the `StartAnalyzeDocument` method. For simplicity, we are not showing all the fields that the service returns. To see the list of all the supported fields returned by service and its corresponding types, consult the [Choosing the prebuilt model ID][choosing-the-prebuilt-model-id] section. ```C# Snippet:FormRecognizerAnalyzeWithPrebuiltModelFromFileAsync +string receiptPath = ""; + +using var stream = new FileStream(receiptPath, FileMode.Open); +var options = new AnalyzeDocumentOptions() { Locale = "en-US" }; + +AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentAsync("prebuilt-invoice", stream, options); + +await operation.WaitForCompletionAsync(); + +AnalyzeResult result = operation.Value; + +// To see the list of all the supported fields returned by service and its corresponding types for the +// prebuilt-invoice model, consult: +// https://aka.ms/formrecognizer/invoicefields + +for (int i = 0; i < result.Documents.Count; i++) +{ + Console.WriteLine($"Document {i}:"); + + AnalyzedDocument document = result.Documents[i]; + + if (document.Fields.TryGetValue("VendorName", out DocumentField vendorNameField)) + { + if (vendorNameField.ValueType == DocumentFieldType.String) + { + string vendorName = vendorNameField.AsString(); + Console.WriteLine($"Vendor Name: '{vendorName}', with confidence {vendorNameField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("CustomerName", out DocumentField customerNameField)) + { + if (customerNameField.ValueType == DocumentFieldType.String) + { + string customerName = customerNameField.AsString(); + Console.WriteLine($"Customer Name: '{customerName}', with confidence {customerNameField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("Items", out DocumentField itemsField)) + { + if (itemsField.ValueType == DocumentFieldType.List) + { + foreach (DocumentField itemField in itemsField.AsList()) + { + Console.WriteLine("Item:"); + + if (itemField.ValueType == DocumentFieldType.Dictionary) + { + IReadOnlyDictionary itemFields = itemField.AsDictionary(); + + if (itemFields.TryGetValue("Description", out DocumentField itemDescriptionField)) + { + if (itemDescriptionField.ValueType == DocumentFieldType.String) + { + string itemDescription = itemDescriptionField.AsString(); + + Console.WriteLine($" Description: '{itemDescription}', with confidence {itemDescriptionField.Confidence}"); + } + } + + if (itemFields.TryGetValue("Amount", out DocumentField itemAmountField)) + { + if (itemAmountField.ValueType == DocumentFieldType.Double) + { + double itemAmount = itemAmountField.AsDouble(); + + Console.WriteLine($" Amount: '{itemAmount}', with confidence {itemAmountField.Confidence}"); + } + } + } + } + } + } + + if (document.Fields.TryGetValue("SubTotal", out DocumentField subTotalField)) + { + if (subTotalField.ValueType == DocumentFieldType.Double) + { + double subTotal = subTotalField.AsDouble(); + Console.WriteLine($"Sub Total: '{subTotal}', with confidence {subTotalField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("TotalTax", out DocumentField totalTaxField)) + { + if (totalTaxField.ValueType == DocumentFieldType.Double) + { + double totalTax = totalTaxField.AsDouble(); + Console.WriteLine($"Total Tax: '{totalTax}', with confidence {totalTaxField.Confidence}"); + } + } + + if (document.Fields.TryGetValue("InvoiceTotal", out DocumentField invoiceTotalField)) + { + if (invoiceTotalField.ValueType == DocumentFieldType.Double) + { + double invoiceTotal = invoiceTotalField.AsDouble(); + Console.WriteLine($"Invoice Total: '{invoiceTotal}', with confidence {invoiceTotalField.Confidence}"); + } + } +} ``` To see the full example source files, see: From 6e83c7a0b14b39d17ce23a8cf4c2321349b00693 Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Wed, 29 Sep 2021 23:13:46 -0700 Subject: [PATCH 08/13] Finished --- .../Azure.AI.FormRecognizer/samples/README.md | 4 ++++ .../samples/Sample_AnalyzeDocument.md | 12 ++++++------ .../samples/Sample_AnalyzeLayout.md | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md index eddd109d561a1..d568c3733ea37 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md @@ -19,6 +19,10 @@ Azure Cognitive Services Form Recognizer is a cloud service that uses machine le - Custom - Build custom models to extract text, field values, selection marks, and table data from documents. Custom models are trained with your own data, so they're tailored to your documents. ## Common scenarios samples for SDK +- [Analyze the layout of a document](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md) +- [Analyze a general document](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md) +- [Analyze a document with a custom model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeCustom.md) +- [Analyze a document with a prebuilt model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuilt.md) - [Build a model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_BuildModel.md) - [Manage models](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ManageModels.md) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md index e886124cfe9b9..354c2a3916a39 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md @@ -1,6 +1,6 @@ -# Analyze a document +# Analyze a general document -This sample demonstrates how to extract text, tables, styles, selection marks like radio buttons, entities, key-value pairs, and layout information from documents, without the need to train a model. +This sample demonstrates how to extract text, tables, styles, selection marks like radio buttons, entities, key-value pairs, and layout information from general documents, without the need to train a model. To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. @@ -17,7 +17,7 @@ var credential = new AzureKeyCredential(apiKey); var client = new DocumentAnalysisClient(new Uri(endpoint), credential); ``` -## Analyze a document from a URI +## Analyze a general document from a URI To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method and pass `prebuilt-document` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. @@ -106,7 +106,7 @@ foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) } ``` -## Analyze a document from a file stream +## Analyze a general document from a file stream To analyze a given file at a file stream, use the `StartAnalyzeDocument` method and pass `prebuilt-document` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. @@ -198,7 +198,7 @@ foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) To see the full example source files, see: -* [Analyze document from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs) -* [Analyze document from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs) +* [Analyze general document from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs) +* [Analyze general document from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs) [README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md index 181a54906813a..bdb5294723321 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md @@ -1,6 +1,6 @@ # Analyze the layout of a document -This sample demonstrates how to extract text, tables, styles, selection marks like radio buttons, and layout information from documents, without the need to train a model. If you want to extract entities and key-value pairs in addition to this data, please see the [Analyze a document][document_sample] sample. +This sample demonstrates how to extract text, tables, styles, selection marks like radio buttons, and layout information from documents, without the need to train a model. If you want to extract entities and key-value pairs in addition to this data, please see the [Analyze a general document][document_sample] sample. To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. From d15ca8aae6c87258fede36e9a58c96606c3cde8a Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Thu, 30 Sep 2021 12:42:51 -0700 Subject: [PATCH 09/13] Update sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md Co-authored-by: Mariana Rios Flores --- .../samples/Sample_AnalyzeWithCustomModel.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md index 384efabfbdcfa..60231dbaf4909 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md @@ -31,7 +31,7 @@ await operation.WaitForCompletionAsync(); AnalyzeResult result = operation.Value; -Console.WriteLine($"Document was analyzed with model with ID: {result.ModelId}"); +Console.WriteLine($"Document was analyzed with model ID: {result.ModelId}"); foreach (AnalyzedDocument document in result.Documents) { From a575a5c3bd362a60412dff0e3dc78b2c949a9ce5 Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Thu, 30 Sep 2021 14:42:49 -0700 Subject: [PATCH 10/13] Addressed comments --- .../Azure.AI.FormRecognizer/samples/README.md | 6 +- ...t.md => Sample_AnalyzePrebuiltDocument.md} | 62 ++++++++++++------- .../samples/Sample_AnalyzeWithCustomModel.md | 4 +- .../Sample_AnalyzeWithPrebuiltModel.md | 12 ++-- ...alyzeLayout.md => Sample_ExtractLayout.md} | 46 ++++++++------ .../src/DocumentAnalysisClient.cs | 8 +-- ...e_AnalyzePrebuiltDocumentFromFileAsync.cs} | 27 +++++--- ...le_AnalyzePrebuiltDocumentFromUriAsync.cs} | 27 +++++--- ...ple_AnalyzeWithCustomModelFromFileAsync.cs | 2 +- ...mple_AnalyzeWithCustomModelFromUriAsync.cs | 2 +- ...s => Sample_ExtractLayoutFromFileAsync.cs} | 17 ++--- ...cs => Sample_ExtractLayoutFromUriAsync.cs} | 17 ++--- 12 files changed, 135 insertions(+), 95 deletions(-) rename sdk/formrecognizer/Azure.AI.FormRecognizer/samples/{Sample_AnalyzeDocument.md => Sample_AnalyzePrebuiltDocument.md} (69%) rename sdk/formrecognizer/Azure.AI.FormRecognizer/samples/{Sample_AnalyzeLayout.md => Sample_ExtractLayout.md} (75%) rename sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/{Sample_AnalyzeDocumentFromFileAsync.cs => Sample_AnalyzePrebuiltDocumentFromFileAsync.cs} (77%) rename sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/{Sample_AnalyzeDocumentFromUriAsync.cs => Sample_AnalyzePrebuiltDocumentFromUriAsync.cs} (76%) rename sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/{Sample_AnalyzeLayoutFromFileAsync.cs => Sample_ExtractLayoutFromFileAsync.cs} (84%) rename sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/{Sample_AnalyzeLayoutFromUriAsync.cs => Sample_ExtractLayoutFromUriAsync.cs} (84%) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md index d568c3733ea37..13f939609b9c9 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md @@ -16,11 +16,11 @@ Azure Cognitive Services Form Recognizer is a cloud service that uses machine le - Layout - Extract text, table structures, and selection marks, along with their bounding region coordinates, from documents. - Document - Analyze entities, key-value pairs, tables, and selection marks from documents using the general prebuilt document model. - Prebuilt - Analyze data from certain types of common documents (such as receipts, invoices, business cards, or identity documents) using pre-trained models. -- Custom - Build custom models to extract text, field values, selection marks, and table data from documents. Custom models are trained with your own data, so they're tailored to your documents. +- Custom - Build custom models to analyze text, field values, selection marks, and table data from documents. Custom models are trained with your own data, so they're tailored to your documents. ## Common scenarios samples for SDK -- [Analyze the layout of a document](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md) -- [Analyze a general document](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md) +- [Extract the layout of a document](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md) +- [Analyze with the prebuilt document model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md) - [Analyze a document with a custom model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeCustom.md) - [Analyze a document with a prebuilt model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuilt.md) - [Build a model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_BuildModel.md) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md similarity index 69% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md rename to sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md index 354c2a3916a39..3af0321f3da3f 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md @@ -1,6 +1,6 @@ -# Analyze a general document +# Analyze with the prebuilt document model -This sample demonstrates how to extract text, tables, styles, selection marks like radio buttons, entities, key-value pairs, and layout information from general documents, without the need to train a model. +This sample demonstrates how to analyze entities, key-value pairs, tables, and selection marks from documents using the general prebuilt document model. To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. @@ -17,11 +17,11 @@ var credential = new AzureKeyCredential(apiKey); var client = new DocumentAnalysisClient(new Uri(endpoint), credential); ``` -## Analyze a general document from a URI +## Use the prebuilt document model to analyze a document from a URI To analyze a given file at a URI, use the `StartAnalyzeDocumentFromUri` method and pass `prebuilt-document` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. -```C# Snippet:FormRecognizerAnalyzeDocumentFromUriAsync +```C# Snippet:FormRecognizerAnalyzePrebuiltDocumentFromUriAsync string fileUri = ""; AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync("prebuilt-document", fileUri); @@ -32,7 +32,8 @@ AnalyzeResult result = operation.Value; foreach (DocumentPage page in result.Pages) { - Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s), {page.Words.Count} word(s),"); + Console.WriteLine($"and {page.SelectionMarks.Count} selection mark(s)."); for (int i = 0; i < page.Lines.Count; i++) { @@ -68,49 +69,55 @@ foreach (DocumentStyle style in result.Styles) if (isHandwritten && style.Confidence > 0.8) { - Console.WriteLine($"Handwritten content found in spans:"); + Console.WriteLine($"Handwritten content found:"); foreach (DocumentSpan span in style.Spans) { - Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + Console.WriteLine($" Content: {result.Content.Substring(span.Offset, span.Length)}"); } } } +Console.WriteLine("The following tables were extracted:"); + for (int i = 0; i < result.Tables.Count; i++) { DocumentTable table = result.Tables[i]; - Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (DocumentTableCell cell in table.Cells) { - Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); } } +Console.WriteLine("Detected entities:"); + foreach (DocumentEntity entity in result.Entities) { if (entity.SubCategory == null) { - Console.WriteLine($"Found entity with category {entity.Category}: '{entity.Content}'"); + Console.WriteLine($" Found entity '{entity.Content}' with category '{entity.Category}'."); } else { - Console.WriteLine($"Found entity with category {entity.Category} and sub-category {entity.SubCategory}: '{entity.Content}'"); + Console.WriteLine($" Found entity '{entity.Content}' with category '{entity.Category}' and sub-category '{entity.SubCategory}'."); } } +Console.WriteLine("Detected key-value pairs:"); + foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) { - Console.WriteLine($"Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); } ``` -## Analyze a general document from a file stream +## Use the prebuilt document model to analyze a document from a file stream To analyze a given file at a file stream, use the `StartAnalyzeDocument` method and pass `prebuilt-document` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. -```C# Snippet:FormRecognizerAnalyzeDocumentFromFileAsync +```C# Snippet:FormRecognizerAnalyzePrebuiltDocumentFromFileAsync string filePath = "filePath"; using var stream = new FileStream(filePath, FileMode.Open); @@ -122,7 +129,8 @@ AnalyzeResult result = operation.Value; foreach (DocumentPage page in result.Pages) { - Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s), {page.Words.Count} word(s),"); + Console.WriteLine($"and {page.SelectionMarks.Count} selection mark(s)."); for (int i = 0; i < page.Lines.Count; i++) { @@ -158,47 +166,53 @@ foreach (DocumentStyle style in result.Styles) if (isHandwritten && style.Confidence > 0.8) { - Console.WriteLine($"Handwritten content found in spans:"); + Console.WriteLine($"Handwritten content found:"); foreach (DocumentSpan span in style.Spans) { - Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + Console.WriteLine($" Content: {result.Content.Substring(span.Offset, span.Length)}"); } } } +Console.WriteLine("The following tables were extracted:"); + for (int i = 0; i < result.Tables.Count; i++) { DocumentTable table = result.Tables[i]; - Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (DocumentTableCell cell in table.Cells) { - Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); } } +Console.WriteLine("Detected entities:"); + foreach (DocumentEntity entity in result.Entities) { if (entity.SubCategory == null) { - Console.WriteLine($"Found entity with category {entity.Category}: '{entity.Content}'"); + Console.WriteLine($" Found entity '{entity.Content}' with category '{entity.Category}'."); } else { - Console.WriteLine($"Found entity with category {entity.Category} and sub-category {entity.SubCategory}: '{entity.Content}'"); + Console.WriteLine($" Found entity '{entity.Content}' with category '{entity.Category}' and sub-category '{entity.SubCategory}'."); } } +Console.WriteLine("Detected key-value pairs:"); + foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) { - Console.WriteLine($"Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); } ``` To see the full example source files, see: -* [Analyze general document from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs) -* [Analyze general document from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs) +* [Analyze with prebuilt document from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromUriAsync.cs) +* [Analyze with prebuilt document from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromFileAsync.cs) [README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md index 60231dbaf4909..ad22900975d74 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md @@ -1,6 +1,6 @@ # Analyze a document with a custom model -This sample demonstrates how to extract text and key information from your custom documents, using models you built with your own document types. For more information on how to do the training, see [build a model][build_a_model]. +This sample demonstrates how to analyze text, field values, selection marks, and table data from custom documents. Custom models are trained with your own data, so they're tailored to your documents. For more information on how to do the training, see [build a model][build_a_model]. To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. @@ -31,7 +31,7 @@ await operation.WaitForCompletionAsync(); AnalyzeResult result = operation.Value; -Console.WriteLine($"Document was analyzed with model ID: {result.ModelId}"); +Console.WriteLine($"Document was analyzed with model with ID: {result.ModelId}"); foreach (AnalyzedDocument document in result.Documents) { diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md index bf4ae3f593204..96bc965defbd3 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md @@ -1,6 +1,6 @@ # Analyze a document with a prebuilt model -This sample demonstrates how to extract text and key information from documents with one of the service's prebuilt models, using an invoice as an example. For a list of the types of documents supported by the Form Recognize service's prebuilt models, please check the [Choosing the prebuilt model ID][choosing-the-prebuilt-model-id] section. +This sample demonstrates how to analyze data from certain types of common documents with pre-trained models, using an invoice as an example. For a list of the types of documents supported by the Form Recognize service's prebuilt models, please check the [Choosing the prebuilt model ID][choosing-the-prebuilt-model-id] section. To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. @@ -23,7 +23,7 @@ The model to use for the analyze operation depends on the type of document to be - prebuilt-businessCard: extracts text and key information from English business cards. [Supported fields][businessCard_fields]. - prebuilt-idDocument: extracts text and key information from US driver licenses and international passports. [Supported fields][idDocument_fields]. -- prebuilt-invoice: extracts text, selection marks, tables, key-value pairs, and key information from English invoices. [Supported fields][invoice_fields]. +- prebuilt-invoice: extracts text, selection marks, tables, key-value pairs, and key information from invoices. [Supported fields][invoice_fields]. - prebuilt-receipt: extracts text and key information from English receipts. [Supported fields][receipt_fields]. ## Use a prebuilt model to analyze a document from a URI @@ -252,9 +252,9 @@ To see the full example source files, see: * [Analyze with prebuilt model from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromUriAsync.cs) * [Analyze with prebuilt model from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithPrebuiltModelFromFileAsync.cs) -[businessCard_fields]: https://aka.ms/formrecognizer/businesscardfields -[idDocument_fields]: https://aka.ms/formrecognizer/iddocumentfields -[invoice_fieds]: https://aka.ms/formrecognizer/invoicefields -[receipt_fields]: https://aka.ms/formrecognizer/receiptfields +[businessCard_fields]: https://aka.ms/azsdk/formrecognizer/businesscardfieldschema +[idDocument_fields]: https://aka.ms/azsdk/formrecognizer/iddocumentfieldschema +[invoice_fieds]: https://aka.ms/azsdk/formrecognizer/invoicefieldschema +[receipt_fields]: https://aka.ms/azsdk/formrecognizer/receiptfieldschema [README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md similarity index 75% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md rename to sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md index bdb5294723321..5c727a3b52475 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeLayout.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md @@ -1,6 +1,6 @@ -# Analyze the layout of a document +# Extract the layout of a document -This sample demonstrates how to extract text, tables, styles, selection marks like radio buttons, and layout information from documents, without the need to train a model. If you want to extract entities and key-value pairs in addition to this data, please see the [Analyze a general document][document_sample] sample. +This sample demonstrates how to extract text, table structures, and selection marks, along with their bounding region coordinates, from documents. If you want to analyze entities and key-value pairs in addition to this data, please see the [Analyze a general document][document_sample] sample. To get started you'll need a Cognitive Services resource or a Form Recognizer resource. See [README][README] for prerequisites and instructions. @@ -17,11 +17,11 @@ var credential = new AzureKeyCredential(apiKey); var client = new DocumentAnalysisClient(new Uri(endpoint), credential); ``` -## Analyze the layout of a document from a URI +## Extract the layout of a document from a URI -To analyze the layout from a given file at a URI, use the `StartAnalyzeDocumentFromUri` method and pass `prebuilt-layout` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. +To extract the layout from a given file at a URI, use the `StartAnalyzeDocumentFromUri` method and pass `prebuilt-layout` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. -```C# Snippet:FormRecognizerAnalyzeLayoutFromUriAsync +```C# Snippet:FormRecognizerExtractLayoutFromUriAsync string fileUri = ""; AnalyzeDocumentOperation operation = await client.StartAnalyzeDocumentFromUriAsync("prebuilt-layout", fileUri); @@ -32,7 +32,8 @@ AnalyzeResult result = operation.Value; foreach (DocumentPage page in result.Pages) { - Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s), {page.Words.Count} word(s),"); + Console.WriteLine($"and {page.SelectionMarks.Count} selection mark(s)."); for (int i = 0; i < page.Lines.Count; i++) { @@ -68,32 +69,34 @@ foreach (DocumentStyle style in result.Styles) if (isHandwritten && style.Confidence > 0.8) { - Console.WriteLine($"Handwritten content found in spans:"); + Console.WriteLine($"Handwritten content found:"); foreach (DocumentSpan span in style.Spans) { - Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + Console.WriteLine($" Content: {result.Content.Substring(span.Offset, span.Length)}"); } } } +Console.WriteLine("The following tables were extracted:"); + for (int i = 0; i < result.Tables.Count; i++) { DocumentTable table = result.Tables[i]; - Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (DocumentTableCell cell in table.Cells) { - Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); } } ``` -## Analyze the layout of a document from a file stream +## Extract the layout of a document from a file stream -To analyze the layout from a given file at a file stream, use the `StartAnalyzeDocument` method and pass `prebuilt-layout` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. +To extract the layout from a given file at a file stream, use the `StartAnalyzeDocument` method and pass `prebuilt-layout` as the model ID. The returned value is an `AnalyzeResult` object containing data about the submitted document. -```C# Snippet:FormRecognizerAnalyzeLayoutFromFileAsync +```C# Snippet:FormRecognizerExtractLayoutFromFileAsync string filePath = "filePath"; using var stream = new FileStream(filePath, FileMode.Open); @@ -105,7 +108,8 @@ AnalyzeResult result = operation.Value; foreach (DocumentPage page in result.Pages) { - Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s), {page.Words.Count} word(s),"); + Console.WriteLine($"and {page.SelectionMarks.Count} selection mark(s)."); for (int i = 0; i < page.Lines.Count; i++) { @@ -141,31 +145,33 @@ foreach (DocumentStyle style in result.Styles) if (isHandwritten && style.Confidence > 0.8) { - Console.WriteLine($"Handwritten content found in spans:"); + Console.WriteLine($"Handwritten content found:"); foreach (DocumentSpan span in style.Spans) { - Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + Console.WriteLine($" Content: {result.Content.Substring(span.Offset, span.Length)}"); } } } +Console.WriteLine("The following tables were extracted:"); + for (int i = 0; i < result.Tables.Count; i++) { DocumentTable table = result.Tables[i]; - Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (DocumentTableCell cell in table.Cells) { - Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); } } ``` To see the full example source files, see: -* [Analyze layout from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs) -* [Analyze layout from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs) +* [Extract layout from URI](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromUriAsync.cs) +* [Extract layout from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromFileAsync.cs) [README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started [document_sample]: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.cs diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/DocumentAnalysisClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/DocumentAnalysisClient.cs index 8a97bc10d854f..9fb03680b595b 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/DocumentAnalysisClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/DocumentAnalysisClient.cs @@ -124,7 +124,7 @@ protected DocumentAnalysisClient() /// "prebuilt-document": extracts text, selection marks, tables, layout information, entities, and key-value pairs from documents. /// "prebuilt-businessCard": extracts text and key information from English business cards. /// "prebuilt-idDocument": extracts text and key information from US driver licenses and international passports. - /// "prebuilt-invoice": extracts text, selection marks, tables, key-value pairs, and key information from English invoices. + /// "prebuilt-invoice": extracts text, selection marks, tables, key-value pairs, and key information from invoices. /// "prebuilt-receipt": extracts text and key information from English receipts. /// /// @@ -181,7 +181,7 @@ public virtual async Task StartAnalyzeDocumentAsync(st /// "prebuilt-document": extracts text, selection marks, tables, layout information, entities, and key-value pairs from documents. /// "prebuilt-businessCard": extracts text and key information from English business cards. /// "prebuilt-idDocument": extracts text and key information from US driver licenses and international passports. - /// "prebuilt-invoice": extracts text, selection marks, tables, key-value pairs, and key information from English invoices. + /// "prebuilt-invoice": extracts text, selection marks, tables, key-value pairs, and key information from invoices. /// "prebuilt-receipt": extracts text and key information from English receipts. /// /// @@ -238,7 +238,7 @@ public virtual AnalyzeDocumentOperation StartAnalyzeDocument(string modelId, Str /// "prebuilt-document": extracts text, selection marks, tables, layout information, entities, and key-value pairs from documents. /// "prebuilt-businessCard": extracts text and key information from English business cards. /// "prebuilt-idDocument": extracts text and key information from US driver licenses and international passports. - /// "prebuilt-invoice": extracts text, selection marks, tables, key-value pairs, and key information from English invoices. + /// "prebuilt-invoice": extracts text, selection marks, tables, key-value pairs, and key information from invoices. /// "prebuilt-receipt": extracts text and key information from English receipts. /// /// @@ -295,7 +295,7 @@ public virtual async Task StartAnalyzeDocumentFromUriA /// "prebuilt-document": extracts text, selection marks, tables, layout information, entities, and key-value pairs from documents. /// "prebuilt-businessCard": extracts text and key information from English business cards. /// "prebuilt-idDocument": extracts text and key information from US driver licenses and international passports. - /// "prebuilt-invoice": extracts text, selection marks, tables, key-value pairs, and key information from English invoices. + /// "prebuilt-invoice": extracts text, selection marks, tables, key-value pairs, and key information from invoices. /// "prebuilt-receipt": extracts text and key information from English receipts. /// /// diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromFileAsync.cs similarity index 77% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromFileAsync.cs index 4369e82990b29..020b1333a327d 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromFileAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromFileAsync.cs @@ -13,14 +13,14 @@ namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples public partial class DocumentAnalysisSamples : SamplesBase { [Test] - public async Task AnalyzeDocumentFromFileAsync() + public async Task AnalyzePrebuiltDocumentFromFileAsync() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); - #region Snippet:FormRecognizerAnalyzeDocumentFromFileAsync + #region Snippet:FormRecognizerAnalyzePrebuiltDocumentFromFileAsync #if SNIPPET string filePath = "filePath"; #else @@ -36,7 +36,8 @@ public async Task AnalyzeDocumentFromFileAsync() foreach (DocumentPage page in result.Pages) { - Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s), {page.Words.Count} word(s),"); + Console.WriteLine($"and {page.SelectionMarks.Count} selection mark(s)."); for (int i = 0; i < page.Lines.Count; i++) { @@ -72,41 +73,47 @@ public async Task AnalyzeDocumentFromFileAsync() if (isHandwritten && style.Confidence > 0.8) { - Console.WriteLine($"Handwritten content found in spans:"); + Console.WriteLine($"Handwritten content found:"); foreach (DocumentSpan span in style.Spans) { - Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + Console.WriteLine($" Content: {result.Content.Substring(span.Offset, span.Length)}"); } } } + Console.WriteLine("The following tables were extracted:"); + for (int i = 0; i < result.Tables.Count; i++) { DocumentTable table = result.Tables[i]; - Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (DocumentTableCell cell in table.Cells) { - Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); } } + Console.WriteLine("Detected entities:"); + foreach (DocumentEntity entity in result.Entities) { if (entity.SubCategory == null) { - Console.WriteLine($"Found entity with category {entity.Category}: '{entity.Content}'"); + Console.WriteLine($" Found entity '{entity.Content}' with category '{entity.Category}'."); } else { - Console.WriteLine($"Found entity with category {entity.Category} and sub-category {entity.SubCategory}: '{entity.Content}'"); + Console.WriteLine($" Found entity '{entity.Content}' with category '{entity.Category}' and sub-category '{entity.SubCategory}'."); } } + Console.WriteLine("Detected key-value pairs:"); + foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) { - Console.WriteLine($"Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); } #endregion diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromUriAsync.cs similarity index 76% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromUriAsync.cs index d938f0cfc1e38..cf23e107c96ec 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeDocumentFromUriAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromUriAsync.cs @@ -12,14 +12,14 @@ namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples public partial class DocumentAnalysisSamples : SamplesBase { [Test] - public async Task AnalyzeDocumentFromUriAsync() + public async Task AnalyzePrebuiltDocumentFromUriAsync() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); - #region Snippet:FormRecognizerAnalyzeDocumentFromUriAsync + #region Snippet:FormRecognizerAnalyzePrebuiltDocumentFromUriAsync #if SNIPPET string fileUri = ""; #else @@ -34,7 +34,8 @@ public async Task AnalyzeDocumentFromUriAsync() foreach (DocumentPage page in result.Pages) { - Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s), {page.Words.Count} word(s),"); + Console.WriteLine($"and {page.SelectionMarks.Count} selection mark(s)."); for (int i = 0; i < page.Lines.Count; i++) { @@ -70,41 +71,47 @@ public async Task AnalyzeDocumentFromUriAsync() if (isHandwritten && style.Confidence > 0.8) { - Console.WriteLine($"Handwritten content found in spans:"); + Console.WriteLine($"Handwritten content found:"); foreach (DocumentSpan span in style.Spans) { - Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + Console.WriteLine($" Content: {result.Content.Substring(span.Offset, span.Length)}"); } } } + Console.WriteLine("The following tables were extracted:"); + for (int i = 0; i < result.Tables.Count; i++) { DocumentTable table = result.Tables[i]; - Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (DocumentTableCell cell in table.Cells) { - Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); } } + Console.WriteLine("Detected entities:"); + foreach (DocumentEntity entity in result.Entities) { if (entity.SubCategory == null) { - Console.WriteLine($"Found entity with category {entity.Category}: '{entity.Content}'"); + Console.WriteLine($" Found entity '{entity.Content}' with category '{entity.Category}'."); } else { - Console.WriteLine($"Found entity with category {entity.Category} and sub-category {entity.SubCategory}: '{entity.Content}'"); + Console.WriteLine($" Found entity '{entity.Content}' with category '{entity.Category}' and sub-category '{entity.SubCategory}'."); } } + Console.WriteLine("Detected key-value pairs:"); + foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) { - Console.WriteLine($"Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); } #endregion diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromFileAsync.cs index 409e8096efd5a..6ef68a9e12846 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromFileAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromFileAsync.cs @@ -23,7 +23,7 @@ public async Task AnalyzeWithCustomModelFromFileAsync() // Firstly, create a custom built model we can use to recognize the custom document. Please note // that models can also be built using a graphical user interface such as the Form Recognizer // Labeling Tool found here: - // https://docs.microsoft.com/azure/cognitive-services/form-recognizer/label-tool?tabs=v2-1 + // https://aka.ms/azsdk/formrecognizer/labelingtool var adminClient = new DocumentModelAdministrationClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); BuildModelOperation buildOperation = await adminClient.StartBuildModelAsync(trainingFileUri); diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs index 22510ba29ac6f..6121c56fff873 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeWithCustomModelFromUriAsync.cs @@ -22,7 +22,7 @@ public async Task AnalyzeWithCustomModelFromUriAsync() // Firstly, create a custom built model we can use to recognize the custom document. Please note // that models can also be built using a graphical user interface such as the Form Recognizer // Labeling Tool found here: - // https://docs.microsoft.com/azure/cognitive-services/form-recognizer/label-tool?tabs=v2-1 + // https://aka.ms/azsdk/formrecognizer/labelingtool var adminClient = new DocumentModelAdministrationClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); BuildModelOperation buildOperation = await adminClient.StartBuildModelAsync(trainingFileUri); diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromFileAsync.cs similarity index 84% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromFileAsync.cs index 748b78e2dc249..b2cc73729761d 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromFileAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromFileAsync.cs @@ -13,14 +13,14 @@ namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples public partial class DocumentAnalysisSamples : SamplesBase { [Test] - public async Task AnalyzeLayoutFromFileAsync() + public async Task ExtractLayoutFromFileAsync() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); - #region Snippet:FormRecognizerAnalyzeLayoutFromFileAsync + #region Snippet:FormRecognizerExtractLayoutFromFileAsync #if SNIPPET string filePath = "filePath"; #else @@ -36,7 +36,8 @@ public async Task AnalyzeLayoutFromFileAsync() foreach (DocumentPage page in result.Pages) { - Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s), {page.Words.Count} word(s),"); + Console.WriteLine($"and {page.SelectionMarks.Count} selection mark(s)."); for (int i = 0; i < page.Lines.Count; i++) { @@ -72,23 +73,25 @@ public async Task AnalyzeLayoutFromFileAsync() if (isHandwritten && style.Confidence > 0.8) { - Console.WriteLine($"Handwritten content found in spans:"); + Console.WriteLine($"Handwritten content found:"); foreach (DocumentSpan span in style.Spans) { - Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + Console.WriteLine($" Content: {result.Content.Substring(span.Offset, span.Length)}"); } } } + Console.WriteLine("The following tables were extracted:"); + for (int i = 0; i < result.Tables.Count; i++) { DocumentTable table = result.Tables[i]; - Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (DocumentTableCell cell in table.Cells) { - Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromUriAsync.cs similarity index 84% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromUriAsync.cs index 850afe04824be..ac4a888bd953c 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzeLayoutFromUriAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromUriAsync.cs @@ -12,14 +12,14 @@ namespace Azure.AI.FormRecognizer.DocumentAnalysis.Samples public partial class DocumentAnalysisSamples : SamplesBase { [Test] - public async Task AnalyzeLayoutFromUriAsync() + public async Task ExtractLayoutFromUriAsync() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); - #region Snippet:FormRecognizerAnalyzeLayoutFromUriAsync + #region Snippet:FormRecognizerExtractLayoutFromUriAsync #if SNIPPET string fileUri = ""; #else @@ -34,7 +34,8 @@ public async Task AnalyzeLayoutFromUriAsync() foreach (DocumentPage page in result.Pages) { - Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s) and {page.Words.Count} word(s)."); + Console.WriteLine($"Document Page {page.PageNumber} has {page.Lines.Count} line(s), {page.Words.Count} word(s),"); + Console.WriteLine($"and {page.SelectionMarks.Count} selection mark(s)."); for (int i = 0; i < page.Lines.Count; i++) { @@ -70,23 +71,25 @@ public async Task AnalyzeLayoutFromUriAsync() if (isHandwritten && style.Confidence > 0.8) { - Console.WriteLine($"Handwritten content found in spans:"); + Console.WriteLine($"Handwritten content found:"); foreach (DocumentSpan span in style.Spans) { - Console.WriteLine($" Content with length {span.Length} at offset {span.Offset}."); + Console.WriteLine($" Content: {result.Content.Substring(span.Offset, span.Length)}"); } } } + Console.WriteLine("The following tables were extracted:"); + for (int i = 0; i < result.Tables.Count; i++) { DocumentTable table = result.Tables[i]; - Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); + Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (DocumentTableCell cell in table.Cells) { - Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); + Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) has kind '{cell.Kind}' and content: '{cell.Content}'."); } } From 3e6a28bdb19e083225659383c61dbbb609eb0ad2 Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Thu, 30 Sep 2021 14:46:39 -0700 Subject: [PATCH 11/13] Addressing 2 --- .../Sample_AnalyzePrebuiltDocumentFromFileAsync.cs | 9 ++++++++- .../Sample_AnalyzePrebuiltDocumentFromUriAsync.cs | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromFileAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromFileAsync.cs index 020b1333a327d..14dc02af1dc2e 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromFileAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromFileAsync.cs @@ -113,7 +113,14 @@ public async Task AnalyzePrebuiltDocumentFromFileAsync() foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) { - Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + if (kvp.Value.Content == null) + { + Console.WriteLine($" Found key with no value: '{kvp.Key.Content}'"); + } + else + { + Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + } } #endregion diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromUriAsync.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromUriAsync.cs index cf23e107c96ec..941d0f561f34e 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromUriAsync.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_AnalyzePrebuiltDocumentFromUriAsync.cs @@ -111,7 +111,14 @@ public async Task AnalyzePrebuiltDocumentFromUriAsync() foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) { - Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + if (kvp.Value.Content == null) + { + Console.WriteLine($" Found key with no value: '{kvp.Key.Content}'"); + } + else + { + Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + } } #endregion From d4011226bd898945fb3f3d8008c16e24f56c1a67 Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Thu, 30 Sep 2021 15:12:56 -0700 Subject: [PATCH 12/13] Fixed links --- sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md | 4 ++-- .../Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md index 13f939609b9c9..44a92a33a9027 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/README.md @@ -21,8 +21,8 @@ Azure Cognitive Services Form Recognizer is a cloud service that uses machine le ## Common scenarios samples for SDK - [Extract the layout of a document](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md) - [Analyze with the prebuilt document model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md) -- [Analyze a document with a custom model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeCustom.md) -- [Analyze a document with a prebuilt model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuilt.md) +- [Analyze a document with a custom model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithCustomModel.md) +- [Analyze a document with a prebuilt model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeWithPrebuiltModel.md) - [Build a model](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_BuildModel.md) - [Manage models](https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ManageModels.md) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md index 5c727a3b52475..6db86f2b7dda8 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_ExtractLayout.md @@ -174,4 +174,4 @@ To see the full example source files, see: * [Extract layout from file](https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/samples/Sample_ExtractLayoutFromFileAsync.cs) [README]: https://github.com/Azure/azure-sdk-for-net/tree/main/sdk/formrecognizer/Azure.AI.FormRecognizer#getting-started -[document_sample]: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzeDocument.cs +[document_sample]: https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md From 03beaaea24646faa7dfc15de2a8f8861f328029b Mon Sep 17 00:00:00 2001 From: Caio Saldanha Date: Thu, 30 Sep 2021 15:24:01 -0700 Subject: [PATCH 13/13] Snippets --- .../samples/Sample_AnalyzePrebuiltDocument.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md index 3af0321f3da3f..7025802322ba5 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample_AnalyzePrebuiltDocument.md @@ -109,7 +109,14 @@ Console.WriteLine("Detected key-value pairs:"); foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) { - Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + if (kvp.Value.Content == null) + { + Console.WriteLine($" Found key with no value: '{kvp.Key.Content}'"); + } + else + { + Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + } } ``` @@ -206,7 +213,14 @@ Console.WriteLine("Detected key-value pairs:"); foreach (DocumentKeyValuePair kvp in result.KeyValuePairs) { - Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + if (kvp.Value.Content == null) + { + Console.WriteLine($" Found key with no value: '{kvp.Key.Content}'"); + } + else + { + Console.WriteLine($" Found key-value pair: '{kvp.Key.Content}' and '{kvp.Value.Content}'"); + } } ```