Skip to content

Commit

Permalink
Add support for building model with AzureBlobContentSource (#30234)
Browse files Browse the repository at this point in the history
  • Loading branch information
samvaity authored Aug 5, 2022
1 parent 1e54ffc commit daec05b
Show file tree
Hide file tree
Showing 36 changed files with 161 additions and 162 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public void run() {
SyncPoller<DocumentOperationResult, DocumentModelDetails>
syncPoller = documentModelAdministrationAsyncClient
.beginBuildModel(FORM_RECOGNIZER_TRAINING_BLOB_CONTAINER_SAS_URL,
DocumentModelBuildMode.TEMPLATE,
DocumentModelBuildMode.TEMPLATE, null,
new BuildModelOptions().setDescription("perf-training-model"))
.getSyncPoller();
modelId = syncPoller.getFinalResult().getModelId();
Expand All @@ -45,7 +45,7 @@ public void run() {
public Mono<Void> runAsync() {
return documentModelAdministrationAsyncClient
.beginBuildModel(FORM_RECOGNIZER_TRAINING_BLOB_CONTAINER_SAS_URL,
DocumentModelBuildMode.TEMPLATE,
DocumentModelBuildMode.TEMPLATE, null,
new BuildModelOptions().setDescription("perf-training-model"))
.last()
.flatMap(pollResponse -> {
Expand Down
8 changes: 5 additions & 3 deletions sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@

### Features Added
- Add a constructor to create an instance of `CopyAuthorization` model.
- Added BinaryData support to document analysis client using `beginAnalyzeDocument(String modelId, BinaryData document, long length)`

### Breaking Changes
- Added BinaryData support to document analysis client using `beginAnalyzeDocument(String modelId, BinaryData document, long length)`
- Renamed `beginCreateComposedModel` method to `beginComposeModel` on DocumentModelAdministrationClient and DocumentModelAdministrationAsyncClient.
- Renamed `CreateComposedModelOptions` model to `ComposeModelOptions`
- Renamed `DocumentModelInfo` model to `DocumentModelSummary`
- Renamed `DocumentModelDetails` model to `DocumentModelInfo`
- Renamed `DocumentModelInfo` model to `DocumentModelDetails`
- Renamed `ModelOperation` model to `ModelOperationDetails`
- Renamed `ModelOperationInfo` model to `ModelOperationSummary`
- Renamed `getAccountProperties` method to `getResourceDetails`
Expand All @@ -20,6 +19,9 @@
- Renamed `DocTypeInfo` model to `DocumentTypeDetails`
- Renamed `docTypes` property on `DocumentModelDetails` model to `documentTypes`
- Removed models `DocumentModelOperationException`, `DocumentModelOperationError` and `DocumentModelOperationInnerError`.
- Renamed `trainingFilesUrl` to `blobContainerUrl` in `beginBuildModel` method
- Changed method `beginBuildModel(String, DocumentModelBuildMode, BuildModelOptions)` to `beginBuildModel(String, DocumentModelBuildMode, String, BuildModelOptions)`
- Removed `prefix` property on model `BuildModelOptions`

### Bugs Fixed

Expand Down
5 changes: 4 additions & 1 deletion sdk/formrecognizer/azure-ai-formrecognizer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -376,10 +376,13 @@ More details on setting up a container and required file structure can be found
// Build custom document analysis model
String trainingFilesUrl = "{SAS_URL_of_your_container_in_blob_storage}";
// The shared access signature (SAS) Url of your Azure Blob Storage container with your forms.
String prefix = "{blob_name_prefix}}";
SyncPoller<DocumentOperationResult, DocumentModelDetails> buildOperationPoller =
documentModelAdminClient.beginBuildModel(trainingFilesUrl,
DocumentModelBuildMode.TEMPLATE,
new BuildModelOptions().setModelId("my-build-model").setDescription("model desc"), Context.NONE);
prefix,
new BuildModelOptions().setModelId("my-build-model").setDescription("model desc"),
Context.NONE);

DocumentModelDetails documentModelDetails = buildOperationPoller.getFinalResult();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -522,10 +522,13 @@ Build a custom document model using 4.x.x `beginBuildModel`:
// Build custom document analysis model
String trainingFilesUrl = "{SAS_URL_of_your_container_in_blob_storage}";
// The shared access signature (SAS) Url of your Azure Blob Storage container with your forms.
String prefix = "{blob_name_prefix}}";
SyncPoller<DocumentOperationResult, DocumentModelDetails> buildOperationPoller =
documentModelAdminClient.beginBuildModel(trainingFilesUrl,
DocumentModelBuildMode.TEMPLATE,
new BuildModelOptions().setModelId("my-build-model").setDescription("model desc"), Context.NONE);
prefix,
new BuildModelOptions().setModelId("my-build-model").setDescription("model desc"),
Context.NONE);

DocumentModelDetails documentModelDetails = buildOperationPoller.getFinalResult();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ DocumentAnalysisAudience getAudience() {
* <p><strong>Code sample</strong></p>
* <!-- src_embed com.azure.ai.formrecognizer.administration.DocumentModelAdministrationAsyncClient.beginBuildModel#String-DocumentModelBuildMode -->
* <pre>
* String trainingFilesUrl = &quot;&#123;SAS-URL-of-your-container-in-blob-storage&#125;&quot;;
* documentModelAdministrationAsyncClient.beginBuildModel&#40;trainingFilesUrl,
* String blobContainerUrl = &quot;&#123;SAS-URL-of-your-container-in-blob-storage&#125;&quot;;
* documentModelAdministrationAsyncClient.beginBuildModel&#40;blobContainerUrl,
* DocumentModelBuildMode.TEMPLATE
* &#41;
* &#47;&#47; if polling operation completed, retrieve the final result.
Expand All @@ -171,7 +171,7 @@ DocumentAnalysisAudience getAudience() {
* </pre>
* <!-- end com.azure.ai.formrecognizer.administration.DocumentModelAdministrationAsyncClient.beginBuildModel#String-DocumentModelBuildMode -->
*
* @param trainingFilesUrl an Azure Storage blob container's SAS URI. A container URI (without SAS)
* @param blobContainerUrl an Azure Storage blob container's SAS URI. A container URI (without SAS)
* can be used if the container is public or has a managed identity configured. For more information on
* setting up a training data set, see: <a href="https://aka.ms/azsdk/formrecognizer/buildcustommodel">here</a>.
* @param buildMode the preferred technique for creating models. For faster training of models use
Expand All @@ -180,12 +180,12 @@ DocumentAnalysisAudience getAudience() {
* @return A {@link PollerFlux} that polls the building model operation until it has completed, has failed, or has
* been cancelled. The completed operation returns the trained {@link DocumentModelDetails custom document analysis model}.
* @throws HttpResponseException If building a model fails with {@link OperationStatus#FAILED} is created.
* @throws NullPointerException If {@code trainingFilesUrl} is null.
* @throws NullPointerException If {@code blobContainerUrl} is null.
*/
@ServiceMethod(returns = ReturnType.LONG_RUNNING_OPERATION)
public PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel(String trainingFilesUrl,
public PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel(String blobContainerUrl,
DocumentModelBuildMode buildMode) {
return beginBuildModel(trainingFilesUrl, buildMode, null);
return beginBuildModel(blobContainerUrl, buildMode, null, null);
}

/**
Expand All @@ -199,17 +199,18 @@ public PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel
* for information on building your own administration data set.
*
* <p><strong>Code sample</strong></p>
* <!-- src_embed com.azure.ai.formrecognizer.administration.DocumentModelAdministrationAsyncClient.beginBuildModel#String-DocumentModelBuildMode-BuildModelOptions -->
* <!-- src_embed com.azure.ai.formrecognizer.administration.DocumentModelAdministrationAsyncClient.beginBuildModel#String-DocumentModelBuildMode-String-BuildModelOptions -->
* <pre>
* String trainingFilesUrl = &quot;&#123;SAS-URL-of-your-container-in-blob-storage&#125;&quot;;
* String blobContainerUrl = &quot;&#123;SAS-URL-of-your-container-in-blob-storage&#125;&quot;;
* String modelId = &quot;model-id&quot;;
* Map&lt;String, String&gt; attrs = new HashMap&lt;String, String&gt;&#40;&#41;;
* attrs.put&#40;&quot;createdBy&quot;, &quot;sample&quot;&#41;;
* String prefix = &quot;Invoice&quot;;
*
* documentModelAdministrationAsyncClient.beginBuildModel&#40;trainingFilesUrl,
* documentModelAdministrationAsyncClient.beginBuildModel&#40;blobContainerUrl,
* DocumentModelBuildMode.TEMPLATE,
* prefix,
* new BuildModelOptions&#40;&#41;
* .setPrefix&#40;&quot;Invoice&quot;&#41;
* .setModelId&#40;modelId&#41;
* .setDescription&#40;&quot;model desc&quot;&#41;
* .setTags&#40;attrs&#41;&#41;
Expand All @@ -229,32 +230,35 @@ public PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel
* &#125;&#41;;
* &#125;&#41;;
* </pre>
* <!-- end com.azure.ai.formrecognizer.administration.DocumentModelAdministrationAsyncClient.beginBuildModel#String-DocumentModelBuildMode-BuildModelOptions -->
* <!-- end com.azure.ai.formrecognizer.administration.DocumentModelAdministrationAsyncClient.beginBuildModel#String-DocumentModelBuildMode-String-BuildModelOptions -->
*
* @param trainingFilesUrl an Azure Storage blob container's SAS URI. A container URI (without SAS)
* @param blobContainerUrl an Azure Storage blob container's SAS URI. A container URI (without SAS)
* can be used if the container is public or has a managed identity configured. For more information on
* setting up a training data set, see: <a href="https://aka.ms/azsdk/formrecognizer/buildcustommodel">here</a>.
* @param buildMode the preferred technique for creating models. For faster training of models use
* {@link DocumentModelBuildMode#TEMPLATE}. See <a href="https://aka.ms/azsdk/formrecognizer/buildmode">here</a>
* for more information on building mode for custom documents.
* @param prefix case-sensitive prefix blob name prefix to filter documents for training.
* @param buildModelOptions The configurable {@link BuildModelOptions options} to pass when
* building a custom document analysis model.
* @return A {@link PollerFlux} that polls the building model operation until it has completed, has failed, or has
* been cancelled. The completed operation returns the trained {@link DocumentModelDetails custom document analysis model}.
* @throws HttpResponseException If building a model fails with {@link OperationStatus#FAILED} is created.
* @throws NullPointerException If {@code trainingFilesUrl} is null.
* @throws NullPointerException If {@code blobContainerUrl} is null.
*/
@ServiceMethod(returns = ReturnType.LONG_RUNNING_OPERATION)
public PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel(String trainingFilesUrl,
DocumentModelBuildMode buildMode,
BuildModelOptions buildModelOptions) {
return beginBuildModel(trainingFilesUrl, buildMode, buildModelOptions, Context.NONE);
public PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel(String blobContainerUrl,
DocumentModelBuildMode buildMode,
String prefix,
BuildModelOptions buildModelOptions) {
return beginBuildModel(blobContainerUrl, buildMode, prefix, buildModelOptions, Context.NONE);
}

PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel(String trainingFilesUrl,
DocumentModelBuildMode buildMode,
BuildModelOptions buildModelOptions,
Context context) {
PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel(String blobContainerUrl,
DocumentModelBuildMode buildMode,
String prefix,
BuildModelOptions buildModelOptions,
Context context) {

buildModelOptions = buildModelOptions == null ? new BuildModelOptions() : buildModelOptions;
String modelId = buildModelOptions.getModelId();
Expand All @@ -263,7 +267,7 @@ PollerFlux<DocumentOperationResult, DocumentModelDetails> beginBuildModel(String
}
return new PollerFlux<DocumentOperationResult, DocumentModelDetails>(
DEFAULT_POLL_INTERVAL,
buildModelActivationOperation(trainingFilesUrl, buildMode, modelId, buildModelOptions, context),
buildModelActivationOperation(blobContainerUrl, buildMode, modelId, prefix, buildModelOptions, context),
createModelPollOperation(context),
(activationResponse, pollingContext) -> Mono.error(new RuntimeException("Cancellation is not supported")),
fetchModelResultOperation(context));
Expand Down Expand Up @@ -928,18 +932,18 @@ PagedFlux<ModelOperationSummary> listOperations(Context context) {

private Function<PollingContext<DocumentOperationResult>, Mono<DocumentOperationResult>>
buildModelActivationOperation(
String trainingFilesUrl, DocumentModelBuildMode buildMode, String modelId,
BuildModelOptions buildModelOptions, Context context) {
String blobContainerUrl, DocumentModelBuildMode buildMode, String modelId,
String prefix, BuildModelOptions buildModelOptions, Context context) {
return (pollingContext) -> {
try {
Objects.requireNonNull(trainingFilesUrl, "'trainingFilesUrl' cannot be null.");
Objects.requireNonNull(blobContainerUrl, "'blobContainerUrl' cannot be null.");
BuildDocumentModelRequest buildDocumentModelRequest = new BuildDocumentModelRequest()
.setModelId(modelId)
.setBuildMode(com.azure.ai.formrecognizer.implementation.models.DocumentBuildMode
.fromString(buildMode.toString()))
.setAzureBlobSource(new AzureBlobContentSource()
.setContainerUrl(trainingFilesUrl)
.setPrefix(buildModelOptions.getPrefix()))
.setContainerUrl(blobContainerUrl)
.setPrefix(prefix))
.setDescription(buildModelOptions.getDescription())
.setTags(buildModelOptions.getTags());

Expand Down
Loading

0 comments on commit daec05b

Please sign in to comment.