From 9dce0cb93b097daed29c29637264a41ea4454c67 Mon Sep 17 00:00:00 2001 From: Mike <45373284+munkhuushmgl@users.noreply.github.com> Date: Wed, 10 Feb 2021 13:17:28 -0800 Subject: [PATCH] chore: added conditonal check to prevent indexOutOfBound Exception (#343) * chore: added conditonal check to prevent indexOfBOund Exception * nit * removed first lang part from batchTable sample --- .../v1beta2/BatchParseFormBeta.java | 34 +++++++-------- .../v1beta2/BatchParseTableBeta.java | 42 +++++++++++-------- .../documentai/v1beta2/ParseFormBeta.java | 14 ++++--- .../documentai/v1beta2/ParseTableBeta.java | 38 +++++++++-------- 4 files changed, 70 insertions(+), 58 deletions(-) diff --git a/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseFormBeta.java b/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseFormBeta.java index ea46ede6a38..ed61ad37b0c 100644 --- a/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseFormBeta.java +++ b/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseFormBeta.java @@ -68,8 +68,7 @@ public static void batchParseFormGcs( // Initialize client that will be used to send requests. This client only needs to be created // once, and can be reused for multiple requests. After completing all of your requests, call // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentUnderstandingServiceClient client = - DocumentUnderstandingServiceClient.create()) { + try (DocumentUnderstandingServiceClient client = DocumentUnderstandingServiceClient.create()) { // Configure the request for processing the PDF String parent = String.format("projects/%s/locations/%s", projectId, location); @@ -103,17 +102,16 @@ public static void batchParseFormGcs( // mime_type can be application/pdf, image/tiff, // and image/gif, or application/json InputConfig config = - InputConfig.newBuilder().setGcsSource(inputUri) - .setMimeType("application/pdf").build(); + InputConfig.newBuilder().setGcsSource(inputUri).setMimeType("application/pdf").build(); - GcsDestination gcsDestination = GcsDestination.newBuilder() - .setUri(String.format("gs://%s/%s", outputGcsBucketName, outputGcsPrefix)).build(); - - OutputConfig outputConfig = OutputConfig.newBuilder() - .setGcsDestination(gcsDestination) - .setPagesPerShard(1) + GcsDestination gcsDestination = + GcsDestination.newBuilder() + .setUri(String.format("gs://%s/%s", outputGcsBucketName, outputGcsPrefix)) .build(); + OutputConfig outputConfig = + OutputConfig.newBuilder().setGcsDestination(gcsDestination).setPagesPerShard(1).build(); + ProcessDocumentRequest request = ProcessDocumentRequest.newBuilder() .setFormExtractionParams(params) @@ -165,13 +163,15 @@ public static void batchParseFormGcs( String text = document.getText(); // Process the output. - Document.Page page1 = document.getPages(0); - for (Document.Page.FormField field : page1.getFormFieldsList()) { - String fieldName = getText(field.getFieldName(), text); - String fieldValue = getText(field.getFieldValue(), text); - - System.out.println("Extracted form fields pair:"); - System.out.printf("\t(%s, %s))", fieldName, fieldValue); + if (document.getPagesCount() > 0) { + Document.Page page1 = document.getPages(0); + for (Document.Page.FormField field : page1.getFormFieldsList()) { + String fieldName = getText(field.getFieldName(), text); + String fieldValue = getText(field.getFieldValue(), text); + + System.out.println("Extracted form fields pair:"); + System.out.printf("\t(%s, %s))", fieldName, fieldValue); + } } // Clean up temp file. diff --git a/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseTableBeta.java b/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseTableBeta.java index a4b4efdda9a..dacaed9510b 100644 --- a/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseTableBeta.java +++ b/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseTableBeta.java @@ -165,24 +165,30 @@ public static void batchParseTableGcs( String text = document.getText(); // Process the output. - Document.Page page1 = document.getPages(0); - Document.Page.Table table = page1.getTables(0); - - System.out.println("Results from first table processed:"); - System.out.println("Header row:"); - - Document.Page.Table.TableRow headerRow = table.getHeaderRows(0); - - for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) { - if (!tableCell.getLayout().getTextAnchor().getTextSegmentsList().isEmpty()) { - // Extract shards from the text field - // First shard in document doesn't have startIndex property - List textSegments = - tableCell.getLayout().getTextAnchor().getTextSegmentsList(); - int startIdx = - textSegments.size() > 0 ? (int) textSegments.get(0).getStartIndex() : 0; - int endIdx = (int) textSegments.get(0).getEndIndex(); - System.out.printf("\t%s", text.substring(startIdx, endIdx)); + if (document.getPagesCount() > 0) { + Document.Page page1 = document.getPages(0); + if (page1.getTablesCount() > 0) { + Document.Page.Table table = page1.getTables(0); + + System.out.println("Results from first table processed:"); + System.out.println("Header row:"); + + if (table.getHeaderRowsCount() > 0) { + Document.Page.Table.TableRow headerRow = table.getHeaderRows(0); + + for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) { + if (!tableCell.getLayout().getTextAnchor().getTextSegmentsList().isEmpty()) { + // Extract shards from the text field + // First shard in document doesn't have startIndex property + List textSegments = + tableCell.getLayout().getTextAnchor().getTextSegmentsList(); + int startIdx = + textSegments.size() > 0 ? (int) textSegments.get(0).getStartIndex() : 0; + int endIdx = (int) textSegments.get(0).getEndIndex(); + System.out.printf("\t%s", text.substring(startIdx, endIdx)); + } + } + } } } diff --git a/document-ai/snippets/src/main/java/documentai/v1beta2/ParseFormBeta.java b/document-ai/snippets/src/main/java/documentai/v1beta2/ParseFormBeta.java index 5cdf7c8e9db..a211a920cab 100644 --- a/document-ai/snippets/src/main/java/documentai/v1beta2/ParseFormBeta.java +++ b/document-ai/snippets/src/main/java/documentai/v1beta2/ParseFormBeta.java @@ -90,13 +90,15 @@ public static void parseForm(String projectId, String location, String inputGcsU String text = response.getText(); // Process the output - Document.Page page1 = response.getPages(0); - for (Document.Page.FormField field : page1.getFormFieldsList()) { - String fieldName = getText(field.getFieldName(), text); - String fieldValue = getText(field.getFieldValue(), text); + if (response.getPagesCount() > 0) { + Document.Page page1 = response.getPages(0); + for (Document.Page.FormField field : page1.getFormFieldsList()) { + String fieldName = getText(field.getFieldName(), text); + String fieldValue = getText(field.getFieldValue(), text); - System.out.println("Extracted form fields pair:"); - System.out.printf("\t(%s, %s))", fieldName, fieldValue); + System.out.println("Extracted form fields pair:"); + System.out.printf("\t(%s, %s))", fieldName, fieldValue); + } } } } diff --git a/document-ai/snippets/src/main/java/documentai/v1beta2/ParseTableBeta.java b/document-ai/snippets/src/main/java/documentai/v1beta2/ParseTableBeta.java index 67b448ae1ec..b3bdeffd352 100644 --- a/document-ai/snippets/src/main/java/documentai/v1beta2/ParseTableBeta.java +++ b/document-ai/snippets/src/main/java/documentai/v1beta2/ParseTableBeta.java @@ -94,23 +94,27 @@ public static void parseTable(String projectId, String location, String inputGcs String text = response.getText(); // Get the first table in the document - Document.Page page1 = response.getPages(0); - Document.Page.Table table = page1.getTables(0); - - System.out.println("Results from first table processed:"); - List detectedLangs = page1.getDetectedLanguagesList(); - String langCode = - detectedLangs.size() > 0 ? detectedLangs.get(0).getLanguageCode() : "NOT_FOUND"; - System.out.printf("First detected language: : %s", langCode); - - Document.Page.Table.TableRow headerRow = table.getHeaderRows(0); - System.out.println("Header row:"); - - for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) { - if (tableCell.getLayout().getTextAnchor().getTextSegmentsList() != null) { - // Extract shards from the text field - // First shard in document doesn't have startIndex property - System.out.printf("\t%s", getText(tableCell.getLayout(), text)); + if (response.getPagesCount() > 0) { + Document.Page page1 = response.getPages(0); + if (page1.getTablesCount() > 0) { + Document.Page.Table table = page1.getTables(0); + + System.out.println("Results from first table processed:"); + List detectedLangs = page1.getDetectedLanguagesList(); + String langCode = + detectedLangs.size() > 0 ? detectedLangs.get(0).getLanguageCode() : "NOT_FOUND"; + System.out.printf("First detected language: : %s", langCode); + + Document.Page.Table.TableRow headerRow = table.getHeaderRows(0); + System.out.println("Header row:"); + + for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) { + if (tableCell.getLayout().getTextAnchor().getTextSegmentsList() != null) { + // Extract shards from the text field + // First shard in document doesn't have startIndex property + System.out.printf("\t%s", getText(tableCell.getLayout(), text)); + } + } } } }