diff --git a/vision/cloud-client/README.md b/vision/cloud-client/README.md index a182b9c4834..52de59f09a2 100644 --- a/vision/cloud-client/README.md +++ b/vision/cloud-client/README.md @@ -93,3 +93,9 @@ mvn exec:java -DDetect -Dexec.args="web-entities-include-geo ./resources/landmar ``` mvn exec:java -DDetect -Dexec.args="crop ./resources/landmark.jpg" ``` + +#### OCR +``` +mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \ + gs:///" +``` diff --git a/vision/cloud-client/pom.xml b/vision/cloud-client/pom.xml index 7b1c0345c5c..3b3074edf7e 100644 --- a/vision/cloud-client/pom.xml +++ b/vision/cloud-client/pom.xml @@ -42,6 +42,11 @@ google-cloud-vision 1.32.0 + + com.google.cloud + google-cloud-storage + 1.32.0 + diff --git a/vision/cloud-client/src/main/java/com/example/vision/Detect.java b/vision/cloud-client/src/main/java/com/example/vision/Detect.java index 2fdc007419a..68f8c98bb5c 100644 --- a/vision/cloud-client/src/main/java/com/example/vision/Detect.java +++ b/vision/cloud-client/src/main/java/com/example/vision/Detect.java @@ -16,8 +16,19 @@ package com.example.vision; +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageOptions; +import com.google.cloud.vision.v1.AnnotateFileResponse; +import com.google.cloud.vision.v1.AnnotateFileResponse.Builder; import com.google.cloud.vision.v1.AnnotateImageRequest; import com.google.cloud.vision.v1.AnnotateImageResponse; +import com.google.cloud.vision.v1.AsyncAnnotateFileRequest; +import com.google.cloud.vision.v1.AsyncAnnotateFileResponse; +import com.google.cloud.vision.v1.AsyncBatchAnnotateFilesResponse; import com.google.cloud.vision.v1.BatchAnnotateImagesResponse; import com.google.cloud.vision.v1.Block; import com.google.cloud.vision.v1.ColorInfo; @@ -28,11 +39,16 @@ import com.google.cloud.vision.v1.FaceAnnotation; import com.google.cloud.vision.v1.Feature; import com.google.cloud.vision.v1.Feature.Type; +import com.google.cloud.vision.v1.GcsDestination; +import com.google.cloud.vision.v1.GcsSource; import com.google.cloud.vision.v1.Image; import com.google.cloud.vision.v1.ImageAnnotatorClient; import com.google.cloud.vision.v1.ImageContext; import com.google.cloud.vision.v1.ImageSource; +import com.google.cloud.vision.v1.InputConfig; import com.google.cloud.vision.v1.LocationInfo; +import com.google.cloud.vision.v1.OperationMetadata; +import com.google.cloud.vision.v1.OutputConfig; import com.google.cloud.vision.v1.Page; import com.google.cloud.vision.v1.Paragraph; import com.google.cloud.vision.v1.SafeSearchAnnotation; @@ -48,12 +64,16 @@ import com.google.protobuf.ByteString; +import com.google.protobuf.util.JsonFormat; import java.io.FileInputStream; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class Detect { @@ -78,11 +98,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception, out.println("Usage:"); out.printf( "\tmvn exec:java -DDetect -Dexec.args=\" \"\n" + + "\tmvn exec:java -DDetect -Dexec.args=\"ocr \"" + + "\n" + "Commands:\n" + "\tfaces | labels | landmarks | logos | text | safe-search | properties" - + "| web | web-entities | web-entities-include-geo | crop \n" + + "| web | web-entities | web-entities-include-geo | crop | ocr \n" + "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage " - + "resource (gs://...)\n"); + + "resource (gs://...)\n" + + "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n" + + "Path to Destination\n\tA path to the remote destination on Cloud Storage for the" + + " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n"); return; } String command = args[0]; @@ -162,6 +187,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception, } else { detectDocumentText(path, out); } + } else if (command.equals("ocr")) { + String destPath = args.length > 2 ? args[2] : ""; + detectDocumentsGcs(path, destPath); } } @@ -1277,4 +1305,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws } } // [END vision_detect_document_uri] + + // [START vision_async_detect_document_ocr] + /** + * Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage. + * + * @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document + * text on. + * @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the + * results on. + * @throws Exception on errors while closing the client. + */ + public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws + Exception { + try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) { + List requests = new ArrayList<>(); + + // Set the GCS source path for the remote file. + GcsSource gcsSource = GcsSource.newBuilder() + .setUri(gcsSourcePath) + .build(); + + // Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions) + // types + InputConfig inputConfig = InputConfig.newBuilder() + .setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff" + .setGcsSource(gcsSource) + .build(); + + // Set the GCS destination path for where to save the results. + GcsDestination gcsDestination = GcsDestination.newBuilder() + .setUri(gcsDestinationPath) + .build(); + + // Create the configuration for the output with the batch size. + // The batch size sets how many pages should be grouped into each json output file. + OutputConfig outputConfig = OutputConfig.newBuilder() + .setBatchSize(2) + .setGcsDestination(gcsDestination) + .build(); + + // Select the Feature required by the vision API + Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build(); + + // Build the OCR request + AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder() + .addFeatures(feature) + .setInputConfig(inputConfig) + .setOutputConfig(outputConfig) + .build(); + + requests.add(request); + + // Perform the OCR request + OperationFuture response = + client.asyncBatchAnnotateFilesAsync(requests); + + System.out.println("Waiting for the operation to finish."); + + // Wait for the request to finish. (The result is not used, since the API saves the result to + // the specified location on GCS.) + List result = response.get(180, TimeUnit.SECONDS) + .getResponsesList(); + + // Once the request has completed and the output has been + // written to GCS, we can list all the output files. + Storage storage = StorageOptions.getDefaultInstance().getService(); + + // Get the destination location from the gcsDestinationPath + Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)"); + Matcher matcher = pattern.matcher(gcsDestinationPath); + + if (matcher.find()) { + String bucketName = matcher.group(1); + String prefix = matcher.group(2); + + // Get the list of objects with the given prefix from the GCS bucket + Bucket bucket = storage.get(bucketName); + com.google.api.gax.paging.Page pageList = bucket.list(BlobListOption.prefix(prefix)); + + Blob firstOutputFile = null; + + // List objects with the given prefix. + System.out.println("Output files:"); + for (Blob blob : pageList.iterateAll()) { + System.out.println(blob.getName()); + + // Process the first output file from GCS. + // Since we specified batch size = 2, the first response contains + // the first two pages of the input file. + if (firstOutputFile == null) { + firstOutputFile = blob; + } + } + + // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse + // object. If the Blob is small read all its content in one request + // (Note: the file is a .json file) + // Storage guide: https://cloud.google.com/storage/docs/downloading-objects + String jsonContents = new String(firstOutputFile.getContent()); + Builder builder = AnnotateFileResponse.newBuilder(); + JsonFormat.parser().merge(jsonContents, builder); + + // Build the AnnotateFileResponse object + AnnotateFileResponse annotateFileResponse = builder.build(); + + // Parse through the object to get the actual response for the first page of the input file. + AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0); + + // Here we print the full text from the first page. + // The response contains more information: + // annotation/pages/blocks/paragraphs/words/symbols + // including confidence score and bounding boxes + System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText()); + } else { + System.out.println("No MATCH"); + } + } + } + // [END vision_async_detect_document_ocr] } diff --git a/vision/cloud-client/src/test/java/com/example/vision/DetectIT.java b/vision/cloud-client/src/test/java/com/example/vision/DetectIT.java index 87935c06ae1..d690ce18b64 100644 --- a/vision/cloud-client/src/test/java/com/example/vision/DetectIT.java +++ b/vision/cloud-client/src/test/java/com/example/vision/DetectIT.java @@ -18,6 +18,11 @@ import static com.google.common.truth.Truth.assertThat; +import com.google.api.gax.paging.Page; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageOptions; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; @@ -36,6 +41,7 @@ public class DetectIT { private PrintStream out; private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); private static final String BUCKET = PROJECT_ID; + private static final String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT"; @Before public void setUp() throws IOException { @@ -346,4 +352,25 @@ public void testDocumentTextGcs() throws Exception { assertThat(got).contains("37%"); assertThat(got).contains("Word text: class (confidence:"); } + + @Test + public void testDetectDocumentsGcs() throws Exception { + // Act + String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf", + "gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"}; + Detect.argsHelper(args, out); + + // Assert + String got = bout.toString(); + assertThat(got).contains("HODGE'S GENERAL CONJECTURE"); + + Storage storage = StorageOptions.getDefaultInstance().getService(); + + Page blobs = storage.list(BUCKET, BlobListOption.currentDirectory(), + BlobListOption.prefix(OUTPUT_PREFIX + "/")); + + for (Blob blob : blobs.iterateAll()) { + blob.delete(); + } + } }