diff --git a/vision/beta/cloud-client/README.md b/vision/beta/cloud-client/README.md index 108cfbdff58..6b25a0cc0d6 100644 --- a/vision/beta/cloud-client/README.md +++ b/vision/beta/cloud-client/README.md @@ -88,3 +88,9 @@ mvn exec:java -DDetect -Dexec.args="web-entities-include-geo ./resources/landmar ``` mvn exec:java -DDetect -Dexec.args="crop ./resources/landmark.jpg" ``` + +#### OCR +``` +mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \ + gs:///" +``` diff --git a/vision/beta/cloud-client/pom.xml b/vision/beta/cloud-client/pom.xml index 432a81a4ac5..b9424c24968 100644 --- a/vision/beta/cloud-client/pom.xml +++ b/vision/beta/cloud-client/pom.xml @@ -40,7 +40,12 @@ com.google.cloud google-cloud-vision - 1.22.0 + 1.24.1 + + + com.google.cloud + google-cloud-storage + 1.24.1 diff --git a/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java b/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java index f42323a6aa2..d506d25ba9d 100644 --- a/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java +++ b/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java @@ -16,37 +16,53 @@ package com.example.vision; -import com.google.cloud.vision.v1p1beta1.AnnotateImageRequest; -import com.google.cloud.vision.v1p1beta1.AnnotateImageResponse; -import com.google.cloud.vision.v1p1beta1.BatchAnnotateImagesResponse; -import com.google.cloud.vision.v1p1beta1.Block; -import com.google.cloud.vision.v1p1beta1.ColorInfo; -import com.google.cloud.vision.v1p1beta1.CropHint; -import com.google.cloud.vision.v1p1beta1.CropHintsAnnotation; -import com.google.cloud.vision.v1p1beta1.DominantColorsAnnotation; -import com.google.cloud.vision.v1p1beta1.EntityAnnotation; -import com.google.cloud.vision.v1p1beta1.FaceAnnotation; -import com.google.cloud.vision.v1p1beta1.Feature; -import com.google.cloud.vision.v1p1beta1.Feature.Type; -import com.google.cloud.vision.v1p1beta1.Image; -import com.google.cloud.vision.v1p1beta1.ImageAnnotatorClient; -import com.google.cloud.vision.v1p1beta1.ImageContext; -import com.google.cloud.vision.v1p1beta1.ImageSource; -import com.google.cloud.vision.v1p1beta1.LocationInfo; -import com.google.cloud.vision.v1p1beta1.Page; -import com.google.cloud.vision.v1p1beta1.Paragraph; -import com.google.cloud.vision.v1p1beta1.SafeSearchAnnotation; -import com.google.cloud.vision.v1p1beta1.Symbol; -import com.google.cloud.vision.v1p1beta1.TextAnnotation; -import com.google.cloud.vision.v1p1beta1.WebDetection; -import com.google.cloud.vision.v1p1beta1.WebDetection.WebEntity; -import com.google.cloud.vision.v1p1beta1.WebDetection.WebImage; -import com.google.cloud.vision.v1p1beta1.WebDetection.WebLabel; -import com.google.cloud.vision.v1p1beta1.WebDetection.WebPage; -import com.google.cloud.vision.v1p1beta1.WebDetectionParams; -import com.google.cloud.vision.v1p1beta1.Word; - +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageOptions; +import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse; +import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder; +import com.google.cloud.vision.v1p2beta1.AnnotateImageRequest; +import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse; +import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest; +import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse; +import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse; +import com.google.cloud.vision.v1p2beta1.BatchAnnotateImagesResponse; +import com.google.cloud.vision.v1p2beta1.Block; +import com.google.cloud.vision.v1p2beta1.ColorInfo; +import com.google.cloud.vision.v1p2beta1.CropHint; +import com.google.cloud.vision.v1p2beta1.CropHintsAnnotation; +import com.google.cloud.vision.v1p2beta1.DominantColorsAnnotation; +import com.google.cloud.vision.v1p2beta1.EntityAnnotation; +import com.google.cloud.vision.v1p2beta1.FaceAnnotation; +import com.google.cloud.vision.v1p2beta1.Feature; +import com.google.cloud.vision.v1p2beta1.Feature.Type; +import com.google.cloud.vision.v1p2beta1.GcsDestination; +import com.google.cloud.vision.v1p2beta1.GcsSource; +import com.google.cloud.vision.v1p2beta1.Image; +import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient; +import com.google.cloud.vision.v1p2beta1.ImageContext; +import com.google.cloud.vision.v1p2beta1.ImageSource; +import com.google.cloud.vision.v1p2beta1.InputConfig; +import com.google.cloud.vision.v1p2beta1.LocationInfo; +import com.google.cloud.vision.v1p2beta1.OperationMetadata; +import com.google.cloud.vision.v1p2beta1.OutputConfig; +import com.google.cloud.vision.v1p2beta1.Page; +import com.google.cloud.vision.v1p2beta1.Paragraph; +import com.google.cloud.vision.v1p2beta1.SafeSearchAnnotation; +import com.google.cloud.vision.v1p2beta1.Symbol; +import com.google.cloud.vision.v1p2beta1.TextAnnotation; +import com.google.cloud.vision.v1p2beta1.WebDetection; +import com.google.cloud.vision.v1p2beta1.WebDetection.WebEntity; +import com.google.cloud.vision.v1p2beta1.WebDetection.WebImage; +import com.google.cloud.vision.v1p2beta1.WebDetection.WebLabel; +import com.google.cloud.vision.v1p2beta1.WebDetection.WebPage; +import com.google.cloud.vision.v1p2beta1.WebDetectionParams; +import com.google.cloud.vision.v1p2beta1.Word; import com.google.protobuf.ByteString; +import com.google.protobuf.util.JsonFormat; import java.io.FileInputStream; import java.io.IOException; @@ -54,6 +70,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class Detect { @@ -78,11 +97,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception, out.println("Usage:"); out.printf( "\tmvn exec:java -DDetect -Dexec.args=\" \"\n" + + "\tmvn exec:java -DDetect -Dexec.args=\"ocr \"" + + "\n" + "Commands:\n" + "\tfaces | labels | landmarks | logos | text | safe-search | properties" - + "| web | web-entities | web-entities-include-geo | crop \n" + + "| web | web-entities | web-entities-include-geo | crop | ocr \n" + "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage " - + "resource (gs://...)\n"); + + "resource (gs://...)\n" + + "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n" + + "Path to Destination\n\tA path to the remote destination on Cloud Storage for the" + + " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n"); return; } String command = args[0]; @@ -162,6 +186,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception, } else { detectDocumentText(path, out); } + } else if (command.equals("ocr")) { + String destPath = args.length > 2 ? args[2] : ""; + detectDocumentsGcs(path, destPath); } } @@ -1277,4 +1304,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws } } // [END vision_detect_document_uri] + + // [START vision_async_detect_document_ocr] + /** + * Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage. + * + * @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document + * text on. + * @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the + * results on. + * @throws Exception on errors while closing the client. + */ + public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws + Exception { + try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) { + List requests = new ArrayList<>(); + + // Set the GCS source path for the remote file. + GcsSource gcsSource = GcsSource.newBuilder() + .setUri(gcsSourcePath) + .build(); + + // Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions) + // types + InputConfig inputConfig = InputConfig.newBuilder() + .setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff" + .setGcsSource(gcsSource) + .build(); + + // Set the GCS destination path for where to save the results. + GcsDestination gcsDestination = GcsDestination.newBuilder() + .setUri(gcsDestinationPath) + .build(); + + // Create the configuration for the output with the batch size. + // The batch size sets how many pages should be grouped into each json output file. + OutputConfig outputConfig = OutputConfig.newBuilder() + .setBatchSize(2) + .setGcsDestination(gcsDestination) + .build(); + + // Select the Feature required by the vision API + Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build(); + + // Build the OCR request + AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder() + .addFeatures(feature) + .setInputConfig(inputConfig) + .setOutputConfig(outputConfig) + .build(); + + requests.add(request); + + // Perform the OCR request + OperationFuture response = + client.asyncBatchAnnotateFilesAsync(requests); + + System.out.println("Waiting for the operation to finish."); + + // Wait for the request to finish. (The result is not used, since the API saves the result to + // the specified location on GCS.) + List result = response.get(180, TimeUnit.SECONDS) + .getResponsesList(); + + // Once the request has completed and the output has been + // written to GCS, we can list all the output files. + Storage storage = StorageOptions.getDefaultInstance().getService(); + + // Get the destination location from the gcsDestinationPath + Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)"); + Matcher matcher = pattern.matcher(gcsDestinationPath); + + if (matcher.find()) { + String bucketName = matcher.group(1); + String prefix = matcher.group(2); + + // Get the list of objects with the given prefix from the GCS bucket + Bucket bucket = storage.get(bucketName); + com.google.api.gax.paging.Page pageList = bucket.list(BlobListOption.prefix(prefix)); + + Blob firstOutputFile = null; + + // List objects with the given prefix. + System.out.println("Output files:"); + for (Blob blob : pageList.iterateAll()) { + System.out.println(blob.getName()); + + // Process the first output file from GCS. + // Since we specified batch size = 2, the first response contains + // the first two pages of the input file. + if (firstOutputFile == null) { + firstOutputFile = blob; + } + } + + // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse + // object. If the Blob is small read all its content in one request + // (Note: the file is a .json file) + // Storage guide: https://cloud.google.com/storage/docs/downloading-objects + String jsonContents = new String(firstOutputFile.getContent()); + Builder builder = AnnotateFileResponse.newBuilder(); + JsonFormat.parser().merge(jsonContents, builder); + + // Build the AnnotateFileResponse object + AnnotateFileResponse annotateFileResponse = builder.build(); + + // Parse through the object to get the actual response for the first page of the input file. + AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0); + + // Here we print the full text from the first page. + // The response contains more information: + // annotation/pages/blocks/paragraphs/words/symbols + // including confidence score and bounding boxes + System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText()); + } else { + System.out.println("No MATCH"); + } + } + } + // [END vision_async_detect_document_ocr] } diff --git a/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java b/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java index 72f52d848ae..cab7a9e3aef 100644 --- a/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java +++ b/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java @@ -18,6 +18,11 @@ import static com.google.common.truth.Truth.assertThat; +import com.google.api.gax.paging.Page; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageOptions; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; @@ -37,6 +42,7 @@ public class DetectIT { private Detect app; private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); private static final String BUCKET = PROJECT_ID; + private static final String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT"; @Before public void setUp() throws IOException { @@ -348,4 +354,25 @@ public void testDocumentTextGcs() throws Exception { assertThat(got).contains("37%"); assertThat(got).contains("Word text: class (confidence:"); } + + @Test + public void testDetectDocumentsGcs() throws Exception { + // Act + String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf", + "gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"}; + Detect.argsHelper(args, out); + + // Assert + String got = bout.toString(); + assertThat(got).contains("HODGE'S GENERAL CONJECTURE"); + + Storage storage = StorageOptions.getDefaultInstance().getService(); + + Page blobs = storage.list(BUCKET, BlobListOption.currentDirectory(), + BlobListOption.prefix(OUTPUT_PREFIX + "/")); + + for (Blob blob : blobs.iterateAll()) { + blob.delete(); + } + } }