Add beta snippets for object tracking / text detection (#1773)

* Add beta snippets for object tracking / text detection * Update beta_snippets_test.py * Update beta_snippets.py * Revert to using explicit URIs * linter
GoogleCloudPlatform · Oct 24, 2018 · 423a6e6 · 423a6e6
1 parent 9e48317
commit 423a6e6
Show file tree

Hide file tree

Showing 7 changed files with 330 additions and 7 deletions.
diff --git a/video/cloud-client/analyze/README.rst b/video/cloud-client/analyze/README.rst
@@ -100,6 +100,46 @@ To run this sample:
 
 
 
+beta samples
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+.. image:: https://gstatic.com/cloudssh/images/open-btn.png
+   :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/analyze/beta_snippets.py,video/cloud-client/analyze/README.rst
+
+
+
+
+To run this sample:
+
+.. code-block:: bash
+
+    $ python beta_snippets.py
+
+    usage: beta_snippets.py [-h]
+                            {transcription,video-text-gcs,video-text,track-objects-gcs,track-objects}
+                            ...
+
+    This application demonstrates speech transcription using the
+    Google Cloud API.
+
+    Usage Examples:
+        python beta_snippets.py transcription         gs://python-docs-samples-tests/video/googlework_short.mp4
+        python beta_snippets.py video-text-gcs         gs://python-docs-samples-tests/video/googlework_short.mp4
+        python beta_snippets.py track-objects /resources/cat.mp4
+
+    positional arguments:
+      {transcription,video-text-gcs,video-text,track-objects-gcs,track-objects}
+        transcription       Transcribe speech from a video stored on GCS.
+        video-text-gcs      Detect text in a video stored on GCS.
+        video-text          Detect text in a local video.
+        track-objects-gcs   Object Tracking.
+        track-objects       Object Tracking.
+
+    optional arguments:
+      -h, --help            show this help message and exit
+
+
+
 
 
 The client library

diff --git a/video/cloud-client/analyze/README.rst.in b/video/cloud-client/analyze/README.rst.in
@@ -16,6 +16,9 @@ samples:
 - name: analyze
   file: analyze.py
   show_help: True
+- name: beta samples
+  file: beta_snippets.py
+  show_help: True
 
 cloud_client_library: true
 

diff --git a/video/cloud-client/analyze/beta_snippets.py b/video/cloud-client/analyze/beta_snippets.py
@@ -18,18 +18,22 @@
 Google Cloud API.
 
 Usage Examples:
-    python beta_snippets.py \
-    transcription gs://python-docs-samples-tests/video/googlework_short.mp4
+    python beta_snippets.py transcription \
+        gs://python-docs-samples-tests/video/googlework_short.mp4
+    python beta_snippets.py video-text-gcs \
+        gs://python-docs-samples-tests/video/googlework_short.mp4
+    python beta_snippets.py track-objects /resources/cat.mp4
 """
 
 import argparse
+import io
 
-from google.cloud import videointelligence_v1p1beta1 as videointelligence
 
-
-# [START video_speech_transcription_gcs_beta]
 def speech_transcription(input_uri):
+    # [START video_speech_transcription_gcs_beta]
     """Transcribe speech from a video stored on GCS."""
+    from google.cloud import videointelligence_v1p1beta1 as videointelligence
+
     video_client = videointelligence.VideoIntelligenceServiceClient()
 
     features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]
@@ -66,7 +70,202 @@ def speech_transcription(input_uri):
             start_time.seconds + start_time.nanos * 1e-9,
             end_time.seconds + end_time.nanos * 1e-9,
             word))
-# [END video_speech_transcription_gcs_beta]
+    # [END video_speech_transcription_gcs_beta]
+
+
+def video_detect_text_gcs(input_uri):
+    # [START video_detect_text_gcs_beta]
+    """Detect text in a video stored on GCS."""
+    from google.cloud import videointelligence_v1p2beta1 as videointelligence
+
+    video_client = videointelligence.VideoIntelligenceServiceClient()
+    features = [videointelligence.enums.Feature.TEXT_DETECTION]
+
+    operation = video_client.annotate_video(
+        input_uri=input_uri,
+        features=features)
+
+    print('\nProcessing video for text detection.')
+    result = operation.result(timeout=300)
+
+    # The first result is retrieved because a single video was processed.
+    annotation_result = result.annotation_results[0]
+
+    # Get only the first result
+    text_annotation = annotation_result.text_annotations[0]
+    print('\nText: {}'.format(text_annotation.text))
+
+    # Get the first text segment
+    text_segment = text_annotation.segments[0]
+    start_time = text_segment.segment.start_time_offset
+    end_time = text_segment.segment.end_time_offset
+    print('start_time: {}, end_time: {}'.format(
+        start_time.seconds + start_time.nanos * 1e-9,
+        end_time.seconds + end_time.nanos * 1e-9))
+
+    print('Confidence: {}'.format(text_segment.confidence))
+
+    # Show the result for the first frame in this segment.
+    frame = text_segment.frames[0]
+    time_offset = frame.time_offset
+    print('Time offset for the first frame: {}'.format(
+        time_offset.seconds + time_offset.nanos * 1e-9))
+    print('Rotated Bounding Box Vertices:')
+    for vertex in frame.rotated_bounding_box.vertices:
+        print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
+    # [END video_detect_text_gcs_beta]
+    return annotation_result.text_annotations
+
+
+def video_detect_text(path):
+    # [START video_detect_text_beta]
+    """Detect text in a local video."""
+    from google.cloud import videointelligence_v1p2beta1 as videointelligence
+
+    video_client = videointelligence.VideoIntelligenceServiceClient()
+    features = [videointelligence.enums.Feature.TEXT_DETECTION]
+    video_context = videointelligence.types.VideoContext()
+
+    with io.open(path, 'rb') as file:
+        input_content = file.read()
+
+    operation = video_client.annotate_video(
+        input_content=input_content,  # the bytes of the video file
+        features=features,
+        video_context=video_context)
+
+    print('\nProcessing video for text detection.')
+    result = operation.result(timeout=300)
+
+    # The first result is retrieved because a single video was processed.
+    annotation_result = result.annotation_results[0]
+
+    # Get only the first result
+    text_annotation = annotation_result.text_annotations[0]
+    print('\nText: {}'.format(text_annotation.text))
+
+    # Get the first text segment
+    text_segment = text_annotation.segments[0]
+    start_time = text_segment.segment.start_time_offset
+    end_time = text_segment.segment.end_time_offset
+    print('start_time: {}, end_time: {}'.format(
+        start_time.seconds + start_time.nanos * 1e-9,
+        end_time.seconds + end_time.nanos * 1e-9))
+
+    print('Confidence: {}'.format(text_segment.confidence))
+
+    # Show the result for the first frame in this segment.
+    frame = text_segment.frames[0]
+    time_offset = frame.time_offset
+    print('Time offset for the first frame: {}'.format(
+        time_offset.seconds + time_offset.nanos * 1e-9))
+    print('Rotated Bounding Box Vertices:')
+    for vertex in frame.rotated_bounding_box.vertices:
+        print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
+    # [END video_detect_text_beta]
+    return annotation_result.text_annotations
+
+
+def track_objects_gcs(gcs_uri):
+    # [START video_object_tracking_gcs_beta]
+    """Object Tracking."""
+    from google.cloud import videointelligence_v1p2beta1 as videointelligence
+
+    # It is recommended to use location_id as 'us-east1' for the best latency
+    # due to different types of processors used in this region and others.
+    video_client = videointelligence.VideoIntelligenceServiceClient()
+    features = [videointelligence.enums.Feature.OBJECT_TRACKING]
+    operation = video_client.annotate_video(
+        input_uri=gcs_uri, features=features, location_id='us-east1')
+    print('\nProcessing video for object annotations.')
+
+    result = operation.result(timeout=300)
+    print('\nFinished processing.\n')
+
+    # The first result is retrieved because a single video was processed.
+    object_annotations = result.annotation_results[0].object_annotations
+
+    # Get only the first annotation for demo purposes.
+    object_annotation = object_annotations[0]
+    print('Entity description: {}'.format(
+        object_annotation.entity.description))
+    if object_annotation.entity.entity_id:
+        print('Entity id: {}'.format(object_annotation.entity.entity_id))
+
+    print('Segment: {}s to {}s'.format(
+        object_annotation.segment.start_time_offset.seconds +
+        object_annotation.segment.start_time_offset.nanos / 1e9,
+        object_annotation.segment.end_time_offset.seconds +
+        object_annotation.segment.end_time_offset.nanos / 1e9))
+
+    print('Confidence: {}'.format(object_annotation.confidence))
+
+    # Here we print only the bounding box of the first frame in this segment
+    frame = object_annotation.frames[0]
+    box = frame.normalized_bounding_box
+    print('Time offset of the first frame: {}s'.format(
+        frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
+    print('Bounding box position:')
+    print('\tleft  : {}'.format(box.left))
+    print('\ttop   : {}'.format(box.top))
+    print('\tright : {}'.format(box.right))
+    print('\tbottom: {}'.format(box.bottom))
+    print('\n')
+    # [END video_object_tracking_gcs_beta]
+    return object_annotations
+
+
+def track_objects(path):
+    # [START video_object_tracking_beta]
+    """Object Tracking."""
+    from google.cloud import videointelligence_v1p2beta1 as videointelligence
+
+    video_client = videointelligence.VideoIntelligenceServiceClient()
+    features = [videointelligence.enums.Feature.OBJECT_TRACKING]
+
+    with io.open(path, 'rb') as file:
+        input_content = file.read()
+
+    # It is recommended to use location_id as 'us-east1' for the best latency
+    # due to different types of processors used in this region and others.
+    operation = video_client.annotate_video(
+        input_content=input_content, features=features, location_id='us-east1')
+    print('\nProcessing video for object annotations.')
+
+    result = operation.result(timeout=300)
+    print('\nFinished processing.\n')
+
+    # The first result is retrieved because a single video was processed.
+    object_annotations = result.annotation_results[0].object_annotations
+
+    # Get only the first annotation for demo purposes.
+    object_annotation = object_annotations[0]
+    print('Entity description: {}'.format(
+        object_annotation.entity.description))
+    if object_annotation.entity.entity_id:
+        print('Entity id: {}'.format(object_annotation.entity.entity_id))
+
+    print('Segment: {}s to {}s'.format(
+        object_annotation.segment.start_time_offset.seconds +
+        object_annotation.segment.start_time_offset.nanos / 1e9,
+        object_annotation.segment.end_time_offset.seconds +
+        object_annotation.segment.end_time_offset.nanos / 1e9))
+
+    print('Confidence: {}'.format(object_annotation.confidence))
+
+    # Here we print only the bounding box of the first frame in this segment
+    frame = object_annotation.frames[0]
+    box = frame.normalized_bounding_box
+    print('Time offset of the first frame: {}s'.format(
+        frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
+    print('Bounding box position:')
+    print('\tleft  : {}'.format(box.left))
+    print('\ttop   : {}'.format(box.top))
+    print('\tright : {}'.format(box.right))
+    print('\tbottom: {}'.format(box.bottom))
+    print('\n')
+    # [END video_object_tracking_beta]
+    return object_annotations
 
 
 if __name__ == '__main__':
@@ -79,7 +278,31 @@ def speech_transcription(input_uri):
         'transcription', help=speech_transcription.__doc__)
     speech_transcription_parser.add_argument('gcs_uri')
 
+    video_text_gcs_parser = subparsers.add_parser(
+        'video-text-gcs', help=video_detect_text_gcs.__doc__)
+    video_text_gcs_parser.add_argument('gcs_uri')
+
+    video_text_parser = subparsers.add_parser(
+        'video-text', help=video_detect_text.__doc__)
+    video_text_parser.add_argument('path')
+
+    video_object_tracking_gcs_parser = subparsers.add_parser(
+        'track-objects-gcs', help=track_objects_gcs.__doc__)
+    video_object_tracking_gcs_parser.add_argument('gcs_uri')
+
+    video_object_tracking_parser = subparsers.add_parser(
+        'track-objects', help=track_objects.__doc__)
+    video_object_tracking_parser.add_argument('path')
+
     args = parser.parse_args()
 
     if args.command == 'transcription':
         speech_transcription(args.gcs_uri)
+    elif args.command == 'video-text-gcs':
+        video_detect_text_gcs(args.gcs_uri)
+    elif args.command == 'video-text':
+        video_detect_text(args.path)
+    elif args.command == 'track-objects-gcs':
+        track_objects_gcs(args.gcs_uri)
+    elif args.command == 'track-objects':
+        track_objects(args.path)
diff --git a/video/cloud-client/analyze/beta_snippets_test.py b/video/cloud-client/analyze/beta_snippets_test.py
@@ -18,10 +18,67 @@
 
 import beta_snippets
 
+POSSIBLE_TEXTS = ['Google', 'SUR', 'SUR', 'ROTO', 'Vice President', '58oo9',
+                  'LONDRES', 'OMAR', 'PARIS', 'METRO', 'RUE', 'CARLO']
+
 
 @pytest.mark.slow
 def test_speech_transcription(capsys):
     beta_snippets.speech_transcription(
         'gs://python-docs-samples-tests/video/googlework_short.mp4')
     out, _ = capsys.readouterr()
     assert 'cultural' in out
+
+
+@pytest.mark.slow
+def test_detect_text():
+    in_file = './resources/googlework_short.mp4'
+    text_annotations = beta_snippets.video_detect_text(in_file)
+
+    text_exists = False
+    for text_annotation in text_annotations:
+        for possible_text in POSSIBLE_TEXTS:
+            if possible_text.upper() in text_annotation.text.upper():
+                text_exists = True
+    assert text_exists
+
+
+@pytest.mark.slow
+def test_detect_text_gcs():
+    in_file = 'gs://python-docs-samples-tests/video/googlework_short.mp4'
+    text_annotations = beta_snippets.video_detect_text_gcs(in_file)
+
+    text_exists = False
+    for text_annotation in text_annotations:
+        for possible_text in POSSIBLE_TEXTS:
+            if possible_text.upper() in text_annotation.text.upper():
+                text_exists = True
+    assert text_exists
+
+
+@pytest.mark.slow
+def test_track_objects():
+    in_file = './resources/cat.mp4'
+    object_annotations = beta_snippets.track_objects(in_file)
+
+    text_exists = False
+    for object_annotation in object_annotations:
+        if 'CAT' in object_annotation.entity.description.upper():
+            text_exists = True
+    assert text_exists
+    assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0
+    assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0
+
+
+@pytest.mark.slow
+def test_track_objects_gcs():
+    in_file = 'gs://demomaker/cat.mp4'
+    object_annotations = beta_snippets.track_objects_gcs(in_file)
+
+    text_exists = False
+    for object_annotation in object_annotations:
+        if 'CAT' in object_annotation.entity.description.upper():
+            text_exists = True
+    assert text_exists
+    assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0
+    assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0
diff --git a/video/cloud-client/analyze/requirements.txt b/video/cloud-client/analyze/requirements.txt
@@ -1 +1 @@
-google-cloud-videointelligence==1.3.0
+google-cloud-videointelligence==1.5.0
diff --git a/video/cloud-client/analyze/resources/cat.mp4 b/video/cloud-client/analyze/resources/cat.mp4
diff --git a/video/cloud-client/analyze/resources/googlework_short.mp4 b/video/cloud-client/analyze/resources/googlework_short.mp4