Skip to content

Commit

Permalink
Add beta snippets for object tracking / text detection (#1773)
Browse files Browse the repository at this point in the history
* Add beta snippets for object tracking / text detection

* Update beta_snippets_test.py

* Update beta_snippets.py

* Revert to using explicit URIs

* linter
  • Loading branch information
nnegrey authored Oct 24, 2018
1 parent 9e48317 commit 423a6e6
Show file tree
Hide file tree
Showing 7 changed files with 330 additions and 7 deletions.
40 changes: 40 additions & 0 deletions video/cloud-client/analyze/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,46 @@ To run this sample:
beta samples
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

.. image:: https://gstatic.com/cloudssh/images/open-btn.png
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/analyze/beta_snippets.py,video/cloud-client/analyze/README.rst




To run this sample:

.. code-block:: bash
$ python beta_snippets.py
usage: beta_snippets.py [-h]
{transcription,video-text-gcs,video-text,track-objects-gcs,track-objects}
...
This application demonstrates speech transcription using the
Google Cloud API.
Usage Examples:
python beta_snippets.py transcription gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py video-text-gcs gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py track-objects /resources/cat.mp4
positional arguments:
{transcription,video-text-gcs,video-text,track-objects-gcs,track-objects}
transcription Transcribe speech from a video stored on GCS.
video-text-gcs Detect text in a video stored on GCS.
video-text Detect text in a local video.
track-objects-gcs Object Tracking.
track-objects Object Tracking.
optional arguments:
-h, --help show this help message and exit
The client library
Expand Down
3 changes: 3 additions & 0 deletions video/cloud-client/analyze/README.rst.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ samples:
- name: analyze
file: analyze.py
show_help: True
- name: beta samples
file: beta_snippets.py
show_help: True

cloud_client_library: true

Expand Down
235 changes: 229 additions & 6 deletions video/cloud-client/analyze/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,22 @@
Google Cloud API.
Usage Examples:
python beta_snippets.py \
transcription gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py transcription \
gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py video-text-gcs \
gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py track-objects /resources/cat.mp4
"""

import argparse
import io

from google.cloud import videointelligence_v1p1beta1 as videointelligence


# [START video_speech_transcription_gcs_beta]
def speech_transcription(input_uri):
# [START video_speech_transcription_gcs_beta]
"""Transcribe speech from a video stored on GCS."""
from google.cloud import videointelligence_v1p1beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()

features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]
Expand Down Expand Up @@ -66,7 +70,202 @@ def speech_transcription(input_uri):
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9,
word))
# [END video_speech_transcription_gcs_beta]
# [END video_speech_transcription_gcs_beta]


def video_detect_text_gcs(input_uri):
# [START video_detect_text_gcs_beta]
"""Detect text in a video stored on GCS."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]

operation = video_client.annotate_video(
input_uri=input_uri,
features=features)

print('\nProcessing video for text detection.')
result = operation.result(timeout=300)

# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]

# Get only the first result
text_annotation = annotation_result.text_annotations[0]
print('\nText: {}'.format(text_annotation.text))

# Get the first text segment
text_segment = text_annotation.segments[0]
start_time = text_segment.segment.start_time_offset
end_time = text_segment.segment.end_time_offset
print('start_time: {}, end_time: {}'.format(
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))

print('Confidence: {}'.format(text_segment.confidence))

# Show the result for the first frame in this segment.
frame = text_segment.frames[0]
time_offset = frame.time_offset
print('Time offset for the first frame: {}'.format(
time_offset.seconds + time_offset.nanos * 1e-9))
print('Rotated Bounding Box Vertices:')
for vertex in frame.rotated_bounding_box.vertices:
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
# [END video_detect_text_gcs_beta]
return annotation_result.text_annotations


def video_detect_text(path):
# [START video_detect_text_beta]
"""Detect text in a local video."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]
video_context = videointelligence.types.VideoContext()

with io.open(path, 'rb') as file:
input_content = file.read()

operation = video_client.annotate_video(
input_content=input_content, # the bytes of the video file
features=features,
video_context=video_context)

print('\nProcessing video for text detection.')
result = operation.result(timeout=300)

# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]

# Get only the first result
text_annotation = annotation_result.text_annotations[0]
print('\nText: {}'.format(text_annotation.text))

# Get the first text segment
text_segment = text_annotation.segments[0]
start_time = text_segment.segment.start_time_offset
end_time = text_segment.segment.end_time_offset
print('start_time: {}, end_time: {}'.format(
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))

print('Confidence: {}'.format(text_segment.confidence))

# Show the result for the first frame in this segment.
frame = text_segment.frames[0]
time_offset = frame.time_offset
print('Time offset for the first frame: {}'.format(
time_offset.seconds + time_offset.nanos * 1e-9))
print('Rotated Bounding Box Vertices:')
for vertex in frame.rotated_bounding_box.vertices:
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
# [END video_detect_text_beta]
return annotation_result.text_annotations


def track_objects_gcs(gcs_uri):
# [START video_object_tracking_gcs_beta]
"""Object Tracking."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

# It is recommended to use location_id as 'us-east1' for the best latency
# due to different types of processors used in this region and others.
video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.OBJECT_TRACKING]
operation = video_client.annotate_video(
input_uri=gcs_uri, features=features, location_id='us-east1')
print('\nProcessing video for object annotations.')

result = operation.result(timeout=300)
print('\nFinished processing.\n')

# The first result is retrieved because a single video was processed.
object_annotations = result.annotation_results[0].object_annotations

# Get only the first annotation for demo purposes.
object_annotation = object_annotations[0]
print('Entity description: {}'.format(
object_annotation.entity.description))
if object_annotation.entity.entity_id:
print('Entity id: {}'.format(object_annotation.entity.entity_id))

print('Segment: {}s to {}s'.format(
object_annotation.segment.start_time_offset.seconds +
object_annotation.segment.start_time_offset.nanos / 1e9,
object_annotation.segment.end_time_offset.seconds +
object_annotation.segment.end_time_offset.nanos / 1e9))

print('Confidence: {}'.format(object_annotation.confidence))

# Here we print only the bounding box of the first frame in this segment
frame = object_annotation.frames[0]
box = frame.normalized_bounding_box
print('Time offset of the first frame: {}s'.format(
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
print('Bounding box position:')
print('\tleft : {}'.format(box.left))
print('\ttop : {}'.format(box.top))
print('\tright : {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')
# [END video_object_tracking_gcs_beta]
return object_annotations


def track_objects(path):
# [START video_object_tracking_beta]
"""Object Tracking."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.OBJECT_TRACKING]

with io.open(path, 'rb') as file:
input_content = file.read()

# It is recommended to use location_id as 'us-east1' for the best latency
# due to different types of processors used in this region and others.
operation = video_client.annotate_video(
input_content=input_content, features=features, location_id='us-east1')
print('\nProcessing video for object annotations.')

result = operation.result(timeout=300)
print('\nFinished processing.\n')

# The first result is retrieved because a single video was processed.
object_annotations = result.annotation_results[0].object_annotations

# Get only the first annotation for demo purposes.
object_annotation = object_annotations[0]
print('Entity description: {}'.format(
object_annotation.entity.description))
if object_annotation.entity.entity_id:
print('Entity id: {}'.format(object_annotation.entity.entity_id))

print('Segment: {}s to {}s'.format(
object_annotation.segment.start_time_offset.seconds +
object_annotation.segment.start_time_offset.nanos / 1e9,
object_annotation.segment.end_time_offset.seconds +
object_annotation.segment.end_time_offset.nanos / 1e9))

print('Confidence: {}'.format(object_annotation.confidence))

# Here we print only the bounding box of the first frame in this segment
frame = object_annotation.frames[0]
box = frame.normalized_bounding_box
print('Time offset of the first frame: {}s'.format(
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
print('Bounding box position:')
print('\tleft : {}'.format(box.left))
print('\ttop : {}'.format(box.top))
print('\tright : {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')
# [END video_object_tracking_beta]
return object_annotations


if __name__ == '__main__':
Expand All @@ -79,7 +278,31 @@ def speech_transcription(input_uri):
'transcription', help=speech_transcription.__doc__)
speech_transcription_parser.add_argument('gcs_uri')

video_text_gcs_parser = subparsers.add_parser(
'video-text-gcs', help=video_detect_text_gcs.__doc__)
video_text_gcs_parser.add_argument('gcs_uri')

video_text_parser = subparsers.add_parser(
'video-text', help=video_detect_text.__doc__)
video_text_parser.add_argument('path')

video_object_tracking_gcs_parser = subparsers.add_parser(
'track-objects-gcs', help=track_objects_gcs.__doc__)
video_object_tracking_gcs_parser.add_argument('gcs_uri')

video_object_tracking_parser = subparsers.add_parser(
'track-objects', help=track_objects.__doc__)
video_object_tracking_parser.add_argument('path')

args = parser.parse_args()

if args.command == 'transcription':
speech_transcription(args.gcs_uri)
elif args.command == 'video-text-gcs':
video_detect_text_gcs(args.gcs_uri)
elif args.command == 'video-text':
video_detect_text(args.path)
elif args.command == 'track-objects-gcs':
track_objects_gcs(args.gcs_uri)
elif args.command == 'track-objects':
track_objects(args.path)
57 changes: 57 additions & 0 deletions video/cloud-client/analyze/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,67 @@

import beta_snippets

POSSIBLE_TEXTS = ['Google', 'SUR', 'SUR', 'ROTO', 'Vice President', '58oo9',
'LONDRES', 'OMAR', 'PARIS', 'METRO', 'RUE', 'CARLO']


@pytest.mark.slow
def test_speech_transcription(capsys):
beta_snippets.speech_transcription(
'gs://python-docs-samples-tests/video/googlework_short.mp4')
out, _ = capsys.readouterr()
assert 'cultural' in out


@pytest.mark.slow
def test_detect_text():
in_file = './resources/googlework_short.mp4'
text_annotations = beta_snippets.video_detect_text(in_file)

text_exists = False
for text_annotation in text_annotations:
for possible_text in POSSIBLE_TEXTS:
if possible_text.upper() in text_annotation.text.upper():
text_exists = True
assert text_exists


@pytest.mark.slow
def test_detect_text_gcs():
in_file = 'gs://python-docs-samples-tests/video/googlework_short.mp4'
text_annotations = beta_snippets.video_detect_text_gcs(in_file)

text_exists = False
for text_annotation in text_annotations:
for possible_text in POSSIBLE_TEXTS:
if possible_text.upper() in text_annotation.text.upper():
text_exists = True
assert text_exists


@pytest.mark.slow
def test_track_objects():
in_file = './resources/cat.mp4'
object_annotations = beta_snippets.track_objects(in_file)

text_exists = False
for object_annotation in object_annotations:
if 'CAT' in object_annotation.entity.description.upper():
text_exists = True
assert text_exists
assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0
assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0


@pytest.mark.slow
def test_track_objects_gcs():
in_file = 'gs://demomaker/cat.mp4'
object_annotations = beta_snippets.track_objects_gcs(in_file)

text_exists = False
for object_annotation in object_annotations:
if 'CAT' in object_annotation.entity.description.upper():
text_exists = True
assert text_exists
assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0
assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0
2 changes: 1 addition & 1 deletion video/cloud-client/analyze/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
google-cloud-videointelligence==1.3.0
google-cloud-videointelligence==1.5.0
Binary file added video/cloud-client/analyze/resources/cat.mp4
Binary file not shown.
Binary file not shown.

0 comments on commit 423a6e6

Please sign in to comment.