Skip to content

Commit

Permalink
Video v1beta2 [(#1088)](#1088)
Browse files Browse the repository at this point in the history
* update analyze_safe_search

* update analyze_shots

* update explicit_content_detection and test

* update fece detection

* update label detection (path)

* update label detection (file)

* flake

* safe search --> explicit content

* update faces tutorial

* update client library quickstart

* update shotchange tutorial

* update labels tutorial

* correct spelling

* correction start_time_offset

* import order

* rebased
  • Loading branch information
dizcology authored and leahecole committed Sep 15, 2023
1 parent b561838 commit 65ae859
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 92 deletions.
12 changes: 6 additions & 6 deletions videointelligence/samples/analyze/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,25 @@ To run this sample:
$ python analyze.py
usage: analyze.py [-h] {faces,labels,labels_file,safe_search,shots} ...
usage: analyze.py [-h] {faces,labels,labels_file,explicit_content,shots} ...
This application demonstrates face detection, label detection, safe search,
and shot change detection using the Google Cloud API.
This application demonstrates face detection, label detection,
explicit content, and shot change detection using the Google Cloud API.
Usage Examples:
python analyze.py faces gs://demomaker/google_gmail.mp4
python analyze.py labels gs://cloud-ml-sandbox/video/chicago.mp4
python analyze.py labels_file resources/cat.mp4
python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
python analyze.py safe_search gs://demomaker/gbikes_dinosaur.mp4
python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
positional arguments:
{faces,labels,labels_file,safe_search,shots}
{faces,labels,labels_file,explicit_content,shots}
faces Detects faces given a GCS path.
labels Detects labels given a GCS path.
labels_file Detects labels given a file path.
safe_search Detects safe search features the GCS path to a video.
explicit_content Detects explicit content from the GCS path to a video.
shots Detects camera shot changes.
optional arguments:
Expand Down
242 changes: 169 additions & 73 deletions videointelligence/samples/analyze/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""This application demonstrates face detection, label detection, safe search,
and shot change detection using the Google Cloud API.
"""This application demonstrates face detection, label detection,
explicit content, and shot change detection using the Google Cloud API.
Usage Examples:
python analyze.py faces gs://demomaker/google_gmail.mp4
python analyze.py labels gs://cloud-ml-sandbox/video/chicago.mp4
python analyze.py labels_file resources/cat.mp4
python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
python analyze.py safe_search gs://demomaker/gbikes_dinosaur.mp4
python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
"""

Expand All @@ -33,18 +33,18 @@
import sys
import time

from google.cloud.gapic.videointelligence.v1beta1 import enums
from google.cloud.gapic.videointelligence.v1beta1 import (
video_intelligence_service_client)
from google.cloud import videointelligence_v1beta2
from google.cloud.videointelligence_v1beta2 import enums
from google.cloud.videointelligence_v1beta2 import types


def analyze_safe_search(path):
""" Detects safe search features the GCS path to a video. """
video_client = (video_intelligence_service_client.
VideoIntelligenceServiceClient())
features = [enums.Feature.SAFE_SEARCH_DETECTION]
def analyze_explicit_content(path):
""" Detects explicit content from the GCS path to a video. """
video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
features = [enums.Feature.EXPLICIT_CONTENT_DETECTION]

operation = video_client.annotate_video(path, features)
print('\nProcessing video for safe search annotations:')
print('\nProcessing video for explicit content annotations:')

while not operation.done():
sys.stdout.write('.')
Expand All @@ -54,27 +54,29 @@ def analyze_safe_search(path):
print('\nFinished processing.')

# first result is retrieved because a single video was processed
safe_annotations = (operation.result().annotation_results[0].
safe_search_annotations)
explicit_annotation = (operation.result().annotation_results[0].
explicit_annotation)

likely_string = ("Unknown", "Very unlikely", "Unlikely", "Possible",
"Likely", "Very likely")

for note in safe_annotations:
print('Time: {}s'.format(note.time_offset / 1000000.0))
print('\tadult: {}'.format(likely_string[note.adult]))
print('\tspoof: {}'.format(likely_string[note.spoof]))
print('\tmedical: {}'.format(likely_string[note.medical]))
print('\tracy: {}'.format(likely_string[note.racy]))
print('\tviolent: {}\n'.format(likely_string[note.violent]))
for frame in explicit_annotation.frames:
frame_time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
print('Time: {}s'.format(frame_time))
print('\tpornography: {}'.format(
likely_string[frame.pornography_likelihood]))


def analyze_faces(path):
""" Detects faces given a GCS path. """
video_client = (video_intelligence_service_client.
VideoIntelligenceServiceClient())
video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
features = [enums.Feature.FACE_DETECTION]
operation = video_client.annotate_video(path, features)

config = types.FaceDetectionConfig(include_bounding_boxes=True)
context = types.VideoContext(face_detection_config=config)

operation = video_client.annotate_video(
path, features, video_context=context)
print('\nProcessing video for face annotations:')

while not operation.done():
Expand All @@ -89,27 +91,43 @@ def analyze_faces(path):
face_annotations)

for face_id, face in enumerate(face_annotations):
print('Face {}'.format(face_id))
print('Thumbnail size: {}'.format(len(face.thumbnail)))

for segment_id, segment in enumerate(face.segments):
positions = 'Entire video'
if (segment.start_time_offset != -1 or
segment.end_time_offset != -1):
positions = '{}s to {}s'.format(
segment.start_time_offset / 1000000.0,
segment.end_time_offset / 1000000.0)

print('\tTrack {}: {}'.format(segment_id, positions))

start_time = (segment.segment.start_time_offset.seconds +
segment.segment.start_time_offset.nanos / 1e9)
end_time = (segment.segment.end_time_offset.seconds +
segment.segment.end_time_offset.nanos / 1e9)
positions = '{}s to {}s'.format(start_time, end_time)
print('\tSegment {}: {}'.format(segment_id, positions))

# There are typically many frames for each face,
# here we print information on only the first frame.
frame = face.frames[0]
time_offset = (frame.time_offset.seconds +
frame.time_offset.nanos / 1e9)
box = frame.normalized_bounding_boxes[0]
print('First frame time offset: {}s'.format(time_offset))
print('First frame normalized bounding box:')
print('\tleft: {}'.format(box.left))
print('\ttop: {}'.format(box.top))
print('\tright: {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')


def analyze_labels(path):
""" Detects labels given a GCS path. """
video_client = (video_intelligence_service_client.
VideoIntelligenceServiceClient())
video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
features = [enums.Feature.LABEL_DETECTION]
operation = video_client.annotate_video(path, features)

config = types.LabelDetectionConfig(
label_detection_mode=enums.LabelDetectionMode.SHOT_AND_FRAME_MODE)
context = types.VideoContext(label_detection_config=config)

operation = video_client.annotate_video(
path, features, video_context=context)
print('\nProcessing video for label annotations:')

while not operation.done():
Expand All @@ -122,26 +140,65 @@ def analyze_labels(path):
# first result is retrieved because a single video was processed
results = operation.result().annotation_results[0]

for i, label in enumerate(results.label_annotations):
print('Label description: {}'.format(label.description))
print('Locations:')
# Process video/segment level label annotations
for i, segment_label in enumerate(results.segment_label_annotations):
print('Video label description: {}'.format(
segment_label.entity.description))
for category_entity in segment_label.category_entities:
print('\tLabel category description: {}'.format(
category_entity.description))

for i, segment in enumerate(segment_label.segments):
start_time = (segment.segment.start_time_offset.seconds +
segment.segment.start_time_offset.nanos / 1e9)
end_time = (segment.segment.end_time_offset.seconds +
segment.segment.end_time_offset.nanos / 1e9)
positions = '{}s to {}s'.format(start_time, end_time)
confidence = segment.confidence
print('\tSegment {}: {}'.format(i, positions))
print('\tConfidence: {}'.format(confidence))
print('\n')

for l, location in enumerate(label.locations):
positions = 'Entire video'
if (location.segment.start_time_offset != -1 or
location.segment.end_time_offset != -1):
positions = '{}s to {}s'.format(
location.segment.start_time_offset / 1000000.0,
location.segment.end_time_offset / 1000000.0)
print('\t{}: {}'.format(l, positions))
# Process shot level label annotations
for i, shot_label in enumerate(results.shot_label_annotations):
print('Shot label description: {}'.format(
shot_label.entity.description))
for category_entity in shot_label.category_entities:
print('\tLabel category description: {}'.format(
category_entity.description))

for i, shot in enumerate(shot_label.segments):
start_time = (shot.segment.start_time_offset.seconds +
shot.segment.start_time_offset.nanos / 1e9)
end_time = (shot.segment.end_time_offset.seconds +
shot.segment.end_time_offset.nanos / 1e9)
positions = '{}s to {}s'.format(start_time, end_time)
confidence = shot.confidence
print('\tSegment {}: {}'.format(i, positions))
print('\tConfidence: {}'.format(confidence))
print('\n')

# Process frame level label annotations
for i, frame_label in enumerate(results.frame_label_annotations):
print('Frame label description: {}'.format(
frame_label.entity.description))
for category_entity in frame_label.category_entities:
print('\tLabel category description: {}'.format(
category_entity.description))

# Each frame_label_annotation has many frames,
# here we print information only about the first frame.
frame = frame_label.frames[0]
time_offset = (frame.time_offset.seconds +
frame.time_offset.nanos / 1e9)
print('\tFirst frame time offset: {}s'.format(time_offset))
print('\tFirst frame confidence: {}'.format(frame.confidence))
print('\n')


def analyze_labels_file(path):
""" Detects labels given a file path. """
video_client = (video_intelligence_service_client.
VideoIntelligenceServiceClient())
video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
features = [enums.Feature.LABEL_DETECTION]

with io.open(path, "rb") as movie:
Expand All @@ -161,26 +218,64 @@ def analyze_labels_file(path):
# first result is retrieved because a single video was processed
results = operation.result().annotation_results[0]

for i, label in enumerate(results.label_annotations):
print('Label description: {}'.format(label.description))
print('Locations:')
# Process video/segment level label annotations
for i, segment_label in enumerate(results.segment_label_annotations):
print('Video label description: {}'.format(
segment_label.entity.description))
for category_entity in segment_label.category_entities:
print('\tLabel category description: {}'.format(
category_entity.description))

for i, segment in enumerate(segment_label.segments):
start_time = (segment.segment.start_time_offset.seconds +
segment.segment.start_time_offset.nanos / 1e9)
end_time = (segment.segment.end_time_offset.seconds +
segment.segment.end_time_offset.nanos / 1e9)
positions = '{}s to {}s'.format(start_time, end_time)
confidence = segment.confidence
print('\tSegment {}: {}'.format(i, positions))
print('\tConfidence: {}'.format(confidence))
print('\n')

for l, location in enumerate(label.locations):
positions = 'Entire video'
if (location.segment.start_time_offset != -1 or
location.segment.end_time_offset != -1):
positions = '{} to {}'.format(
location.segment.start_time_offset / 1000000.0,
location.segment.end_time_offset / 1000000.0)
print('\t{}: {}'.format(l, positions))
# Process shot level label annotations
for i, shot_label in enumerate(results.shot_label_annotations):
print('Shot label description: {}'.format(
shot_label.entity.description))
for category_entity in shot_label.category_entities:
print('\tLabel category description: {}'.format(
category_entity.description))

for i, shot in enumerate(shot_label.segments):
start_time = (shot.segment.start_time_offset.seconds +
shot.segment.start_time_offset.nanos / 1e9)
end_time = (shot.segment.end_time_offset.seconds +
shot.segment.end_time_offset.nanos / 1e9)
positions = '{}s to {}s'.format(start_time, end_time)
confidence = shot.confidence
print('\tSegment {}: {}'.format(i, positions))
print('\tConfidence: {}'.format(confidence))
print('\n')

# Process frame level label annotations
for i, frame_label in enumerate(results.frame_label_annotations):
print('Frame label description: {}'.format(
frame_label.entity.description))
for category_entity in frame_label.category_entities:
print('\tLabel category description: {}'.format(
category_entity.description))

# Each frame_label_annotation has many frames,
# here we print information only about the first frame.
frame = frame_label.frames[0]
time_offset = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
print('\tFirst frame time offset: {}s'.format(time_offset))
print('\tFirst frame confidence: {}'.format(frame.confidence))
print('\n')


def analyze_shots(path):
""" Detects camera shot changes. """
video_client = (video_intelligence_service_client.
VideoIntelligenceServiceClient())
video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
features = [enums.Feature.SHOT_CHANGE_DETECTION]
operation = video_client.annotate_video(path, features)
print('\nProcessing video for shot change annotations:')
Expand All @@ -193,13 +288,14 @@ def analyze_shots(path):
print('\nFinished processing.')

# first result is retrieved because a single video was processed
shots = operation.result().annotation_results[0]
shots = operation.result().annotation_results[0].shot_annotations

for note, shot in enumerate(shots.shot_annotations):
print('\tScene {}: {} to {}'.format(
note,
shot.start_time_offset / 1000000.0,
shot.end_time_offset / 1000000.0))
for i, shot in enumerate(shots):
start_time = (shot.start_time_offset.seconds +
shot.start_time_offset.nanos / 1e9)
end_time = (shot.end_time_offset.seconds +
shot.end_time_offset.nanos / 1e9)
print('\tShot {}: {} to {}'.format(i, start_time, end_time))


if __name__ == '__main__':
Expand All @@ -216,9 +312,9 @@ def analyze_shots(path):
analyze_labels_file_parser = subparsers.add_parser(
'labels_file', help=analyze_labels_file.__doc__)
analyze_labels_file_parser.add_argument('path')
analyze_safe_search_parser = subparsers.add_parser(
'safe_search', help=analyze_safe_search.__doc__)
analyze_safe_search_parser.add_argument('path')
analyze_explicit_content_parser = subparsers.add_parser(
'explicit_content', help=analyze_explicit_content.__doc__)
analyze_explicit_content_parser.add_argument('path')
analyze_shots_parser = subparsers.add_parser(
'shots', help=analyze_shots.__doc__)
analyze_shots_parser.add_argument('path')
Expand All @@ -233,5 +329,5 @@ def analyze_shots(path):
analyze_labels_file(args.path)
if args.command == 'shots':
analyze_shots(args.path)
if args.command == 'safe_search':
analyze_safe_search(args.path)
if args.command == 'explicit_content':
analyze_explicit_content(args.path)
Loading

0 comments on commit 65ae859

Please sign in to comment.