From 6b6cd25e29672d24e4c29f676d2a0799a6a924d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Mon, 23 Oct 2023 12:45:42 -0700 Subject: [PATCH 01/26] wip --- livekit-api/livekit/api/_twirp_client.py | 8 ++++---- livekit-api/livekit/api/access_token.py | 21 +++++++++++++++++---- livekit-api/livekit/api/room_service.py | 6 +++--- livekit-rtc/livekit/rtc/audio_stream.py | 2 +- livekit-rtc/livekit/rtc/video_stream.py | 2 +- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/livekit-api/livekit/api/_twirp_client.py b/livekit-api/livekit/api/_twirp_client.py index 88977297..e97a46ec 100644 --- a/livekit-api/livekit/api/_twirp_client.py +++ b/livekit-api/livekit/api/_twirp_client.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Optional, Type, TypeVar +from typing import Dict, Type, TypeVar import aiohttp from google.protobuf.message import Message @@ -47,7 +47,7 @@ class TwirpErrorCode: DATA_LOSS = "dataloss" -T = TypeVar('T', bound=Message, type=None) +T = TypeVar('T', bound=Message) class TwirpClient: @@ -63,7 +63,7 @@ async def request( method: str, data: Message, headers: Dict[str, str], - response_class: Type[T] = None + response_class: Type[T], ) -> T: url = f"{self.host}/{self.prefix}/{self.pkg}.{service}/{method}" headers["Content-Type"] = "application/protobuf" @@ -79,5 +79,5 @@ async def request( error_data = await resp.json() raise TwirpError(error_data["code"], error_data["msg"]) - async def close(self): + async def aclose(self): await self.session.close() diff --git a/livekit-api/livekit/api/access_token.py b/livekit-api/livekit/api/access_token.py index f39ce7e8..d988ce4d 100644 --- a/livekit-api/livekit/api/access_token.py +++ b/livekit-api/livekit/api/access_token.py @@ -41,7 +41,7 @@ class VideoGrants: # TrackSource types that a participant may publish. # When set, it supercedes CanPublish. Only sources explicitly set here can be # published - can_publish_sources: list[str] = [] # keys keep track of each source + can_publish_sources: list[str] = dataclasses.field(default_factory=list) # by default, a participant is not allowed to update its own metadata can_update_own_metadata: bool = False @@ -99,14 +99,27 @@ def with_sha256(self, sha256: str) -> 'AccessToken': return self def to_jwt(self) -> str: - claims = { + + def camel_case_dict(data) -> dict: + return { + "".join( + word if i == 0 else word.title() for i, word in enumerate(key.split("_")) + ): value + for key, value in data + if value is not None + } + + claims = dataclasses.asdict(self.claims) + claims.update({ 'sub': self.identity, "iss": self.api_key, "nbf": calendar.timegm(datetime.datetime.utcnow().utctimetuple()), "exp": calendar.timegm( (datetime.datetime.utcnow() + self.ttl).utctimetuple() ), - } + "video": dataclasses.asdict( + self.claims.video, dict_factory=camel_case_dict + ), + }) - claims.update(dataclasses.asdict(self.claims)) return jwt.encode(claims, self.api_secret, algorithm='HS256') diff --git a/livekit-api/livekit/api/room_service.py b/livekit-api/livekit/api/room_service.py index 76373083..d667084c 100644 --- a/livekit-api/livekit/api/room_service.py +++ b/livekit-api/livekit/api/room_service.py @@ -53,13 +53,13 @@ async def get_participant(self, get: proto_room.RoomParticipantIdentity) \ return await self._client.request(SVC, "GetParticipant", get, self._auth_header( VideoGrants(room_admin=True, - jroom=get.room)), + room=get.room)), proto_models.ParticipantInfo) async def remove_participant(self, remove: proto_room.RoomParticipantIdentity) \ - -> None: + -> proto_room.RemoveParticipantResponse: return await self._client.request(SVC, "remove_participant", remove, self._auth_header( VideoGrants(room_admin=True, room=remove.room)), - None) + proto_room.RemoveParticipantResponse) diff --git a/livekit-rtc/livekit/rtc/audio_stream.py b/livekit-rtc/livekit/rtc/audio_stream.py index 415a56c5..ee403b7d 100644 --- a/livekit-rtc/livekit/rtc/audio_stream.py +++ b/livekit-rtc/livekit/rtc/audio_stream.py @@ -59,7 +59,7 @@ async def _run(self): elif audio_event.HasField('eos'): break - async def close(self): + async def aclose(self): ffi_client.queue.unsubscribe(self._ffi_queue) del self._ffi_handle await self._task diff --git a/livekit-rtc/livekit/rtc/video_stream.py b/livekit-rtc/livekit/rtc/video_stream.py index 35e07624..4c92d5df 100644 --- a/livekit-rtc/livekit/rtc/video_stream.py +++ b/livekit-rtc/livekit/rtc/video_stream.py @@ -62,7 +62,7 @@ async def _run(self): elif video_event.HasField('eos'): break - async def close(self): + async def aclose(self): ffi_client.queue.unsubscribe(self._ffi_queue) del self._ffi_handle await self._task From 0f043ebdd4141fa1231a08ec8badda82c3ad91fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Mon, 23 Oct 2023 12:54:43 -0700 Subject: [PATCH 02/26] fix export --- livekit-api/livekit/api/__init__.py | 2 ++ livekit-api/livekit/api/_service.py | 8 +++++--- livekit-api/livekit/api/room_service.py | 1 - 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/livekit-api/livekit/api/__init__.py b/livekit-api/livekit/api/__init__.py index 9f8183f9..82da6b59 100644 --- a/livekit-api/livekit/api/__init__.py +++ b/livekit-api/livekit/api/__init__.py @@ -16,6 +16,8 @@ """ # flake8: noqa +from ._proto import livekit_room_pb2 as proto_room from .version import __version__ from .access_token import VideoGrants, AccessToken from .room_service import RoomService + diff --git a/livekit-api/livekit/api/_service.py b/livekit-api/livekit/api/_service.py index 1fc89ffe..24b3033e 100644 --- a/livekit-api/livekit/api/_service.py +++ b/livekit-api/livekit/api/_service.py @@ -1,13 +1,12 @@ - from typing import Dict - +from abc import ABC from ._twirp_client import TwirpClient from .access_token import AccessToken, VideoGrants AUTHORIZATION = "authorization" -class Service: +class Service(ABC): def __init__(self, host: str, api_key: str, api_secret: str): self._client = TwirpClient(host, "livekit") self.api_key = api_key @@ -20,3 +19,6 @@ def _auth_header(self, grants: VideoGrants) -> Dict[str, str]: headers = {} headers[AUTHORIZATION] = "Bearer {}".format(token) return headers + + async def aclose(self): + await self._client.aclose() diff --git a/livekit-api/livekit/api/room_service.py b/livekit-api/livekit/api/room_service.py index d667084c..46c478b3 100644 --- a/livekit-api/livekit/api/room_service.py +++ b/livekit-api/livekit/api/room_service.py @@ -1,4 +1,3 @@ - from ._proto import livekit_models_pb2 as proto_models from ._proto import livekit_room_pb2 as proto_room from ._service import Service From 216e2b240ade0357fbd476a188d4b76d2bd8d87e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Tue, 24 Oct 2023 15:45:34 -0700 Subject: [PATCH 03/26] requires python 3.9 & more types --- livekit-rtc/livekit/rtc/_event_emitter.py | 40 +++++++++++++++++++++++ livekit-rtc/livekit/rtc/room.py | 24 ++++++++------ livekit-rtc/livekit/rtc/track.py | 10 ++++-- livekit-rtc/setup.py | 12 +++---- 4 files changed, 66 insertions(+), 20 deletions(-) create mode 100644 livekit-rtc/livekit/rtc/_event_emitter.py diff --git a/livekit-rtc/livekit/rtc/_event_emitter.py b/livekit-rtc/livekit/rtc/_event_emitter.py new file mode 100644 index 00000000..49f8b1fe --- /dev/null +++ b/livekit-rtc/livekit/rtc/_event_emitter.py @@ -0,0 +1,40 @@ +from typing import Callable, Dict, Set, Optional, Generic, TypeVar + +T = TypeVar('T') + +class EventEmitter(Generic[T]): + def __init__(self): + self._events: Dict[T, Set[Callable]] = dict() + + def emit(self, event: T, *args, **kwargs) -> None: + if event in self._events: + for callback in self._events[event]: + callback(*args, **kwargs) + + def once(self, event: T, callback: Optional[Callable]) -> Callable: + if callback is not None: + def once_callback(*args, **kwargs): + self.off(event, once_callback) + callback(*args, **kwargs) + return self.on(event, once_callback) + else: + def decorator(callback: Callable) -> Callable: + self.once(event, callback) + return callback + return decorator + + def on(self, event: T, callback: Optional[Callable]) -> Callable: + if callback is not None: + if event not in self._events: + self._events[event] = set() + self._events[event].add(callback) + return callback + else: + def decorator(callback: Callable) -> Callable: + self.on(event, callback) + return callback + return decorator + + def off(self, event: T, callback: Callable) -> None: + if event in self._events: + self._events[event].remove(callback) diff --git a/livekit-rtc/livekit/rtc/room.py b/livekit-rtc/livekit/rtc/room.py index f0cd09aa..0178b568 100644 --- a/livekit-rtc/livekit/rtc/room.py +++ b/livekit-rtc/livekit/rtc/room.py @@ -16,10 +16,7 @@ import ctypes import logging from dataclasses import dataclass, field -from typing import Optional - -from pyee.asyncio import EventEmitter - +from typing import Dict, Optional, Literal from ._ffi_client import FfiHandle, ffi_client from ._proto import ffi_pb2 as proto_ffi from ._proto import participant_pb2 as proto_participant @@ -27,10 +24,17 @@ from ._proto.room_pb2 import ConnectionState from ._proto.track_pb2 import TrackKind from ._utils import BroadcastQueue +from ._event_emitter import EventEmitter from .e2ee import E2EEManager, E2EEOptions from .participant import LocalParticipant, Participant, RemoteParticipant from .track import RemoteAudioTrack, RemoteVideoTrack from .track_publication import RemoteTrackPublication +from .participant import RemoteParticipant, Participant + +EventTypes = Literal['participant_connected', 'participant_disconnected', 'local_track_published', 'local_track_unpublished', 'track_published', 'track_unpublished', + 'track_subscribed', 'track_unsubscribed', 'track_subscription_failed', 'track_muted', 'track_unmuted', 'active_speakers_changed', 'room_metadata_changed', + 'participant_metadata_changed', 'participant_name_changed', 'connection_quality_changed', 'data_received', 'e2ee_state_changed', 'connection_state_changed', + 'connected', 'disconnected', 'reconnecting', 'reconnected'] @dataclass @@ -55,7 +59,7 @@ def __init__(self, message: str): self.message = message -class Room(EventEmitter): +class Room(EventEmitter[EventTypes]): def __init__(self, loop: Optional[asyncio.AbstractEventLoop] = None) -> None: super().__init__() @@ -64,7 +68,7 @@ def __init__(self, loop: Optional[asyncio.AbstractEventLoop] = None) -> None: self._room_queue = BroadcastQueue[proto_ffi.FfiEvent]() self._info = proto_room.RoomInfo() - self.participants: dict[str, RemoteParticipant] = {} + self.participants: Dict[str, RemoteParticipant] = {} self.connection_state = ConnectionState.CONN_DISCONNECTED def __del__(self) -> None: @@ -126,8 +130,8 @@ async def connect(self, # subscribe before connecting so we don't miss any events self._ffi_queue = ffi_client.queue.subscribe(self._loop) + queue = ffi_client.queue.subscribe() try: - queue = ffi_client.queue.subscribe() resp = ffi_client.request(req) cb = await queue.wait_for(lambda e: e.connect.async_id == resp.connect.async_id) @@ -167,8 +171,8 @@ async def disconnect(self) -> None: req = proto_ffi.FfiRequest() req.disconnect.room_handle = self._ffi_handle.handle # type: ignore + queue = ffi_client.queue.subscribe() try: - queue = ffi_client.queue.subscribe() resp = ffi_client.request(req) await queue.wait_for(lambda e: e.disconnect.async_id == resp.disconnect.async_id) @@ -311,8 +315,8 @@ def _on_room_event(self, event: proto_room.RoomEvent): native_data = ctypes.cast(buffer_info.data_ptr, ctypes.POINTER(ctypes.c_byte * buffer_info.data_len)).contents - - data = bytearray(native_data) + + data = bytes(native_data) FfiHandle(owned_buffer_info.handle.id) rparticipant = None if event.data_received.participant_sid: diff --git a/livekit-rtc/livekit/rtc/track.py b/livekit-rtc/livekit/rtc/track.py index 89066d1e..0247d336 100644 --- a/livekit-rtc/livekit/rtc/track.py +++ b/livekit-rtc/livekit/rtc/track.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING - +from typing import TYPE_CHECKING, Union from ._ffi_client import FfiHandle, ffi_client from ._proto import ffi_pb2 as proto_ffi from ._proto import track_pb2 as proto_track @@ -22,7 +21,6 @@ from .audio_source import AudioSource from .video_source import VideoSource - class Track(): def __init__(self, owned_info: proto_track.OwnedTrack): self._info = owned_info.info @@ -88,3 +86,9 @@ def __init__(self, info: proto_track.OwnedTrack): class RemoteVideoTrack(Track): def __init__(self, info: proto_track.OwnedTrack): super().__init__(info) + + +LocalTrack = Union[LocalVideoTrack, LocalAudioTrack] +RemoteTrack = Union[RemoteVideoTrack, RemoteAudioTrack] +AudioTrack = Union[LocalAudioTrack, RemoteAudioTrack] +VideoTrack = Union[LocalVideoTrack, RemoteVideoTrack] diff --git a/livekit-rtc/setup.py b/livekit-rtc/setup.py index c56298f6..58ae221c 100644 --- a/livekit-rtc/setup.py +++ b/livekit-rtc/setup.py @@ -14,7 +14,6 @@ import os import pathlib -import platform import subprocess import setuptools @@ -41,7 +40,8 @@ def run(self): download_script = here / 'rust-sdks' / 'download_ffi.py' output = here / 'livekit' / 'rtc' / 'resources' - cmd = ['python3', str(download_script.absolute()), '--output', str(output.absolute())] + cmd = ['python3', str(download_script.absolute()), + '--output', str(output.absolute())] # cibuildwheel is crosscompiling to arm64 on macos, make sure we download the # right binary (kind of a hack here...) @@ -52,6 +52,7 @@ def run(self): subprocess.run(cmd, check=True) setuptools.command.build_py.build_py.run(self) + setuptools.setup( name="livekit", version=about['__version__'], @@ -70,8 +71,6 @@ def run(self): "Topic :: Multimedia :: Video", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3 :: Only", @@ -79,9 +78,8 @@ def run(self): keywords=["webrtc", "realtime", "audio", "video", "livekit"], license="Apache-2.0", packages=setuptools.find_namespace_packages(include=['livekit.*']), - python_requires=">=3.7.0", - install_requires=["pyee>=11.0.0", - "protobuf>=3.1.0", + python_requires=">=3.9.0", + install_requires=["protobuf>=3.1.0", "types-protobuf>=3.1.0"], package_data={ "livekit.rtc": ['resources/*', '_proto/*.py'], From 948882c01360317d2b170205e4c810a51d805028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Tue, 24 Oct 2023 20:11:30 -0700 Subject: [PATCH 04/26] copy audio data to python gc --- livekit-rtc/livekit/rtc/_utils.py | 14 +++++++ livekit-rtc/livekit/rtc/audio_frame.py | 57 +++++++++++++++----------- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/livekit-rtc/livekit/rtc/_utils.py b/livekit-rtc/livekit/rtc/_utils.py index d4d6f8c9..e84bf56c 100644 --- a/livekit-rtc/livekit/rtc/_utils.py +++ b/livekit-rtc/livekit/rtc/_utils.py @@ -1,6 +1,20 @@ import asyncio from collections import deque +import ctypes from typing import Callable, Generic, List, TypeVar +from typing_extensions import Buffer +import numpy + +def get_address(data: bytearray) -> int: + """ Get the address of a buffer using ctypes """ + + view = memoryview(data) + if not view.c_contiguous: + raise ValueError('data must be contiguous') + + buffer = (ctypes.c_int8 * view.nbytes).from_buffer(view) + return ctypes.addressof(buffer) + T = TypeVar('T') diff --git a/livekit-rtc/livekit/rtc/audio_frame.py b/livekit-rtc/livekit/rtc/audio_frame.py index cfd1027c..7cfd9f39 100644 --- a/livekit-rtc/livekit/rtc/audio_frame.py +++ b/livekit-rtc/livekit/rtc/audio_frame.py @@ -13,33 +13,32 @@ # limitations under the License. import ctypes - from ._ffi_client import FfiHandle, ffi_client from ._proto import audio_frame_pb2 as proto_audio from ._proto import ffi_pb2 as proto_ffi +from ._utils import get_address class AudioFrame: - def __init__(self, owned_info: proto_audio.OwnedAudioFrameBuffer) -> None: - self._info = owned_info.info - self._ffi_handle = FfiHandle(owned_info.handle.id) - - data_len = self.num_channels * self.samples_per_channel - self.data = ctypes.cast(self._info.data_ptr, - ctypes.POINTER(ctypes.c_int16 * data_len)).contents + def __init__(self, data: bytearray, + sample_rate: int, + num_channels: int, + samples_per_channel: int) -> None: + if len(data) < num_channels * samples_per_channel * ctypes.sizeof(ctypes.c_int16): + raise ValueError( + 'data length must be >= num_channels * samples_per_channel * sizeof(int16)') + + self._sample_rate = sample_rate + self._num_channels = num_channels + self._samples_per_channel = samples_per_channel + self._data = data @staticmethod - def create(sample_rate: int, num_channels: int, samples_per_channel: int) \ - -> 'AudioFrame': - # TODO(theomonnom): There should be no problem to directly - # send audio data from a Python created ctypes buffer - req = proto_ffi.FfiRequest() - req.alloc_audio_buffer.sample_rate = sample_rate - req.alloc_audio_buffer.num_channels = num_channels - req.alloc_audio_buffer.samples_per_channel = samples_per_channel - - resp = ffi_client.request(req) - return AudioFrame(resp.alloc_audio_buffer.buffer) + def create(sample_rate: int, num_channels: int, samples_per_channel: int) -> 'AudioFrame': + size = num_channels * samples_per_channel * \ + ctypes.sizeof(ctypes.c_int16) + data = bytearray(size) + return AudioFrame(data, sample_rate, num_channels, samples_per_channel) def remix_and_resample(self, sample_rate: int, num_channels: int) -> 'AudioFrame': """ Resample the audio frame to the given sample rate and number of channels.""" @@ -53,31 +52,39 @@ def remix_and_resample(self, sample_rate: int, num_channels: int) -> 'AudioFrame resp.new_audio_resampler.resampler.handle.id) resample_req = proto_ffi.FfiRequest() - resample_req.remix_and_resample.resampler_handle = resampler_handle.handle resample_req.remix_and_resample.buffer.CopyFrom(self._proto_info()) resample_req.remix_and_resample.sample_rate = sample_rate resample_req.remix_and_resample.num_channels = num_channels resp = ffi_client.request(resample_req) - return AudioFrame(resp.remix_and_resample.buffer) + + size = num_channels * self.samples_per_channel * ctypes.sizeof(ctypes.c_int16) + data_ptr = resp.remix_and_resample.buffer.info.data_ptr + data = (ctypes.c_int16 * size).from_address(data_ptr) + + return AudioFrame(bytearray(data), sample_rate, num_channels, self.samples_per_channel) def _proto_info(self) -> proto_audio.AudioFrameBufferInfo: audio_info = proto_audio.AudioFrameBufferInfo() - audio_info.data_ptr = ctypes.addressof(self.data) + audio_info.data_ptr = get_address(self._data) audio_info.sample_rate = self.sample_rate audio_info.num_channels = self.num_channels audio_info.samples_per_channel = self.samples_per_channel return audio_info + @property + def data(self) -> bytearray: + return self._data + @property def sample_rate(self) -> int: - return self._info.sample_rate + return self._sample_rate @property def num_channels(self) -> int: - return self._info.num_channels + return self._num_channels @property def samples_per_channel(self) -> int: - return self._info.samples_per_channel + return self._samples_per_channel From f3ede73b3cb8bd6402cd9cf6c6e9ccd9a82dab5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Tue, 24 Oct 2023 20:26:30 -0700 Subject: [PATCH 05/26] bump setuptools py version to 3.9 --- livekit-rtc/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/livekit-rtc/pyproject.toml b/livekit-rtc/pyproject.toml index 34ece544..0e6cc88f 100644 --- a/livekit-rtc/pyproject.toml +++ b/livekit-rtc/pyproject.toml @@ -7,7 +7,7 @@ requires = [ build-backend = "setuptools.build_meta" [tool.cibuildwheel] -build = "cp37-*" +build = "cp39-*" skip = "*-musllinux_*" # not supported (libwebrtc is using glibc) manylinux-x86_64-image = "manylinux_2_28" From dc8f80ed993628db9b0e4ccb602aec485c9cf0fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Tue, 24 Oct 2023 20:39:19 -0700 Subject: [PATCH 06/26] fix audio_stream --- livekit-rtc/livekit/rtc/audio_frame.py | 15 +++++++++------ livekit-rtc/livekit/rtc/audio_stream.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/livekit-rtc/livekit/rtc/audio_frame.py b/livekit-rtc/livekit/rtc/audio_frame.py index 7cfd9f39..b3ae79ed 100644 --- a/livekit-rtc/livekit/rtc/audio_frame.py +++ b/livekit-rtc/livekit/rtc/audio_frame.py @@ -40,6 +40,14 @@ def create(sample_rate: int, num_channels: int, samples_per_channel: int) -> 'Au data = bytearray(size) return AudioFrame(data, sample_rate, num_channels, samples_per_channel) + @staticmethod + def _from_owned_info(owned_info: proto_audio.OwnedAudioFrameBuffer) -> 'AudioFrame': + info = owned_info.info + size = info.num_channels * info.samples_per_channel * ctypes.sizeof(ctypes.c_int16) + data = (ctypes.c_int16 * size).from_address(info.data_ptr) + FfiHandle(owned_info.handle.id) + return AudioFrame(bytearray(data), info.sample_rate, info.num_channels, info.samples_per_channel) + def remix_and_resample(self, sample_rate: int, num_channels: int) -> 'AudioFrame': """ Resample the audio frame to the given sample rate and number of channels.""" @@ -58,12 +66,7 @@ def remix_and_resample(self, sample_rate: int, num_channels: int) -> 'AudioFrame resample_req.remix_and_resample.num_channels = num_channels resp = ffi_client.request(resample_req) - - size = num_channels * self.samples_per_channel * ctypes.sizeof(ctypes.c_int16) - data_ptr = resp.remix_and_resample.buffer.info.data_ptr - data = (ctypes.c_int16 * size).from_address(data_ptr) - - return AudioFrame(bytearray(data), sample_rate, num_channels, self.samples_per_channel) + return AudioFrame._from_owned_info(resp.remix_and_resample.buffer) def _proto_info(self) -> proto_audio.AudioFrameBufferInfo: audio_info = proto_audio.AudioFrameBufferInfo() diff --git a/livekit-rtc/livekit/rtc/audio_stream.py b/livekit-rtc/livekit/rtc/audio_stream.py index ee403b7d..47decaaa 100644 --- a/livekit-rtc/livekit/rtc/audio_stream.py +++ b/livekit-rtc/livekit/rtc/audio_stream.py @@ -54,7 +54,7 @@ async def _run(self): if audio_event.HasField('frame_received'): owned_buffer_info = audio_event.frame_received.frame - frame = AudioFrame(owned_buffer_info) + frame = AudioFrame._from_owned_info(owned_buffer_info) self._queue.put(frame) elif audio_event.HasField('eos'): break From 9e05f9c39fda9345efc2cbd371ee909827a09a96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Tue, 24 Oct 2023 20:54:52 -0700 Subject: [PATCH 07/26] nit --- livekit-rtc/livekit/rtc/_utils.py | 7 +------ livekit-rtc/livekit/rtc/audio_source.py | 2 +- livekit-rtc/livekit/rtc/video_frame.py | 1 + 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/livekit-rtc/livekit/rtc/_utils.py b/livekit-rtc/livekit/rtc/_utils.py index e84bf56c..aa44798e 100644 --- a/livekit-rtc/livekit/rtc/_utils.py +++ b/livekit-rtc/livekit/rtc/_utils.py @@ -7,12 +7,7 @@ def get_address(data: bytearray) -> int: """ Get the address of a buffer using ctypes """ - - view = memoryview(data) - if not view.c_contiguous: - raise ValueError('data must be contiguous') - - buffer = (ctypes.c_int8 * view.nbytes).from_buffer(view) + buffer = (ctypes.c_int8 * len(data)).from_buffer(data) return ctypes.addressof(buffer) diff --git a/livekit-rtc/livekit/rtc/audio_source.py b/livekit-rtc/livekit/rtc/audio_source.py index ed19c2b9..8a26dcbe 100644 --- a/livekit-rtc/livekit/rtc/audio_source.py +++ b/livekit-rtc/livekit/rtc/audio_source.py @@ -36,8 +36,8 @@ async def capture_frame(self, frame: AudioFrame) -> None: req.capture_audio_frame.source_handle = self._ffi_handle.handle req.capture_audio_frame.buffer.CopyFrom(frame._proto_info()) + queue = ffi_client.queue.subscribe() try: - queue = ffi_client.queue.subscribe() resp = ffi_client.request(req) cb = await queue.wait_for(lambda e: e.capture_audio_frame.async_id == resp.capture_audio_frame.async_id) diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index 95f695ac..3be238ec 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -90,6 +90,7 @@ def create(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ raise Exception('Unsupported VideoFrameBufferType') +# TODO(theomonnom): Ability to get GPU texture directly class NativeVideoFrameBuffer(VideoFrameBuffer): def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: super().__init__(owned_info) From a4c9cc15cfa03ba9c9bd43dde1f95ff363cd792a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Wed, 25 Oct 2023 14:39:35 -0700 Subject: [PATCH 08/26] wip python allocated video frames --- livekit-rtc/livekit/rtc/video_frame.py | 361 ++++++++++++++++++------- 1 file changed, 266 insertions(+), 95 deletions(-) diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index 3be238ec..9ddd47d5 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -13,11 +13,13 @@ # limitations under the License. import ctypes +from typing import Union from ._ffi_client import FfiHandle, ffi_client from ._proto import ffi_pb2 as proto_ffi from ._proto import video_frame_pb2 as proto_video_frame -from ._proto.video_frame_pb2 import VideoFormatType, VideoFrameBufferType, VideoRotation +from ._proto.video_frame_pb2 import VideoFormatType, VideoFrameBufferType, VideoFrameReceived, VideoRotation +from abc import ABC class VideoFrame: @@ -29,30 +31,38 @@ def __init__(self, timestamp_us: int, self.rotation = rotation -class VideoFrameBuffer: - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - self._info = owned_info.info - self._ffi_handle = FfiHandle(owned_info.handle.id) +class VideoFrameBuffer(ABC): + + def __init__(self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType) -> None: + self._data = data + self._width = width + self._height = height + self._buffer_type = buffer_type @property def width(self) -> int: - return self._info.width + return self._width @property def height(self) -> int: - return self._info.height + return self._height @property def type(self) -> VideoFrameBufferType.ValueType: - return self._info.buffer_type + return self._buffer_type + # TODO(theomonnom): Need Rust modification def to_i420(self) -> 'I420Buffer': req = proto_ffi.FfiRequest() req.to_i420.yuv_handle = self._ffi_handle.handle - resp = ffi_client.request(req) return I420Buffer(resp.to_i420.buffer) + # TODO(theomonnom): Need Rust modification def to_argb(self, dst: 'ArgbFrame') -> None: req = proto_ffi.FfiRequest() req.to_argb.buffer_handle = self._ffi_handle.handle @@ -61,11 +71,10 @@ def to_argb(self, dst: 'ArgbFrame') -> None: req.to_argb.dst_stride = dst.width * 4 req.to_argb.dst_width = dst.width req.to_argb.dst_height = dst.height - ffi_client.request(req) @staticmethod - def create(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ + def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ -> 'VideoFrameBuffer': """ Create the right class instance from the VideoFrameBufferInfo @@ -92,155 +101,317 @@ def create(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ # TODO(theomonnom): Ability to get GPU texture directly class NativeVideoFrameBuffer(VideoFrameBuffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) + def __init__(self, width: int, height: int) -> None: + super().__init__(bytearray(), width, height, VideoFrameBufferType.NATIVE) -class PlanarYuvBuffer(VideoFrameBuffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) +class PlanarYuvBuffer(VideoFrameBuffer, ABC): + def __init__(self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + stride_y: int, + stride_u: int, + stride_v: int, + chroma_width: int, + chroma_height: int) -> None: + super().__init__(data, width, height, buffer_type) + self._stride_y = stride_y + self._stride_u = stride_u + self._stride_v = stride_v + self._chroma_width = chroma_width + self._chroma_height = chroma_height @property def chroma_width(self) -> int: - return self._info.yuv.chroma_width + return self._chroma_width @property def chroma_height(self) -> int: - return self._info.yuv.chroma_height + return self._chroma_height @property def stride_y(self) -> int: - return self._info.yuv.stride_y + return self._stride_y @property def stride_u(self) -> int: - return self._info.yuv.stride_u + return self._stride_u @property def stride_v(self) -> int: - return self._info.yuv.stride_v + return self._stride_v -class PlanarYuv8Buffer(PlanarYuvBuffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) +class PlanarYuv8Buffer(PlanarYuvBuffer, ABC): + def __init__(self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + stride_y: int, + stride_u: int, + stride_v: int, + chroma_width: int, + chroma_height: int) -> None: + super().__init__(data, width, height, buffer_type, stride_u, + stride_y, stride_v, chroma_width, chroma_height) @property - def data_y(self) -> ctypes.Array[ctypes.c_uint8]: - arr = ctypes.cast(self._info.yuv.data_y_ptr, ctypes.POINTER( - ctypes.c_uint8 * (self._info.yuv.stride_y * self._info.height))).contents - return arr + def data_y(self) -> memoryview: + return memoryview(self._data)[0:self._stride_y * self._height] @property - def data_u(self) -> ctypes.Array[ctypes.c_uint8]: - arr = ctypes.cast(self._info.yuv.data_u_ptr, ctypes.POINTER( - ctypes.c_uint8 * (self._info.yuv.stride_u * - self._info.yuv.chroma_height))).contents - return arr + def data_u(self) -> memoryview: + return memoryview(self._data)[self._stride_y * self._height: + self._stride_y * self._height + + self._stride_u * self._chroma_height] @property - def data_v(self) -> ctypes.Array[ctypes.c_uint8]: - arr = ctypes.cast(self._info.yuv.data_v_ptr, ctypes.POINTER( - ctypes.c_uint8 * (self._info.yuv.stride_v * - self._info.yuv.chroma_height))).contents - return arr + def data_v(self) -> memoryview: + return memoryview(self._data)[self._stride_y * self._height + + self._stride_u * self._chroma_height: + self._stride_y * self._height + + self._stride_u * self._chroma_height + + self._stride_v * self._chroma_height] -class PlanarYuv16Buffer(PlanarYuvBuffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) +class PlanarYuv16Buffer(PlanarYuvBuffer, ABC): + def __init__(self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + stride_y: int, + stride_u: int, + stride_v: int, + chroma_width: int, + chroma_height: int) -> None: + super().__init__(data, width, height, buffer_type, stride_y, + stride_u, stride_v, chroma_width, chroma_height) @property - def data_y(self) -> ctypes.Array[ctypes.c_uint16]: - arr = ctypes.cast(self._info.yuv.data_y_ptr, ctypes.POINTER( - ctypes.c_uint16 * (self._info.yuv.stride_y // 2 * - self._info.height))).contents - return arr + def data_y(self) -> memoryview: + return memoryview(self._data)[0:self._stride_y * self._height].cast('H') @property - def data_u(self) -> ctypes.Array[ctypes.c_uint16]: - arr = ctypes.cast(self._info.yuv.data_u_ptr, ctypes.POINTER( - ctypes.c_uint16 * (self._info.yuv.stride_u // 2 * - self._info.yuv.chroma_height))).contents - return arr + def data_u(self) -> memoryview: + return memoryview(self._data)[self._stride_y * self._height: + self._stride_y * self._height + + self._stride_u * self._chroma_height].cast('H') @property - def data_v(self) -> ctypes.Array[ctypes.c_uint16]: - arr = ctypes.cast(self._info.yuv.data_v_ptr, ctypes.POINTER( - ctypes.c_uint16 * (self._info.yuv.stride_v // 2 * - self._info.yuv.chroma_height))).contents - return arr + def data_v(self) -> memoryview: + return memoryview(self._data)[self._stride_y * self._height + + self._stride_u * self._chroma_height: + self._stride_y * self._height + + self._stride_u * self._chroma_height + + self._stride_v * self._chroma_height].cast('H') -class BiplanaraYuv8Buffer(VideoFrameBuffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) +class BiplanaraYuv8Buffer(VideoFrameBuffer, ABC): + def __init__(self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + stride_y: int, + stride_uv: int, + chroma_width: int, + chroma_height: int) -> None: + super().__init__(data, width, height, buffer_type) + self._stride_y = stride_y + self._stride_uv = stride_uv + self._chroma_width = chroma_width + self._chroma_height = chroma_height @property - def data_y(self) -> ctypes.Array[ctypes.c_uint8]: - arr = ctypes.cast(self._info.bi_yuv.data_y_ptr, ctypes.POINTER( - ctypes.c_uint8 * (self._info.bi_yuv.stride_y * self._info.height))).contents - return arr + def chroma_width(self) -> int: + return self._chroma_width @property - def data_uv(self) -> ctypes.Array[ctypes.c_uint8]: - arr = ctypes.cast(self._info.bi_yuv.data_uv_ptr, ctypes.POINTER( - ctypes.c_uint8 * (self._info.bi_yuv.stride_uv * - self._info.bi_yuv.chroma_height))).contents - return arr + def chroma_height(self) -> int: + return self._chroma_height + + @property + def stride_y(self) -> int: + return self._stride_y + + @property + def stride_uv(self) -> int: + return self._stride_uv + + @ property + def data_y(self) -> memoryview: + return memoryview(self._data)[0:self._stride_y * self._height] + + @ property + def data_uv(self) -> memoryview: + return memoryview(self._data)[self._stride_y * self._height: + self._stride_y * self._height + + self._stride_uv * self._chroma_height] class I420Buffer(PlanarYuv8Buffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) + def __init__(self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int) -> None: + + if len(data) < I420Buffer.calc_data_size(height, stride_y, stride_u, stride_v): + raise ValueError( + 'buffer too small for I420 data. Expected {} bytes, got {}.'.format( + I420Buffer.calc_data_size(height, stride_y, stride_u, stride_v), len(data))) + + chroma_width = (width + 1) // 2 + chroma_height = (height + 1) // 2 + super().__init__(data, width, height, + VideoFrameBufferType.I420, stride_y, stride_u, stride_v, chroma_width, chroma_height) + + @staticmethod + def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: + return stride_y * height + (stride_u + stride_v) * ((height + 1) // 2) + + @staticmethod + def create(width: int, height: int) -> 'I420Buffer': + stride_y = width + stride_u = (width + 1) // 2 + stride_v = (width + 1) // 2 + data_size = I420Buffer.calc_data_size( + height, stride_y, stride_u, stride_v) + data = bytearray(data_size) + return I420Buffer(data, width, height, stride_y, stride_u, stride_v) class I420ABuffer(PlanarYuv8Buffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) + def __init__(self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int, + stride_a: int) -> None: + chroma_width = (width + 1) // 2 + chroma_height = (height + 1) // 2 + super().__init__(data, width, height, VideoFrameBufferType.I420A, + stride_y, stride_u, stride_v, chroma_width, chroma_height) + self._stride_a = stride_a + + @staticmethod + def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int, stride_a: int) -> int: + return (stride_y + stride_a) * height + (stride_u + stride_v) * ((height + 1) // 2) + + @property + def stride_a(self) -> int: + return self._stride_a @property - def data_a(self) -> ctypes.Array[ctypes.c_uint8]: - arr = ctypes.cast(self._info.yuv.data_a_ptr, ctypes.POINTER( - ctypes.c_uint8 * (self._info.yuv.stride_a * self._info.height))).contents - return arr + def data_a(self) -> memoryview: + return memoryview(self._data)[self._stride_y * self._height + + self._stride_u * self._chroma_height + + self._stride_v * self._chroma_height: + self._stride_y * self._height + + self._stride_u * self._chroma_height + + self._stride_v * self._chroma_height + + self._stride_a * self._height] class I422Buffer(PlanarYuv8Buffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) + def __init__(self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int) -> None: + chroma_width = (width + 1) // 2 + chroma_height = height + super().__init__(data, width, height, VideoFrameBufferType.I422, + stride_y, stride_u, stride_v, chroma_width, chroma_height) + + @staticmethod + def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: + return stride_y * height + stride_u * height + stride_v * height + class I444Buffer(PlanarYuv8Buffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) + def __init__(self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int) -> None: + chroma_width = width + chroma_height = height + super().__init__(data, width, height, VideoFrameBufferType.I444, + stride_y, stride_u, stride_v, chroma_width, chroma_height) + + @staticmethod + def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: + return stride_y * height + stride_u * height + stride_v * height class I010Buffer(PlanarYuv16Buffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) + def __init__(self, data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int) -> None: + chroma_width = (width + 1) // 2 + chroma_height = (height + 1) // 2 + super().__init__(data, width, height, VideoFrameBufferType.I010, + stride_y, stride_u, stride_v, chroma_width, chroma_height) + + @staticmethod + def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: + return stride_y * height * 2 + stride_u * ((height + 1) // 2) * 2 + stride_v * ((height + 1) // 2) * 2 + class NV12Buffer(BiplanaraYuv8Buffer): - def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: - super().__init__(owned_info) + def __init__(self, data: bytearray, + width: int, + height: int, + stride_y: int, + stride_uv: int) -> None: + chroma_width = (width + 1) // 2 + chroma_height = (height + 1) // 2 + super().__init__(data, width, height, VideoFrameBufferType.NV12, + stride_y, stride_uv, chroma_width, chroma_height) + + @staticmethod + def calc_data_size(height: int, stride_y: int, stride_uv: int) -> int: + return stride_y * height + stride_uv * ((height + 1) // 2) -class ArgbFrame: - """ - Mainly used to simplify the usage of to_argb method - So the users don't need to deal with ctypes - """ +class ArgbFrame: def __init__(self, + data: Union[bytes, bytearray, memoryview], format: VideoFormatType.ValueType, width: int, - height: int) -> None: + height: int, + stride: int = 0) -> None: + + if stride == 0: + stride = width * ctypes.sizeof(ctypes.c_uint32) + + if len(data) < stride * height: + raise ValueError("data size does not match stride and height") + + self._data = bytearray(data) self._format = format - self.width = width - self.height = height - self.data = (ctypes.c_uint8 * (width * height * - ctypes.sizeof(ctypes.c_uint32)))() # alloc frame + self._width = width + self._height = height + self._stride = stride def to_i420(self) -> I420Buffer: # TODO(theomonnom): avoid unnecessary buffer allocation @@ -254,6 +425,6 @@ def to_i420(self) -> I420Buffer: res = ffi_client.request(req) return I420Buffer(res.to_i420.buffer) - @property + @ property def format(self) -> VideoFormatType.ValueType: return self._format From 0f1fd40416461477af71a5572897ac155adb3000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Wed, 25 Oct 2023 14:42:24 -0700 Subject: [PATCH 09/26] Update video_frame.py --- livekit-rtc/livekit/rtc/video_frame.py | 33 +++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index 9ddd47d5..64001c45 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -296,6 +296,12 @@ def __init__(self, stride_u: int, stride_v: int, stride_a: int) -> None: + + if len(data) < I420ABuffer.calc_data_size(height, stride_y, stride_u, stride_v, stride_a): + raise ValueError( + 'buffer too small for I420A data. Expected {} bytes, got {}.'.format( + I420ABuffer.calc_data_size(height, stride_y, stride_u, stride_v, stride_a), len(data))) + chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 super().__init__(data, width, height, VideoFrameBufferType.I420A, @@ -329,6 +335,12 @@ def __init__(self, stride_y: int, stride_u: int, stride_v: int) -> None: + + if len(data) < I422Buffer.calc_data_size(height, stride_y, stride_u, stride_v): + raise ValueError( + 'buffer too small for I422 data. Expected {} bytes, got {}.'.format( + I422Buffer.calc_data_size(height, stride_y, stride_u, stride_v), len(data))) + chroma_width = (width + 1) // 2 chroma_height = height super().__init__(data, width, height, VideoFrameBufferType.I422, @@ -339,7 +351,6 @@ def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> return stride_y * height + stride_u * height + stride_v * height - class I444Buffer(PlanarYuv8Buffer): def __init__(self, data: bytearray, @@ -348,6 +359,12 @@ def __init__(self, stride_y: int, stride_u: int, stride_v: int) -> None: + + if len(data) < I444Buffer.calc_data_size(height, stride_y, stride_u, stride_v): + raise ValueError( + 'buffer too small for I444 data. Expected {} bytes, got {}.'.format( + I444Buffer.calc_data_size(height, stride_y, stride_u, stride_v), len(data))) + chroma_width = width chroma_height = height super().__init__(data, width, height, VideoFrameBufferType.I444, @@ -365,6 +382,12 @@ def __init__(self, data: bytearray, stride_y: int, stride_u: int, stride_v: int) -> None: + + if len(data) < I010Buffer.calc_data_size(height, stride_y, stride_u, stride_v): + raise ValueError( + 'buffer too small for I010 data. Expected {} bytes, got {}.'.format( + I010Buffer.calc_data_size(height, stride_y, stride_u, stride_v), len(data))) + chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 super().__init__(data, width, height, VideoFrameBufferType.I010, @@ -375,13 +398,18 @@ def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> return stride_y * height * 2 + stride_u * ((height + 1) // 2) * 2 + stride_v * ((height + 1) // 2) * 2 - class NV12Buffer(BiplanaraYuv8Buffer): def __init__(self, data: bytearray, width: int, height: int, stride_y: int, stride_uv: int) -> None: + + if len(data) < NV12Buffer.calc_data_size(height, stride_y, stride_uv): + raise ValueError( + 'buffer too small for NV12 data. Expected {} bytes, got {}.'.format( + NV12Buffer.calc_data_size(height, stride_y, stride_uv), len(data))) + chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 super().__init__(data, width, height, VideoFrameBufferType.NV12, @@ -392,7 +420,6 @@ def calc_data_size(height: int, stride_y: int, stride_uv: int) -> int: return stride_y * height + stride_uv * ((height + 1) // 2) - class ArgbFrame: def __init__(self, data: Union[bytes, bytearray, memoryview], From bdd9fbefb3ac182a3e753180feacdbd98dd02caa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Thu, 26 Oct 2023 19:17:02 -0700 Subject: [PATCH 10/26] wip --- .../livekit/rtc/_proto/video_frame_pb2.py | 114 +++++----- .../livekit/rtc/_proto/video_frame_pb2.pyi | 42 ++-- livekit-rtc/livekit/rtc/_utils.py | 7 +- livekit-rtc/livekit/rtc/video_frame.py | 200 +++++++++++++++--- livekit-rtc/rust-sdks | 2 +- 5 files changed, 262 insertions(+), 103 deletions(-) diff --git a/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.py b/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.py index 02db57c2..74124674 100644 --- a/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.py +++ b/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.py @@ -14,7 +14,7 @@ from . import handle_pb2 as handle__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11video_frame.proto\x12\rlivekit.proto\x1a\x0chandle.proto\"k\n\x17\x41llocVideoBufferRequest\x12\x31\n\x04type\x18\x01 \x01(\x0e\x32#.livekit.proto.VideoFrameBufferType\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\"P\n\x18\x41llocVideoBufferResponse\x12\x34\n\x06\x62uffer\x18\x01 \x01(\x0b\x32$.livekit.proto.OwnedVideoFrameBuffer\"[\n\x15NewVideoStreamRequest\x12\x14\n\x0ctrack_handle\x18\x01 \x01(\x04\x12,\n\x04type\x18\x02 \x01(\x0e\x32\x1e.livekit.proto.VideoStreamType\"I\n\x16NewVideoStreamResponse\x12/\n\x06stream\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoStream\"\x7f\n\x15NewVideoSourceRequest\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoSourceType\x12\x38\n\nresolution\x18\x02 \x01(\x0b\x32$.livekit.proto.VideoSourceResolution\"I\n\x16NewVideoSourceResponse\x12/\n\x06source\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoSource\"v\n\x18\x43\x61ptureVideoFrameRequest\x12\x15\n\rsource_handle\x18\x01 \x01(\x04\x12,\n\x05\x66rame\x18\x02 \x01(\x0b\x32\x1d.livekit.proto.VideoFrameInfo\x12\x15\n\rbuffer_handle\x18\x03 \x01(\x04\"\x1b\n\x19\x43\x61ptureVideoFrameResponse\"l\n\rToI420Request\x12\x0e\n\x06\x66lip_y\x18\x01 \x01(\x08\x12-\n\x04\x61rgb\x18\x02 \x01(\x0b\x32\x1d.livekit.proto.ArgbBufferInfoH\x00\x12\x14\n\nyuv_handle\x18\x03 \x01(\x04H\x00\x42\x06\n\x04\x66rom\"F\n\x0eToI420Response\x12\x34\n\x06\x62uffer\x18\x01 \x01(\x0b\x32$.livekit.proto.OwnedVideoFrameBuffer\"\xb6\x01\n\rToArgbRequest\x12\x15\n\rbuffer_handle\x18\x01 \x01(\x04\x12\x0f\n\x07\x64st_ptr\x18\x02 \x01(\x04\x12\x32\n\ndst_format\x18\x03 \x01(\x0e\x32\x1e.livekit.proto.VideoFormatType\x12\x12\n\ndst_stride\x18\x04 \x01(\r\x12\x11\n\tdst_width\x18\x05 \x01(\r\x12\x12\n\ndst_height\x18\x06 \x01(\r\x12\x0e\n\x06\x66lip_y\x18\x07 \x01(\x08\"\x10\n\x0eToArgbResponse\"D\n\x0fVideoResolution\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\x12\x12\n\nframe_rate\x18\x03 \x01(\x01\"|\n\x0e\x41rgbBufferInfo\x12\x0b\n\x03ptr\x18\x01 \x01(\x04\x12.\n\x06\x66ormat\x18\x02 \x01(\x0e\x32\x1e.livekit.proto.VideoFormatType\x12\x0e\n\x06stride\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12\x0e\n\x06height\x18\x05 \x01(\r\"V\n\x0eVideoFrameInfo\x12\x14\n\x0ctimestamp_us\x18\x01 \x01(\x03\x12.\n\x08rotation\x18\x02 \x01(\x0e\x32\x1c.livekit.proto.VideoRotation\"\x97\x02\n\x14VideoFrameBufferInfo\x12\x38\n\x0b\x62uffer_type\x18\x01 \x01(\x0e\x32#.livekit.proto.VideoFrameBufferType\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x31\n\x03yuv\x18\x04 \x01(\x0b\x32\".livekit.proto.PlanarYuvBufferInfoH\x00\x12\x36\n\x06\x62i_yuv\x18\x05 \x01(\x0b\x32$.livekit.proto.BiplanarYuvBufferInfoH\x00\x12\x31\n\x06native\x18\x06 \x01(\x0b\x32\x1f.livekit.proto.NativeBufferInfoH\x00\x42\x08\n\x06\x62uffer\"y\n\x15OwnedVideoFrameBuffer\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12\x31\n\x04info\x18\x02 \x01(\x0b\x32#.livekit.proto.VideoFrameBufferInfo\"\xda\x01\n\x13PlanarYuvBufferInfo\x12\x14\n\x0c\x63hroma_width\x18\x01 \x01(\r\x12\x15\n\rchroma_height\x18\x02 \x01(\r\x12\x10\n\x08stride_y\x18\x03 \x01(\r\x12\x10\n\x08stride_u\x18\x04 \x01(\r\x12\x10\n\x08stride_v\x18\x05 \x01(\r\x12\x10\n\x08stride_a\x18\x06 \x01(\r\x12\x12\n\ndata_y_ptr\x18\x07 \x01(\x04\x12\x12\n\ndata_u_ptr\x18\x08 \x01(\x04\x12\x12\n\ndata_v_ptr\x18\t \x01(\x04\x12\x12\n\ndata_a_ptr\x18\n \x01(\x04\"\x92\x01\n\x15\x42iplanarYuvBufferInfo\x12\x14\n\x0c\x63hroma_width\x18\x01 \x01(\r\x12\x15\n\rchroma_height\x18\x02 \x01(\r\x12\x10\n\x08stride_y\x18\x03 \x01(\r\x12\x11\n\tstride_uv\x18\x04 \x01(\r\x12\x12\n\ndata_y_ptr\x18\x05 \x01(\x04\x12\x13\n\x0b\x64\x61ta_uv_ptr\x18\x06 \x01(\x04\"\x12\n\x10NativeBufferInfo\"?\n\x0fVideoStreamInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoStreamType\"o\n\x10OwnedVideoStream\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoStreamInfo\"\x9f\x01\n\x10VideoStreamEvent\x12\x15\n\rstream_handle\x18\x01 \x01(\x04\x12;\n\x0e\x66rame_received\x18\x02 \x01(\x0b\x32!.livekit.proto.VideoFrameReceivedH\x00\x12,\n\x03\x65os\x18\x03 \x01(\x0b\x32\x1d.livekit.proto.VideoStreamEOSH\x00\x42\t\n\x07message\"x\n\x12VideoFrameReceived\x12,\n\x05\x66rame\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.VideoFrameInfo\x12\x34\n\x06\x62uffer\x18\x02 \x01(\x0b\x32$.livekit.proto.OwnedVideoFrameBuffer\"\x10\n\x0eVideoStreamEOS\"6\n\x15VideoSourceResolution\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\"?\n\x0fVideoSourceInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoSourceType\"o\n\x10OwnedVideoSource\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoSourceInfo*(\n\nVideoCodec\x12\x07\n\x03VP8\x10\x00\x12\x08\n\x04H264\x10\x01\x12\x07\n\x03\x41V1\x10\x02*l\n\rVideoRotation\x12\x14\n\x10VIDEO_ROTATION_0\x10\x00\x12\x15\n\x11VIDEO_ROTATION_90\x10\x01\x12\x16\n\x12VIDEO_ROTATION_180\x10\x02\x12\x16\n\x12VIDEO_ROTATION_270\x10\x03*U\n\x0fVideoFormatType\x12\x0f\n\x0b\x46ORMAT_ARGB\x10\x00\x12\x0f\n\x0b\x46ORMAT_BGRA\x10\x01\x12\x0f\n\x0b\x46ORMAT_ABGR\x10\x02\x12\x0f\n\x0b\x46ORMAT_RGBA\x10\x03*_\n\x14VideoFrameBufferType\x12\n\n\x06NATIVE\x10\x00\x12\x08\n\x04I420\x10\x01\x12\t\n\x05I420A\x10\x02\x12\x08\n\x04I422\x10\x03\x12\x08\n\x04I444\x10\x04\x12\x08\n\x04I010\x10\x05\x12\x08\n\x04NV12\x10\x06*Y\n\x0fVideoStreamType\x12\x17\n\x13VIDEO_STREAM_NATIVE\x10\x00\x12\x16\n\x12VIDEO_STREAM_WEBGL\x10\x01\x12\x15\n\x11VIDEO_STREAM_HTML\x10\x02**\n\x0fVideoSourceType\x12\x17\n\x13VIDEO_SOURCE_NATIVE\x10\x00\x42\x10\xaa\x02\rLiveKit.Protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11video_frame.proto\x12\rlivekit.proto\x1a\x0chandle.proto\"k\n\x17\x41llocVideoBufferRequest\x12\x31\n\x04type\x18\x01 \x01(\x0e\x32#.livekit.proto.VideoFrameBufferType\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\"P\n\x18\x41llocVideoBufferResponse\x12\x34\n\x06\x62uffer\x18\x01 \x01(\x0b\x32$.livekit.proto.OwnedVideoFrameBuffer\"[\n\x15NewVideoStreamRequest\x12\x14\n\x0ctrack_handle\x18\x01 \x01(\x04\x12,\n\x04type\x18\x02 \x01(\x0e\x32\x1e.livekit.proto.VideoStreamType\"I\n\x16NewVideoStreamResponse\x12/\n\x06stream\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoStream\"\x7f\n\x15NewVideoSourceRequest\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoSourceType\x12\x38\n\nresolution\x18\x02 \x01(\x0b\x32$.livekit.proto.VideoSourceResolution\"I\n\x16NewVideoSourceResponse\x12/\n\x06source\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoSource\"\xae\x01\n\x18\x43\x61ptureVideoFrameRequest\x12\x15\n\rsource_handle\x18\x01 \x01(\x04\x12,\n\x05\x66rame\x18\x02 \x01(\x0b\x32\x1d.livekit.proto.VideoFrameInfo\x12\x33\n\x04info\x18\x03 \x01(\x0b\x32#.livekit.proto.VideoFrameBufferInfoH\x00\x12\x10\n\x06handle\x18\x04 \x01(\x04H\x00\x42\x06\n\x04\x66rom\"\x1b\n\x19\x43\x61ptureVideoFrameResponse\"\x9f\x01\n\rToI420Request\x12\x0e\n\x06\x66lip_y\x18\x01 \x01(\x08\x12-\n\x04\x61rgb\x18\x02 \x01(\x0b\x32\x1d.livekit.proto.ArgbBufferInfoH\x00\x12\x35\n\x06\x62uffer\x18\x03 \x01(\x0b\x32#.livekit.proto.VideoFrameBufferInfoH\x00\x12\x10\n\x06handle\x18\x04 \x01(\x04H\x00\x42\x06\n\x04\x66rom\"F\n\x0eToI420Response\x12\x34\n\x06\x62uffer\x18\x01 \x01(\x0b\x32$.livekit.proto.OwnedVideoFrameBuffer\"\xd4\x01\n\rToArgbRequest\x12\x33\n\x06\x62uffer\x18\x01 \x01(\x0b\x32#.livekit.proto.VideoFrameBufferInfo\x12\x0f\n\x07\x64st_ptr\x18\x02 \x01(\x04\x12\x32\n\ndst_format\x18\x03 \x01(\x0e\x32\x1e.livekit.proto.VideoFormatType\x12\x12\n\ndst_stride\x18\x04 \x01(\r\x12\x11\n\tdst_width\x18\x05 \x01(\r\x12\x12\n\ndst_height\x18\x06 \x01(\r\x12\x0e\n\x06\x66lip_y\x18\x07 \x01(\x08\"\x10\n\x0eToArgbResponse\"D\n\x0fVideoResolution\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\x12\x12\n\nframe_rate\x18\x03 \x01(\x01\"|\n\x0e\x41rgbBufferInfo\x12\x0b\n\x03ptr\x18\x01 \x01(\x04\x12.\n\x06\x66ormat\x18\x02 \x01(\x0e\x32\x1e.livekit.proto.VideoFormatType\x12\x0e\n\x06stride\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12\x0e\n\x06height\x18\x05 \x01(\r\"V\n\x0eVideoFrameInfo\x12\x14\n\x0ctimestamp_us\x18\x01 \x01(\x03\x12.\n\x08rotation\x18\x02 \x01(\x0e\x32\x1c.livekit.proto.VideoRotation\"\x97\x02\n\x14VideoFrameBufferInfo\x12\x38\n\x0b\x62uffer_type\x18\x01 \x01(\x0e\x32#.livekit.proto.VideoFrameBufferType\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x31\n\x03yuv\x18\x04 \x01(\x0b\x32\".livekit.proto.PlanarYuvBufferInfoH\x00\x12\x36\n\x06\x62i_yuv\x18\x05 \x01(\x0b\x32$.livekit.proto.BiplanarYuvBufferInfoH\x00\x12\x31\n\x06native\x18\x06 \x01(\x0b\x32\x1f.livekit.proto.NativeBufferInfoH\x00\x42\x08\n\x06\x62uffer\"y\n\x15OwnedVideoFrameBuffer\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12\x31\n\x04info\x18\x02 \x01(\x0b\x32#.livekit.proto.VideoFrameBufferInfo\"\xda\x01\n\x13PlanarYuvBufferInfo\x12\x14\n\x0c\x63hroma_width\x18\x01 \x01(\r\x12\x15\n\rchroma_height\x18\x02 \x01(\r\x12\x10\n\x08stride_y\x18\x03 \x01(\r\x12\x10\n\x08stride_u\x18\x04 \x01(\r\x12\x10\n\x08stride_v\x18\x05 \x01(\r\x12\x10\n\x08stride_a\x18\x06 \x01(\r\x12\x12\n\ndata_y_ptr\x18\x07 \x01(\x04\x12\x12\n\ndata_u_ptr\x18\x08 \x01(\x04\x12\x12\n\ndata_v_ptr\x18\t \x01(\x04\x12\x12\n\ndata_a_ptr\x18\n \x01(\x04\"\x92\x01\n\x15\x42iplanarYuvBufferInfo\x12\x14\n\x0c\x63hroma_width\x18\x01 \x01(\r\x12\x15\n\rchroma_height\x18\x02 \x01(\r\x12\x10\n\x08stride_y\x18\x03 \x01(\r\x12\x11\n\tstride_uv\x18\x04 \x01(\r\x12\x12\n\ndata_y_ptr\x18\x05 \x01(\x04\x12\x13\n\x0b\x64\x61ta_uv_ptr\x18\x06 \x01(\x04\"\x12\n\x10NativeBufferInfo\"?\n\x0fVideoStreamInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoStreamType\"o\n\x10OwnedVideoStream\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoStreamInfo\"\x9f\x01\n\x10VideoStreamEvent\x12\x15\n\rstream_handle\x18\x01 \x01(\x04\x12;\n\x0e\x66rame_received\x18\x02 \x01(\x0b\x32!.livekit.proto.VideoFrameReceivedH\x00\x12,\n\x03\x65os\x18\x03 \x01(\x0b\x32\x1d.livekit.proto.VideoStreamEOSH\x00\x42\t\n\x07message\"x\n\x12VideoFrameReceived\x12,\n\x05\x66rame\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.VideoFrameInfo\x12\x34\n\x06\x62uffer\x18\x02 \x01(\x0b\x32$.livekit.proto.OwnedVideoFrameBuffer\"\x10\n\x0eVideoStreamEOS\"6\n\x15VideoSourceResolution\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\"?\n\x0fVideoSourceInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoSourceType\"o\n\x10OwnedVideoSource\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoSourceInfo*(\n\nVideoCodec\x12\x07\n\x03VP8\x10\x00\x12\x08\n\x04H264\x10\x01\x12\x07\n\x03\x41V1\x10\x02*l\n\rVideoRotation\x12\x14\n\x10VIDEO_ROTATION_0\x10\x00\x12\x15\n\x11VIDEO_ROTATION_90\x10\x01\x12\x16\n\x12VIDEO_ROTATION_180\x10\x02\x12\x16\n\x12VIDEO_ROTATION_270\x10\x03*U\n\x0fVideoFormatType\x12\x0f\n\x0b\x46ORMAT_ARGB\x10\x00\x12\x0f\n\x0b\x46ORMAT_BGRA\x10\x01\x12\x0f\n\x0b\x46ORMAT_ABGR\x10\x02\x12\x0f\n\x0b\x46ORMAT_RGBA\x10\x03*_\n\x14VideoFrameBufferType\x12\n\n\x06NATIVE\x10\x00\x12\x08\n\x04I420\x10\x01\x12\t\n\x05I420A\x10\x02\x12\x08\n\x04I422\x10\x03\x12\x08\n\x04I444\x10\x04\x12\x08\n\x04I010\x10\x05\x12\x08\n\x04NV12\x10\x06*Y\n\x0fVideoStreamType\x12\x17\n\x13VIDEO_STREAM_NATIVE\x10\x00\x12\x16\n\x12VIDEO_STREAM_WEBGL\x10\x01\x12\x15\n\x11VIDEO_STREAM_HTML\x10\x02**\n\x0fVideoSourceType\x12\x17\n\x13VIDEO_SOURCE_NATIVE\x10\x00\x42\x10\xaa\x02\rLiveKit.Protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,18 +23,18 @@ DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b'\252\002\rLiveKit.Proto' - _globals['_VIDEOCODEC']._serialized_start=2940 - _globals['_VIDEOCODEC']._serialized_end=2980 - _globals['_VIDEOROTATION']._serialized_start=2982 - _globals['_VIDEOROTATION']._serialized_end=3090 - _globals['_VIDEOFORMATTYPE']._serialized_start=3092 - _globals['_VIDEOFORMATTYPE']._serialized_end=3177 - _globals['_VIDEOFRAMEBUFFERTYPE']._serialized_start=3179 - _globals['_VIDEOFRAMEBUFFERTYPE']._serialized_end=3274 - _globals['_VIDEOSTREAMTYPE']._serialized_start=3276 - _globals['_VIDEOSTREAMTYPE']._serialized_end=3365 - _globals['_VIDEOSOURCETYPE']._serialized_start=3367 - _globals['_VIDEOSOURCETYPE']._serialized_end=3409 + _globals['_VIDEOCODEC']._serialized_start=3079 + _globals['_VIDEOCODEC']._serialized_end=3119 + _globals['_VIDEOROTATION']._serialized_start=3121 + _globals['_VIDEOROTATION']._serialized_end=3229 + _globals['_VIDEOFORMATTYPE']._serialized_start=3231 + _globals['_VIDEOFORMATTYPE']._serialized_end=3316 + _globals['_VIDEOFRAMEBUFFERTYPE']._serialized_start=3318 + _globals['_VIDEOFRAMEBUFFERTYPE']._serialized_end=3413 + _globals['_VIDEOSTREAMTYPE']._serialized_start=3415 + _globals['_VIDEOSTREAMTYPE']._serialized_end=3504 + _globals['_VIDEOSOURCETYPE']._serialized_start=3506 + _globals['_VIDEOSOURCETYPE']._serialized_end=3548 _globals['_ALLOCVIDEOBUFFERREQUEST']._serialized_start=50 _globals['_ALLOCVIDEOBUFFERREQUEST']._serialized_end=157 _globals['_ALLOCVIDEOBUFFERRESPONSE']._serialized_start=159 @@ -47,48 +47,48 @@ _globals['_NEWVIDEOSOURCEREQUEST']._serialized_end=536 _globals['_NEWVIDEOSOURCERESPONSE']._serialized_start=538 _globals['_NEWVIDEOSOURCERESPONSE']._serialized_end=611 - _globals['_CAPTUREVIDEOFRAMEREQUEST']._serialized_start=613 - _globals['_CAPTUREVIDEOFRAMEREQUEST']._serialized_end=731 - _globals['_CAPTUREVIDEOFRAMERESPONSE']._serialized_start=733 - _globals['_CAPTUREVIDEOFRAMERESPONSE']._serialized_end=760 - _globals['_TOI420REQUEST']._serialized_start=762 - _globals['_TOI420REQUEST']._serialized_end=870 - _globals['_TOI420RESPONSE']._serialized_start=872 - _globals['_TOI420RESPONSE']._serialized_end=942 - _globals['_TOARGBREQUEST']._serialized_start=945 - _globals['_TOARGBREQUEST']._serialized_end=1127 - _globals['_TOARGBRESPONSE']._serialized_start=1129 - _globals['_TOARGBRESPONSE']._serialized_end=1145 - _globals['_VIDEORESOLUTION']._serialized_start=1147 - _globals['_VIDEORESOLUTION']._serialized_end=1215 - _globals['_ARGBBUFFERINFO']._serialized_start=1217 - _globals['_ARGBBUFFERINFO']._serialized_end=1341 - _globals['_VIDEOFRAMEINFO']._serialized_start=1343 - _globals['_VIDEOFRAMEINFO']._serialized_end=1429 - _globals['_VIDEOFRAMEBUFFERINFO']._serialized_start=1432 - _globals['_VIDEOFRAMEBUFFERINFO']._serialized_end=1711 - _globals['_OWNEDVIDEOFRAMEBUFFER']._serialized_start=1713 - _globals['_OWNEDVIDEOFRAMEBUFFER']._serialized_end=1834 - _globals['_PLANARYUVBUFFERINFO']._serialized_start=1837 - _globals['_PLANARYUVBUFFERINFO']._serialized_end=2055 - _globals['_BIPLANARYUVBUFFERINFO']._serialized_start=2058 - _globals['_BIPLANARYUVBUFFERINFO']._serialized_end=2204 - _globals['_NATIVEBUFFERINFO']._serialized_start=2206 - _globals['_NATIVEBUFFERINFO']._serialized_end=2224 - _globals['_VIDEOSTREAMINFO']._serialized_start=2226 - _globals['_VIDEOSTREAMINFO']._serialized_end=2289 - _globals['_OWNEDVIDEOSTREAM']._serialized_start=2291 - _globals['_OWNEDVIDEOSTREAM']._serialized_end=2402 - _globals['_VIDEOSTREAMEVENT']._serialized_start=2405 - _globals['_VIDEOSTREAMEVENT']._serialized_end=2564 - _globals['_VIDEOFRAMERECEIVED']._serialized_start=2566 - _globals['_VIDEOFRAMERECEIVED']._serialized_end=2686 - _globals['_VIDEOSTREAMEOS']._serialized_start=2688 - _globals['_VIDEOSTREAMEOS']._serialized_end=2704 - _globals['_VIDEOSOURCERESOLUTION']._serialized_start=2706 - _globals['_VIDEOSOURCERESOLUTION']._serialized_end=2760 - _globals['_VIDEOSOURCEINFO']._serialized_start=2762 - _globals['_VIDEOSOURCEINFO']._serialized_end=2825 - _globals['_OWNEDVIDEOSOURCE']._serialized_start=2827 - _globals['_OWNEDVIDEOSOURCE']._serialized_end=2938 + _globals['_CAPTUREVIDEOFRAMEREQUEST']._serialized_start=614 + _globals['_CAPTUREVIDEOFRAMEREQUEST']._serialized_end=788 + _globals['_CAPTUREVIDEOFRAMERESPONSE']._serialized_start=790 + _globals['_CAPTUREVIDEOFRAMERESPONSE']._serialized_end=817 + _globals['_TOI420REQUEST']._serialized_start=820 + _globals['_TOI420REQUEST']._serialized_end=979 + _globals['_TOI420RESPONSE']._serialized_start=981 + _globals['_TOI420RESPONSE']._serialized_end=1051 + _globals['_TOARGBREQUEST']._serialized_start=1054 + _globals['_TOARGBREQUEST']._serialized_end=1266 + _globals['_TOARGBRESPONSE']._serialized_start=1268 + _globals['_TOARGBRESPONSE']._serialized_end=1284 + _globals['_VIDEORESOLUTION']._serialized_start=1286 + _globals['_VIDEORESOLUTION']._serialized_end=1354 + _globals['_ARGBBUFFERINFO']._serialized_start=1356 + _globals['_ARGBBUFFERINFO']._serialized_end=1480 + _globals['_VIDEOFRAMEINFO']._serialized_start=1482 + _globals['_VIDEOFRAMEINFO']._serialized_end=1568 + _globals['_VIDEOFRAMEBUFFERINFO']._serialized_start=1571 + _globals['_VIDEOFRAMEBUFFERINFO']._serialized_end=1850 + _globals['_OWNEDVIDEOFRAMEBUFFER']._serialized_start=1852 + _globals['_OWNEDVIDEOFRAMEBUFFER']._serialized_end=1973 + _globals['_PLANARYUVBUFFERINFO']._serialized_start=1976 + _globals['_PLANARYUVBUFFERINFO']._serialized_end=2194 + _globals['_BIPLANARYUVBUFFERINFO']._serialized_start=2197 + _globals['_BIPLANARYUVBUFFERINFO']._serialized_end=2343 + _globals['_NATIVEBUFFERINFO']._serialized_start=2345 + _globals['_NATIVEBUFFERINFO']._serialized_end=2363 + _globals['_VIDEOSTREAMINFO']._serialized_start=2365 + _globals['_VIDEOSTREAMINFO']._serialized_end=2428 + _globals['_OWNEDVIDEOSTREAM']._serialized_start=2430 + _globals['_OWNEDVIDEOSTREAM']._serialized_end=2541 + _globals['_VIDEOSTREAMEVENT']._serialized_start=2544 + _globals['_VIDEOSTREAMEVENT']._serialized_end=2703 + _globals['_VIDEOFRAMERECEIVED']._serialized_start=2705 + _globals['_VIDEOFRAMERECEIVED']._serialized_end=2825 + _globals['_VIDEOSTREAMEOS']._serialized_start=2827 + _globals['_VIDEOSTREAMEOS']._serialized_end=2843 + _globals['_VIDEOSOURCERESOLUTION']._serialized_start=2845 + _globals['_VIDEOSOURCERESOLUTION']._serialized_end=2899 + _globals['_VIDEOSOURCEINFO']._serialized_start=2901 + _globals['_VIDEOSOURCEINFO']._serialized_end=2964 + _globals['_OWNEDVIDEOSOURCE']._serialized_start=2966 + _globals['_OWNEDVIDEOSOURCE']._serialized_end=3077 # @@protoc_insertion_point(module_scope) diff --git a/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.pyi b/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.pyi index 9b523223..106c3a21 100644 --- a/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.pyi +++ b/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.pyi @@ -275,20 +275,25 @@ class CaptureVideoFrameRequest(google.protobuf.message.Message): SOURCE_HANDLE_FIELD_NUMBER: builtins.int FRAME_FIELD_NUMBER: builtins.int - BUFFER_HANDLE_FIELD_NUMBER: builtins.int + INFO_FIELD_NUMBER: builtins.int + HANDLE_FIELD_NUMBER: builtins.int source_handle: builtins.int @property def frame(self) -> global___VideoFrameInfo: ... - buffer_handle: builtins.int + @property + def info(self) -> global___VideoFrameBufferInfo: ... + handle: builtins.int def __init__( self, *, source_handle: builtins.int = ..., frame: global___VideoFrameInfo | None = ..., - buffer_handle: builtins.int = ..., + info: global___VideoFrameBufferInfo | None = ..., + handle: builtins.int = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["frame", b"frame"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["buffer_handle", b"buffer_handle", "frame", b"frame", "source_handle", b"source_handle"]) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["frame", b"frame", "from", b"from", "handle", b"handle", "info", b"info"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["frame", b"frame", "from", b"from", "handle", b"handle", "info", b"info", "source_handle", b"source_handle"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["from", b"from"]) -> typing_extensions.Literal["info", "handle"] | None: ... global___CaptureVideoFrameRequest = CaptureVideoFrameRequest @@ -312,22 +317,25 @@ class ToI420Request(google.protobuf.message.Message): FLIP_Y_FIELD_NUMBER: builtins.int ARGB_FIELD_NUMBER: builtins.int - YUV_HANDLE_FIELD_NUMBER: builtins.int + BUFFER_FIELD_NUMBER: builtins.int + HANDLE_FIELD_NUMBER: builtins.int flip_y: builtins.bool @property def argb(self) -> global___ArgbBufferInfo: ... - yuv_handle: builtins.int - """Another yuv buffer""" + @property + def buffer(self) -> global___VideoFrameBufferInfo: ... + handle: builtins.int def __init__( self, *, flip_y: builtins.bool = ..., argb: global___ArgbBufferInfo | None = ..., - yuv_handle: builtins.int = ..., + buffer: global___VideoFrameBufferInfo | None = ..., + handle: builtins.int = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["argb", b"argb", "from", b"from", "yuv_handle", b"yuv_handle"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["argb", b"argb", "flip_y", b"flip_y", "from", b"from", "yuv_handle", b"yuv_handle"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["from", b"from"]) -> typing_extensions.Literal["argb", "yuv_handle"] | None: ... + def HasField(self, field_name: typing_extensions.Literal["argb", b"argb", "buffer", b"buffer", "from", b"from", "handle", b"handle"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["argb", b"argb", "buffer", b"buffer", "flip_y", b"flip_y", "from", b"from", "handle", b"handle"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["from", b"from"]) -> typing_extensions.Literal["argb", "buffer", "handle"] | None: ... global___ToI420Request = ToI420Request @@ -356,14 +364,15 @@ class ToArgbRequest(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - BUFFER_HANDLE_FIELD_NUMBER: builtins.int + BUFFER_FIELD_NUMBER: builtins.int DST_PTR_FIELD_NUMBER: builtins.int DST_FORMAT_FIELD_NUMBER: builtins.int DST_STRIDE_FIELD_NUMBER: builtins.int DST_WIDTH_FIELD_NUMBER: builtins.int DST_HEIGHT_FIELD_NUMBER: builtins.int FLIP_Y_FIELD_NUMBER: builtins.int - buffer_handle: builtins.int + @property + def buffer(self) -> global___VideoFrameBufferInfo: ... dst_ptr: builtins.int dst_format: global___VideoFormatType.ValueType dst_stride: builtins.int @@ -373,7 +382,7 @@ class ToArgbRequest(google.protobuf.message.Message): def __init__( self, *, - buffer_handle: builtins.int = ..., + buffer: global___VideoFrameBufferInfo | None = ..., dst_ptr: builtins.int = ..., dst_format: global___VideoFormatType.ValueType = ..., dst_stride: builtins.int = ..., @@ -381,7 +390,8 @@ class ToArgbRequest(google.protobuf.message.Message): dst_height: builtins.int = ..., flip_y: builtins.bool = ..., ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["buffer_handle", b"buffer_handle", "dst_format", b"dst_format", "dst_height", b"dst_height", "dst_ptr", b"dst_ptr", "dst_stride", b"dst_stride", "dst_width", b"dst_width", "flip_y", b"flip_y"]) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["buffer", b"buffer"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["buffer", b"buffer", "dst_format", b"dst_format", "dst_height", b"dst_height", "dst_ptr", b"dst_ptr", "dst_stride", b"dst_stride", "dst_width", b"dst_width", "flip_y", b"flip_y"]) -> None: ... global___ToArgbRequest = ToArgbRequest diff --git a/livekit-rtc/livekit/rtc/_utils.py b/livekit-rtc/livekit/rtc/_utils.py index aa44798e..8d80441e 100644 --- a/livekit-rtc/livekit/rtc/_utils.py +++ b/livekit-rtc/livekit/rtc/_utils.py @@ -2,12 +2,11 @@ from collections import deque import ctypes from typing import Callable, Generic, List, TypeVar -from typing_extensions import Buffer -import numpy -def get_address(data: bytearray) -> int: +def get_address(data: memoryview) -> int: """ Get the address of a buffer using ctypes """ - buffer = (ctypes.c_int8 * len(data)).from_buffer(data) + nbytes = data.nbytes + buffer = (ctypes.c_int8 * nbytes).from_buffer(data) return ctypes.addressof(buffer) diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index 64001c45..a4a08d35 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -17,9 +17,10 @@ from ._ffi_client import FfiHandle, ffi_client from ._proto import ffi_pb2 as proto_ffi +from ._utils import get_address from ._proto import video_frame_pb2 as proto_video_frame -from ._proto.video_frame_pb2 import VideoFormatType, VideoFrameBufferType, VideoFrameReceived, VideoRotation -from abc import ABC +from ._proto.video_frame_pb2 import VideoFormatType, VideoFrameBufferType, VideoRotation +from abc import ABC, abstractmethod class VideoFrame: @@ -51,24 +52,30 @@ def width(self) -> int: def height(self) -> int: return self._height + @property + def data(self) -> bytearray: + return self._data + @property def type(self) -> VideoFrameBufferType.ValueType: return self._buffer_type - # TODO(theomonnom): Need Rust modification + @abstractmethod + def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: + pass + def to_i420(self) -> 'I420Buffer': req = proto_ffi.FfiRequest() - req.to_i420.yuv_handle = self._ffi_handle.handle + req.to_i420.buffer.CopyFrom(self._proto_info()) resp = ffi_client.request(req) - return I420Buffer(resp.to_i420.buffer) + return I420Buffer._from_owned_info(resp.to_i420.buffer) - # TODO(theomonnom): Need Rust modification def to_argb(self, dst: 'ArgbFrame') -> None: req = proto_ffi.FfiRequest() - req.to_argb.buffer_handle = self._ffi_handle.handle - req.to_argb.dst_ptr = ctypes.addressof(dst.data) + req.to_argb.buffer.CopyFrom(self._proto_info()) + req.to_argb.dst_ptr = get_address(memoryview(self._data)) req.to_argb.dst_format = dst.format - req.to_argb.dst_stride = dst.width * 4 + req.to_argb.dst_stride = dst.stride req.to_argb.dst_width = dst.width req.to_argb.dst_height = dst.height ffi_client.request(req) @@ -82,27 +89,48 @@ def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ info = owned_info.info if info.buffer_type == VideoFrameBufferType.NATIVE: - return NativeVideoFrameBuffer(owned_info) + return NativeVideoBuffer._from_owned_info(owned_info) elif info.buffer_type == VideoFrameBufferType.I420: - return I420Buffer(owned_info) + return I420Buffer._from_owned_info(owned_info) elif info.buffer_type == VideoFrameBufferType.I420A: - return I420ABuffer(owned_info) + return I420ABuffer._from_owned_info(owned_info) elif info.buffer_type == VideoFrameBufferType.I422: - return I422Buffer(owned_info) + return I422Buffer._from_owned_info(owned_info) elif info.buffer_type == VideoFrameBufferType.I444: - return I444Buffer(owned_info) + return I444Buffer._from_owned_info(owned_info) elif info.buffer_type == VideoFrameBufferType.I010: - return I010Buffer(owned_info) + return I010Buffer._from_owned_info(owned_info) elif info.buffer_type == VideoFrameBufferType.NV12: - return NV12Buffer(owned_info) + return NV12Buffer._from_owned_info(owned_info) else: raise Exception('Unsupported VideoFrameBufferType') # TODO(theomonnom): Ability to get GPU texture directly -class NativeVideoFrameBuffer(VideoFrameBuffer): - def __init__(self, width: int, height: int) -> None: - super().__init__(bytearray(), width, height, VideoFrameBufferType.NATIVE) +class NativeVideoBuffer(VideoFrameBuffer): + def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: + self._info = owned_info.info + self._ffi_handle = FfiHandle(owned_info.handle.id) + super().__init__(bytearray(), self._info.width, + self._info.height, VideoFrameBufferType.NATIVE) + + def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: + return self._info + + @staticmethod + def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ + -> 'NativeVideoBuffer': + return NativeVideoBuffer(owned_info) + + def to_i420(self) -> 'I420Buffer': + req = proto_ffi.FfiRequest() + req.to_i420.handle = self._ffi_handle.handle + resp = ffi_client.request(req) + return I420Buffer._from_owned_info(resp.to_i420.buffer) + + def to_argb(self, dst: 'ArgbFrame') -> None: + self.to_i420().to_argb(dst) + class PlanarYuvBuffer(VideoFrameBuffer, ABC): @@ -123,6 +151,16 @@ def __init__(self, self._chroma_width = chroma_width self._chroma_height = chroma_height + def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: + info = proto_video_frame.VideoFrameBufferInfo() + info.width = self._width + info.height = self._height + info.buffer_type = self._buffer_type + info.yuv.stride_y = self._stride_y + info.yuv.stride_u = self._stride_u + info.yuv.stride_v = self._stride_v + return info + @property def chroma_width(self) -> int: return self._chroma_width @@ -158,6 +196,13 @@ def __init__(self, super().__init__(data, width, height, buffer_type, stride_u, stride_y, stride_v, chroma_width, chroma_height) + def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: + info = super()._proto_info() + info.yuv.data_y_ptr = get_address(self.data_y) + info.yuv.data_u_ptr = get_address(self.data_u) + info.yuv.data_v_ptr = get_address(self.data_v) + return info + @property def data_y(self) -> memoryview: return memoryview(self._data)[0:self._stride_y * self._height] @@ -191,6 +236,13 @@ def __init__(self, super().__init__(data, width, height, buffer_type, stride_y, stride_u, stride_v, chroma_width, chroma_height) + def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: + info = super()._proto_info() + info.yuv.data_y_ptr = get_address(self.data_y) + info.yuv.data_u_ptr = get_address(self.data_u) + info.yuv.data_v_ptr = get_address(self.data_v) + return info + @property def data_y(self) -> memoryview: return memoryview(self._data)[0:self._stride_y * self._height].cast('H') @@ -226,6 +278,17 @@ def __init__(self, self._chroma_width = chroma_width self._chroma_height = chroma_height + def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: + info = proto_video_frame.VideoFrameBufferInfo() + info.width = self._width + info.height = self._height + info.buffer_type = self._buffer_type + info.bi_yuv.stride_y = self._stride_y + info.bi_yuv.stride_uv = self._stride_uv + info.bi_yuv.data_y_ptr = get_address(self.data_y) + info.bi_yuv.data_uv_ptr = get_address(self.data_uv) + return info + @property def chroma_width(self) -> int: return self._chroma_width @@ -272,6 +335,18 @@ def __init__(self, super().__init__(data, width, height, VideoFrameBufferType.I420, stride_y, stride_u, stride_v, chroma_width, chroma_height) + @staticmethod + def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I420Buffer': + info = owned_info.info + stride_y = info.yuv.stride_y + stride_u = info.yuv.stride_u + stride_v = info.yuv.stride_v + cdata = (ctypes.c_uint8 * I420Buffer.calc_data_size(info.height, + stride_y, stride_u, stride_v)).from_address(info.yuv.data_y_ptr) + data = bytearray(cdata) + FfiHandle(owned_info.handle.id) + return I420Buffer(data, info.width, info.height, stride_y, stride_u, stride_v) + @staticmethod def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: return stride_y * height + (stride_u + stride_v) * ((height + 1) // 2) @@ -308,6 +383,19 @@ def __init__(self, stride_y, stride_u, stride_v, chroma_width, chroma_height) self._stride_a = stride_a + @staticmethod + def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I420ABuffer': + info = owned_info.info + stride_y = info.yuv.stride_y + stride_u = info.yuv.stride_u + stride_v = info.yuv.stride_v + stride_a = info.yuv.stride_a + cdata = (ctypes.c_uint8 * I420ABuffer.calc_data_size(info.height, + stride_y, stride_u, stride_v, stride_a)).from_address(info.yuv.data_y_ptr) + data = bytearray(cdata) + FfiHandle(owned_info.handle.id) + return I420ABuffer(data, info.width, info.height, stride_y, stride_u, stride_v, stride_a) + @staticmethod def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int, stride_a: int) -> int: return (stride_y + stride_a) * height + (stride_u + stride_v) * ((height + 1) // 2) @@ -346,6 +434,18 @@ def __init__(self, super().__init__(data, width, height, VideoFrameBufferType.I422, stride_y, stride_u, stride_v, chroma_width, chroma_height) + @staticmethod + def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I422Buffer': + info = owned_info.info + stride_y = info.yuv.stride_y + stride_u = info.yuv.stride_u + stride_v = info.yuv.stride_v + cdata = (ctypes.c_uint8 * I422Buffer.calc_data_size(info.height, + stride_y, stride_u, stride_v)).from_address(info.yuv.data_y_ptr) + data = bytearray(cdata) + FfiHandle(owned_info.handle.id) + return I422Buffer(data, info.width, info.height, stride_y, stride_u, stride_v) + @staticmethod def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: return stride_y * height + stride_u * height + stride_v * height @@ -370,6 +470,18 @@ def __init__(self, super().__init__(data, width, height, VideoFrameBufferType.I444, stride_y, stride_u, stride_v, chroma_width, chroma_height) + @staticmethod + def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I444Buffer': + info = owned_info.info + stride_y = info.yuv.stride_y + stride_u = info.yuv.stride_u + stride_v = info.yuv.stride_v + cdata = (ctypes.c_uint8 * I444Buffer.calc_data_size(info.height, + stride_y, stride_u, stride_v)).from_address(info.yuv.data_y_ptr) + data = bytearray(cdata) + FfiHandle(owned_info.handle.id) + return I444Buffer(data, info.width, info.height, stride_y, stride_u, stride_v) + @staticmethod def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: return stride_y * height + stride_u * height + stride_v * height @@ -393,6 +505,18 @@ def __init__(self, data: bytearray, super().__init__(data, width, height, VideoFrameBufferType.I010, stride_y, stride_u, stride_v, chroma_width, chroma_height) + @staticmethod + def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I010Buffer': + info = owned_info.info + stride_y = info.yuv.stride_y + stride_u = info.yuv.stride_u + stride_v = info.yuv.stride_v + cdata = (ctypes.c_uint8 * I010Buffer.calc_data_size(info.height, + stride_y, stride_u, stride_v)).from_address(info.yuv.data_y_ptr) + data = bytearray(cdata) + FfiHandle(owned_info.handle.id) + return I010Buffer(data, info.width, info.height, stride_y, stride_u, stride_v) + @staticmethod def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: return stride_y * height * 2 + stride_u * ((height + 1) // 2) * 2 + stride_v * ((height + 1) // 2) * 2 @@ -415,6 +539,17 @@ def __init__(self, data: bytearray, super().__init__(data, width, height, VideoFrameBufferType.NV12, stride_y, stride_uv, chroma_width, chroma_height) + @staticmethod + def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'NV12Buffer': + info = owned_info.info + stride_y = info.bi_yuv.stride_y + stride_uv = info.bi_yuv.stride_uv + cdata = (ctypes.c_uint8 * NV12Buffer.calc_data_size(info.height, + stride_y, stride_uv)).from_address(info.yuv.data_y_ptr) + data = bytearray(cdata) + FfiHandle(owned_info.handle.id) + return NV12Buffer(data, info.width, info.height, stride_y, stride_uv) + @staticmethod def calc_data_size(height: int, stride_y: int, stride_uv: int) -> int: return stride_y * height + stride_uv * ((height + 1) // 2) @@ -443,15 +578,30 @@ def __init__(self, def to_i420(self) -> I420Buffer: # TODO(theomonnom): avoid unnecessary buffer allocation req = proto_ffi.FfiRequest() - req.to_i420.argb.format = self._format + req.to_i420.argb.format = self.format req.to_i420.argb.width = self.width req.to_i420.argb.height = self.height - req.to_i420.argb.stride = self.width * 4 - req.to_i420.argb.ptr = ctypes.addressof(self.data) - + req.to_i420.argb.stride = self.stride + req.to_i420.argb.ptr = get_address(memoryview(self._data)) res = ffi_client.request(req) - return I420Buffer(res.to_i420.buffer) + return I420Buffer._from_owned_info(res.to_i420.buffer) - @ property + @property + def data(self) -> bytearray: + return self._data + + @property + def width(self) -> int: + return self._width + + @property + def height(self) -> int: + return self._height + + @property + def stride(self) -> int: + return self._stride + + @property def format(self) -> VideoFormatType.ValueType: return self._format diff --git a/livekit-rtc/rust-sdks b/livekit-rtc/rust-sdks index 327472c8..06b024af 160000 --- a/livekit-rtc/rust-sdks +++ b/livekit-rtc/rust-sdks @@ -1 +1 @@ -Subproject commit 327472c8fc4d5d66c97a9cc20b7f48d754bb2d01 +Subproject commit 06b024af8975741e97c1450f9a1de6993479173b From 0d5fb791fea49ba7470db67a6edbdb41739481c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Thu, 26 Oct 2023 19:20:23 -0700 Subject: [PATCH 11/26] fixes --- livekit-rtc/livekit/rtc/_ffi_client.py | 5 ++++- livekit-rtc/livekit/rtc/audio_stream.py | 5 +++-- livekit-rtc/livekit/rtc/video_stream.py | 7 ++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/livekit-rtc/livekit/rtc/_ffi_client.py b/livekit-rtc/livekit/rtc/_ffi_client.py index f685be18..155af19e 100644 --- a/livekit-rtc/livekit/rtc/_ffi_client.py +++ b/livekit-rtc/livekit/rtc/_ffi_client.py @@ -69,10 +69,13 @@ def __init__(self, handle: int) -> None: self.handle = handle def __del__(self): + self.dispose() + + def dispose(self) -> None: if self.handle != INVALID_HANDLE: assert ffi_lib.livekit_ffi_drop_handle( ctypes.c_uint64(self.handle)) - + self.handle = INVALID_HANDLE T = TypeVar('T') diff --git a/livekit-rtc/livekit/rtc/audio_stream.py b/livekit-rtc/livekit/rtc/audio_stream.py index 47decaaa..0584d421 100644 --- a/livekit-rtc/livekit/rtc/audio_stream.py +++ b/livekit-rtc/livekit/rtc/audio_stream.py @@ -59,9 +59,10 @@ async def _run(self): elif audio_event.HasField('eos'): break - async def aclose(self): ffi_client.queue.unsubscribe(self._ffi_queue) - del self._ffi_handle + + async def aclose(self): + self._ffi_handle.dispose() await self._task def __aiter__(self): diff --git a/livekit-rtc/livekit/rtc/video_stream.py b/livekit-rtc/livekit/rtc/video_stream.py index 4c92d5df..cd474a95 100644 --- a/livekit-rtc/livekit/rtc/video_stream.py +++ b/livekit-rtc/livekit/rtc/video_stream.py @@ -57,14 +57,15 @@ async def _run(self): owned_buffer_info = video_event.frame_received.buffer frame = VideoFrame(frame_info.timestamp_us, frame_info.rotation, - VideoFrameBuffer.create(owned_buffer_info)) + VideoFrameBuffer._from_owned_info(owned_buffer_info)) self._queue.put(frame) elif video_event.HasField('eos'): break - async def aclose(self): ffi_client.queue.unsubscribe(self._ffi_queue) - del self._ffi_handle + + async def aclose(self): + self._ffi_handle.dispose() await self._task def __aiter__(self): From bb7bc6f2525723a33746c785a61c9663ac993d4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 10:07:35 -0700 Subject: [PATCH 12/26] fix errors --- examples/basic_room.py | 38 +++++++++++------------ examples/publish_wave.py | 2 +- livekit-rtc/livekit/rtc/__init__.py | 2 +- livekit-rtc/livekit/rtc/_event_emitter.py | 4 +-- livekit-rtc/livekit/rtc/audio_frame.py | 2 +- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/examples/basic_room.py b/examples/basic_room.py index dd80dc5f..9393ab86 100644 --- a/examples/basic_room.py +++ b/examples/basic_room.py @@ -11,42 +11,42 @@ async def main(room: rtc.Room) -> None: - @room.listens_to("participant_connected") + @room.on("participant_connected") def on_participant_connected(participant: rtc.RemoteParticipant) -> None: logging.info( "participant connected: %s %s", participant.sid, participant.identity) - @room.listens_to("participant_disconnected") + @room.on("participant_disconnected") def on_participant_disconnected(participant: rtc.RemoteParticipant): logging.info("participant disconnected: %s %s", participant.sid, participant.identity) - @room.listens_to("local_track_published") + @room.on("local_track_published") def on_local_track_published(publication: rtc.LocalTrackPublication, track: Union[rtc.LocalAudioTrack, rtc.LocalVideoTrack]): logging.info("local track published: %s", publication.sid) - @room.listens_to("active_speakers_changed") + @room.on("active_speakers_changed") def on_active_speakers_changed(speakers: list[rtc.Participant]): logging.info("active speakers changed: %s", speakers) - @room.listens_to("local_track_unpublished") + @room.on("local_track_unpublished") def on_local_track_unpublished(publication: rtc.LocalTrackPublication): logging.info("local track unpublished: %s", publication.sid) - @room.listens_to("track_published") + @room.on("track_published") def on_track_published(publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant): logging.info("track published: %s from participant %s (%s)", publication.sid, participant.sid, participant.identity) - @room.listens_to("track_unpublished") + @room.on("track_unpublished") def on_track_unpublished(publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant): logging.info("track unpublished: %s", publication.sid) - @room.listens_to("track_subscribed") + @room.on("track_subscribed") def on_track_subscribed(track: rtc.Track, publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant): @@ -59,57 +59,57 @@ def on_track_subscribed(track: rtc.Track, _audio_stream = rtc.AudioStream(track) # audio_stream is an async iterator that yields AudioFrame - @room.listens_to("track_unsubscribed") + @room.on("track_unsubscribed") def on_track_unsubscribed(track: rtc.Track, publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant): logging.info("track unsubscribed: %s", publication.sid) - @room.listens_to("track_muted") + @room.on("track_muted") def on_track_muted(publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant): logging.info("track muted: %s", publication.sid) - @room.listens_to("track_unmuted") + @room.on("track_unmuted") def on_track_unmuted(publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant): logging.info("track unmuted: %s", publication.sid) - @room.listens_to("data_received") + @room.on("data_received") def on_data_received(data: bytes, kind: rtc.DataPacketKind, participant: rtc.Participant): logging.info("received data from %s: %s", participant.identity, data) - @room.listens_to("connection_quality_changed") + @room.on("connection_quality_changed") def on_connection_quality_changed(participant: rtc.Participant, quality: rtc.ConnectionQuality): logging.info("connection quality changed for %s", participant.identity) - @room.listens_to("track_subscription_failed") + @room.on("track_subscription_failed") def on_track_subscription_failed(participant: rtc.RemoteParticipant, track_sid: str, error: str): logging.info("track subscription failed: %s %s", participant.identity, error) - @room.listens_to("connection_state_changed") + @room.on("connection_state_changed") def on_connection_state_changed(state: rtc.ConnectionState): logging.info("connection state changed: %s", state) - @room.listens_to("connected") + @room.on("connected") def on_connected() -> None: logging.info("connected") - @room.listens_to("disconnected") + @room.on("disconnected") def on_disconnected() -> None: logging.info("disconnected") - @room.listens_to("reconnecting") + @room.on("reconnecting") def on_reconnecting() -> None: logging.info("reconnecting") - @room.listens_to("reconnected") + @room.on("reconnected") def on_reconnected() -> None: logging.info("reconnected") diff --git a/examples/publish_wave.py b/examples/publish_wave.py index a742a11a..58f56e4a 100644 --- a/examples/publish_wave.py +++ b/examples/publish_wave.py @@ -19,7 +19,7 @@ async def publish_frames(source: rtc.AudioSource, frequency: int): total_samples = 0 audio_frame = rtc.AudioFrame.create( SAMPLE_RATE, NUM_CHANNELS, samples_per_channel) - audio_data = np.ctypeslib.as_array(audio_frame.data) + audio_data = np.frombuffer(audio_frame.data, dtype=np.int16) while True: time = (total_samples + np.arange(samples_per_channel)) / SAMPLE_RATE sine_wave = (amplitude * np.sin(2 * np.pi * diff --git a/livekit-rtc/livekit/rtc/__init__.py b/livekit-rtc/livekit/rtc/__init__.py index 3c83ea74..d825217a 100644 --- a/livekit-rtc/livekit/rtc/__init__.py +++ b/livekit-rtc/livekit/rtc/__init__.py @@ -58,7 +58,7 @@ I420ABuffer, I420Buffer, I422Buffer, - NativeVideoFrameBuffer, + NativeVideoBuffer, NV12Buffer, PlanarYuv8Buffer, PlanarYuv16Buffer, diff --git a/livekit-rtc/livekit/rtc/_event_emitter.py b/livekit-rtc/livekit/rtc/_event_emitter.py index 49f8b1fe..265a6380 100644 --- a/livekit-rtc/livekit/rtc/_event_emitter.py +++ b/livekit-rtc/livekit/rtc/_event_emitter.py @@ -11,7 +11,7 @@ def emit(self, event: T, *args, **kwargs) -> None: for callback in self._events[event]: callback(*args, **kwargs) - def once(self, event: T, callback: Optional[Callable]) -> Callable: + def once(self, event: T, callback: Optional[Callable] = None) -> Callable: if callback is not None: def once_callback(*args, **kwargs): self.off(event, once_callback) @@ -23,7 +23,7 @@ def decorator(callback: Callable) -> Callable: return callback return decorator - def on(self, event: T, callback: Optional[Callable]) -> Callable: + def on(self, event: T, callback: Optional[Callable] = None) -> Callable: if callback is not None: if event not in self._events: self._events[event] = set() diff --git a/livekit-rtc/livekit/rtc/audio_frame.py b/livekit-rtc/livekit/rtc/audio_frame.py index b3ae79ed..da8f8f8c 100644 --- a/livekit-rtc/livekit/rtc/audio_frame.py +++ b/livekit-rtc/livekit/rtc/audio_frame.py @@ -70,7 +70,7 @@ def remix_and_resample(self, sample_rate: int, num_channels: int) -> 'AudioFrame def _proto_info(self) -> proto_audio.AudioFrameBufferInfo: audio_info = proto_audio.AudioFrameBufferInfo() - audio_info.data_ptr = get_address(self._data) + audio_info.data_ptr = get_address(memoryview(self._data)) audio_info.sample_rate = self.sample_rate audio_info.num_channels = self.num_channels audio_info.samples_per_channel = self.samples_per_channel From b2dc10b9e911f87170bbcb438f7996637b6e077f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 11:31:24 -0700 Subject: [PATCH 13/26] working video sources --- examples/publish_hue.py | 8 +++++--- livekit-rtc/livekit/rtc/video_frame.py | 27 ++++++++++++++++--------- livekit-rtc/livekit/rtc/video_source.py | 2 +- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/examples/publish_hue.py b/examples/publish_hue.py index a59cae44..f11455b1 100644 --- a/examples/publish_hue.py +++ b/examples/publish_hue.py @@ -3,6 +3,8 @@ import logging from signal import SIGINT, SIGTERM +import cv2 + import numpy as np from livekit import rtc @@ -11,10 +13,9 @@ async def draw_color_cycle(source: rtc.VideoSource): - argb_frame = rtc.ArgbFrame( + argb_frame = rtc.ArgbFrame.create( rtc.VideoFormatType.FORMAT_ARGB, 1280, 720) - - arr = np.ctypeslib.as_array(argb_frame.data) + arr = np.frombuffer(argb_frame.data, dtype=np.uint8) framerate = 1 / 30 hue = 0.0 @@ -33,6 +34,7 @@ async def draw_color_cycle(source: rtc.VideoSource): frame = rtc.VideoFrame( 0, rtc.VideoRotation.VIDEO_ROTATION_0, argb_frame.to_i420()) + source.capture_frame(frame) hue = (hue + framerate / 3) % 1.0 diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index a4a08d35..c5a29498 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -132,7 +132,6 @@ def to_argb(self, dst: 'ArgbFrame') -> None: self.to_i420().to_argb(dst) - class PlanarYuvBuffer(VideoFrameBuffer, ABC): def __init__(self, data: bytearray, @@ -153,12 +152,14 @@ def __init__(self, def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: info = proto_video_frame.VideoFrameBufferInfo() - info.width = self._width - info.height = self._height - info.buffer_type = self._buffer_type - info.yuv.stride_y = self._stride_y - info.yuv.stride_u = self._stride_u - info.yuv.stride_v = self._stride_v + info.width = self.width + info.height = self.height + info.yuv.chroma_width = self.chroma_width + info.yuv.chroma_height = self.chroma_height + info.buffer_type = self.type + info.yuv.stride_y = self.stride_y + info.yuv.stride_u = self.stride_u + info.yuv.stride_v = self.stride_v return info @property @@ -193,8 +194,8 @@ def __init__(self, stride_v: int, chroma_width: int, chroma_height: int) -> None: - super().__init__(data, width, height, buffer_type, stride_u, - stride_y, stride_v, chroma_width, chroma_height) + super().__init__(data, width, height, buffer_type, stride_y, + stride_u, stride_v, chroma_width, chroma_height) def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: info = super()._proto_info() @@ -282,6 +283,8 @@ def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: info = proto_video_frame.VideoFrameBufferInfo() info.width = self._width info.height = self._height + info.bi_yuv.chroma_width = self.chroma_width + info.bi_yuv.chroma_height = self.chroma_height info.buffer_type = self._buffer_type info.bi_yuv.stride_y = self._stride_y info.bi_yuv.stride_uv = self._stride_uv @@ -575,8 +578,12 @@ def __init__(self, self._height = height self._stride = stride + @staticmethod + def create(format: VideoFormatType.ValueType, width: int, height: int) -> 'ArgbFrame': + data = bytearray(width * height * ctypes.sizeof(ctypes.c_uint32)) + return ArgbFrame(data, format, width, height) + def to_i420(self) -> I420Buffer: - # TODO(theomonnom): avoid unnecessary buffer allocation req = proto_ffi.FfiRequest() req.to_i420.argb.format = self.format req.to_i420.argb.width = self.width diff --git a/livekit-rtc/livekit/rtc/video_source.py b/livekit-rtc/livekit/rtc/video_source.py index 866ae2c1..d1364585 100644 --- a/livekit-rtc/livekit/rtc/video_source.py +++ b/livekit-rtc/livekit/rtc/video_source.py @@ -31,7 +31,7 @@ def __init__(self) -> None: def capture_frame(self, frame: VideoFrame) -> None: req = proto_ffi.FfiRequest() req.capture_video_frame.source_handle = self._ffi_handle.handle - req.capture_video_frame.buffer_handle = frame.buffer._ffi_handle.handle + req.capture_video_frame.info.CopyFrom(frame.buffer._proto_info()) req.capture_video_frame.frame.rotation = frame.rotation req.capture_video_frame.frame.timestamp_us = frame.timestamp_us ffi_client.request(req) From cca48cb63a7e8664c78dbc47b4c693eb9e07d0c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 12:33:57 -0700 Subject: [PATCH 14/26] fix segfault --- examples/face_landmark/face_landmark.py | 4 ++-- examples/publish_hue.py | 2 -- livekit-rtc/livekit/rtc/video_frame.py | 6 +++--- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/face_landmark/face_landmark.py b/examples/face_landmark/face_landmark.py index 3843d014..05ee8d79 100644 --- a/examples/face_landmark/face_landmark.py +++ b/examples/face_landmark/face_landmark.py @@ -79,12 +79,12 @@ async def frame_loop(video_stream: rtc.VideoStream) -> None: if argb_frame is None or argb_frame.width != buffer.width \ or argb_frame.height != buffer.height: - argb_frame = rtc.ArgbFrame( + argb_frame = rtc.ArgbFrame.create( rtc.VideoFormatType.FORMAT_ABGR, buffer.width, buffer.height) buffer.to_argb(argb_frame) - arr = np.ctypeslib.as_array(argb_frame.data) + arr = np.frombuffer(argb_frame.data, dtype=np.uint8) arr = arr.reshape((argb_frame.height, argb_frame.width, 4)) arr = cv2.cvtColor(arr, cv2.COLOR_RGBA2RGB) diff --git a/examples/publish_hue.py b/examples/publish_hue.py index f11455b1..8543d35e 100644 --- a/examples/publish_hue.py +++ b/examples/publish_hue.py @@ -3,8 +3,6 @@ import logging from signal import SIGINT, SIGTERM -import cv2 - import numpy as np from livekit import rtc diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index c5a29498..8f79088a 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -73,7 +73,7 @@ def to_i420(self) -> 'I420Buffer': def to_argb(self, dst: 'ArgbFrame') -> None: req = proto_ffi.FfiRequest() req.to_argb.buffer.CopyFrom(self._proto_info()) - req.to_argb.dst_ptr = get_address(memoryview(self._data)) + req.to_argb.dst_ptr = get_address(memoryview(dst.data)) req.to_argb.dst_format = dst.format req.to_argb.dst_stride = dst.stride req.to_argb.dst_width = dst.width @@ -344,8 +344,8 @@ def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I4 stride_y = info.yuv.stride_y stride_u = info.yuv.stride_u stride_v = info.yuv.stride_v - cdata = (ctypes.c_uint8 * I420Buffer.calc_data_size(info.height, - stride_y, stride_u, stride_v)).from_address(info.yuv.data_y_ptr) + nbytes = I420Buffer.calc_data_size(info.height, stride_y, stride_u, stride_v) + cdata = (ctypes.c_uint8 * nbytes).from_address(info.yuv.data_y_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) return I420Buffer(data, info.width, info.height, stride_y, stride_u, stride_v) From fdf862961ab756f107b1e91517472eaabd6d81f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 12:39:02 -0700 Subject: [PATCH 15/26] fix stuck tasks --- livekit-rtc/livekit/rtc/_ffi_client.py | 10 ++++++++-- livekit-rtc/livekit/rtc/video_frame.py | 7 ++++--- livekit-rtc/livekit/rtc/video_stream.py | 1 - 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/livekit-rtc/livekit/rtc/_ffi_client.py b/livekit-rtc/livekit/rtc/_ffi_client.py index 155af19e..181678dd 100644 --- a/livekit-rtc/livekit/rtc/_ffi_client.py +++ b/livekit-rtc/livekit/rtc/_ffi_client.py @@ -67,15 +67,21 @@ def get_ffi_lib_path(): class FfiHandle: def __init__(self, handle: int) -> None: self.handle = handle + self._disposed = False def __del__(self): self.dispose() + @property + def disposed(self) -> bool: + return self._disposed + def dispose(self) -> None: - if self.handle != INVALID_HANDLE: + if self.handle != INVALID_HANDLE and not self._disposed: + self._disposed = True assert ffi_lib.livekit_ffi_drop_handle( ctypes.c_uint64(self.handle)) - self.handle = INVALID_HANDLE + T = TypeVar('T') diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index 8f79088a..d5cff97b 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -308,11 +308,11 @@ def stride_y(self) -> int: def stride_uv(self) -> int: return self._stride_uv - @ property + @property def data_y(self) -> memoryview: return memoryview(self._data)[0:self._stride_y * self._height] - @ property + @property def data_uv(self) -> memoryview: return memoryview(self._data)[self._stride_y * self._height: self._stride_y * self._height + @@ -344,7 +344,8 @@ def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I4 stride_y = info.yuv.stride_y stride_u = info.yuv.stride_u stride_v = info.yuv.stride_v - nbytes = I420Buffer.calc_data_size(info.height, stride_y, stride_u, stride_v) + nbytes = I420Buffer.calc_data_size( + info.height, stride_y, stride_u, stride_v) cdata = (ctypes.c_uint8 * nbytes).from_address(info.yuv.data_y_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) diff --git a/livekit-rtc/livekit/rtc/video_stream.py b/livekit-rtc/livekit/rtc/video_stream.py index cd474a95..efd975d2 100644 --- a/livekit-rtc/livekit/rtc/video_stream.py +++ b/livekit-rtc/livekit/rtc/video_stream.py @@ -41,7 +41,6 @@ def __init__(self, track: Track, stream_info = resp.new_video_stream.stream self._ffi_handle = FfiHandle(stream_info.handle.id) self._info = stream_info.info - self._task = self._loop.create_task(self._run()) def __del__(self) -> None: From fbc38231c7210b43a8aa6d7f6e413a4907cd262a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 13:04:55 -0700 Subject: [PATCH 16/26] fix audio segfault --- examples/face_landmark/face_landmark.py | 2 +- examples/whisper/whisper.py | 7 ++++--- livekit-rtc/livekit/rtc/audio_frame.py | 11 ++++++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/examples/face_landmark/face_landmark.py b/examples/face_landmark/face_landmark.py index 05ee8d79..25ac3e2d 100644 --- a/examples/face_landmark/face_landmark.py +++ b/examples/face_landmark/face_landmark.py @@ -72,7 +72,7 @@ def draw_landmarks_on_image(rgb_image, detection_result): async def frame_loop(video_stream: rtc.VideoStream) -> None: landmarker = FaceLandmarker.create_from_options(options) argb_frame = None - cv2.namedWindow('rtc_video', cv2.WINDOW_AUTOSIZE) + cv2.namedWindow('livekit_video', cv2.WINDOW_AUTOSIZE) cv2.startWindowThread() async for frame in video_stream: buffer = frame.buffer diff --git a/examples/whisper/whisper.py b/examples/whisper/whisper.py index 17c5c203..e3c3e581 100644 --- a/examples/whisper/whisper.py +++ b/examples/whisper/whisper.py @@ -105,10 +105,11 @@ async def whisper_task(stream: rtc.AudioStream): async for frame in stream: # whisper requires 16kHz mono, so resample the data # also convert the samples from int16 to float32 + frame = frame.remix_and_resample( WHISPER_SAMPLE_RATE, 1) - data = np.array(frame.data, dtype=np.float32) / 32768.0 + data = np.frombuffer(frame.data, dtype=np.int16).astype(np.float32) / 32768.0 # write the data inside data_30_secs at written_samples data_start = SAMPLES_KEEP + written_samples @@ -150,7 +151,7 @@ async def whisper_task(stream: rtc.AudioStream): async def main(room: rtc.Room): - @room.listens_to("track_published") + @room.on("track_published") def on_track_published(publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant): # Only subscribe to the audio tracks coming from the microphone @@ -161,7 +162,7 @@ def on_track_published(publication: rtc.RemoteTrackPublication, publication.set_subscribed(True) - @room.listens_to("track_subscribed") + @room.on("track_subscribed") def on_track_subscribed(track: rtc.Track, publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant): diff --git a/livekit-rtc/livekit/rtc/audio_frame.py b/livekit-rtc/livekit/rtc/audio_frame.py index da8f8f8c..f49829e4 100644 --- a/livekit-rtc/livekit/rtc/audio_frame.py +++ b/livekit-rtc/livekit/rtc/audio_frame.py @@ -43,10 +43,11 @@ def create(sample_rate: int, num_channels: int, samples_per_channel: int) -> 'Au @staticmethod def _from_owned_info(owned_info: proto_audio.OwnedAudioFrameBuffer) -> 'AudioFrame': info = owned_info.info - size = info.num_channels * info.samples_per_channel * ctypes.sizeof(ctypes.c_int16) - data = (ctypes.c_int16 * size).from_address(info.data_ptr) + size = info.num_channels * info.samples_per_channel + cdata = (ctypes.c_int16 * size).from_address(info.data_ptr) + data = bytearray(cdata) FfiHandle(owned_info.handle.id) - return AudioFrame(bytearray(data), info.sample_rate, info.num_channels, info.samples_per_channel) + return AudioFrame(data, info.sample_rate, info.num_channels, info.samples_per_channel) def remix_and_resample(self, sample_rate: int, num_channels: int) -> 'AudioFrame': """ Resample the audio frame to the given sample rate and number of channels.""" @@ -77,8 +78,8 @@ def _proto_info(self) -> proto_audio.AudioFrameBufferInfo: return audio_info @property - def data(self) -> bytearray: - return self._data + def data(self) -> memoryview: + return memoryview(self._data).cast('h') @property def sample_rate(self) -> int: From a61d2b1d0a42e27222ce9b77c5db4b9b172f5d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 13:20:01 -0700 Subject: [PATCH 17/26] nit --- livekit-rtc/livekit/rtc/participant.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/livekit-rtc/livekit/rtc/participant.py b/livekit-rtc/livekit/rtc/participant.py index 7ef4b2be..a0fddf3a 100644 --- a/livekit-rtc/livekit/rtc/participant.py +++ b/livekit-rtc/livekit/rtc/participant.py @@ -102,8 +102,8 @@ async def publish_data(self, req.publish_data.destination_sids.extend(sids) + queue = ffi_client.queue.subscribe() try: - queue = ffi_client.queue.subscribe() resp = ffi_client.request(req) cb = await queue.wait_for(lambda e: e.publish_data.async_id == resp.publish_data.async_id) @@ -118,10 +118,10 @@ async def update_metadata(self, metadata: str) -> None: req.update_local_metadata.local_participant_handle = self._ffi_handle.handle req.update_local_metadata.metadata = metadata + queue = ffi_client.queue.subscribe() try: - queue = ffi_client.queue.subscribe() resp = ffi_client.request(req) - cb = await queue.wait_for(lambda e: e.update_local_metadata.async_id == + await queue.wait_for(lambda e: e.update_local_metadata.async_id == resp.update_local_metadata.async_id) finally: ffi_client.queue.unsubscribe(queue) @@ -131,10 +131,10 @@ async def update_name(self, name: str) -> None: req.update_local_name.local_participant_handle = self._ffi_handle.handle req.update_local_name.name = name + queue = ffi_client.queue.subscribe() try: - queue = ffi_client.queue.subscribe() resp = ffi_client.request(req) - cb = await queue.wait_for(lambda e: e.update_local_name.async_id == + await queue.wait_for(lambda e: e.update_local_name.async_id == resp.update_local_name.async_id) finally: ffi_client.queue.unsubscribe(queue) @@ -151,8 +151,8 @@ async def publish_track(self, track: Track, options: TrackPublishOptions) \ req.publish_track.local_participant_handle = self._ffi_handle.handle req.publish_track.options.CopyFrom(options) + queue = ffi_client.queue.subscribe() try: - queue = self._room_queue.subscribe() resp = ffi_client.request(req) cb = await queue.wait_for(lambda e: e.publish_track.async_id == resp.publish_track.async_id) @@ -175,8 +175,8 @@ async def unpublish_track(self, track_sid: str) -> None: req.unpublish_track.local_participant_handle = self._ffi_handle.handle req.unpublish_track.track_sid = track_sid + queue = self._room_queue.subscribe() try: - queue = self._room_queue.subscribe() resp = ffi_client.request(req) cb = await queue.wait_for(lambda e: e.unpublish_track.async_id == resp.unpublish_track.async_id) From ba0952bbae2831598a1fede35d159bf16e1b132f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 13:20:45 -0700 Subject: [PATCH 18/26] Update participant.py --- livekit-rtc/livekit/rtc/participant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/livekit-rtc/livekit/rtc/participant.py b/livekit-rtc/livekit/rtc/participant.py index a0fddf3a..95866420 100644 --- a/livekit-rtc/livekit/rtc/participant.py +++ b/livekit-rtc/livekit/rtc/participant.py @@ -151,7 +151,7 @@ async def publish_track(self, track: Track, options: TrackPublishOptions) \ req.publish_track.local_participant_handle = self._ffi_handle.handle req.publish_track.options.CopyFrom(options) - queue = ffi_client.queue.subscribe() + queue = self._room_queue.subscribe() try: resp = ffi_client.request(req) cb = await queue.wait_for(lambda e: e.publish_track.async_id == From 6e8c479fa7c8b2e9d18f2fa859ef7482438056a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 15:59:17 -0700 Subject: [PATCH 19/26] use ruff, format & include pyi files --- .github/workflows/ruff.yml | 23 + examples/basic_room.py | 94 ++-- examples/e2ee.py | 103 ++-- examples/face_landmark/face_landmark.py | 69 +-- examples/publish_hue.py | 20 +- examples/publish_wave.py | 34 +- examples/whisper/whisper.py | 175 +++--- livekit-api/livekit/api/__init__.py | 1 - livekit-api/livekit/api/_service.py | 3 +- livekit-api/livekit/api/_twirp_client.py | 8 +- livekit-api/livekit/api/access_token.py | 44 +- livekit-api/livekit/api/room_service.py | 116 ++-- livekit-api/setup.py | 23 +- livekit-rtc/livekit/rtc/__init__.py | 6 +- livekit-rtc/livekit/rtc/_event_emitter.py | 11 +- livekit-rtc/livekit/rtc/_ffi_client.py | 35 +- livekit-rtc/livekit/rtc/_utils.py | 14 +- livekit-rtc/livekit/rtc/audio_frame.py | 43 +- livekit-rtc/livekit/rtc/audio_source.py | 9 +- livekit-rtc/livekit/rtc/audio_stream.py | 13 +- livekit-rtc/livekit/rtc/e2ee.py | 27 +- livekit-rtc/livekit/rtc/participant.py | 65 ++- livekit-rtc/livekit/rtc/room.py | 256 +++++---- livekit-rtc/livekit/rtc/track.py | 7 +- livekit-rtc/livekit/rtc/track_publication.py | 2 +- livekit-rtc/livekit/rtc/video_frame.py | 545 ++++++++++++------- livekit-rtc/livekit/rtc/video_source.py | 3 +- livekit-rtc/livekit/rtc/video_stream.py | 20 +- livekit-rtc/setup.py | 41 +- ruff.toml | 5 + 30 files changed, 1093 insertions(+), 722 deletions(-) create mode 100644 .github/workflows/ruff.yml create mode 100644 ruff.toml diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 00000000..74d987d8 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,23 @@ +name: Ruff - Checks + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.9" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + + - name: Ruff livekit-api + run: ruff check --output-format=github livekit-api/ + + - name: Ruff livekit-rtc + run: ruff check --output-format=github livekit-rtc/ diff --git a/examples/basic_room.py b/examples/basic_room.py index 9393ab86..3859f755 100644 --- a/examples/basic_room.py +++ b/examples/basic_room.py @@ -5,26 +5,28 @@ from livekit import rtc -URL = 'ws://localhost:7880' -TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY' # noqa +URL = "ws://localhost:7880" +TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY" # noqa async def main(room: rtc.Room) -> None: - @room.on("participant_connected") def on_participant_connected(participant: rtc.RemoteParticipant) -> None: logging.info( - "participant connected: %s %s", participant.sid, participant.identity) + "participant connected: %s %s", participant.sid, participant.identity + ) @room.on("participant_disconnected") def on_participant_disconnected(participant: rtc.RemoteParticipant): - logging.info("participant disconnected: %s %s", - participant.sid, participant.identity) + logging.info( + "participant disconnected: %s %s", participant.sid, participant.identity + ) @room.on("local_track_published") - def on_local_track_published(publication: rtc.LocalTrackPublication, - track: Union[rtc.LocalAudioTrack, - rtc.LocalVideoTrack]): + def on_local_track_published( + publication: rtc.LocalTrackPublication, + track: Union[rtc.LocalAudioTrack, rtc.LocalVideoTrack], + ): logging.info("local track published: %s", publication.sid) @room.on("active_speakers_changed") @@ -36,20 +38,28 @@ def on_local_track_unpublished(publication: rtc.LocalTrackPublication): logging.info("local track unpublished: %s", publication.sid) @room.on("track_published") - def on_track_published(publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant): - logging.info("track published: %s from participant %s (%s)", - publication.sid, participant.sid, participant.identity) + def on_track_published( + publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant + ): + logging.info( + "track published: %s from participant %s (%s)", + publication.sid, + participant.sid, + participant.identity, + ) @room.on("track_unpublished") - def on_track_unpublished(publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant): + def on_track_unpublished( + publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant + ): logging.info("track unpublished: %s", publication.sid) @room.on("track_subscribed") - def on_track_subscribed(track: rtc.Track, - publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant): + def on_track_subscribed( + track: rtc.Track, + publication: rtc.RemoteTrackPublication, + participant: rtc.RemoteParticipant, + ): logging.info("track subscribed: %s", publication.sid) if track.kind == rtc.TrackKind.KIND_VIDEO: _video_stream = rtc.VideoStream(track) @@ -60,38 +70,42 @@ def on_track_subscribed(track: rtc.Track, # audio_stream is an async iterator that yields AudioFrame @room.on("track_unsubscribed") - def on_track_unsubscribed(track: rtc.Track, - publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant): + def on_track_unsubscribed( + track: rtc.Track, + publication: rtc.RemoteTrackPublication, + participant: rtc.RemoteParticipant, + ): logging.info("track unsubscribed: %s", publication.sid) @room.on("track_muted") - def on_track_muted(publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant): + def on_track_muted( + publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant + ): logging.info("track muted: %s", publication.sid) @room.on("track_unmuted") - def on_track_unmuted(publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant): + def on_track_unmuted( + publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant + ): logging.info("track unmuted: %s", publication.sid) @room.on("data_received") - def on_data_received(data: bytes, - kind: rtc.DataPacketKind, - participant: rtc.Participant): + def on_data_received( + data: bytes, kind: rtc.DataPacketKind, participant: rtc.Participant + ): logging.info("received data from %s: %s", participant.identity, data) @room.on("connection_quality_changed") - def on_connection_quality_changed(participant: rtc.Participant, - quality: rtc.ConnectionQuality): + def on_connection_quality_changed( + participant: rtc.Participant, quality: rtc.ConnectionQuality + ): logging.info("connection quality changed for %s", participant.identity) @room.on("track_subscription_failed") - def on_track_subscription_failed(participant: rtc.RemoteParticipant, - track_sid: str, - error: str): - logging.info("track subscription failed: %s %s", - participant.identity, error) + def on_track_subscription_failed( + participant: rtc.RemoteParticipant, track_sid: str, error: str + ): + logging.info("track subscription failed: %s %s", participant.identity, error) @room.on("connection_state_changed") def on_connection_state_changed(state: rtc.ConnectionState): @@ -122,9 +136,10 @@ def on_reconnected() -> None: if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, handlers=[ - logging.FileHandler("basic_room.log"), - logging.StreamHandler()]) + logging.basicConfig( + level=logging.INFO, + handlers=[logging.FileHandler("basic_room.log"), logging.StreamHandler()], + ) loop = asyncio.get_event_loop() room = rtc.Room(loop=loop) @@ -135,8 +150,7 @@ async def cleanup(): asyncio.ensure_future(main(room)) for signal in [SIGINT, SIGTERM]: - loop.add_signal_handler( - signal, lambda: asyncio.ensure_future(cleanup())) + loop.add_signal_handler(signal, lambda: asyncio.ensure_future(cleanup())) try: loop.run_forever() diff --git a/examples/e2ee.py b/examples/e2ee.py index 99403784..c8e86a4f 100644 --- a/examples/e2ee.py +++ b/examples/e2ee.py @@ -5,8 +5,8 @@ import numpy as np from livekit import rtc -URL = 'ws://localhost:7880' -TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY' # noqa +URL = "ws://localhost:7880" +TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY" # noqa # ("livekitrocks") this is our shared key, it must match the one used by your clients SHARED_KEY = b"liveitrocks" @@ -15,40 +15,78 @@ async def draw_cube(source: rtc.VideoSource): W, H, MID_W, MID_H = 1280, 720, 640, 360 cube_size = 60 - vertices = (np.array([[-1, -1, -1], [1, -1, -1], [1, 1, -1], [-1, 1, -1], - [-1, -1, 1], [1, -1, 1], [1, 1, 1], [-1, 1, 1]]) * cube_size) - edges = [[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], - [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]] - - frame = rtc.ArgbFrame(livekit.VideoFormatType.FORMAT_ARGB, W, H) + vertices = ( + np.array( + [ + [-1, -1, -1], + [1, -1, -1], + [1, 1, -1], + [-1, 1, -1], + [-1, -1, 1], + [1, -1, 1], + [1, 1, 1], + [-1, 1, 1], + ] + ) + * cube_size + ) + edges = [ + [0, 1], + [1, 2], + [2, 3], + [3, 0], + [4, 5], + [5, 6], + [6, 7], + [7, 4], + [0, 4], + [1, 5], + [2, 6], + [3, 7], + ] + + frame = rtc.ArgbFrame(rtc.VideoFormatType.FORMAT_ARGB, W, H) arr = np.ctypeslib.as_array(frame.data) angle = 0 while True: start_time = asyncio.get_event_loop().time() arr.fill(0) - rot = np.dot(np.array([[1, 0, 0], - [0, np.cos(angle), -np.sin(angle)], - [0, np.sin(angle), np.cos(angle)]]), - np.array([[np.cos(angle), 0, np.sin(angle)], - [0, 1, 0], - [-np.sin(angle), 0, np.cos(angle)]])) - proj_points = [[int(pt[0] / (pt[2] / 200 + 1)), int(pt[1] / (pt[2] / 200 + 1))] - for pt in np.dot(vertices, rot)] + rot = np.dot( + np.array( + [ + [1, 0, 0], + [0, np.cos(angle), -np.sin(angle)], + [0, np.sin(angle), np.cos(angle)], + ] + ), + np.array( + [ + [np.cos(angle), 0, np.sin(angle)], + [0, 1, 0], + [-np.sin(angle), 0, np.cos(angle)], + ] + ), + ) + proj_points = [ + [int(pt[0] / (pt[2] / 200 + 1)), int(pt[1] / (pt[2] / 200 + 1))] + for pt in np.dot(vertices, rot) + ] for e in edges: x1, y1, x2, y2 = *proj_points[e[0]], *proj_points[e[1]] for t in np.linspace(0, 1, 100): - x, y = int(MID_W + (1 - t) * x1 + t * - x2), int(MID_H + (1 - t) * y1 + t * y2) + x, y = ( + int(MID_W + (1 - t) * x1 + t * x2), + int(MID_H + (1 - t) * y1 + t * y2), + ) for dx in [-1, 0, 1]: for dy in [-1, 0, 1]: if 0 <= x + dx < W and 0 <= y + dy < H: idx = (y + dy) * W * 4 + (x + dx) * 4 - arr[idx:idx+4] = [255, 255, 255, 255] + arr[idx : idx + 4] = [255, 255, 255, 255] - f = rtc.VideoFrame( - 0, rtc.VideoRotation.VIDEO_ROTATION_0, frame.to_i420()) + f = rtc.VideoFrame(0, rtc.VideoRotation.VIDEO_ROTATION_0, frame.to_i420()) source.capture_frame(f) angle += 0.02 @@ -58,8 +96,9 @@ async def draw_cube(source: rtc.VideoSource): async def main(room: rtc.Room): @room.listens_to("e2ee_state_changed") - def on_e2ee_state_changed(participant: rtc.Participant, - state: rtc.EncryptionState) -> None: + def on_e2ee_state_changed( + participant: rtc.Participant, state: rtc.EncryptionState + ) -> None: logging.info("e2ee state changed: %s %s", participant.identity, state) logging.info("connecting to %s", URL) @@ -67,10 +106,9 @@ def on_e2ee_state_changed(participant: rtc.Participant, e2ee_options = rtc.E2EEOptions() e2ee_options.key_provider_options.shared_key = SHARED_KEY - await room.connect(URL, TOKEN, options=rtc.RoomOptions( - auto_subscribe=True, - e2ee=e2ee_options - )) + await room.connect( + URL, TOKEN, options=rtc.RoomOptions(auto_subscribe=True, e2ee=e2ee_options) + ) logging.info("connected to room %s", room.name) except rtc.ConnectError as e: @@ -87,10 +125,12 @@ def on_e2ee_state_changed(participant: rtc.Participant, asyncio.ensure_future(draw_cube(source)) + if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, handlers=[ - logging.FileHandler("e2ee.log"), - logging.StreamHandler()]) + logging.basicConfig( + level=logging.INFO, + handlers=[logging.FileHandler("e2ee.log"), logging.StreamHandler()], + ) loop = asyncio.get_event_loop() room = rtc.Room(loop=loop) @@ -101,8 +141,7 @@ async def cleanup(): asyncio.ensure_future(main(room)) for signal in [SIGINT, SIGTERM]: - loop.add_signal_handler( - signal, lambda: asyncio.ensure_future(cleanup())) + loop.add_signal_handler(signal, lambda: asyncio.ensure_future(cleanup())) try: loop.run_forever() diff --git a/examples/face_landmark/face_landmark.py b/examples/face_landmark/face_landmark.py index 25ac3e2d..4a209e74 100644 --- a/examples/face_landmark/face_landmark.py +++ b/examples/face_landmark/face_landmark.py @@ -11,14 +11,14 @@ from livekit import rtc -URL = 'ws://localhost:7880' -TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY' # noqa +URL = "ws://localhost:7880" +TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY" # noqa tasks = set() # You can download a face landmark model file from https://developers.google.com/mediapipe/solutions/vision/face_landmarker#models -model_file = 'face_landmarker.task' -model_path = os.path.dirname(os.path.realpath(__file__)) + '/' + model_file +model_file = "face_landmarker.task" +model_path = os.path.dirname(os.path.realpath(__file__)) + "/" + model_file BaseOptions = mp.tasks.BaseOptions FaceLandmarker = mp.tasks.vision.FaceLandmarker @@ -27,7 +27,8 @@ options = FaceLandmarkerOptions( base_options=BaseOptions(model_asset_path=model_path), - running_mode=VisionRunningMode.VIDEO) + running_mode=VisionRunningMode.VIDEO, +) # from https://github.com/googlesamples/mediapipe/blob/main/examples/face_landmarker/python/%5BMediaPipe_Python_Tasks%5D_Face_Landmarker.ipynb @@ -37,50 +38,56 @@ def draw_landmarks_on_image(rgb_image, detection_result): # Loop through the detected faces to visualize. for face_landmarks in face_landmarks_list: - # Draw the face landmarks. face_landmarks_proto = landmark_pb2.NormalizedLandmarkList() - face_landmarks_proto.landmark.extend([ - landmark_pb2.NormalizedLandmark( - x=landmark.x, y=landmark.y, z=landmark.z) - for landmark in face_landmarks - ]) + face_landmarks_proto.landmark.extend( + [ + landmark_pb2.NormalizedLandmark( + x=landmark.x, y=landmark.y, z=landmark.z + ) + for landmark in face_landmarks + ] + ) solutions.drawing_utils.draw_landmarks( image=rgb_image, landmark_list=face_landmarks_proto, connections=mp.solutions.face_mesh.FACEMESH_TESSELATION, landmark_drawing_spec=None, - connection_drawing_spec=mp.solutions.drawing_styles - .get_default_face_mesh_tesselation_style()) + connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_tesselation_style(), + ) solutions.drawing_utils.draw_landmarks( image=rgb_image, landmark_list=face_landmarks_proto, connections=mp.solutions.face_mesh.FACEMESH_CONTOURS, landmark_drawing_spec=None, - connection_drawing_spec=mp.solutions.drawing_styles - .get_default_face_mesh_contours_style()) + connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_contours_style(), + ) solutions.drawing_utils.draw_landmarks( image=rgb_image, landmark_list=face_landmarks_proto, connections=mp.solutions.face_mesh.FACEMESH_IRISES, landmark_drawing_spec=None, - connection_drawing_spec=mp.solutions.drawing_styles - .get_default_face_mesh_iris_connections_style()) + connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_iris_connections_style(), + ) async def frame_loop(video_stream: rtc.VideoStream) -> None: landmarker = FaceLandmarker.create_from_options(options) argb_frame = None - cv2.namedWindow('livekit_video', cv2.WINDOW_AUTOSIZE) + cv2.namedWindow("livekit_video", cv2.WINDOW_AUTOSIZE) cv2.startWindowThread() async for frame in video_stream: buffer = frame.buffer - if argb_frame is None or argb_frame.width != buffer.width \ - or argb_frame.height != buffer.height: + if ( + argb_frame is None + or argb_frame.width != buffer.width + or argb_frame.height != buffer.height + ): argb_frame = rtc.ArgbFrame.create( - rtc.VideoFormatType.FORMAT_ABGR, buffer.width, buffer.height) + rtc.VideoFormatType.FORMAT_ABGR, buffer.width, buffer.height + ) buffer.to_argb(argb_frame) @@ -88,18 +95,16 @@ async def frame_loop(video_stream: rtc.VideoStream) -> None: arr = arr.reshape((argb_frame.height, argb_frame.width, 4)) arr = cv2.cvtColor(arr, cv2.COLOR_RGBA2RGB) - mp_image = mp.Image( - image_format=mp.ImageFormat.SRGB, data=arr) + mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=arr) - detection_result = landmarker.detect_for_video( - mp_image, frame.timestamp_us) + detection_result = landmarker.detect_for_video(mp_image, frame.timestamp_us) draw_landmarks_on_image(arr, detection_result) arr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR) - cv2.imshow('livekit_video', arr) - if cv2.waitKey(1) & 0xFF == ord('q'): + cv2.imshow("livekit_video", arr) + if cv2.waitKey(1) & 0xFF == ord("q"): break landmarker.close() @@ -128,9 +133,10 @@ def on_track_subscribed(track: rtc.Track, *_): if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, handlers=[ - logging.FileHandler("face_landmark.log"), - logging.StreamHandler()]) + logging.basicConfig( + level=logging.INFO, + handlers=[logging.FileHandler("face_landmark.log"), logging.StreamHandler()], + ) loop = asyncio.get_event_loop() room = rtc.Room(loop=loop) @@ -141,8 +147,7 @@ async def cleanup(): asyncio.ensure_future(main(room)) for signal in [SIGINT, SIGTERM]: - loop.add_signal_handler( - signal, lambda: asyncio.ensure_future(cleanup())) + loop.add_signal_handler(signal, lambda: asyncio.ensure_future(cleanup())) try: loop.run_forever() diff --git a/examples/publish_hue.py b/examples/publish_hue.py index 8543d35e..01d43565 100644 --- a/examples/publish_hue.py +++ b/examples/publish_hue.py @@ -6,13 +6,12 @@ import numpy as np from livekit import rtc -URL = 'ws://localhost:7880' -TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY' # noqa +URL = "ws://localhost:7880" +TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY" # noqa async def draw_color_cycle(source: rtc.VideoSource): - argb_frame = rtc.ArgbFrame.create( - rtc.VideoFormatType.FORMAT_ARGB, 1280, 720) + argb_frame = rtc.ArgbFrame.create(rtc.VideoFormatType.FORMAT_ARGB, 1280, 720) arr = np.frombuffer(argb_frame.data, dtype=np.uint8) framerate = 1 / 30 @@ -31,7 +30,8 @@ async def draw_color_cycle(source: rtc.VideoSource): arr.flat[3::4] = argb_color[3] frame = rtc.VideoFrame( - 0, rtc.VideoRotation.VIDEO_ROTATION_0, argb_frame.to_i420()) + 0, rtc.VideoRotation.VIDEO_ROTATION_0, argb_frame.to_i420() + ) source.capture_frame(frame) hue = (hue + framerate / 3) % 1.0 @@ -61,9 +61,10 @@ async def main(room: rtc.Room): if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, handlers=[ - logging.FileHandler("publish_hue.log"), - logging.StreamHandler()]) + logging.basicConfig( + level=logging.INFO, + handlers=[logging.FileHandler("publish_hue.log"), logging.StreamHandler()], + ) loop = asyncio.get_event_loop() room = rtc.Room(loop=loop) @@ -74,8 +75,7 @@ async def cleanup(): asyncio.ensure_future(main(room)) for signal in [SIGINT, SIGTERM]: - loop.add_signal_handler( - signal, lambda: asyncio.ensure_future(cleanup())) + loop.add_signal_handler(signal, lambda: asyncio.ensure_future(cleanup())) try: loop.run_forever() diff --git a/examples/publish_wave.py b/examples/publish_wave.py index 58f56e4a..10e4c928 100644 --- a/examples/publish_wave.py +++ b/examples/publish_wave.py @@ -1,44 +1,46 @@ import asyncio -import time import logging from signal import SIGINT, SIGTERM import numpy as np from livekit import rtc -URL = 'ws://localhost:7880' -TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY' # noqa +URL = "ws://localhost:7880" +TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY" # noqa SAMPLE_RATE = 48000 NUM_CHANNELS = 1 + async def publish_frames(source: rtc.AudioSource, frequency: int): amplitude = 32767 # for 16-bit audio samples_per_channel = 480 # 10ms at 48kHz time = np.arange(samples_per_channel) / SAMPLE_RATE total_samples = 0 - audio_frame = rtc.AudioFrame.create( - SAMPLE_RATE, NUM_CHANNELS, samples_per_channel) + audio_frame = rtc.AudioFrame.create(SAMPLE_RATE, NUM_CHANNELS, samples_per_channel) audio_data = np.frombuffer(audio_frame.data, dtype=np.int16) while True: time = (total_samples + np.arange(samples_per_channel)) / SAMPLE_RATE - sine_wave = (amplitude * np.sin(2 * np.pi * - frequency * time)).astype(np.int16) + sine_wave = (amplitude * np.sin(2 * np.pi * frequency * time)).astype(np.int16) np.copyto(audio_data, sine_wave) await source.capture_frame(audio_frame) total_samples += samples_per_channel -async def main(room: rtc.Room) -> None: +async def main(room: rtc.Room) -> None: @room.on("participant_disconnected") def on_participant_disconnect(participant: rtc.Participant, *_): logging.info("participant disconnected: %s", participant.identity) logging.info("connecting to %s", URL) try: - await room.connect(URL, TOKEN, options=rtc.RoomOptions( - auto_subscribe=True, - )) + await room.connect( + URL, + TOKEN, + options=rtc.RoomOptions( + auto_subscribe=True, + ), + ) logging.info("connected to room %s", room.name) except rtc.ConnectError as e: logging.error("failed to connect to the room: %s", e) @@ -56,9 +58,10 @@ def on_participant_disconnect(participant: rtc.Participant, *_): if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, handlers=[ - logging.FileHandler("publish_wave.log"), - logging.StreamHandler()]) + logging.basicConfig( + level=logging.INFO, + handlers=[logging.FileHandler("publish_wave.log"), logging.StreamHandler()], + ) loop = asyncio.get_event_loop() room = rtc.Room(loop=loop) @@ -69,8 +72,7 @@ async def cleanup(): asyncio.ensure_future(main(room)) for signal in [SIGINT, SIGTERM]: - loop.add_signal_handler( - signal, lambda: asyncio.ensure_future(cleanup())) + loop.add_signal_handler(signal, lambda: asyncio.ensure_future(cleanup())) try: loop.run_forever() diff --git a/examples/whisper/whisper.py b/examples/whisper/whisper.py index e3c3e581..ced42b02 100644 --- a/examples/whisper/whisper.py +++ b/examples/whisper/whisper.py @@ -10,18 +10,18 @@ os = platform.system().lower() if os == "windows": - lib_file = 'whisper.dll' + lib_file = "whisper.dll" elif os == "darwin": - lib_file = 'libwhisper.dylib' + lib_file = "libwhisper.dylib" else: - lib_file = 'libwhisper.so' + lib_file = "libwhisper.so" whisper_dir = pathlib.Path(__file__).parent.absolute() / "whisper.cpp" libname = str(whisper_dir / lib_file) fname_model = str(whisper_dir / "models/ggml-tiny.en.bin") -URL = 'ws://localhost:7880' -TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY' # noqa +URL = "ws://localhost:7880" +TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE5MDY2MTMyODgsImlzcyI6IkFQSVRzRWZpZFpqclFvWSIsIm5hbWUiOiJuYXRpdmUiLCJuYmYiOjE2NzI2MTMyODgsInN1YiI6Im5hdGl2ZSIsInZpZGVvIjp7InJvb20iOiJ0ZXN0Iiwicm9vbUFkbWluIjp0cnVlLCJyb29tQ3JlYXRlIjp0cnVlLCJyb29tSm9pbiI6dHJ1ZSwicm9vbUxpc3QiOnRydWV9fQ.uSNIangMRu8jZD5mnRYoCHjcsQWCrJXgHCs0aNIgBFY" # noqa # declare the Whisper C API (Only what we need, keep things simple) @@ -36,52 +36,52 @@ class WhisperSamplingStrategy(ctypes.c_int): class WhisperFullParams(ctypes.Structure): _fields_ = [ - ('strategy', ctypes.c_int), - ('n_threads', ctypes.c_int), - ('n_max_text_ctx', ctypes.c_int), - ('offset_ms', ctypes.c_int), - ('duration_ms', ctypes.c_int), - ('translate', ctypes.c_bool), - ('no_context', ctypes.c_bool), - ('single_segment', ctypes.c_bool), - ('print_special', ctypes.c_bool), - ('print_progress', ctypes.c_bool), - ('print_realtime', ctypes.c_bool), - ('print_timestamps', ctypes.c_bool), - ('token_timestamps', ctypes.c_bool), - ('thold_pt', ctypes.c_float), - ('thold_ptsum', ctypes.c_float), - ('max_len', ctypes.c_int), - ('split_on_word', ctypes.c_bool), - ('max_tokens', ctypes.c_int), - ('speed_up', ctypes. c_bool), - ('audio_ctx', ctypes. c_int), - ('tdrz_enable', ctypes. c_bool), - ('initial_prompt', ctypes.c_char_p), - ('prompt_tokens', ctypes.c_void_p), - ('prompt_n_tokens', ctypes.c_int), - ('language', ctypes.c_char_p), - ('detect_language', ctypes.c_bool), - ('suppress_blank', ctypes.c_bool), - ('suppress_non_speech_tokens', ctypes.c_bool), - ('temperature', ctypes.c_float), - ('max_initial_ts', ctypes.c_float), - ('length_penalty', ctypes.c_float), - ('temperature_inc', ctypes. c_float), - ('entropy_thold', ctypes. c_float), - ('logprob_thold', ctypes. c_float), - ('no_speech_thold', ctypes. c_float), - ('greedy', ctypes.c_int), - ('beam_size', ctypes.c_int), - ('patience', ctypes.c_float), - ('new_segment_callback', ctypes.c_void_p), - ('new_segment_callback_user_data', ctypes.c_void_p), - ('progress_callback', ctypes.c_void_p), - ('progress_callback_user_data', ctypes.c_void_p), - ('encoder_begin_callback', ctypes.c_void_p), - ('encoder_begin_callback_user_data', ctypes.c_void_p), - ('logits_filter_callback', ctypes.c_void_p), - ('logits_filter_callback_user_data', ctypes.c_void_p), + ("strategy", ctypes.c_int), + ("n_threads", ctypes.c_int), + ("n_max_text_ctx", ctypes.c_int), + ("offset_ms", ctypes.c_int), + ("duration_ms", ctypes.c_int), + ("translate", ctypes.c_bool), + ("no_context", ctypes.c_bool), + ("single_segment", ctypes.c_bool), + ("print_special", ctypes.c_bool), + ("print_progress", ctypes.c_bool), + ("print_realtime", ctypes.c_bool), + ("print_timestamps", ctypes.c_bool), + ("token_timestamps", ctypes.c_bool), + ("thold_pt", ctypes.c_float), + ("thold_ptsum", ctypes.c_float), + ("max_len", ctypes.c_int), + ("split_on_word", ctypes.c_bool), + ("max_tokens", ctypes.c_int), + ("speed_up", ctypes.c_bool), + ("audio_ctx", ctypes.c_int), + ("tdrz_enable", ctypes.c_bool), + ("initial_prompt", ctypes.c_char_p), + ("prompt_tokens", ctypes.c_void_p), + ("prompt_n_tokens", ctypes.c_int), + ("language", ctypes.c_char_p), + ("detect_language", ctypes.c_bool), + ("suppress_blank", ctypes.c_bool), + ("suppress_non_speech_tokens", ctypes.c_bool), + ("temperature", ctypes.c_float), + ("max_initial_ts", ctypes.c_float), + ("length_penalty", ctypes.c_float), + ("temperature_inc", ctypes.c_float), + ("entropy_thold", ctypes.c_float), + ("logprob_thold", ctypes.c_float), + ("no_speech_thold", ctypes.c_float), + ("greedy", ctypes.c_int), + ("beam_size", ctypes.c_int), + ("patience", ctypes.c_float), + ("new_segment_callback", ctypes.c_void_p), + ("new_segment_callback_user_data", ctypes.c_void_p), + ("progress_callback", ctypes.c_void_p), + ("progress_callback_user_data", ctypes.c_void_p), + ("encoder_begin_callback", ctypes.c_void_p), + ("encoder_begin_callback_user_data", ctypes.c_void_p), + ("logits_filter_callback", ctypes.c_void_p), + ("logits_filter_callback_user_data", ctypes.c_void_p), ] @@ -95,7 +95,7 @@ class WhisperFullParams(ctypes.Structure): whisper.whisper_init_from_file.restype = ctypes.c_void_p whisper.whisper_full_default_params.restype = WhisperFullParams whisper.whisper_full_get_segment_text.restype = ctypes.c_char_p -ctx = whisper.whisper_init_from_file(fname_model.encode('utf-8')) +ctx = whisper.whisper_init_from_file(fname_model.encode("utf-8")) async def whisper_task(stream: rtc.AudioStream): @@ -106,29 +106,27 @@ async def whisper_task(stream: rtc.AudioStream): # whisper requires 16kHz mono, so resample the data # also convert the samples from int16 to float32 - frame = frame.remix_and_resample( - WHISPER_SAMPLE_RATE, 1) + frame = frame.remix_and_resample(WHISPER_SAMPLE_RATE, 1) data = np.frombuffer(frame.data, dtype=np.int16).astype(np.float32) / 32768.0 # write the data inside data_30_secs at written_samples data_start = SAMPLES_KEEP + written_samples - data_30_secs[data_start:data_start+len(data)] = data + data_30_secs[data_start : data_start + len(data)] = data written_samples += len(data) if written_samples >= SAMPLES_STEP: params = whisper.whisper_full_default_params( - WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) + WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY + ) params.print_realtime = False params.print_progress = False ctx_ptr = ctypes.c_void_p(ctx) - data_ptr = data_30_secs.ctypes.data_as( - ctypes.POINTER(ctypes.c_float)) - res = whisper.whisper_full(ctx_ptr, - params, - data_ptr, - written_samples + SAMPLES_KEEP) + data_ptr = data_30_secs.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) + res = whisper.whisper_full( + ctx_ptr, params, data_ptr, written_samples + SAMPLES_KEEP + ) if res != 0: logging.error("error while running inference: %s", res) @@ -141,31 +139,42 @@ async def whisper_task(stream: rtc.AudioStream): txt = whisper.whisper_full_get_segment_text(ctx_ptr, i) logging.info( - f"{t0/1000.0:.3f} - {t1/1000.0:.3f} : {txt.decode('utf-8')}") + f"{t0/1000.0:.3f} - {t1/1000.0:.3f} : {txt.decode('utf-8')}" + ) # write old data to the beginning of the buffer (SAMPLES_KEEP) - data_30_secs[:SAMPLES_KEEP] = data_30_secs[data_start + - written_samples - SAMPLES_KEEP: - data_start + written_samples] + data_30_secs[:SAMPLES_KEEP] = data_30_secs[ + data_start + written_samples - SAMPLES_KEEP : data_start + + written_samples + ] written_samples = 0 async def main(room: rtc.Room): @room.on("track_published") - def on_track_published(publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant): + def on_track_published( + publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant + ): # Only subscribe to the audio tracks coming from the microphone - if publication.kind == rtc.TrackKind.KIND_AUDIO \ - and publication.source == rtc.TrackSource.SOURCE_MICROPHONE: - logging.info("track published: %s from participant %s (%s), subscribing...", - publication.sid, participant.sid, participant.identity) + if ( + publication.kind == rtc.TrackKind.KIND_AUDIO + and publication.source == rtc.TrackSource.SOURCE_MICROPHONE + ): + logging.info( + "track published: %s from participant %s (%s), subscribing...", + publication.sid, + participant.sid, + participant.identity, + ) publication.set_subscribed(True) @room.on("track_subscribed") - def on_track_subscribed(track: rtc.Track, - publication: rtc.RemoteTrackPublication, - participant: rtc.RemoteParticipant): + def on_track_subscribed( + track: rtc.Track, + publication: rtc.RemoteTrackPublication, + participant: rtc.RemoteParticipant, + ): logging.info("starting listening to: %s", participant.identity) audio_stream = rtc.AudioStream(track) asyncio.create_task(whisper_task(audio_stream)) @@ -176,15 +185,18 @@ def on_track_subscribed(track: rtc.Track, # check if there are already published audio tracks for participant in room.participants.values(): for track in participant.tracks.values(): - if track.kind == rtc.TrackKind.KIND_AUDIO \ - and track.source == rtc.TrackSource.SOURCE_MICROPHONE: + if ( + track.kind == rtc.TrackKind.KIND_AUDIO + and track.source == rtc.TrackSource.SOURCE_MICROPHONE + ): track.set_subscribed(True) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, handlers=[ - logging.FileHandler("whisper.log"), - logging.StreamHandler()]) + logging.basicConfig( + level=logging.INFO, + handlers=[logging.FileHandler("whisper.log"), logging.StreamHandler()], + ) loop = asyncio.get_event_loop() room = rtc.Room(loop=loop) @@ -195,8 +207,7 @@ async def cleanup(): asyncio.ensure_future(main(room)) for signal in [SIGINT, SIGTERM]: - loop.add_signal_handler( - signal, lambda: asyncio.ensure_future(cleanup())) + loop.add_signal_handler(signal, lambda: asyncio.ensure_future(cleanup())) try: loop.run_forever() diff --git a/livekit-api/livekit/api/__init__.py b/livekit-api/livekit/api/__init__.py index 82da6b59..7c9c3673 100644 --- a/livekit-api/livekit/api/__init__.py +++ b/livekit-api/livekit/api/__init__.py @@ -20,4 +20,3 @@ from .version import __version__ from .access_token import VideoGrants, AccessToken from .room_service import RoomService - diff --git a/livekit-api/livekit/api/_service.py b/livekit-api/livekit/api/_service.py index 24b3033e..b845cedd 100644 --- a/livekit-api/livekit/api/_service.py +++ b/livekit-api/livekit/api/_service.py @@ -13,8 +13,7 @@ def __init__(self, host: str, api_key: str, api_secret: str): self.api_secret = api_secret def _auth_header(self, grants: VideoGrants) -> Dict[str, str]: - token = AccessToken( - self.api_key, self.api_secret).with_grants(grants).to_jwt() + token = AccessToken(self.api_key, self.api_secret).with_grants(grants).to_jwt() headers = {} headers[AUTHORIZATION] = "Bearer {}".format(token) diff --git a/livekit-api/livekit/api/_twirp_client.py b/livekit-api/livekit/api/_twirp_client.py index e97a46ec..9182afe0 100644 --- a/livekit-api/livekit/api/_twirp_client.py +++ b/livekit-api/livekit/api/_twirp_client.py @@ -47,7 +47,7 @@ class TwirpErrorCode: DATA_LOSS = "dataloss" -T = TypeVar('T', bound=Message) +T = TypeVar("T", bound=Message) class TwirpClient: @@ -69,9 +69,9 @@ async def request( headers["Content-Type"] = "application/protobuf" serialized_data = data.SerializeToString() - async with self.session.post(url, - headers=headers, - data=serialized_data) as resp: + async with self.session.post( + url, headers=headers, data=serialized_data + ) as resp: if resp.status == 200: return response_class.FromString(await resp.read()) else: diff --git a/livekit-api/livekit/api/access_token.py b/livekit-api/livekit/api/access_token.py index d988ce4d..9a524d05 100644 --- a/livekit-api/livekit/api/access_token.py +++ b/livekit-api/livekit/api/access_token.py @@ -74,52 +74,54 @@ def __init__(self, api_key: str, api_secret: str) -> None: self.identity = "" # sub self.ttl = DEFAULT_TTL # exp - def with_ttl(self, ttl: datetime.timedelta) -> 'AccessToken': + def with_ttl(self, ttl: datetime.timedelta) -> "AccessToken": self.ttl = ttl return self - def with_grants(self, grants: VideoGrants) -> 'AccessToken': + def with_grants(self, grants: VideoGrants) -> "AccessToken": self.claims.video = grants return self - def with_identity(self, identity: str) -> 'AccessToken': + def with_identity(self, identity: str) -> "AccessToken": self.identity = identity return self - def with_name(self, name: str) -> 'AccessToken': + def with_name(self, name: str) -> "AccessToken": self.claims.name = name return self - def with_metadata(self, metadata: str) -> 'AccessToken': + def with_metadata(self, metadata: str) -> "AccessToken": self.claims.metadata = metadata return self - def with_sha256(self, sha256: str) -> 'AccessToken': + def with_sha256(self, sha256: str) -> "AccessToken": self.claims.sha256 = sha256 return self def to_jwt(self) -> str: - def camel_case_dict(data) -> dict: return { "".join( - word if i == 0 else word.title() for i, word in enumerate(key.split("_")) + word if i == 0 else word.title() + for i, word in enumerate(key.split("_")) ): value for key, value in data if value is not None } claims = dataclasses.asdict(self.claims) - claims.update({ - 'sub': self.identity, - "iss": self.api_key, - "nbf": calendar.timegm(datetime.datetime.utcnow().utctimetuple()), - "exp": calendar.timegm( - (datetime.datetime.utcnow() + self.ttl).utctimetuple() - ), - "video": dataclasses.asdict( - self.claims.video, dict_factory=camel_case_dict - ), - }) - - return jwt.encode(claims, self.api_secret, algorithm='HS256') + claims.update( + { + "sub": self.identity, + "iss": self.api_key, + "nbf": calendar.timegm(datetime.datetime.utcnow().utctimetuple()), + "exp": calendar.timegm( + (datetime.datetime.utcnow() + self.ttl).utctimetuple() + ), + "video": dataclasses.asdict( + self.claims.video, dict_factory=camel_case_dict + ), + } + ) + + return jwt.encode(claims, self.api_secret, algorithm="HS256") diff --git a/livekit-api/livekit/api/room_service.py b/livekit-api/livekit/api/room_service.py index 46c478b3..195c614f 100644 --- a/livekit-api/livekit/api/room_service.py +++ b/livekit-api/livekit/api/room_service.py @@ -10,55 +10,79 @@ class RoomService(Service): def __init__(self, host: str, api_key: str, api_secret: str): super().__init__(host, api_key, api_secret) - async def create_room(self, create: proto_room.CreateRoomRequest) \ - -> proto_models.Room: - return await self._client.request(SVC, "CreateRoom", create, - self._auth_header( - VideoGrants(room_create=True)), - proto_models.Room) + async def create_room( + self, create: proto_room.CreateRoomRequest + ) -> proto_models.Room: + return await self._client.request( + SVC, + "CreateRoom", + create, + self._auth_header(VideoGrants(room_create=True)), + proto_models.Room, + ) - async def list_rooms(self, list: proto_room.ListRoomsRequest) \ - -> proto_room.ListRoomsResponse: - return await self._client.request(SVC, "ListRooms", list, - self._auth_header( - VideoGrants(room_list=True)), - proto_room.ListRoomsResponse) + async def list_rooms( + self, list: proto_room.ListRoomsRequest + ) -> proto_room.ListRoomsResponse: + return await self._client.request( + SVC, + "ListRooms", + list, + self._auth_header(VideoGrants(room_list=True)), + proto_room.ListRoomsResponse, + ) - async def delete_room(self, delete: proto_room.DeleteRoomRequest) \ - -> proto_room.DeleteRoomResponse: - return await self._client.request(SVC, "DeleteRoom", delete, - self._auth_header( - VideoGrants(room_create=True)), - proto_room.DeleteRoomResponse) + async def delete_room( + self, delete: proto_room.DeleteRoomRequest + ) -> proto_room.DeleteRoomResponse: + return await self._client.request( + SVC, + "DeleteRoom", + delete, + self._auth_header(VideoGrants(room_create=True)), + proto_room.DeleteRoomResponse, + ) - async def update_room_metadata(self, update: proto_room.UpdateRoomMetadataRequest) \ - -> proto_models.Room: - return await self._client.request(SVC, "UpdateRoomMetadata", update, - self._auth_header( - VideoGrants(room_admin=True, - room=update.room)), - proto_models.Room) + async def update_room_metadata( + self, update: proto_room.UpdateRoomMetadataRequest + ) -> proto_models.Room: + return await self._client.request( + SVC, + "UpdateRoomMetadata", + update, + self._auth_header(VideoGrants(room_admin=True, room=update.room)), + proto_models.Room, + ) - async def list_participants(self, list: proto_room.ListParticipantsRequest) \ - -> proto_room.ListParticipantsResponse: - return await self._client.request(SVC, "ListParticipants", list, - self._auth_header( - VideoGrants(room_admin=True, - room=list.room)), - proto_room.ListParticipantsResponse) + async def list_participants( + self, list: proto_room.ListParticipantsRequest + ) -> proto_room.ListParticipantsResponse: + return await self._client.request( + SVC, + "ListParticipants", + list, + self._auth_header(VideoGrants(room_admin=True, room=list.room)), + proto_room.ListParticipantsResponse, + ) - async def get_participant(self, get: proto_room.RoomParticipantIdentity) \ - -> proto_models.ParticipantInfo: - return await self._client.request(SVC, "GetParticipant", get, - self._auth_header( - VideoGrants(room_admin=True, - room=get.room)), - proto_models.ParticipantInfo) + async def get_participant( + self, get: proto_room.RoomParticipantIdentity + ) -> proto_models.ParticipantInfo: + return await self._client.request( + SVC, + "GetParticipant", + get, + self._auth_header(VideoGrants(room_admin=True, room=get.room)), + proto_models.ParticipantInfo, + ) - async def remove_participant(self, remove: proto_room.RoomParticipantIdentity) \ - -> proto_room.RemoveParticipantResponse: - return await self._client.request(SVC, "remove_participant", remove, - self._auth_header( - VideoGrants(room_admin=True, - room=remove.room)), - proto_room.RemoveParticipantResponse) + async def remove_participant( + self, remove: proto_room.RoomParticipantIdentity + ) -> proto_room.RemoveParticipantResponse: + return await self._client.request( + SVC, + "remove_participant", + remove, + self._auth_header(VideoGrants(room_admin=True, room=remove.room)), + proto_room.RemoveParticipantResponse, + ) diff --git a/livekit-api/setup.py b/livekit-api/setup.py index 541c583a..cceb517a 100644 --- a/livekit-api/setup.py +++ b/livekit-api/setup.py @@ -19,14 +19,14 @@ here = pathlib.Path(__file__).parent.resolve() about = {} -with open(os.path.join(here, 'livekit', 'api', 'version.py'), 'r') as f: +with open(os.path.join(here, "livekit", "api", "version.py"), "r") as f: exec(f.read(), about) setuptools.setup( name="livekit-api", - version=about['__version__'], - description="LiveKit Python Server for LiveKit", + version=about["__version__"], + description="Python Server SDK for LiveKit", long_description=(here / "README.md").read_text(encoding="utf-8"), long_description_content_type="text/markdown", url="https://github.com/livekit/client-sdk-python", @@ -45,15 +45,20 @@ ], keywords=["webrtc", "realtime", "audio", "video", "livekit"], license="Apache-2.0", - packages=setuptools.find_namespace_packages(include=['livekit.*']), + packages=setuptools.find_namespace_packages(include=["livekit.*"]), python_requires=">=3.7.0", - install_requires=["pyjwt>=2.0.0", - "aiohttp>=3.8.0", - "protobuf>=3.1.0", - "types-protobuf>=3.1.0"], + install_requires=[ + "pyjwt>=2.0.0", + "aiohttp>=3.8.0", + "protobuf>=3.1.0", + "types-protobuf>=3.1.0", + ], + package_data={ + "livekit.api": ["_proto/*.py", "py.typed", "*.pyi", "**/*.pyi"], + }, project_urls={ "Documentation": "https://docs.livekit.io", "Website": "https://livekit.io/", - "Source": "https://github.com/livekit/client-sdk-python/", + "Source": "https://github.com/livekit/python-sdks/", }, ) diff --git a/livekit-rtc/livekit/rtc/__init__.py b/livekit-rtc/livekit/rtc/__init__.py index d825217a..5925fa9c 100644 --- a/livekit-rtc/livekit/rtc/__init__.py +++ b/livekit-rtc/livekit/rtc/__init__.py @@ -23,9 +23,9 @@ TrackPublishOptions, IceTransportType, ContinualGatheringPolicy, - IceServer + IceServer, ) -from ._proto.e2ee_pb2 import (EncryptionType, EncryptionState) +from ._proto.e2ee_pb2 import EncryptionType, EncryptionState from ._proto.track_pb2 import StreamState, TrackKind, TrackSource from ._proto.video_frame_pb2 import VideoFormatType, VideoFrameBufferType, VideoRotation from .audio_frame import AudioFrame @@ -45,7 +45,7 @@ E2EEOptions, KeyProviderOptions, KeyProvider, - FrameCryptor + FrameCryptor, ) from .track_publication import ( LocalTrackPublication, diff --git a/livekit-rtc/livekit/rtc/_event_emitter.py b/livekit-rtc/livekit/rtc/_event_emitter.py index 265a6380..540f1fdf 100644 --- a/livekit-rtc/livekit/rtc/_event_emitter.py +++ b/livekit-rtc/livekit/rtc/_event_emitter.py @@ -1,9 +1,10 @@ from typing import Callable, Dict, Set, Optional, Generic, TypeVar -T = TypeVar('T') +T = TypeVar("T") + class EventEmitter(Generic[T]): - def __init__(self): + def __init__(self) -> None: self._events: Dict[T, Set[Callable]] = dict() def emit(self, event: T, *args, **kwargs) -> None: @@ -13,14 +14,18 @@ def emit(self, event: T, *args, **kwargs) -> None: def once(self, event: T, callback: Optional[Callable] = None) -> Callable: if callback is not None: + def once_callback(*args, **kwargs): self.off(event, once_callback) callback(*args, **kwargs) + return self.on(event, once_callback) else: + def decorator(callback: Callable) -> Callable: self.once(event, callback) return callback + return decorator def on(self, event: T, callback: Optional[Callable] = None) -> Callable: @@ -30,9 +35,11 @@ def on(self, event: T, callback: Optional[Callable] = None) -> Callable: self._events[event].add(callback) return callback else: + def decorator(callback: Callable) -> Callable: self.on(event, callback) return callback + return decorator def off(self, event: T, callback: Callable) -> None: diff --git a/livekit-rtc/livekit/rtc/_ffi_client.py b/livekit-rtc/livekit/rtc/_ffi_client.py index 181678dd..7cf0604d 100644 --- a/livekit-rtc/livekit/rtc/_ffi_client.py +++ b/livekit-rtc/livekit/rtc/_ffi_client.py @@ -40,10 +40,12 @@ def get_ffi_lib_path(): else: raise Exception( f"no ffi library found for platform {platform.system()}. \ - Set LIVEKIT_LIB_PATH to specify a the lib path") + Set LIVEKIT_LIB_PATH to specify a the lib path" + ) libpath = pkg_resources.resource_filename( - 'livekit.rtc', os.path.join('resources', libname)) + "livekit.rtc", os.path.join("resources", libname) + ) return libpath @@ -54,7 +56,7 @@ def get_ffi_lib_path(): ctypes.POINTER(ctypes.c_ubyte), ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_ubyte)), - ctypes.POINTER(ctypes.c_size_t) + ctypes.POINTER(ctypes.c_size_t), ] ffi_lib.livekit_ffi_request.restype = ctypes.c_uint64 @@ -79,26 +81,23 @@ def disposed(self) -> bool: def dispose(self) -> None: if self.handle != INVALID_HANDLE and not self._disposed: self._disposed = True - assert ffi_lib.livekit_ffi_drop_handle( - ctypes.c_uint64(self.handle)) + assert ffi_lib.livekit_ffi_drop_handle(ctypes.c_uint64(self.handle)) -T = TypeVar('T') +T = TypeVar("T") class FfiQueue(Generic[T]): def __init__(self) -> None: self._lock = threading.RLock() - self._subscribers: List[tuple[ - Queue[T], asyncio.AbstractEventLoop]] = [] + self._subscribers: List[tuple[Queue[T], asyncio.AbstractEventLoop]] = [] def put(self, item: T) -> None: with self._lock: for queue, loop in self._subscribers: loop.call_soon_threadsafe(queue.put_nowait, item) - def subscribe(self, loop: Optional[asyncio.AbstractEventLoop] = None) \ - -> Queue[T]: + def subscribe(self, loop: Optional[asyncio.AbstractEventLoop] = None) -> Queue[T]: with self._lock: queue = Queue[T]() loop = loop or asyncio.get_event_loop() @@ -115,9 +114,11 @@ def unsubscribe(self, queue: Queue[T]) -> None: @ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.POINTER(ctypes.c_uint8), ctypes.c_size_t) -def ffi_event_callback(data_ptr: ctypes.POINTER(ctypes.c_uint8), # type: ignore - data_len: ctypes.c_size_t) -> None: - event_data = bytes(data_ptr[:int(data_len)]) +def ffi_event_callback( + data_ptr: ctypes.POINTER(ctypes.c_uint8), # type: ignore + data_len: ctypes.c_size_t, +) -> None: + event_data = bytes(data_ptr[: int(data_len)]) event = proto_ffi.FfiEvent() event.ParseFromString(event_data) ffi_client.queue.put(event) @@ -130,8 +131,7 @@ def __init__(self) -> None: # initialize request req = proto_ffi.FfiRequest() - cb_callback = int(ctypes.cast( - ffi_event_callback, ctypes.c_void_p).value) # type: ignore + cb_callback = int(ctypes.cast(ffi_event_callback, ctypes.c_void_p).value) # type: ignore req.initialize.event_callback_ptr = cb_callback self.request(req) @@ -147,9 +147,10 @@ def request(self, req: proto_ffi.FfiRequest) -> proto_ffi.FfiResponse: resp_ptr = ctypes.POINTER(ctypes.c_ubyte)() resp_len = ctypes.c_size_t() handle = ffi_lib.livekit_ffi_request( - data, proto_len, ctypes.byref(resp_ptr), ctypes.byref(resp_len)) + data, proto_len, ctypes.byref(resp_ptr), ctypes.byref(resp_len) + ) - resp_data = bytes(resp_ptr[:resp_len.value]) + resp_data = bytes(resp_ptr[: resp_len.value]) resp = proto_ffi.FfiResponse() resp.ParseFromString(resp_data) diff --git a/livekit-rtc/livekit/rtc/_utils.py b/livekit-rtc/livekit/rtc/_utils.py index 8d80441e..f187e537 100644 --- a/livekit-rtc/livekit/rtc/_utils.py +++ b/livekit-rtc/livekit/rtc/_utils.py @@ -3,14 +3,15 @@ import ctypes from typing import Callable, Generic, List, TypeVar + def get_address(data: memoryview) -> int: - """ Get the address of a buffer using ctypes """ + """Get the address of a buffer using ctypes""" nbytes = data.nbytes buffer = (ctypes.c_int8 * nbytes).from_buffer(data) return ctypes.addressof(buffer) -T = TypeVar('T') +T = TypeVar("T") class RingQueue(Generic[T]): @@ -33,14 +34,13 @@ async def get(self) -> T: class Queue(asyncio.Queue[T]): - """ asyncio.Queue with utility functions. """ + """asyncio.Queue with utility functions.""" def __init__(self, maxsize: int = 0) -> None: super().__init__(maxsize) - async def wait_for(self, fnc: Callable[[T], bool]) \ - -> T: - """ Wait for an event that matches the given function. + async def wait_for(self, fnc: Callable[[T], bool]) -> T: + """Wait for an event that matches the given function. The previous events are discarded. """ @@ -54,7 +54,7 @@ async def wait_for(self, fnc: Callable[[T], bool]) \ class BroadcastQueue(Generic[T]): - """ Queue with multiple subscribers. """ + """Queue with multiple subscribers.""" def __init__(self) -> None: self._lock = asyncio.Lock() diff --git a/livekit-rtc/livekit/rtc/audio_frame.py b/livekit-rtc/livekit/rtc/audio_frame.py index f49829e4..7a31fbb5 100644 --- a/livekit-rtc/livekit/rtc/audio_frame.py +++ b/livekit-rtc/livekit/rtc/audio_frame.py @@ -20,13 +20,19 @@ class AudioFrame: - def __init__(self, data: bytearray, - sample_rate: int, - num_channels: int, - samples_per_channel: int) -> None: - if len(data) < num_channels * samples_per_channel * ctypes.sizeof(ctypes.c_int16): + def __init__( + self, + data: bytearray, + sample_rate: int, + num_channels: int, + samples_per_channel: int, + ) -> None: + if len(data) < num_channels * samples_per_channel * ctypes.sizeof( + ctypes.c_int16 + ): raise ValueError( - 'data length must be >= num_channels * samples_per_channel * sizeof(int16)') + "data length must be >= num_channels * samples_per_channel * sizeof(int16)" + ) self._sample_rate = sample_rate self._num_channels = num_channels @@ -34,31 +40,32 @@ def __init__(self, data: bytearray, self._data = data @staticmethod - def create(sample_rate: int, num_channels: int, samples_per_channel: int) -> 'AudioFrame': - size = num_channels * samples_per_channel * \ - ctypes.sizeof(ctypes.c_int16) + def create( + sample_rate: int, num_channels: int, samples_per_channel: int + ) -> "AudioFrame": + size = num_channels * samples_per_channel * ctypes.sizeof(ctypes.c_int16) data = bytearray(size) return AudioFrame(data, sample_rate, num_channels, samples_per_channel) @staticmethod - def _from_owned_info(owned_info: proto_audio.OwnedAudioFrameBuffer) -> 'AudioFrame': + def _from_owned_info(owned_info: proto_audio.OwnedAudioFrameBuffer) -> "AudioFrame": info = owned_info.info size = info.num_channels * info.samples_per_channel cdata = (ctypes.c_int16 * size).from_address(info.data_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) - return AudioFrame(data, info.sample_rate, info.num_channels, info.samples_per_channel) + return AudioFrame( + data, info.sample_rate, info.num_channels, info.samples_per_channel + ) - def remix_and_resample(self, sample_rate: int, num_channels: int) -> 'AudioFrame': - """ Resample the audio frame to the given sample rate and number of channels.""" + def remix_and_resample(self, sample_rate: int, num_channels: int) -> "AudioFrame": + """Resample the audio frame to the given sample rate and number of channels.""" req = proto_ffi.FfiRequest() - req.new_audio_resampler.CopyFrom( - proto_audio.NewAudioResamplerRequest()) + req.new_audio_resampler.CopyFrom(proto_audio.NewAudioResamplerRequest()) resp = ffi_client.request(req) - resampler_handle = FfiHandle( - resp.new_audio_resampler.resampler.handle.id) + resampler_handle = FfiHandle(resp.new_audio_resampler.resampler.handle.id) resample_req = proto_ffi.FfiRequest() resample_req.remix_and_resample.resampler_handle = resampler_handle.handle @@ -79,7 +86,7 @@ def _proto_info(self) -> proto_audio.AudioFrameBufferInfo: @property def data(self) -> memoryview: - return memoryview(self._data).cast('h') + return memoryview(self._data).cast("h") @property def sample_rate(self) -> int: diff --git a/livekit-rtc/livekit/rtc/audio_source.py b/livekit-rtc/livekit/rtc/audio_source.py index 8a26dcbe..bb1ef865 100644 --- a/livekit-rtc/livekit/rtc/audio_source.py +++ b/livekit-rtc/livekit/rtc/audio_source.py @@ -21,8 +21,9 @@ class AudioSource: def __init__(self, sample_rate: int, num_channels: int) -> None: req = proto_ffi.FfiRequest() - req.new_audio_source.type = \ + req.new_audio_source.type = ( proto_audio_frame.AudioSourceType.AUDIO_SOURCE_NATIVE + ) req.new_audio_source.sample_rate = sample_rate req.new_audio_source.num_channels = num_channels @@ -39,8 +40,10 @@ async def capture_frame(self, frame: AudioFrame) -> None: queue = ffi_client.queue.subscribe() try: resp = ffi_client.request(req) - cb = await queue.wait_for(lambda e: e.capture_audio_frame.async_id == - resp.capture_audio_frame.async_id) + cb = await queue.wait_for( + lambda e: e.capture_audio_frame.async_id + == resp.capture_audio_frame.async_id + ) finally: ffi_client.queue.unsubscribe(queue) diff --git a/livekit-rtc/livekit/rtc/audio_stream.py b/livekit-rtc/livekit/rtc/audio_stream.py index 0584d421..b6d63fbb 100644 --- a/livekit-rtc/livekit/rtc/audio_stream.py +++ b/livekit-rtc/livekit/rtc/audio_stream.py @@ -24,9 +24,12 @@ class AudioStream: - def __init__(self, track: Track, - loop: Optional[asyncio.AbstractEventLoop] = None, - capacity: int = 0) -> None: + def __init__( + self, + track: Track, + loop: Optional[asyncio.AbstractEventLoop] = None, + capacity: int = 0, + ) -> None: self._track = track self._loop = loop or asyncio.get_event_loop() self._ffi_queue = ffi_client.queue.subscribe(self._loop) @@ -52,11 +55,11 @@ async def _run(self): event = await self._ffi_queue.wait_for(self._is_event) audio_event = event.audio_stream_event - if audio_event.HasField('frame_received'): + if audio_event.HasField("frame_received"): owned_buffer_info = audio_event.frame_received.frame frame = AudioFrame._from_owned_info(owned_buffer_info) self._queue.put(frame) - elif audio_event.HasField('eos'): + elif audio_event.HasField("eos"): break ffi_client.queue.unsubscribe(self._ffi_queue) diff --git a/livekit-rtc/livekit/rtc/e2ee.py b/livekit-rtc/livekit/rtc/e2ee.py index e9ad3479..2db86ef7 100644 --- a/livekit-rtc/livekit/rtc/e2ee.py +++ b/livekit-rtc/livekit/rtc/e2ee.py @@ -34,8 +34,7 @@ class KeyProviderOptions: @dataclass class E2EEOptions: - key_provider_options: KeyProviderOptions = field( - default_factory=KeyProviderOptions) + key_provider_options: KeyProviderOptions = field(default_factory=KeyProviderOptions) encryption_type: proto_e2ee.EncryptionType.ValueType = proto_e2ee.EncryptionType.GCM @@ -104,10 +103,9 @@ def ratchet_key(self, participant_identity: str, key_index: int) -> bytes: class FrameCryptor: - def __init__(self, room_handle: int, - participant_identity: str, - key_index: int, - enabled: bool): + def __init__( + self, room_handle: int, participant_identity: str, key_index: int, enabled: bool + ): self._room_handle = room_handle self._enabled = enabled self._participant_identity = participant_identity @@ -150,7 +148,8 @@ def __init__(self, room_handle: int, options: Optional[E2EEOptions]): if options is not None: self._key_provider = KeyProvider( - self._room_handle, options.key_provider_options) + self._room_handle, options.key_provider_options + ) @property def key_provider(self) -> Optional[KeyProvider]: @@ -174,10 +173,12 @@ def frame_cryptors(self) -> List[FrameCryptor]: resp = ffi_client.request(req) frame_cryptors = [] for frame_cryptor in resp.e2ee.manager_get_frame_cryptors.frame_cryptors: - frame_cryptors.append(FrameCryptor( - self._room_handle, - frame_cryptor.participant_identity, - frame_cryptor.key_index, - frame_cryptor.enabled - )) + frame_cryptors.append( + FrameCryptor( + self._room_handle, + frame_cryptor.participant_identity, + frame_cryptor.key_index, + frame_cryptor.enabled, + ) + ) return frame_cryptors diff --git a/livekit-rtc/livekit/rtc/participant.py b/livekit-rtc/livekit/rtc/participant.py index 95866420..cce64058 100644 --- a/livekit-rtc/livekit/rtc/participant.py +++ b/livekit-rtc/livekit/rtc/participant.py @@ -67,21 +67,23 @@ def metadata(self) -> str: class LocalParticipant(Participant): - def __init__(self, - room_queue: BroadcastQueue[proto_ffi.FfiEvent], - owned_info: proto_participant.OwnedParticipant) -> None: + def __init__( + self, + room_queue: BroadcastQueue[proto_ffi.FfiEvent], + owned_info: proto_participant.OwnedParticipant, + ) -> None: super().__init__(owned_info) self._room_queue = room_queue self.tracks: dict[str, LocalTrackPublication] = {} # type: ignore - async def publish_data(self, - payload: Union[bytes, str], - kind: DataPacketKind.ValueType - = DataPacketKind.KIND_RELIABLE, - destination_sids: Optional[ - List[Union[str, 'RemoteParticipant']]] = None) -> None: + async def publish_data( + self, + payload: Union[bytes, str], + kind: DataPacketKind.ValueType = DataPacketKind.KIND_RELIABLE, + destination_sids: Optional[List[Union[str, "RemoteParticipant"]]] = None, + ) -> None: if isinstance(payload, str): - payload = payload.encode('utf-8') + payload = payload.encode("utf-8") data_len = len(payload) cdata = (ctypes.c_byte * data_len)(*payload) @@ -105,8 +107,9 @@ async def publish_data(self, queue = ffi_client.queue.subscribe() try: resp = ffi_client.request(req) - cb = await queue.wait_for(lambda e: e.publish_data.async_id == - resp.publish_data.async_id) + cb = await queue.wait_for( + lambda e: e.publish_data.async_id == resp.publish_data.async_id + ) finally: ffi_client.queue.unsubscribe(queue) @@ -121,8 +124,10 @@ async def update_metadata(self, metadata: str) -> None: queue = ffi_client.queue.subscribe() try: resp = ffi_client.request(req) - await queue.wait_for(lambda e: e.update_local_metadata.async_id == - resp.update_local_metadata.async_id) + await queue.wait_for( + lambda e: e.update_local_metadata.async_id + == resp.update_local_metadata.async_id + ) finally: ffi_client.queue.unsubscribe(queue) @@ -134,17 +139,20 @@ async def update_name(self, name: str) -> None: queue = ffi_client.queue.subscribe() try: resp = ffi_client.request(req) - await queue.wait_for(lambda e: e.update_local_name.async_id == - resp.update_local_name.async_id) + await queue.wait_for( + lambda e: e.update_local_name.async_id + == resp.update_local_name.async_id + ) finally: ffi_client.queue.unsubscribe(queue) - - async def publish_track(self, track: Track, options: TrackPublishOptions) \ - -> TrackPublication: - if not isinstance(track, LocalAudioTrack) \ - and not isinstance(track, LocalVideoTrack): - raise Exception('cannot publish a remote track') + async def publish_track( + self, track: Track, options: TrackPublishOptions + ) -> TrackPublication: + if not isinstance(track, LocalAudioTrack) and not isinstance( + track, LocalVideoTrack + ): + raise Exception("cannot publish a remote track") req = proto_ffi.FfiRequest() req.publish_track.track_handle = track._ffi_handle.handle @@ -154,14 +162,14 @@ async def publish_track(self, track: Track, options: TrackPublishOptions) \ queue = self._room_queue.subscribe() try: resp = ffi_client.request(req) - cb = await queue.wait_for(lambda e: e.publish_track.async_id == - resp.publish_track.async_id) + cb = await queue.wait_for( + lambda e: e.publish_track.async_id == resp.publish_track.async_id + ) if cb.publish_track.error: raise PublishTrackError(cb.publish_track.error) - track_publication = LocalTrackPublication( - cb.publish_track.publication) + track_publication = LocalTrackPublication(cb.publish_track.publication) track_publication.track = track self.tracks[track_publication.sid] = track_publication @@ -178,8 +186,9 @@ async def unpublish_track(self, track_sid: str) -> None: queue = self._room_queue.subscribe() try: resp = ffi_client.request(req) - cb = await queue.wait_for(lambda e: e.unpublish_track.async_id == - resp.unpublish_track.async_id) + cb = await queue.wait_for( + lambda e: e.unpublish_track.async_id == resp.unpublish_track.async_id + ) if cb.unpublish_track.error: raise UnpublishTrackError(cb.unpublish_track.error) diff --git a/livekit-rtc/livekit/rtc/room.py b/livekit-rtc/livekit/rtc/room.py index 0178b568..0fa532af 100644 --- a/livekit-rtc/livekit/rtc/room.py +++ b/livekit-rtc/livekit/rtc/room.py @@ -31,18 +31,41 @@ from .track_publication import RemoteTrackPublication from .participant import RemoteParticipant, Participant -EventTypes = Literal['participant_connected', 'participant_disconnected', 'local_track_published', 'local_track_unpublished', 'track_published', 'track_unpublished', - 'track_subscribed', 'track_unsubscribed', 'track_subscription_failed', 'track_muted', 'track_unmuted', 'active_speakers_changed', 'room_metadata_changed', - 'participant_metadata_changed', 'participant_name_changed', 'connection_quality_changed', 'data_received', 'e2ee_state_changed', 'connection_state_changed', - 'connected', 'disconnected', 'reconnecting', 'reconnected'] +EventTypes = Literal[ + "participant_connected", + "participant_disconnected", + "local_track_published", + "local_track_unpublished", + "track_published", + "track_unpublished", + "track_subscribed", + "track_unsubscribed", + "track_subscription_failed", + "track_muted", + "track_unmuted", + "active_speakers_changed", + "room_metadata_changed", + "participant_metadata_changed", + "participant_name_changed", + "connection_quality_changed", + "data_received", + "e2ee_state_changed", + "connection_state_changed", + "connected", + "disconnected", + "reconnecting", + "reconnected", +] @dataclass class RtcConfiguration: - ice_transport_type: proto_room.IceTransportType.ValueType = \ + ice_transport_type: proto_room.IceTransportType.ValueType = ( proto_room.IceTransportType.TRANSPORT_ALL - continual_gathering_policy: proto_room.ContinualGatheringPolicy.ValueType = \ + ) + continual_gathering_policy: proto_room.ContinualGatheringPolicy.ValueType = ( proto_room.ContinualGatheringPolicy.GATHER_CONTINUALLY + ) ice_servers: list[proto_room.IceServer] = field(default_factory=list) @@ -92,13 +115,14 @@ def e2ee_manager(self) -> E2EEManager: return self._e2ee_manager def isconnected(self) -> bool: - return self._ffi_handle is not None and \ - self.connection_state != ConnectionState.CONN_DISCONNECTED - - async def connect(self, - url: str, - token: str, - options: RoomOptions = RoomOptions()) -> None: + return ( + self._ffi_handle is not None + and self.connection_state != ConnectionState.CONN_DISCONNECTED + ) + + async def connect( + self, url: str, token: str, options: RoomOptions = RoomOptions() + ) -> None: req = proto_ffi.FfiRequest() req.connect.url = url req.connect.token = token @@ -108,24 +132,30 @@ async def connect(self, req.connect.options.dynacast = options.dynacast if options.e2ee: - req.connect.options.e2ee.encryption_type = \ - options.e2ee.encryption_type - req.connect.options.e2ee.key_provider_options.shared_key = \ - options.e2ee.key_provider_options.shared_key # type: ignore - req.connect.options.e2ee.key_provider_options.ratchet_salt = \ + req.connect.options.e2ee.encryption_type = options.e2ee.encryption_type + req.connect.options.e2ee.key_provider_options.shared_key = ( + options.e2ee.key_provider_options.shared_key + ) # type: ignore + req.connect.options.e2ee.key_provider_options.ratchet_salt = ( options.e2ee.key_provider_options.ratchet_salt - req.connect.options.e2ee.key_provider_options.failure_tolerance = \ + ) + req.connect.options.e2ee.key_provider_options.failure_tolerance = ( options.e2ee.key_provider_options.failure_tolerance - req.connect.options.e2ee.key_provider_options.ratchet_window_size = \ + ) + req.connect.options.e2ee.key_provider_options.ratchet_window_size = ( options.e2ee.key_provider_options.ratchet_window_size + ) if options.rtc_config: - req.connect.options.rtc_config.ice_transport_type = \ - options.rtc_config.ice_transport_type # type: ignore - req.connect.options.rtc_config.continual_gathering_policy = \ - options.rtc_config.continual_gathering_policy # type: ignore + req.connect.options.rtc_config.ice_transport_type = ( + options.rtc_config.ice_transport_type + ) # type: ignore + req.connect.options.rtc_config.continual_gathering_policy = ( + options.rtc_config.continual_gathering_policy + ) # type: ignore req.connect.options.rtc_config.ice_servers.extend( - options.rtc_config.ice_servers) + options.rtc_config.ice_servers + ) # subscribe before connecting so we don't miss any events self._ffi_queue = ffi_client.queue.subscribe(self._loop) @@ -133,8 +163,9 @@ async def connect(self, queue = ffi_client.queue.subscribe() try: resp = ffi_client.request(req) - cb = await queue.wait_for(lambda e: e.connect.async_id == - resp.connect.async_id) + cb = await queue.wait_for( + lambda e: e.connect.async_id == resp.connect.async_id + ) finally: ffi_client.queue.unsubscribe(queue) @@ -144,14 +175,14 @@ async def connect(self, self._ffi_handle = FfiHandle(cb.connect.room.handle.id) - self._e2ee_manager = E2EEManager( - self._ffi_handle.handle, options.e2ee) + self._e2ee_manager = E2EEManager(self._ffi_handle.handle, options.e2ee) self._info = cb.connect.room.info self.connection_state = ConnectionState.CONN_CONNECTED self.local_participant = LocalParticipant( - self._room_queue, cb.connect.local_participant) + self._room_queue, cb.connect.local_participant + ) for pt in cb.connect.participants: rp = self._create_remote_participant(pt.participant) @@ -174,8 +205,9 @@ async def disconnect(self) -> None: queue = ffi_client.queue.subscribe() try: resp = ffi_client.request(req) - await queue.wait_for(lambda e: e.disconnect.async_id == - resp.disconnect.async_id) + await queue.wait_for( + lambda e: e.disconnect.async_id == resp.disconnect.async_id + ) finally: ffi_client.queue.unsubscribe(queue) @@ -187,15 +219,17 @@ async def _listen_task(self) -> None: while True: event = await self._ffi_queue.get() if event.room_event.room_handle == self._ffi_handle.handle: # type: ignore - if event.room_event.HasField('eos'): + if event.room_event.HasField("eos"): break try: self._on_room_event(event.room_event) except Exception: logging.exception( - 'error running user callback for %s: %s', - event.room_event.WhichOneof('message'), event.room_event) + "error running user callback for %s: %s", + event.room_event.WhichOneof("message"), + event.room_event, + ) # wait for the subscribers to process the event # before processing the next one @@ -203,36 +237,37 @@ async def _listen_task(self) -> None: await self._room_queue.join() def _on_room_event(self, event: proto_room.RoomEvent): - which = event.WhichOneof('message') - if which == 'participant_connected': + which = event.WhichOneof("message") + if which == "participant_connected": rparticipant = self._create_remote_participant( - event.participant_connected.info) - self.emit('participant_connected', rparticipant) - elif which == 'participant_disconnected': + event.participant_connected.info + ) + self.emit("participant_connected", rparticipant) + elif which == "participant_disconnected": sid = event.participant_disconnected.participant_sid rparticipant = self.participants.pop(sid) - self.emit('participant_disconnected', rparticipant) - elif which == 'local_track_published': + self.emit("participant_disconnected", rparticipant) + elif which == "local_track_published": sid = event.local_track_published.track_sid lpublication = self.local_participant.tracks[sid] track = lpublication.track - self.emit('local_track_published', lpublication, track) - elif which == 'local_track_unpublished': + self.emit("local_track_published", lpublication, track) + elif which == "local_track_unpublished": sid = event.local_track_unpublished.publication_sid lpublication = self.local_participant.tracks[sid] - self.emit('local_track_unpublished', lpublication) - elif which == 'track_published': + self.emit("local_track_unpublished", lpublication) + elif which == "track_published": rparticipant = self.participants[event.track_published.participant_sid] - rpublication = RemoteTrackPublication( - event.track_published.publication) + rpublication = RemoteTrackPublication(event.track_published.publication) rparticipant.tracks[rpublication.sid] = rpublication - self.emit('track_published', rpublication, rparticipant) - elif which == 'track_unpublished': + self.emit("track_published", rpublication, rparticipant) + elif which == "track_unpublished": rparticipant = self.participants[event.track_unpublished.participant_sid] rpublication = rparticipant.tracks.pop( - event.track_unpublished.publication_sid) - self.emit('track_unpublished', rpublication, rparticipant) - elif which == 'track_subscribed': + event.track_unpublished.publication_sid + ) + self.emit("track_unpublished", rpublication, rparticipant) + elif which == "track_subscribed": owned_track_info = event.track_subscribed.track track_info = owned_track_info.info rparticipant = self.participants[event.track_subscribed.participant_sid] @@ -241,28 +276,34 @@ def _on_room_event(self, event: proto_room.RoomEvent): if track_info.kind == TrackKind.KIND_VIDEO: remote_video_track = RemoteVideoTrack(owned_track_info) rpublication.track = remote_video_track - self.emit('track_subscribed', - remote_video_track, rpublication, rparticipant) + self.emit( + "track_subscribed", remote_video_track, rpublication, rparticipant + ) elif track_info.kind == TrackKind.KIND_AUDIO: remote_audio_track = RemoteAudioTrack(owned_track_info) rpublication.track = remote_audio_track - self.emit('track_subscribed', remote_audio_track, - rpublication, rparticipant) - elif which == 'track_unsubscribed': + self.emit( + "track_subscribed", remote_audio_track, rpublication, rparticipant + ) + elif which == "track_unsubscribed": sid = event.track_unsubscribed.participant_sid rparticipant = self.participants[sid] rpublication = rparticipant.tracks[event.track_unsubscribed.track_sid] track = rpublication.track rpublication.track = None rpublication.subscribed = False - self.emit('track_unsubscribed', track, rpublication, rparticipant) - elif which == 'track_subscription_failed': + self.emit("track_unsubscribed", track, rpublication, rparticipant) + elif which == "track_subscription_failed": sid = event.track_subscription_failed.participant_sid rparticipant = self.participants[sid] error = event.track_subscription_failed.error - self.emit('track_subscription_failed', rparticipant, - event.track_subscription_failed.track_sid, error) - elif which == 'track_muted': + self.emit( + "track_subscription_failed", + rparticipant, + event.track_subscription_failed.track_sid, + error, + ) + elif which == "track_muted": sid = event.track_muted.participant_sid participant = self._retrieve_participant(sid) publication = participant.tracks[event.track_muted.track_sid] @@ -270,8 +311,8 @@ def _on_room_event(self, event: proto_room.RoomEvent): if publication.track: publication.track._info.muted = True - self.emit('track_muted', participant, publication) - elif which == 'track_unmuted': + self.emit("track_muted", participant, publication) + elif which == "track_unmuted": sid = event.track_unmuted.participant_sid participant = self._retrieve_participant(sid) publication = participant.tracks[event.track_unmuted.track_sid] @@ -279,81 +320,88 @@ def _on_room_event(self, event: proto_room.RoomEvent): if publication.track: publication.track._info.muted = False - self.emit('track_unmuted', participant, publication) - elif which == 'active_speakers_changed': + self.emit("track_unmuted", participant, publication) + elif which == "active_speakers_changed": speakers: list[Participant] = [] for sid in event.active_speakers_changed.participant_sids: speakers.append(self._retrieve_participant(sid)) - self.emit('active_speakers_changed', speakers) - elif which == 'room_metadata_changed': + self.emit("active_speakers_changed", speakers) + elif which == "room_metadata_changed": old_metadata = self.metadata self._info.metadata = event.room_metadata_changed.metadata - self.emit('room_metadata_changed', old_metadata, self.metadata) - elif which == 'participant_metadata_changed': + self.emit("room_metadata_changed", old_metadata, self.metadata) + elif which == "participant_metadata_changed": sid = event.participant_metadata_changed.participant_sid participant = self._retrieve_participant(sid) old_metadata = participant.metadata participant._info.metadata = event.participant_metadata_changed.metadata - self.emit('participant_metadata_changed', - participant, old_metadata, participant.metadata) - elif which == 'participant_name_changed': + self.emit( + "participant_metadata_changed", + participant, + old_metadata, + participant.metadata, + ) + elif which == "participant_name_changed": sid = event.participant_name_changed.participant_sid participant = self._retrieve_participant(sid) old_name = participant.name participant._info.name = event.participant_name_changed.name - self.emit('participant_name_changed', - participant, old_name, participant.name) - elif which == 'connection_quality_changed': + self.emit( + "participant_name_changed", participant, old_name, participant.name + ) + elif which == "connection_quality_changed": sid = event.connection_quality_changed.participant_sid participant = self._retrieve_participant(sid) - self.emit('connection_quality_changed', - participant, event.connection_quality_changed.quality) - elif which == 'data_received': + self.emit( + "connection_quality_changed", + participant, + event.connection_quality_changed.quality, + ) + elif which == "data_received": owned_buffer_info = event.data_received.data buffer_info = owned_buffer_info.data - native_data = ctypes.cast(buffer_info.data_ptr, - ctypes.POINTER(ctypes.c_byte - * buffer_info.data_len)).contents + native_data = ctypes.cast( + buffer_info.data_ptr, + ctypes.POINTER(ctypes.c_byte * buffer_info.data_len), + ).contents data = bytes(native_data) FfiHandle(owned_buffer_info.handle.id) rparticipant = None if event.data_received.participant_sid: rparticipant = self.participants[event.data_received.participant_sid] - self.emit('data_received', data, - event.data_received.kind, rparticipant) - elif which == 'e2ee_state_changed': + self.emit("data_received", data, event.data_received.kind, rparticipant) + elif which == "e2ee_state_changed": sid = event.e2ee_state_changed.participant_sid e2ee_state = event.e2ee_state_changed.state - self.emit('e2ee_state_changed', - self._retrieve_participant(sid), e2ee_state) - elif which == 'connection_state_changed': + self.emit("e2ee_state_changed", self._retrieve_participant(sid), e2ee_state) + elif which == "connection_state_changed": connection_state = event.connection_state_changed.state self.connection_state = connection_state - self.emit('connection_state_changed', connection_state) - elif which == 'connected': - self.emit('connected') - elif which == 'disconnected': - self.emit('disconnected') - elif which == 'reconnecting': - self.emit('reconnecting') - elif which == 'reconnected': - self.emit('reconnected') + self.emit("connection_state_changed", connection_state) + elif which == "connected": + self.emit("connected") + elif which == "disconnected": + self.emit("disconnected") + elif which == "reconnecting": + self.emit("reconnecting") + elif which == "reconnected": + self.emit("reconnected") def _retrieve_participant(self, sid: str) -> Participant: - """ Retrieve a participant by sid, returns the LocalParticipant - if sid matches """ + """Retrieve a participant by sid, returns the LocalParticipant + if sid matches""" if sid == self.local_participant.sid: return self.local_participant else: return self.participants[sid] - def _create_remote_participant(self, - owned_info: proto_participant.OwnedParticipant) \ - -> RemoteParticipant: + def _create_remote_participant( + self, owned_info: proto_participant.OwnedParticipant + ) -> RemoteParticipant: if owned_info.info.sid in self.participants: - raise Exception('participant already exists') + raise Exception("participant already exists") participant = RemoteParticipant(owned_info) self.participants[participant.sid] = participant diff --git a/livekit-rtc/livekit/rtc/track.py b/livekit-rtc/livekit/rtc/track.py index 0247d336..788de663 100644 --- a/livekit-rtc/livekit/rtc/track.py +++ b/livekit-rtc/livekit/rtc/track.py @@ -21,7 +21,8 @@ from .audio_source import AudioSource from .video_source import VideoSource -class Track(): + +class Track: def __init__(self, owned_info: proto_track.OwnedTrack): self._info = owned_info.info self._ffi_handle = FfiHandle(owned_info.handle.id) @@ -55,7 +56,7 @@ def __init__(self, info: proto_track.OwnedTrack): super().__init__(info) @staticmethod - def create_audio_track(name: str, source: 'AudioSource') -> 'LocalAudioTrack': + def create_audio_track(name: str, source: "AudioSource") -> "LocalAudioTrack": req = proto_ffi.FfiRequest() req.create_audio_track.name = name req.create_audio_track.source_handle = source._ffi_handle.handle @@ -69,7 +70,7 @@ def __init__(self, info: proto_track.OwnedTrack): super().__init__(info) @staticmethod - def create_video_track(name: str, source: 'VideoSource') -> 'LocalVideoTrack': + def create_video_track(name: str, source: "VideoSource") -> "LocalVideoTrack": req = proto_ffi.FfiRequest() req.create_video_track.name = name req.create_video_track.source_handle = source._ffi_handle.handle diff --git a/livekit-rtc/livekit/rtc/track_publication.py b/livekit-rtc/livekit/rtc/track_publication.py index 4306794c..ddf67a00 100644 --- a/livekit-rtc/livekit/rtc/track_publication.py +++ b/livekit-rtc/livekit/rtc/track_publication.py @@ -21,7 +21,7 @@ from .track import Track -class TrackPublication(): +class TrackPublication: def __init__(self, owned_info: proto_track.OwnedTrackPublication): self._info = owned_info.info self.track: Optional[Track] = None diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index d5cff97b..b1acea00 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -24,21 +24,25 @@ class VideoFrame: - def __init__(self, timestamp_us: int, - rotation: VideoRotation.ValueType, - buffer: 'VideoFrameBuffer') -> None: + def __init__( + self, + timestamp_us: int, + rotation: VideoRotation.ValueType, + buffer: "VideoFrameBuffer", + ) -> None: self.buffer = buffer self.timestamp_us = timestamp_us self.rotation = rotation class VideoFrameBuffer(ABC): - - def __init__(self, - data: bytearray, - width: int, - height: int, - buffer_type: VideoFrameBufferType.ValueType) -> None: + def __init__( + self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + ) -> None: self._data = data self._width = width self._height = height @@ -64,13 +68,13 @@ def type(self) -> VideoFrameBufferType.ValueType: def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: pass - def to_i420(self) -> 'I420Buffer': + def to_i420(self) -> "I420Buffer": req = proto_ffi.FfiRequest() req.to_i420.buffer.CopyFrom(self._proto_info()) resp = ffi_client.request(req) return I420Buffer._from_owned_info(resp.to_i420.buffer) - def to_argb(self, dst: 'ArgbFrame') -> None: + def to_argb(self, dst: "ArgbFrame") -> None: req = proto_ffi.FfiRequest() req.to_argb.buffer.CopyFrom(self._proto_info()) req.to_argb.dst_ptr = get_address(memoryview(dst.data)) @@ -81,8 +85,9 @@ def to_argb(self, dst: 'ArgbFrame') -> None: ffi_client.request(req) @staticmethod - def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ - -> 'VideoFrameBuffer': + def _from_owned_info( + owned_info: proto_video_frame.OwnedVideoFrameBuffer + ) -> "VideoFrameBuffer": """ Create the right class instance from the VideoFrameBufferInfo """ @@ -103,7 +108,7 @@ def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ elif info.buffer_type == VideoFrameBufferType.NV12: return NV12Buffer._from_owned_info(owned_info) else: - raise Exception('Unsupported VideoFrameBufferType') + raise Exception("Unsupported VideoFrameBufferType") # TODO(theomonnom): Ability to get GPU texture directly @@ -111,38 +116,45 @@ class NativeVideoBuffer(VideoFrameBuffer): def __init__(self, owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> None: self._info = owned_info.info self._ffi_handle = FfiHandle(owned_info.handle.id) - super().__init__(bytearray(), self._info.width, - self._info.height, VideoFrameBufferType.NATIVE) + super().__init__( + bytearray(), + self._info.width, + self._info.height, + VideoFrameBufferType.NATIVE, + ) def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: return self._info @staticmethod - def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) \ - -> 'NativeVideoBuffer': + def _from_owned_info( + owned_info: proto_video_frame.OwnedVideoFrameBuffer + ) -> "NativeVideoBuffer": return NativeVideoBuffer(owned_info) - def to_i420(self) -> 'I420Buffer': + def to_i420(self) -> "I420Buffer": req = proto_ffi.FfiRequest() req.to_i420.handle = self._ffi_handle.handle resp = ffi_client.request(req) return I420Buffer._from_owned_info(resp.to_i420.buffer) - def to_argb(self, dst: 'ArgbFrame') -> None: + def to_argb(self, dst: "ArgbFrame") -> None: self.to_i420().to_argb(dst) class PlanarYuvBuffer(VideoFrameBuffer, ABC): - def __init__(self, - data: bytearray, - width: int, - height: int, - buffer_type: VideoFrameBufferType.ValueType, - stride_y: int, - stride_u: int, - stride_v: int, - chroma_width: int, - chroma_height: int) -> None: + def __init__( + self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + stride_y: int, + stride_u: int, + stride_v: int, + chroma_width: int, + chroma_height: int, + ) -> None: super().__init__(data, width, height, buffer_type) self._stride_y = stride_y self._stride_u = stride_u @@ -184,18 +196,29 @@ def stride_v(self) -> int: class PlanarYuv8Buffer(PlanarYuvBuffer, ABC): - def __init__(self, - data: bytearray, - width: int, - height: int, - buffer_type: VideoFrameBufferType.ValueType, - stride_y: int, - stride_u: int, - stride_v: int, - chroma_width: int, - chroma_height: int) -> None: - super().__init__(data, width, height, buffer_type, stride_y, - stride_u, stride_v, chroma_width, chroma_height) + def __init__( + self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + stride_y: int, + stride_u: int, + stride_v: int, + chroma_width: int, + chroma_height: int, + ) -> None: + super().__init__( + data, + width, + height, + buffer_type, + stride_y, + stride_u, + stride_v, + chroma_width, + chroma_height, + ) def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: info = super()._proto_info() @@ -206,36 +229,49 @@ def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: @property def data_y(self) -> memoryview: - return memoryview(self._data)[0:self._stride_y * self._height] + return memoryview(self._data)[0 : self._stride_y * self._height] @property def data_u(self) -> memoryview: - return memoryview(self._data)[self._stride_y * self._height: - self._stride_y * self._height + - self._stride_u * self._chroma_height] + return memoryview(self._data)[ + self._stride_y * self._height : self._stride_y * self._height + + self._stride_u * self._chroma_height + ] @property def data_v(self) -> memoryview: - return memoryview(self._data)[self._stride_y * self._height + - self._stride_u * self._chroma_height: - self._stride_y * self._height + - self._stride_u * self._chroma_height + - self._stride_v * self._chroma_height] + return memoryview(self._data)[ + self._stride_y * self._height + + self._stride_u * self._chroma_height : self._stride_y * self._height + + self._stride_u * self._chroma_height + + self._stride_v * self._chroma_height + ] class PlanarYuv16Buffer(PlanarYuvBuffer, ABC): - def __init__(self, - data: bytearray, - width: int, - height: int, - buffer_type: VideoFrameBufferType.ValueType, - stride_y: int, - stride_u: int, - stride_v: int, - chroma_width: int, - chroma_height: int) -> None: - super().__init__(data, width, height, buffer_type, stride_y, - stride_u, stride_v, chroma_width, chroma_height) + def __init__( + self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + stride_y: int, + stride_u: int, + stride_v: int, + chroma_width: int, + chroma_height: int, + ) -> None: + super().__init__( + data, + width, + height, + buffer_type, + stride_y, + stride_u, + stride_v, + chroma_width, + chroma_height, + ) def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: info = super()._proto_info() @@ -246,33 +282,37 @@ def _proto_info(self) -> proto_video_frame.VideoFrameBufferInfo: @property def data_y(self) -> memoryview: - return memoryview(self._data)[0:self._stride_y * self._height].cast('H') + return memoryview(self._data)[0 : self._stride_y * self._height].cast("H") @property def data_u(self) -> memoryview: - return memoryview(self._data)[self._stride_y * self._height: - self._stride_y * self._height + - self._stride_u * self._chroma_height].cast('H') + return memoryview(self._data)[ + self._stride_y * self._height : self._stride_y * self._height + + self._stride_u * self._chroma_height + ].cast("H") @property def data_v(self) -> memoryview: - return memoryview(self._data)[self._stride_y * self._height + - self._stride_u * self._chroma_height: - self._stride_y * self._height + - self._stride_u * self._chroma_height + - self._stride_v * self._chroma_height].cast('H') + return memoryview(self._data)[ + self._stride_y * self._height + + self._stride_u * self._chroma_height : self._stride_y * self._height + + self._stride_u * self._chroma_height + + self._stride_v * self._chroma_height + ].cast("H") class BiplanaraYuv8Buffer(VideoFrameBuffer, ABC): - def __init__(self, - data: bytearray, - width: int, - height: int, - buffer_type: VideoFrameBufferType.ValueType, - stride_y: int, - stride_uv: int, - chroma_width: int, - chroma_height: int) -> None: + def __init__( + self, + data: bytearray, + width: int, + height: int, + buffer_type: VideoFrameBufferType.ValueType, + stride_y: int, + stride_uv: int, + chroma_width: int, + chroma_height: int, + ) -> None: super().__init__(data, width, height, buffer_type) self._stride_y = stride_y self._stride_uv = stride_uv @@ -310,42 +350,57 @@ def stride_uv(self) -> int: @property def data_y(self) -> memoryview: - return memoryview(self._data)[0:self._stride_y * self._height] + return memoryview(self._data)[0 : self._stride_y * self._height] @property def data_uv(self) -> memoryview: - return memoryview(self._data)[self._stride_y * self._height: - self._stride_y * self._height + - self._stride_uv * self._chroma_height] + return memoryview(self._data)[ + self._stride_y * self._height : self._stride_y * self._height + + self._stride_uv * self._chroma_height + ] class I420Buffer(PlanarYuv8Buffer): - def __init__(self, - data: bytearray, - width: int, - height: int, - stride_y: int, - stride_u: int, - stride_v: int) -> None: - + def __init__( + self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int, + ) -> None: if len(data) < I420Buffer.calc_data_size(height, stride_y, stride_u, stride_v): raise ValueError( - 'buffer too small for I420 data. Expected {} bytes, got {}.'.format( - I420Buffer.calc_data_size(height, stride_y, stride_u, stride_v), len(data))) + "buffer too small for I420 data. Expected {} bytes, got {}.".format( + I420Buffer.calc_data_size(height, stride_y, stride_u, stride_v), + len(data), + ) + ) chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 - super().__init__(data, width, height, - VideoFrameBufferType.I420, stride_y, stride_u, stride_v, chroma_width, chroma_height) + super().__init__( + data, + width, + height, + VideoFrameBufferType.I420, + stride_y, + stride_u, + stride_v, + chroma_width, + chroma_height, + ) @staticmethod - def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I420Buffer': + def _from_owned_info( + owned_info: proto_video_frame.OwnedVideoFrameBuffer + ) -> "I420Buffer": info = owned_info.info stride_y = info.yuv.stride_y stride_u = info.yuv.stride_u stride_v = info.yuv.stride_v - nbytes = I420Buffer.calc_data_size( - info.height, stride_y, stride_u, stride_v) + nbytes = I420Buffer.calc_data_size(info.height, stride_y, stride_u, stride_v) cdata = (ctypes.c_uint8 * nbytes).from_address(info.yuv.data_y_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) @@ -356,53 +411,81 @@ def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> return stride_y * height + (stride_u + stride_v) * ((height + 1) // 2) @staticmethod - def create(width: int, height: int) -> 'I420Buffer': + def create(width: int, height: int) -> "I420Buffer": stride_y = width stride_u = (width + 1) // 2 stride_v = (width + 1) // 2 - data_size = I420Buffer.calc_data_size( - height, stride_y, stride_u, stride_v) + data_size = I420Buffer.calc_data_size(height, stride_y, stride_u, stride_v) data = bytearray(data_size) return I420Buffer(data, width, height, stride_y, stride_u, stride_v) class I420ABuffer(PlanarYuv8Buffer): - def __init__(self, - data: bytearray, - width: int, - height: int, - stride_y: int, - stride_u: int, - stride_v: int, - stride_a: int) -> None: - - if len(data) < I420ABuffer.calc_data_size(height, stride_y, stride_u, stride_v, stride_a): + def __init__( + self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int, + stride_a: int, + ) -> None: + if len(data) < I420ABuffer.calc_data_size( + height, stride_y, stride_u, stride_v, stride_a + ): raise ValueError( - 'buffer too small for I420A data. Expected {} bytes, got {}.'.format( - I420ABuffer.calc_data_size(height, stride_y, stride_u, stride_v, stride_a), len(data))) + "buffer too small for I420A data. Expected {} bytes, got {}.".format( + I420ABuffer.calc_data_size( + height, stride_y, stride_u, stride_v, stride_a + ), + len(data), + ) + ) chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 - super().__init__(data, width, height, VideoFrameBufferType.I420A, - stride_y, stride_u, stride_v, chroma_width, chroma_height) + super().__init__( + data, + width, + height, + VideoFrameBufferType.I420A, + stride_y, + stride_u, + stride_v, + chroma_width, + chroma_height, + ) self._stride_a = stride_a @staticmethod - def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I420ABuffer': + def _from_owned_info( + owned_info: proto_video_frame.OwnedVideoFrameBuffer + ) -> "I420ABuffer": info = owned_info.info stride_y = info.yuv.stride_y stride_u = info.yuv.stride_u stride_v = info.yuv.stride_v stride_a = info.yuv.stride_a - cdata = (ctypes.c_uint8 * I420ABuffer.calc_data_size(info.height, - stride_y, stride_u, stride_v, stride_a)).from_address(info.yuv.data_y_ptr) + cdata = ( + ctypes.c_uint8 + * I420ABuffer.calc_data_size( + info.height, stride_y, stride_u, stride_v, stride_a + ) + ).from_address(info.yuv.data_y_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) - return I420ABuffer(data, info.width, info.height, stride_y, stride_u, stride_v, stride_a) + return I420ABuffer( + data, info.width, info.height, stride_y, stride_u, stride_v, stride_a + ) @staticmethod - def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int, stride_a: int) -> int: - return (stride_y + stride_a) * height + (stride_u + stride_v) * ((height + 1) // 2) + def calc_data_size( + height: int, stride_y: int, stride_u: int, stride_v: int, stride_a: int + ) -> int: + return (stride_y + stride_a) * height + (stride_u + stride_v) * ( + (height + 1) // 2 + ) @property def stride_a(self) -> int: @@ -410,42 +493,60 @@ def stride_a(self) -> int: @property def data_a(self) -> memoryview: - return memoryview(self._data)[self._stride_y * self._height + - self._stride_u * self._chroma_height + - self._stride_v * self._chroma_height: - self._stride_y * self._height + - self._stride_u * self._chroma_height + - self._stride_v * self._chroma_height + - self._stride_a * self._height] + return memoryview(self._data)[ + self._stride_y * self._height + + self._stride_u * self._chroma_height + + self._stride_v * self._chroma_height : self._stride_y * self._height + + self._stride_u * self._chroma_height + + self._stride_v * self._chroma_height + + self._stride_a * self._height + ] class I422Buffer(PlanarYuv8Buffer): - def __init__(self, - data: bytearray, - width: int, - height: int, - stride_y: int, - stride_u: int, - stride_v: int) -> None: - + def __init__( + self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int, + ) -> None: if len(data) < I422Buffer.calc_data_size(height, stride_y, stride_u, stride_v): raise ValueError( - 'buffer too small for I422 data. Expected {} bytes, got {}.'.format( - I422Buffer.calc_data_size(height, stride_y, stride_u, stride_v), len(data))) + "buffer too small for I422 data. Expected {} bytes, got {}.".format( + I422Buffer.calc_data_size(height, stride_y, stride_u, stride_v), + len(data), + ) + ) chroma_width = (width + 1) // 2 chroma_height = height - super().__init__(data, width, height, VideoFrameBufferType.I422, - stride_y, stride_u, stride_v, chroma_width, chroma_height) + super().__init__( + data, + width, + height, + VideoFrameBufferType.I422, + stride_y, + stride_u, + stride_v, + chroma_width, + chroma_height, + ) @staticmethod - def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I422Buffer': + def _from_owned_info( + owned_info: proto_video_frame.OwnedVideoFrameBuffer + ) -> "I422Buffer": info = owned_info.info stride_y = info.yuv.stride_y stride_u = info.yuv.stride_u stride_v = info.yuv.stride_v - cdata = (ctypes.c_uint8 * I422Buffer.calc_data_size(info.height, - stride_y, stride_u, stride_v)).from_address(info.yuv.data_y_ptr) + cdata = ( + ctypes.c_uint8 + * I422Buffer.calc_data_size(info.height, stride_y, stride_u, stride_v) + ).from_address(info.yuv.data_y_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) return I422Buffer(data, info.width, info.height, stride_y, stride_u, stride_v) @@ -456,32 +557,49 @@ def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> class I444Buffer(PlanarYuv8Buffer): - def __init__(self, - data: bytearray, - width: int, - height: int, - stride_y: int, - stride_u: int, - stride_v: int) -> None: - + def __init__( + self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int, + ) -> None: if len(data) < I444Buffer.calc_data_size(height, stride_y, stride_u, stride_v): raise ValueError( - 'buffer too small for I444 data. Expected {} bytes, got {}.'.format( - I444Buffer.calc_data_size(height, stride_y, stride_u, stride_v), len(data))) + "buffer too small for I444 data. Expected {} bytes, got {}.".format( + I444Buffer.calc_data_size(height, stride_y, stride_u, stride_v), + len(data), + ) + ) chroma_width = width chroma_height = height - super().__init__(data, width, height, VideoFrameBufferType.I444, - stride_y, stride_u, stride_v, chroma_width, chroma_height) + super().__init__( + data, + width, + height, + VideoFrameBufferType.I444, + stride_y, + stride_u, + stride_v, + chroma_width, + chroma_height, + ) @staticmethod - def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I444Buffer': + def _from_owned_info( + owned_info: proto_video_frame.OwnedVideoFrameBuffer + ) -> "I444Buffer": info = owned_info.info stride_y = info.yuv.stride_y stride_u = info.yuv.stride_u stride_v = info.yuv.stride_v - cdata = (ctypes.c_uint8 * I444Buffer.calc_data_size(info.height, - stride_y, stride_u, stride_v)).from_address(info.yuv.data_y_ptr) + cdata = ( + ctypes.c_uint8 + * I444Buffer.calc_data_size(info.height, stride_y, stride_u, stride_v) + ).from_address(info.yuv.data_y_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) return I444Buffer(data, info.width, info.height, stride_y, stride_u, stride_v) @@ -492,64 +610,96 @@ def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> class I010Buffer(PlanarYuv16Buffer): - def __init__(self, data: bytearray, - width: int, - height: int, - stride_y: int, - stride_u: int, - stride_v: int) -> None: - + def __init__( + self, + data: bytearray, + width: int, + height: int, + stride_y: int, + stride_u: int, + stride_v: int, + ) -> None: if len(data) < I010Buffer.calc_data_size(height, stride_y, stride_u, stride_v): raise ValueError( - 'buffer too small for I010 data. Expected {} bytes, got {}.'.format( - I010Buffer.calc_data_size(height, stride_y, stride_u, stride_v), len(data))) + "buffer too small for I010 data. Expected {} bytes, got {}.".format( + I010Buffer.calc_data_size(height, stride_y, stride_u, stride_v), + len(data), + ) + ) chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 - super().__init__(data, width, height, VideoFrameBufferType.I010, - stride_y, stride_u, stride_v, chroma_width, chroma_height) + super().__init__( + data, + width, + height, + VideoFrameBufferType.I010, + stride_y, + stride_u, + stride_v, + chroma_width, + chroma_height, + ) @staticmethod - def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'I010Buffer': + def _from_owned_info( + owned_info: proto_video_frame.OwnedVideoFrameBuffer + ) -> "I010Buffer": info = owned_info.info stride_y = info.yuv.stride_y stride_u = info.yuv.stride_u stride_v = info.yuv.stride_v - cdata = (ctypes.c_uint8 * I010Buffer.calc_data_size(info.height, - stride_y, stride_u, stride_v)).from_address(info.yuv.data_y_ptr) + cdata = ( + ctypes.c_uint8 + * I010Buffer.calc_data_size(info.height, stride_y, stride_u, stride_v) + ).from_address(info.yuv.data_y_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) return I010Buffer(data, info.width, info.height, stride_y, stride_u, stride_v) @staticmethod def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> int: - return stride_y * height * 2 + stride_u * ((height + 1) // 2) * 2 + stride_v * ((height + 1) // 2) * 2 + return ( + stride_y * height * 2 + + stride_u * ((height + 1) // 2) * 2 + + stride_v * ((height + 1) // 2) * 2 + ) class NV12Buffer(BiplanaraYuv8Buffer): - def __init__(self, data: bytearray, - width: int, - height: int, - stride_y: int, - stride_uv: int) -> None: - + def __init__( + self, data: bytearray, width: int, height: int, stride_y: int, stride_uv: int + ) -> None: if len(data) < NV12Buffer.calc_data_size(height, stride_y, stride_uv): raise ValueError( - 'buffer too small for NV12 data. Expected {} bytes, got {}.'.format( - NV12Buffer.calc_data_size(height, stride_y, stride_uv), len(data))) + "buffer too small for NV12 data. Expected {} bytes, got {}.".format( + NV12Buffer.calc_data_size(height, stride_y, stride_uv), len(data) + ) + ) chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 - super().__init__(data, width, height, VideoFrameBufferType.NV12, - stride_y, stride_uv, chroma_width, chroma_height) + super().__init__( + data, + width, + height, + VideoFrameBufferType.NV12, + stride_y, + stride_uv, + chroma_width, + chroma_height, + ) @staticmethod - def _from_owned_info(owned_info: proto_video_frame.OwnedVideoFrameBuffer) -> 'NV12Buffer': + def _from_owned_info( + owned_info: proto_video_frame.OwnedVideoFrameBuffer + ) -> "NV12Buffer": info = owned_info.info stride_y = info.bi_yuv.stride_y stride_uv = info.bi_yuv.stride_uv - cdata = (ctypes.c_uint8 * NV12Buffer.calc_data_size(info.height, - stride_y, stride_uv)).from_address(info.yuv.data_y_ptr) + cdata = ( + ctypes.c_uint8 * NV12Buffer.calc_data_size(info.height, stride_y, stride_uv) + ).from_address(info.yuv.data_y_ptr) data = bytearray(cdata) FfiHandle(owned_info.handle.id) return NV12Buffer(data, info.width, info.height, stride_y, stride_uv) @@ -560,13 +710,14 @@ def calc_data_size(height: int, stride_y: int, stride_uv: int) -> int: class ArgbFrame: - def __init__(self, - data: Union[bytes, bytearray, memoryview], - format: VideoFormatType.ValueType, - width: int, - height: int, - stride: int = 0) -> None: - + def __init__( + self, + data: Union[bytes, bytearray, memoryview], + format: VideoFormatType.ValueType, + width: int, + height: int, + stride: int = 0, + ) -> None: if stride == 0: stride = width * ctypes.sizeof(ctypes.c_uint32) @@ -580,7 +731,9 @@ def __init__(self, self._stride = stride @staticmethod - def create(format: VideoFormatType.ValueType, width: int, height: int) -> 'ArgbFrame': + def create( + format: VideoFormatType.ValueType, width: int, height: int + ) -> "ArgbFrame": data = bytearray(width * height * ctypes.sizeof(ctypes.c_uint32)) return ArgbFrame(data, format, width, height) diff --git a/livekit-rtc/livekit/rtc/video_source.py b/livekit-rtc/livekit/rtc/video_source.py index d1364585..81d6b7fa 100644 --- a/livekit-rtc/livekit/rtc/video_source.py +++ b/livekit-rtc/livekit/rtc/video_source.py @@ -21,8 +21,9 @@ class VideoSource: def __init__(self) -> None: req = proto_ffi.FfiRequest() - req.new_video_source.type = \ + req.new_video_source.type = ( proto_video_frame.VideoSourceType.VIDEO_SOURCE_NATIVE + ) resp = ffi_client.request(req) self._info = resp.new_video_source.source diff --git a/livekit-rtc/livekit/rtc/video_stream.py b/livekit-rtc/livekit/rtc/video_stream.py index efd975d2..e445e0e7 100644 --- a/livekit-rtc/livekit/rtc/video_stream.py +++ b/livekit-rtc/livekit/rtc/video_stream.py @@ -24,9 +24,12 @@ class VideoStream: - def __init__(self, track: Track, - loop: Optional[asyncio.AbstractEventLoop] = None, - capacity: int = 0) -> None: + def __init__( + self, + track: Track, + loop: Optional[asyncio.AbstractEventLoop] = None, + capacity: int = 0, + ) -> None: self._track = track self._loop = loop or asyncio.get_event_loop() self._ffi_queue = ffi_client.queue.subscribe(self._loop) @@ -51,14 +54,17 @@ async def _run(self): event = await self._ffi_queue.wait_for(self._is_event) video_event = event.video_stream_event - if video_event.HasField('frame_received'): + if video_event.HasField("frame_received"): frame_info = video_event.frame_received.frame owned_buffer_info = video_event.frame_received.buffer - frame = VideoFrame(frame_info.timestamp_us, frame_info.rotation, - VideoFrameBuffer._from_owned_info(owned_buffer_info)) + frame = VideoFrame( + frame_info.timestamp_us, + frame_info.rotation, + VideoFrameBuffer._from_owned_info(owned_buffer_info), + ) self._queue.put(frame) - elif video_event.HasField('eos'): + elif video_event.HasField("eos"): break ffi_client.queue.unsubscribe(self._ffi_queue) diff --git a/livekit-rtc/setup.py b/livekit-rtc/setup.py index 58ae221c..d2c705aa 100644 --- a/livekit-rtc/setup.py +++ b/livekit-rtc/setup.py @@ -23,7 +23,7 @@ here = pathlib.Path(__file__).parent.resolve() about = {} -with open(os.path.join(here, 'livekit', 'rtc', 'version.py'), 'r') as f: +with open(os.path.join(here, "livekit", "rtc", "version.py"), "r") as f: exec(f.read(), about) @@ -34,20 +34,24 @@ def finalize_options(self): class BuildPyCommand(setuptools.command.build_py.build_py): - """ Download a prebuilt version of livekit_ffi """ + """Download a prebuilt version of livekit_ffi""" def run(self): - - download_script = here / 'rust-sdks' / 'download_ffi.py' - output = here / 'livekit' / 'rtc' / 'resources' - cmd = ['python3', str(download_script.absolute()), - '--output', str(output.absolute())] + download_script = here / "rust-sdks" / "download_ffi.py" + output = here / "livekit" / "rtc" / "resources" + cmd = [ + "python3", + str(download_script.absolute()), + "--output", + str(output.absolute()), + ] # cibuildwheel is crosscompiling to arm64 on macos, make sure we download the # right binary (kind of a hack here...) - if os.environ.get("CIBUILDWHEEL") == "1" \ - and "arm64" in os.environ.get("ARCHFLAGS", ""): - cmd += ['--arch', 'arm64'] + if os.environ.get("CIBUILDWHEEL") == "1" and "arm64" in os.environ.get( + "ARCHFLAGS", "" + ): + cmd += ["--arch", "arm64"] subprocess.run(cmd, check=True) setuptools.command.build_py.build_py.run(self) @@ -55,14 +59,14 @@ def run(self): setuptools.setup( name="livekit", - version=about['__version__'], - description="LiveKit Python Client SDK for LiveKit", + version=about["__version__"], + description="Python Client SDK for LiveKit", long_description=(here / "README.md").read_text(encoding="utf-8"), long_description_content_type="text/markdown", url="https://github.com/livekit/client-sdk-python", cmdclass={ - 'bdist_wheel': bdist_wheel, - 'build_py': BuildPyCommand, + "bdist_wheel": bdist_wheel, + "build_py": BuildPyCommand, }, classifiers=[ "Intended Audience :: Developers", @@ -77,16 +81,15 @@ def run(self): ], keywords=["webrtc", "realtime", "audio", "video", "livekit"], license="Apache-2.0", - packages=setuptools.find_namespace_packages(include=['livekit.*']), + packages=setuptools.find_namespace_packages(include=["livekit.*"]), python_requires=">=3.9.0", - install_requires=["protobuf>=3.1.0", - "types-protobuf>=3.1.0"], + install_requires=["protobuf>=3.1.0", "types-protobuf>=3.1.0"], package_data={ - "livekit.rtc": ['resources/*', '_proto/*.py'], + "livekit.rtc": ["resources/*", "_proto/*.py", "py.typed", "*.pyi", "**/*.pyi"], }, project_urls={ "Documentation": "https://docs.livekit.io", "Website": "https://livekit.io/", - "Source": "https://github.com/livekit/client-sdk-python/", + "Source": "https://github.com/livekit/python-sdks/", }, ) diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..97ebaa6a --- /dev/null +++ b/ruff.toml @@ -0,0 +1,5 @@ + + +exclude = [ + "_proto" +] From 308917c5a45a73209a959c59034a065936fc7d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 16:01:53 -0700 Subject: [PATCH 20/26] update ruff ci --- .github/workflows/ruff.yml | 7 ++++--- ruff.toml | 2 -- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index 74d987d8..20498f14 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -17,7 +17,8 @@ jobs: pip install ruff - name: Ruff livekit-api - run: ruff check --output-format=github livekit-api/ + run: ruff check --output-format=github . + + - name: Check format + run: ruff format --check . - - name: Ruff livekit-rtc - run: ruff check --output-format=github livekit-rtc/ diff --git a/ruff.toml b/ruff.toml index 97ebaa6a..79bba3d5 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,5 +1,3 @@ - - exclude = [ "_proto" ] From 0d6610ddd8536281a034079de3212be965c0a662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 16:03:11 -0700 Subject: [PATCH 21/26] fixes --- .github/workflows/ruff.yml | 4 +--- livekit-rtc/livekit/rtc/room.py | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index 20498f14..06514b44 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -1,7 +1,5 @@ name: Ruff - Checks - -on: [push, pull_request] - +on: push jobs: build: runs-on: ubuntu-latest diff --git a/livekit-rtc/livekit/rtc/room.py b/livekit-rtc/livekit/rtc/room.py index 0fa532af..2087e015 100644 --- a/livekit-rtc/livekit/rtc/room.py +++ b/livekit-rtc/livekit/rtc/room.py @@ -29,7 +29,6 @@ from .participant import LocalParticipant, Participant, RemoteParticipant from .track import RemoteAudioTrack, RemoteVideoTrack from .track_publication import RemoteTrackPublication -from .participant import RemoteParticipant, Participant EventTypes = Literal[ "participant_connected", From f1a200a4ffcfcde807a5e187561385c80233fa8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 16:04:27 -0700 Subject: [PATCH 22/26] Update ruff.toml --- ruff.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ruff.toml b/ruff.toml index 79bba3d5..260091dc 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,3 +1,8 @@ exclude = [ "_proto" ] + +line-length = 88 +indent-width = 4 + +target-version = "py39" From f67784a8c47b13ae6afc77eaa927a7c20adafb23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 16:11:03 -0700 Subject: [PATCH 23/26] Update __init__.py --- livekit-rtc/livekit/rtc/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/livekit-rtc/livekit/rtc/__init__.py b/livekit-rtc/livekit/rtc/__init__.py index 5925fa9c..13a57d60 100644 --- a/livekit-rtc/livekit/rtc/__init__.py +++ b/livekit-rtc/livekit/rtc/__init__.py @@ -39,6 +39,10 @@ RemoteAudioTrack, RemoteVideoTrack, Track, + LocalTrack, + RemoteTrack, + AudioTrack, + VideoTrack ) from .e2ee import ( E2EEManager, From cd0634a53292d2ba9c959455df10b323c49b80e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 16:13:50 -0700 Subject: [PATCH 24/26] Update __init__.py --- livekit-api/livekit/api/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/livekit-api/livekit/api/__init__.py b/livekit-api/livekit/api/__init__.py index 7c9c3673..f6e784a5 100644 --- a/livekit-api/livekit/api/__init__.py +++ b/livekit-api/livekit/api/__init__.py @@ -16,7 +16,10 @@ """ # flake8: noqa -from ._proto import livekit_room_pb2 as proto_room +from ._proto.livekit_egress_pb2 import * +from ._proto.livekit_models_pb2 import * +from ._proto.livekit_room_pb2 import * +from ._proto.livekit_ingress_pb2 import * from .version import __version__ from .access_token import VideoGrants, AccessToken from .room_service import RoomService From a788af391973e803fb03e7e613c38b79b6974223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 16:16:22 -0700 Subject: [PATCH 25/26] nit --- livekit-rtc/livekit/rtc/__init__.py | 2 +- livekit-rtc/livekit/rtc/video_frame.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/livekit-rtc/livekit/rtc/__init__.py b/livekit-rtc/livekit/rtc/__init__.py index 13a57d60..ca5eabba 100644 --- a/livekit-rtc/livekit/rtc/__init__.py +++ b/livekit-rtc/livekit/rtc/__init__.py @@ -42,7 +42,7 @@ LocalTrack, RemoteTrack, AudioTrack, - VideoTrack + VideoTrack, ) from .e2ee import ( E2EEManager, diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index b1acea00..d82187a1 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -718,6 +718,17 @@ def __init__( height: int, stride: int = 0, ) -> None: + """ + Create a new ArgbFrame. + + Args: + data: The data for the frame. Must be at least width * height * sizeof(uint32) bytes. + format: The format of the data. + width: The width of the frame. + height: The height of the frame. + stride: The stride of the frame. If 0, the stride will be set to width * sizeof(uint32). + """ + if stride == 0: stride = width * ctypes.sizeof(ctypes.c_uint32) From 26ea0020ac3e680172f9e7d5eb8d74b6def4dbd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 27 Oct 2023 16:22:26 -0700 Subject: [PATCH 26/26] accept more buffer types --- livekit-rtc/livekit/rtc/audio_frame.py | 5 ++-- livekit-rtc/livekit/rtc/room.py | 4 +-- livekit-rtc/livekit/rtc/video_frame.py | 41 +++++++++++++++++--------- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/livekit-rtc/livekit/rtc/audio_frame.py b/livekit-rtc/livekit/rtc/audio_frame.py index 7a31fbb5..ca53e44d 100644 --- a/livekit-rtc/livekit/rtc/audio_frame.py +++ b/livekit-rtc/livekit/rtc/audio_frame.py @@ -17,12 +17,13 @@ from ._proto import audio_frame_pb2 as proto_audio from ._proto import ffi_pb2 as proto_ffi from ._utils import get_address +from typing import Union class AudioFrame: def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], sample_rate: int, num_channels: int, samples_per_channel: int, @@ -34,10 +35,10 @@ def __init__( "data length must be >= num_channels * samples_per_channel * sizeof(int16)" ) + self._data = bytearray(data) self._sample_rate = sample_rate self._num_channels = num_channels self._samples_per_channel = samples_per_channel - self._data = data @staticmethod def create( diff --git a/livekit-rtc/livekit/rtc/room.py b/livekit-rtc/livekit/rtc/room.py index 2087e015..f76edf9c 100644 --- a/livekit-rtc/livekit/rtc/room.py +++ b/livekit-rtc/livekit/rtc/room.py @@ -133,8 +133,8 @@ async def connect( if options.e2ee: req.connect.options.e2ee.encryption_type = options.e2ee.encryption_type req.connect.options.e2ee.key_provider_options.shared_key = ( - options.e2ee.key_provider_options.shared_key - ) # type: ignore + options.e2ee.key_provider_options.shared_key # type: ignore + ) req.connect.options.e2ee.key_provider_options.ratchet_salt = ( options.e2ee.key_provider_options.ratchet_salt ) diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index d82187a1..776b7cfb 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -38,12 +38,16 @@ def __init__( class VideoFrameBuffer(ABC): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, buffer_type: VideoFrameBufferType.ValueType, ) -> None: - self._data = data + view = memoryview(data) + if not view.c_contiguous: + raise ValueError("data must be contiguous") + + self._data = bytearray(data) self._width = width self._height = height self._buffer_type = buffer_type @@ -145,7 +149,7 @@ def to_argb(self, dst: "ArgbFrame") -> None: class PlanarYuvBuffer(VideoFrameBuffer, ABC): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, buffer_type: VideoFrameBufferType.ValueType, @@ -198,7 +202,7 @@ def stride_v(self) -> int: class PlanarYuv8Buffer(PlanarYuvBuffer, ABC): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, buffer_type: VideoFrameBufferType.ValueType, @@ -251,7 +255,7 @@ def data_v(self) -> memoryview: class PlanarYuv16Buffer(PlanarYuvBuffer, ABC): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, buffer_type: VideoFrameBufferType.ValueType, @@ -304,7 +308,7 @@ def data_v(self) -> memoryview: class BiplanaraYuv8Buffer(VideoFrameBuffer, ABC): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, buffer_type: VideoFrameBufferType.ValueType, @@ -363,7 +367,7 @@ def data_uv(self) -> memoryview: class I420Buffer(PlanarYuv8Buffer): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, stride_y: int, @@ -423,7 +427,7 @@ def create(width: int, height: int) -> "I420Buffer": class I420ABuffer(PlanarYuv8Buffer): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, stride_y: int, @@ -506,7 +510,7 @@ def data_a(self) -> memoryview: class I422Buffer(PlanarYuv8Buffer): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, stride_y: int, @@ -521,6 +525,10 @@ def __init__( ) ) + view = memoryview(data) + if not view.c_contiguous: + raise ValueError("data must be contiguous") + chroma_width = (width + 1) // 2 chroma_height = height super().__init__( @@ -559,7 +567,7 @@ def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> class I444Buffer(PlanarYuv8Buffer): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, stride_y: int, @@ -612,7 +620,7 @@ def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> class I010Buffer(PlanarYuv16Buffer): def __init__( self, - data: bytearray, + data: Union[bytes, bytearray, memoryview], width: int, height: int, stride_y: int, @@ -668,7 +676,12 @@ def calc_data_size(height: int, stride_y: int, stride_u: int, stride_v: int) -> class NV12Buffer(BiplanaraYuv8Buffer): def __init__( - self, data: bytearray, width: int, height: int, stride_y: int, stride_uv: int + self, + data: Union[bytes, bytearray, memoryview], + width: int, + height: int, + stride_y: int, + stride_uv: int, ) -> None: if len(data) < NV12Buffer.calc_data_size(height, stride_y, stride_uv): raise ValueError( @@ -759,8 +772,8 @@ def to_i420(self) -> I420Buffer: return I420Buffer._from_owned_info(res.to_i420.buffer) @property - def data(self) -> bytearray: - return self._data + def data(self) -> memoryview: + return memoryview(self._data) @property def width(self) -> int: