diff --git a/spec/test-utils/webrtc.ts b/spec/test-utils/webrtc.ts index 735555a6c80..1cbf0e598cd 100644 --- a/spec/test-utils/webrtc.ts +++ b/spec/test-utils/webrtc.ts @@ -266,6 +266,9 @@ export class MockRTCRtpSender { public replaceTrack(track: MockMediaStreamTrack) { this.track = track; } + + public getParameters() {} + public setParameters() {} } export class MockRTCRtpReceiver { @@ -292,7 +295,7 @@ export class MockMediaStreamTrack { public listeners: [string, (...args: any[]) => any][] = []; public isStopped = false; - public settings?: MediaTrackSettings; + public settings: MediaTrackSettings = {}; public getSettings(): MediaTrackSettings { return this.settings!; @@ -592,6 +595,7 @@ export class MockCallFeed { export function installWebRTCMocks() { global.navigator = { mediaDevices: new MockMediaDevices().typed(), + userAgent: "This is definitely a user agent string", } as unknown as Navigator; global.window = { diff --git a/spec/unit/webrtc/call.spec.ts b/spec/unit/webrtc/call.spec.ts index 9e0a3116c79..ae6d3eb7c00 100644 --- a/spec/unit/webrtc/call.spec.ts +++ b/spec/unit/webrtc/call.spec.ts @@ -29,7 +29,6 @@ import { import { MCallAnswer, MCallHangupReject, - SDPStreamMetadata, SDPStreamMetadataKey, SDPStreamMetadataPurpose, } from "../../../src/webrtc/callEventTypes"; @@ -83,6 +82,7 @@ const fakeIncomingCall = async (client: TestClient, call: MatrixCall, version: s client: client.client, userId: "remote_user_id", deviceId: undefined, + feedId: "remote_stream_id", stream: new MockMediaStream("remote_stream_id", [ new MockMediaStreamTrack("remote_tack_id", "audio"), ]) as unknown as MediaStream, @@ -316,13 +316,13 @@ describe("Call", function () { }), ); - (call as any).pushRemoteFeed( + (call as any).pushRemoteStream( new MockMediaStream("remote_stream", [ new MockMediaStreamTrack("remote_audio_track", "audio"), new MockMediaStreamTrack("remote_video_track", "video"), ]), ); - const feed = call.getFeeds().find((feed) => feed.stream.id === "remote_stream"); + const feed = call.getFeeds().find((feed) => feed.stream?.id === "remote_stream"); expect(feed?.purpose).toBe(SDPStreamMetadataPurpose.Usermedia); expect(feed?.isAudioMuted()).toBeTruthy(); expect(feed?.isVideoMuted()).not.toBeTruthy(); @@ -439,8 +439,8 @@ describe("Call", function () { video_muted: false, }, }); - (call as any).pushRemoteFeed(new MockMediaStream("remote_stream", [])); - const feed = call.getFeeds().find((feed) => feed.stream.id === "remote_stream"); + (call as any).pushRemoteStream(new MockMediaStream("remote_stream", [])); + const feed = call.getFeeds().find((feed) => feed.stream?.id === "remote_stream"); call.onSDPStreamMetadataChangedReceived( makeMockEvent("@test:foo", { @@ -520,14 +520,14 @@ describe("Call", function () { it("if no video", async () => { call.getOpponentMember = jest.fn().mockReturnValue({ userId: "@bob:bar.uk" }); - (call as any).pushRemoteFeed(new MockMediaStream("remote_stream1", [])); + (call as any).pushRemoteStream(new MockMediaStream("remote_stream1", [])); expect(call.type).toBe(CallType.Voice); }); it("if remote video", async () => { call.getOpponentMember = jest.fn().mockReturnValue({ userId: "@bob:bar.uk" }); - (call as any).pushRemoteFeed( + (call as any).pushRemoteStream( new MockMediaStream("remote_stream1", [new MockMediaStreamTrack("track_id", "video")]), ); expect(call.type).toBe(CallType.Video); @@ -555,6 +555,7 @@ describe("Call", function () { roomId: call.roomId, userId: client.getUserId(), deviceId: undefined, + feedId: "local_stream1", purpose: SDPStreamMetadataPurpose.Usermedia, audioMuted: false, videoMuted: false, @@ -597,6 +598,7 @@ describe("Call", function () { client: client.client, userId: client.getUserId(), deviceId: undefined, + feedId: localUsermediaStream.id, stream: localUsermediaStream as unknown as MediaStream, purpose: SDPStreamMetadataPurpose.Usermedia, audioMuted: false, @@ -606,6 +608,7 @@ describe("Call", function () { client: client.client, userId: client.getUserId(), deviceId: undefined, + feedId: localScreensharingStream.id, stream: localScreensharingStream as unknown as MediaStream, purpose: SDPStreamMetadataPurpose.Screenshare, audioMuted: false, @@ -629,8 +632,8 @@ describe("Call", function () { video_muted: false, }, }); - (call as any).pushRemoteFeed(remoteUsermediaStream); - (call as any).pushRemoteFeed(remoteScreensharingStream); + (call as any).pushRemoteStream(remoteUsermediaStream); + (call as any).pushRemoteStream(remoteScreensharingStream); expect(call.localUsermediaFeed!.stream).toBe(localUsermediaStream); expect(call.localUsermediaStream).toBe(localUsermediaStream); @@ -762,7 +765,7 @@ describe("Call", function () { call.off(CallEvent.FeedsChanged, FEEDS_CHANGED_CALLBACK); }); - it("should ignore stream passed to pushRemoteFeed()", async () => { + it("should ignore stream passed to pushRemoteStream()", async () => { await call.onAnswerReceived( makeMockEvent("@test:foo", { version: 1, @@ -779,16 +782,16 @@ describe("Call", function () { }), ); - (call as any).pushRemoteFeed(new MockMediaStream(STREAM_ID)); - (call as any).pushRemoteFeed(new MockMediaStream(STREAM_ID)); + (call as any).pushRemoteStream(new MockMediaStream(STREAM_ID)); + (call as any).pushRemoteStream(new MockMediaStream(STREAM_ID)); expect(call.getRemoteFeeds().length).toBe(1); expect(FEEDS_CHANGED_CALLBACK).toHaveBeenCalledTimes(1); }); - it("should ignore stream passed to pushRemoteFeedWithoutMetadata()", async () => { - (call as any).pushRemoteFeedWithoutMetadata(new MockMediaStream(STREAM_ID)); - (call as any).pushRemoteFeedWithoutMetadata(new MockMediaStream(STREAM_ID)); + it("should ignore stream passed to pushRemoteStreamWithoutMetadata()", async () => { + (call as any).pushRemoteStreamWithoutMetadata(new MockMediaStream(STREAM_ID)); + (call as any).pushRemoteStreamWithoutMetadata(new MockMediaStream(STREAM_ID)); expect(call.getRemoteFeeds().length).toBe(1); expect(FEEDS_CHANGED_CALLBACK).toHaveBeenCalledTimes(1); @@ -858,18 +861,8 @@ describe("Call", function () { }); describe("receiving sdp_stream_metadata_changed events", () => { - const setupCall = (audio: boolean, video: boolean): SDPStreamMetadata => { - const metadata = { - stream: { - user_id: "user", - device_id: "device", - purpose: SDPStreamMetadataPurpose.Usermedia, - audio_muted: audio, - video_muted: video, - tracks: {}, - }, - }; - (call as any).pushRemoteFeed( + const setupCall = (audio: boolean, video: boolean): void => { + (call as any).pushRemoteStream( new MockMediaStream("stream", [ new MockMediaStreamTrack("track1", "audio"), new MockMediaStreamTrack("track1", "video"), @@ -877,22 +870,30 @@ describe("Call", function () { ); call.onSDPStreamMetadataChangedReceived({ getContent: () => ({ - [SDPStreamMetadataKey]: metadata, + [SDPStreamMetadataKey]: { + stream: { + user_id: "user", + device_id: "device", + purpose: SDPStreamMetadataPurpose.Usermedia, + audio_muted: audio, + video_muted: video, + tracks: {}, + }, + }, }), } as MatrixEvent); - return metadata; }; it("should handle incoming sdp_stream_metadata_changed with audio muted", async () => { - const metadata = setupCall(true, false); - expect((call as any).remoteSDPStreamMetadata).toStrictEqual(metadata); + setupCall(true, false); + expect(call.opponentSupportsSDPStreamMetadata()).toBe(true); expect(call.getRemoteFeeds()[0].isAudioMuted()).toBe(true); expect(call.getRemoteFeeds()[0].isVideoMuted()).toBe(false); }); it("should handle incoming sdp_stream_metadata_changed with video muted", async () => { - const metadata = setupCall(false, true); - expect((call as any).remoteSDPStreamMetadata).toStrictEqual(metadata); + setupCall(false, true); + expect(call.opponentSupportsSDPStreamMetadata()).toBe(true); expect(call.getRemoteFeeds()[0].isAudioMuted()).toBe(false); expect(call.getRemoteFeeds()[0].isVideoMuted()).toBe(true); }); @@ -1394,8 +1395,8 @@ describe("Call", function () { describe("onTrack", () => { it("ignores streamless track", async () => { - // @ts-ignore Mock pushRemoteFeed() is private - jest.spyOn(call, "pushRemoteFeed"); + // @ts-ignore Mock pushRemoteStream() is private + jest.spyOn(call, "pushRemoteStream"); await call.placeVoiceCall(); @@ -1404,13 +1405,13 @@ describe("Call", function () { track: new MockMediaStreamTrack("track_ev", "audio"), } as unknown as RTCTrackEvent); - // @ts-ignore Mock pushRemoteFeed() is private - expect(call.pushRemoteFeed).not.toHaveBeenCalled(); + // @ts-ignore Mock pushRemoteStream() is private + expect(call.pushRemoteStream).not.toHaveBeenCalled(); }); it("correctly pushes", async () => { - // @ts-ignore Mock pushRemoteFeed() is private - jest.spyOn(call, "pushRemoteFeed"); + // @ts-ignore Mock pushRemoteStream() is private + jest.spyOn(call, "pushRemoteStream"); await call.placeVoiceCall(); await call.onAnswerReceived( @@ -1430,9 +1431,9 @@ describe("Call", function () { track: stream.getAudioTracks()[0], } as unknown as RTCTrackEvent); - // @ts-ignore Mock pushRemoteFeed() is private - expect(call.pushRemoteFeed).toHaveBeenCalledWith(stream); - // @ts-ignore Mock pushRemoteFeed() is private + // @ts-ignore Mock pushRemoteStream() is private + expect(call.pushRemoteStream).toHaveBeenCalledWith(stream); + // @ts-ignore Mock pushRemoteStream() is private expect(call.removeTrackListeners.has(stream)).toBe(true); }); }); diff --git a/spec/unit/webrtc/callFeed.spec.ts b/spec/unit/webrtc/callFeed.spec.ts index e14a1a0c56b..c648f7aa066 100644 --- a/spec/unit/webrtc/callFeed.spec.ts +++ b/spec/unit/webrtc/callFeed.spec.ts @@ -102,6 +102,8 @@ describe("CallFeed", () => { [CallState.Connected, true], [CallState.Connecting, false], ])("should react to call state, when !isLocal()", (state: CallState, expected: Boolean) => { + feed.stream?.addTrack(new MockMediaStreamTrack("track1", "video").typed()); + call.state = state; call.emit(CallEvent.State, state); expect(feed.connected).toBe(expected); diff --git a/spec/unit/webrtc/groupCall.spec.ts b/spec/unit/webrtc/groupCall.spec.ts index 2cd1f4cdabe..8f271a24109 100644 --- a/spec/unit/webrtc/groupCall.spec.ts +++ b/spec/unit/webrtc/groupCall.spec.ts @@ -807,7 +807,7 @@ describe("Group Call", function () { await groupCall.setMicrophoneMuted(true); - groupCall.localCallFeed!.stream.getAudioTracks().forEach((track) => expect(track.enabled).toBe(false)); + groupCall.localCallFeed!.stream!.getAudioTracks().forEach((track) => expect(track.enabled).toBe(false)); expect(groupCall.localCallFeed!.setAudioVideoMuted).toHaveBeenCalledWith(true, null); setAVMutedArray.forEach((f) => expect(f).toHaveBeenCalledWith(true, null)); tracksArray.forEach((track) => expect(track.enabled).toBe(false)); @@ -835,9 +835,8 @@ describe("Group Call", function () { await groupCall.setLocalVideoMuted(true); - groupCall.localCallFeed!.stream.getVideoTracks().forEach((track) => expect(track.enabled).toBe(false)); - expect(mockClient.getMediaHandler().getUserMediaStream).toHaveBeenCalledWith(true, false); - expect(groupCall.updateLocalUsermediaStream).toHaveBeenCalled(); + groupCall.localCallFeed!.stream!.getVideoTracks().forEach((track) => expect(track.enabled).toBe(false)); + expect(groupCall.localCallFeed!.setAudioVideoMuted).toHaveBeenCalledWith(null, true); setAVMutedArray.forEach((f) => expect(f).toHaveBeenCalledWith(null, true)); tracksArray.forEach((track) => expect(track.enabled).toBe(false)); sendMetadataUpdateArray.forEach((f) => expect(f).toHaveBeenCalled()); @@ -872,7 +871,7 @@ describe("Group Call", function () { call.getOpponentMember = () => ({ userId: call.invitee } as RoomMember); call.onSDPStreamMetadataChangedReceived(metadataEvent); // @ts-ignore Mock - call.pushRemoteFeed( + call.pushRemoteStream( // @ts-ignore Mock new MockMediaStream("stream", [ new MockMediaStreamTrack("audio_track", "audio"), @@ -899,7 +898,7 @@ describe("Group Call", function () { call.getOpponentMember = () => ({ userId: call.invitee } as RoomMember); call.onSDPStreamMetadataChangedReceived(metadataEvent); // @ts-ignore Mock - call.pushRemoteFeed( + call.pushRemoteStream( // @ts-ignore Mock new MockMediaStream("stream", [ new MockMediaStreamTrack("audio_track", "audio"), @@ -1157,7 +1156,7 @@ describe("Group Call", function () { }), } as MatrixEvent); // @ts-ignore Mock - call.pushRemoteFeed( + call.pushRemoteStream( // @ts-ignore Mock new MockMediaStream("screensharing_stream", [new MockMediaStreamTrack("video_track", "video")]), ); @@ -1211,6 +1210,7 @@ describe("Group Call", function () { roomId: FAKE_ROOM_ID, userId: FAKE_USER_ID_2, deviceId: FAKE_DEVICE_ID_1, + feedId: "foo", stream: new MockMediaStream("foo", []).typed(), purpose: SDPStreamMetadataPurpose.Usermedia, audioMuted: false, @@ -1223,6 +1223,7 @@ describe("Group Call", function () { roomId: FAKE_ROOM_ID, userId: FAKE_USER_ID_3, deviceId: FAKE_DEVICE_ID_1, + feedId: "foo", stream: new MockMediaStream("foo", []).typed(), purpose: SDPStreamMetadataPurpose.Usermedia, audioMuted: false, diff --git a/src/webrtc/call.ts b/src/webrtc/call.ts index edcbebe3866..6da3b36d8c9 100644 --- a/src/webrtc/call.ts +++ b/src/webrtc/call.ts @@ -53,7 +53,7 @@ import { SDPStreamMetadataKeyStable, FocusEventBaseContent, } from "./callEventTypes"; -import { CallFeed } from "./callFeed"; +import { CallFeed, CallFeedEvent } from "./callFeed"; import { MatrixClient } from "../client"; import { EventEmitterEvents, TypedEventEmitter } from "../models/typed-event-emitter"; import { DeviceInfo } from "../crypto/deviceinfo"; @@ -93,6 +93,12 @@ interface AssertedIdentity { displayName: string; } +export enum SimulcastResolution { + Full = "f", + Half = "h", + Quarter = "q", +} + enum MediaType { AUDIO = "audio", VIDEO = "video", @@ -267,6 +273,31 @@ const CALL_TIMEOUT_MS = 60 * 1000; // ms const CALL_LENGTH_INTERVAL = 1000; // ms /** The time after which we end the call, if ICE got disconnected */ const ICE_DISCONNECTED_TIMEOUT = 30 * 1000; // ms +/** + * The time we wait for call feed size and visibility changing before we send a + * new m.call.track_subscription + */ +const SUBSCRIBE_TO_FOCUS_TIMEOUT = 2 * 1000; + +const SIMULCAST_ENCODINGS = [ + // Order is important here: some browsers (e.g. + // Chrome) will only send some of the encodings, if + // the track has a resolution to low for it to send + // all, in that case the encoding higher in the list + // has priority and therefore we put full as first + // as we always want to send the full resolution + { + rid: SimulcastResolution.Full, + }, + { + rid: SimulcastResolution.Half, + scaleResolutionDownBy: 2.0, + }, + { + rid: SimulcastResolution.Quarter, + scaleResolutionDownBy: 4.0, + }, +]; export class CallError extends Error { public readonly code: string; @@ -296,6 +327,10 @@ function getCodecParamMods(isPtt: boolean): CodecParamsMod[] { return mods; } +function isFirefox(): boolean { + return navigator.userAgent.indexOf("Firefox") !== -1; +} + export type CallEventHandlerMap = { [CallEvent.DataChannel]: (channel: RTCDataChannel) => void; [CallEvent.FeedsChanged]: (feeds: CallFeed[]) => void; @@ -352,7 +387,6 @@ export class MatrixCall extends TypedEventEmitter(); - private subscribedTracks: FocusTrackDescription[] = []; private inviteOrAnswerSent = false; private waitForLocalAVStream = false; @@ -388,7 +422,6 @@ export class MatrixCall extends TypedEventEmitter(); private remoteAssertedIdentity?: AssertedIdentity; - private remoteSDPStreamMetadata?: SDPStreamMetadata; private callLengthInterval?: ReturnType; private callStartTime?: number; @@ -400,6 +433,10 @@ export class MatrixCall extends TypedEventEmitter; + + private _opponentSupportsSDPStreamMetadata = false; + /** * Construct a new Matrix Call. * @param opts - Config options. @@ -549,8 +586,8 @@ export class MatrixCall extends TypedEventEmitter feed.stream.id === streamId); + private getFeedById(streamId: string): CallFeed | undefined { + return this.getFeeds().find((feed) => feed.feedId === streamId); } /** @@ -605,19 +642,16 @@ export class MatrixCall extends TypedEventEmitter { + if (!transceiver.sender.track) return tracks; if ( ![ getTransceiverKey(localFeed.purpose, "audio"), @@ -633,7 +667,11 @@ export class MatrixCall extends TypedEventEmitter !feed.isLocal()); } - private pushRemoteFeed(stream: MediaStream): void { + private pushRemoteStream(stream: MediaStream): void { // Fallback to old behavior if the other side doesn't support SDPStreamMetadata - const metadata = this.remoteSDPStreamMetadata?.[stream.id]; - if (!this.opponentSupportsSDPStreamMetadata() || !metadata) { - this.pushRemoteFeedWithoutMetadata(stream); + if (!this.opponentSupportsSDPStreamMetadata()) { + this.pushRemoteStreamWithoutMetadata(stream); return; } - // If we're calling with a focus we trust its metadata, otherwise we - // only trust ourselves to avoid impersonation - const userId = this.isFocus ? metadata.user_id : this.getOpponentMember()!.userId; - const deviceId = this.isFocus ? metadata.device_id : this.getOpponentDeviceId()!; - const purpose = metadata.purpose; - const audioMuted = metadata.audio_muted; - const videoMuted = metadata.video_muted; - - if (!purpose) { - logger.warn( - `Call ${this.callId} Ignoring stream with id ${stream.id} because we didn't get any metadata about it`, - ); + const feed = this.getFeedById(stream.id); + if (!feed) { + logger.warn(`Ignoring stream with id ${stream.id} because we don't have a feed for it`); return; } - if (this.getFeedByStreamId(stream.id)) { - logger.warn(`Ignoring stream with id ${stream.id} because we already have a feed for it`); - return; - } - - this.feeds.push( - new CallFeed({ - client: this.client, - call: this, - roomId: this.roomId, - userId, - deviceId, - stream, - purpose, - audioMuted, - videoMuted, - }), - ); - - this.emit(CallEvent.FeedsChanged, this.feeds); + feed.setNewStream(stream); logger.info( `Call ${this.callId} Pushed remote stream (` + - `id="${stream.id}" ` + + `id="${feed.feedId}" ` + `active="${stream.active}" ` + - `purpose=${purpose} ` + - `userId=${userId}` + - `deviceId=${deviceId}` + + `purpose=${feed.purpose} ` + + `userId=${feed.userId}` + + `deviceId=${feed.deviceId}` + `)`, ); } @@ -721,7 +730,7 @@ export class MatrixCall extends TypedEventEmitter callFeed.stream.id === feed.stream.id)) { - logger.info(`Ignoring duplicate local stream ${callFeed.stream.id} in call ${this.callId}`); + if (!callFeed.stream) { + logger.warn(`Ignoring stream-less local feed ${callFeed.feedId} in call ${this.callId}`); + return; + } + + if (this.feeds.some((feed) => callFeed.feedId === feed.feedId)) { + logger.info(`Ignoring duplicate local stream ${callFeed.feedId} in call ${this.callId}`); return; } this.feeds.push(callFeed); if (addToPeerConnection) { - for (const track of callFeed.stream.getTracks()) { + for (const track of callFeed.stream!.getTracks()) { logger.info( `Call ${this.callId} ` + `Adding track (` + `id="${track.id}", ` + `kind="${track.kind}", ` + - `streamId="${callFeed.stream.id}", ` + + `streamId="${callFeed.feedId}", ` + `streamPurpose="${callFeed.purpose}", ` + `enabled=${track.enabled}` + `) to peer connection`, ); + const encodings = track.kind === "video" ? SIMULCAST_ENCODINGS : undefined; + const tKey = getTransceiverKey(callFeed.purpose, track.kind); if (this.transceivers.has(tKey)) { // we already have a sender, so we re-use it. We try to re-use transceivers as much @@ -826,28 +844,39 @@ export class MatrixCall extends TypedEventEmitter t.sender === newSender); - if (newTransciever) { - this.transceivers.set(tKey, newTransciever); - } else { - logger.warn("Didn't find a matching transceiver after adding track!"); + // create a new one + const transceiver = this.peerConn!.addTransceiver(track, { + streams: [callFeed.stream!], + sendEncodings: this.isFocus && isFirefox() ? undefined : encodings, + }); + + if (this.isFocus && isFirefox()) { + const parameters = transceiver.sender.getParameters(); + transceiver.sender.setParameters({ + ...transceiver.sender.getParameters(), + encodings: encodings ?? parameters.encodings, + }); } + + this.transceivers.set(tKey, transceiver); } } } @@ -855,8 +884,8 @@ export class MatrixCall extends TypedEventEmitter t.sender === newSender); - if (newTransciever) { - this.transceivers.set(tKey, newTransciever); - } else { - logger.warn("Couldn't find matching transceiver for newly added track!"); + const newTransceiver = this.peerConn!.addTransceiver(track, { + streams: [this.localUsermediaStream!], + sendEncodings: this.isFocus && isFirefox() ? undefined : encodings, + }); + + if (this.isFocus && isFirefox()) { + const parameters = newTransceiver.sender.getParameters(); + newTransceiver.sender.setParameters({ + ...newTransceiver.sender.getParameters(), + encodings: encodings ?? parameters.encodings, + }); } + + this.transceivers.set(tKey, newTransceiver); } } } @@ -1602,7 +1672,7 @@ export class MatrixCall extends TypedEventEmitter Boolean(info.tracks)) // Skip trackless feeds - .reduce( - (a: FocusTrackDescription[], [s, i]) => [ - ...a, - ...Object.keys(i.tracks).map((t) => ({ stream_id: s, track_id: t })), - ], - [], - ) // Get array of tracks from feeds - .filter((track) => !this.subscribedTracks.find((subscribed) => utils.deepCompare(track, subscribed))); // Filter out already subscribed tracks + clearTimeout(this.subscribeToFocusTimeout); + this.subscribeToFocusTimeout = setTimeout(() => { + this.sendSubscriptionFocusEvent(); + }, SUBSCRIBE_TO_FOCUS_TIMEOUT); + } - if (tracks.length === 0) { - logger.info("Failed to find any new streams to subscribe to"); - return; - } else { - this.subscribedTracks.push(...tracks); + /** + * This method should only ever be called by MatrixCall::subscribeToFocus()! + */ + private sendSubscriptionFocusEvent(): void { + const subscribe: FocusTrackDescription[] = []; + const unsubscribe: FocusTrackDescription[] = []; + for (const { feedId, tracksMetadata, isVisible, width, height } of this.getRemoteFeeds()) { + for (const [trackId, trackMetadata] of Object.entries(tracksMetadata)) { + const trackDescription: FocusTrackDescription = { + track_id: trackId, + stream_id: feedId, + }; + + if (trackMetadata.kind === "audio") { + // We want audio from everyone + subscribe.push(trackDescription); + } else if (isVisible && width !== 0 && height !== 0) { + // Subscribe to visible videos + trackDescription.width = width; + trackDescription.height = height; + + subscribe.push(trackDescription); + } else { + // Unsubscribe from invisible videos + unsubscribe.push(trackDescription); + } + } } + // Return, if there is nothing to do + if (subscribe.length === 0 && unsubscribe.length === 0) return; + + // TODO: Is it ok to keep re-requesting tracks this.sendFocusEvent(EventType.CallTrackSubscription, { - subscribe: tracks, - unsubscribe: [], + subscribe, + unsubscribe, } as FocusTrackSubscriptionEvent); } + private onCallFeedSizeChanged = async (): Promise => { + this.subscribeToFocus(); + }; + public updateRemoteSDPStreamMetadata(metadata: SDPStreamMetadata): void { - if (!metadata) return; - this.remoteSDPStreamMetadata = utils.recursivelyAssign(this.remoteSDPStreamMetadata || {}, metadata, true); + this._opponentSupportsSDPStreamMetadata = true; + + let feedsChanged = false; + + // Add new feeds and update existing ones + for (const [streamId, streamMetadata] of Object.entries(metadata)) { + let feed = this.getRemoteFeeds().find((f) => f.feedId === streamId); + if (feed) { + feed.purpose = streamMetadata.purpose; + feed.tracksMetadata = streamMetadata.tracks; + } else { + feed = new CallFeed({ + client: this.client, + call: this, + roomId: this.roomId, + userId: this.isFocus ? streamMetadata.user_id : this.getOpponentMember()!.userId, + deviceId: this.isFocus ? streamMetadata.device_id : this.getOpponentDeviceId()!, + feedId: streamId, + purpose: streamMetadata.purpose, + audioMuted: streamMetadata.audio_muted, + videoMuted: streamMetadata.video_muted, + tracksMetadata: streamMetadata.tracks, + }); + this.addRemoteFeed(feed, false); + feedsChanged = true; + } + feed.setAudioVideoMuted(streamMetadata.audio_muted, streamMetadata.video_muted); + } + + // Remove old feeds for (const feed of this.getRemoteFeeds()) { - const streamId = feed.stream.id; - const metadata = this.remoteSDPStreamMetadata![streamId]; + if (!Object.keys(metadata).includes(feed.feedId)) { + this.deleteFeed(feed, false); + feedsChanged = true; + } + } - feed.setAudioVideoMuted(metadata?.audio_muted, metadata?.video_muted); - feed.purpose = this.remoteSDPStreamMetadata![streamId]?.purpose; - feed.userId = this.remoteSDPStreamMetadata![streamId]?.user_id; + if (feedsChanged) { + this.emit(CallEvent.FeedsChanged, this.feeds); } if (this.isFocus) { this.subscribeToFocus(); @@ -2082,9 +2219,10 @@ export class MatrixCall extends TypedEventEmitter(); - const metadata = content[SDPStreamMetadataKey]; - this.updateRemoteSDPStreamMetadata(metadata); + const metadata = event.getContent()?.[SDPStreamMetadataKey]; + if (metadata) { + this.updateRemoteSDPStreamMetadata(metadata); + } } public async onAssertedIdentityReceived(event: MatrixEvent): Promise { @@ -2197,7 +2335,7 @@ export class MatrixCall extends TypedEventEmitter { if (stream.getTracks().length === 0) { - // FIXME: We should really be doing this per-track. Šimon - // leaves this for when we switch to mids for signalling - const getIndex = (): number => this.subscribedTracks.findIndex((t) => t.stream_id === stream.id); - let indexOfTrackToRemove = getIndex(); - while (indexOfTrackToRemove !== -1) { - this.subscribedTracks.splice(indexOfTrackToRemove, 1); - indexOfTrackToRemove = getIndex(); - } - logger.info(`Call ${this.callId} removing track streamId: ${stream.id}`); - this.deleteFeedByStream(stream); stream.removeEventListener("removetrack", onRemoveTrack); this.removeTrackListeners.delete(stream); } @@ -2681,6 +2809,10 @@ export class MatrixCall extends TypedEventEmitter +Copyright 2021 - 2022 Šimon Brandner +Copyright 2021 - 2023 The Matrix.org Foundation C.I.C. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -import { SDPStreamMetadataPurpose } from "./callEventTypes"; +import { SDPStreamMetadataPurpose, SDPStreamMetadataTracks } from "./callEventTypes"; import { acquireContext, releaseContext } from "./audioContext"; import { MatrixClient } from "../client"; import { RoomMember } from "../models/room-member"; @@ -31,8 +32,14 @@ export interface ICallFeedOpts { roomId?: string; userId: string; deviceId: string | undefined; - stream: MediaStream; + /** + * Now, this should be the same as streamId but in the future we might want + * to use something different + */ + feedId: string; + stream?: MediaStream; purpose: SDPStreamMetadataPurpose; + tracksMetadata?: SDPStreamMetadataTracks; /** * Whether or not the remote SDPStreamMetadata says audio is muted */ @@ -53,28 +60,31 @@ export enum CallFeedEvent { LocalVolumeChanged = "local_volume_changed", VolumeChanged = "volume_changed", ConnectedChanged = "connected_changed", + SizeChanged = "size_changed", Speaking = "speaking", Disposed = "disposed", } type EventHandlerMap = { - [CallFeedEvent.NewStream]: (stream: MediaStream) => void; + [CallFeedEvent.NewStream]: (stream?: MediaStream) => void; [CallFeedEvent.MuteStateChanged]: (audioMuted: boolean, videoMuted: boolean) => void; [CallFeedEvent.LocalVolumeChanged]: (localVolume: number) => void; [CallFeedEvent.VolumeChanged]: (volume: number) => void; [CallFeedEvent.ConnectedChanged]: (connected: boolean) => void; + [CallFeedEvent.SizeChanged]: () => void; [CallFeedEvent.Speaking]: (speaking: boolean) => void; [CallFeedEvent.Disposed]: () => void; }; export class CallFeed extends TypedEventEmitter { - public stream: MediaStream; - public sdpMetadataStreamId: string; - public userId: string; + public feedId: string; + public readonly userId: string; public readonly deviceId: string | undefined; public purpose: SDPStreamMetadataPurpose; public speakingVolumeSamples: number[]; + public tracksMetadata: SDPStreamMetadataTracks = {}; + private _stream?: MediaStream; private client: MatrixClient; private call?: MatrixCall; private roomId?: string; @@ -91,6 +101,11 @@ export class CallFeed extends TypedEventEmitter private _disposed = false; private _connected = false; + private _width = 0; + private _height = 0; + + private _isVisible = false; + public constructor(opts: ICallFeedOpts) { super(); @@ -103,10 +118,9 @@ export class CallFeed extends TypedEventEmitter this.audioMuted = opts.audioMuted; this.videoMuted = opts.videoMuted; this.speakingVolumeSamples = new Array(SPEAKING_SAMPLE_COUNT).fill(-Infinity); - this.sdpMetadataStreamId = opts.stream.id; + this.feedId = opts.feedId; - this.updateStream(null, opts.stream); - this.stream = opts.stream; // updateStream does this, but this makes TS happier + this.updateStream(undefined, opts.stream); if (this.hasAudioTrack) { this.initVolumeMeasuring(); @@ -114,8 +128,12 @@ export class CallFeed extends TypedEventEmitter if (opts.call) { opts.call.addListener(CallEvent.State, this.onCallState); - this.onCallState(opts.call.state); } + this.updateConnected(); + } + + public get stream(): MediaStream | undefined { + return this._stream; } public get connected(): boolean { @@ -128,31 +146,44 @@ export class CallFeed extends TypedEventEmitter this.emit(CallFeedEvent.ConnectedChanged, this.connected); } + public get isVisible(): boolean { + return this._isVisible; + } + + public get width(): number | undefined { + return this._width; + } + + public get height(): number | undefined { + return this._height; + } + private get hasAudioTrack(): boolean { - return this.stream.getAudioTracks().length > 0; + return this.stream ? this.stream.getAudioTracks().length > 0 : false; } - private updateStream(oldStream: MediaStream | null, newStream: MediaStream): void { + private updateStream(oldStream?: MediaStream, newStream?: MediaStream): void { if (newStream === oldStream) return; if (oldStream) { oldStream.removeEventListener("addtrack", this.onAddTrack); - this.measureVolumeActivity(false); + oldStream.removeEventListener("removetrack", this.onRemoveTrack); + clearTimeout(this.volumeLooperTimeout); } - this.stream = newStream; - newStream.addEventListener("addtrack", this.onAddTrack); + this._stream = newStream; + newStream?.addEventListener("addtrack", this.onAddTrack); + newStream?.addEventListener("removetrack", this.onRemoveTrack); - if (this.hasAudioTrack) { - this.initVolumeMeasuring(); - } else { - this.measureVolumeActivity(false); - } + this.updateConnected(); + this.initVolumeMeasuring(); + this.volumeLooper(); this.emit(CallFeedEvent.NewStream, this.stream); } private initVolumeMeasuring(): void { + if (!this.stream) return; if (!this.hasAudioTrack) return; if (!this.audioContext) this.audioContext = acquireContext(); @@ -167,16 +198,30 @@ export class CallFeed extends TypedEventEmitter } private onAddTrack = (): void => { + this.updateConnected(); this.emit(CallFeedEvent.NewStream, this.stream); }; - private onCallState = (state: CallState): void => { - if (state === CallState.Connected) { - this.connected = true; - } else if (state === CallState.Connecting) { + private onRemoveTrack = (): void => { + this.updateConnected(); + this.emit(CallFeedEvent.NewStream, this.stream); + }; + + private onCallState = (): void => { + this.updateConnected(); + }; + + private updateConnected(): void { + if (this.call?.state === CallState.Connecting) { + this.connected = false; + } else if (!this.stream) { this.connected = false; + } else if (this.stream.getTracks().length === 0) { + this.connected = false; + } else if (this.call?.state === CallState.Connected) { + this.connected = true; } - }; + } /** * Returns callRoom member @@ -204,7 +249,7 @@ export class CallFeed extends TypedEventEmitter * @returns is audio muted? */ public isAudioMuted(): boolean { - return this.stream.getAudioTracks().length === 0 || this.audioMuted; + return !this.stream || this.stream.getAudioTracks().length === 0 || this.audioMuted; } /** @@ -214,7 +259,7 @@ export class CallFeed extends TypedEventEmitter */ public isVideoMuted(): boolean { // We assume only one video track - return this.stream.getVideoTracks().length === 0 || this.videoMuted; + return !this.stream || this.stream.getVideoTracks().length === 0 || this.videoMuted; } public isSpeaking(): boolean { @@ -255,8 +300,7 @@ export class CallFeed extends TypedEventEmitter */ public measureVolumeActivity(enabled: boolean): void { if (enabled) { - if (!this.analyser || !this.frequencyBinCount || !this.hasAudioTrack) return; - + clearTimeout(this.volumeLooperTimeout); this.measuringVolumeActivity = true; this.volumeLooper(); } else { @@ -272,7 +316,8 @@ export class CallFeed extends TypedEventEmitter private volumeLooper = (): void => { if (!this.analyser) return; - + if (!this.hasAudioTrack) return; + if (!this.frequencyBinCount) return; if (!this.measuringVolumeActivity) return; this.analyser.getFloatFrequencyData(this.frequencyBinCount!); @@ -308,13 +353,17 @@ export class CallFeed extends TypedEventEmitter public clone(): CallFeed { const mediaHandler = this.client.getMediaHandler(); - const stream = this.stream.clone(); - logger.log(`callFeed cloning stream ${this.stream.id} newStream ${stream.id}`); - if (this.purpose === SDPStreamMetadataPurpose.Usermedia) { - mediaHandler.userMediaStreams.push(stream); - } else { - mediaHandler.screensharingStreams.push(stream); + let stream: MediaStream | undefined; + if (this.stream) { + stream = this.stream.clone(); + logger.log(`callFeed cloning stream ${this.stream.id} newStream ${stream.id}`); + + if (this.purpose === SDPStreamMetadataPurpose.Usermedia) { + mediaHandler.userMediaStreams.push(stream); + } else { + mediaHandler.screensharingStreams.push(stream); + } } return new CallFeed({ @@ -322,6 +371,7 @@ export class CallFeed extends TypedEventEmitter roomId: this.roomId, userId: this.userId, deviceId: this.deviceId, + feedId: this.feedId, stream, purpose: this.purpose, audioMuted: this.audioMuted, @@ -332,6 +382,7 @@ export class CallFeed extends TypedEventEmitter public dispose(): void { clearTimeout(this.volumeLooperTimeout); this.stream?.removeEventListener("addtrack", this.onAddTrack); + this.stream?.removeEventListener("removetrack", this.onRemoveTrack); this.call?.removeListener(CallEvent.State, this.onCallState); if (this.audioContext) { this.audioContext = undefined; @@ -358,4 +409,17 @@ export class CallFeed extends TypedEventEmitter this.localVolume = localVolume; this.emit(CallFeedEvent.LocalVolumeChanged, localVolume); } + + public setResolution(width: number, height: number): void { + this._width = Math.round(width); + this._height = Math.round(height); + + this.emit(CallFeedEvent.SizeChanged); + } + + public setIsVisible(isVisible: boolean): void { + this._isVisible = isVisible; + + this.emit(CallFeedEvent.SizeChanged); + } } diff --git a/src/webrtc/groupCall.ts b/src/webrtc/groupCall.ts index 541d090945f..bc752979f6b 100644 --- a/src/webrtc/groupCall.ts +++ b/src/webrtc/groupCall.ts @@ -396,6 +396,7 @@ export class GroupCall extends TypedEventEmitter< roomId: this.room.roomId, userId: this.client.getUserId()!, deviceId: this.client.getDeviceId()!, + feedId: stream.id, stream, purpose: SDPStreamMetadataPurpose.Usermedia, audioMuted: this.initWithAudioMuted || stream.getAudioTracks().length === 0 || this.isPtt, @@ -419,12 +420,15 @@ export class GroupCall extends TypedEventEmitter< this.localCallFeed.setNewStream(stream); const micShouldBeMuted = this.localCallFeed.isAudioMuted(); const vidShouldBeMuted = this.localCallFeed.isVideoMuted(); - logger.log( - `groupCall ${this.groupCallId} updateLocalUsermediaStream oldStream ${oldStream.id} newStream ${stream.id} micShouldBeMuted ${micShouldBeMuted} vidShouldBeMuted ${vidShouldBeMuted}`, - ); setTracksEnabled(stream.getAudioTracks(), !micShouldBeMuted); setTracksEnabled(stream.getVideoTracks(), !vidShouldBeMuted); - this.client.getMediaHandler().stopUserMediaStream(oldStream); + + if (oldStream) { + this.client.getMediaHandler().stopUserMediaStream(oldStream); + logger.log( + `groupCall ${this.groupCallId} updateLocalUsermediaStream oldStream ${oldStream.id} newStream ${stream.id} micShouldBeMuted ${micShouldBeMuted} vidShouldBeMuted ${vidShouldBeMuted}`, + ); + } } } @@ -523,7 +527,9 @@ export class GroupCall extends TypedEventEmitter< } if (this.localScreenshareFeed) { - this.client.getMediaHandler().stopScreensharingStream(this.localScreenshareFeed.stream); + if (this.localScreenshareFeed.stream) { + this.client.getMediaHandler().stopScreensharingStream(this.localScreenshareFeed.stream); + } this.removeScreenshareFeed(this.localScreenshareFeed); this.localScreenshareFeed = undefined; this.localDesktopCapturerSourceId = undefined; @@ -652,20 +658,26 @@ export class GroupCall extends TypedEventEmitter< if (this.localCallFeed) { logger.log( - `groupCall ${this.groupCallId} setMicrophoneMuted stream ${this.localCallFeed.stream.id} muted ${muted}`, + `groupCall ${this.groupCallId} setMicrophoneMuted stream ${this.localCallFeed.feedId} muted ${muted}`, ); this.localCallFeed.setAudioVideoMuted(muted, null); // I don't believe its actually necessary to enable these tracks: they // are the one on the groupcall's own CallFeed and are cloned before being // given to any of the actual calls, so these tracks don't actually go // anywhere. Let's do it anyway to avoid confusion. - setTracksEnabled(this.localCallFeed.stream.getAudioTracks(), !muted); + if (this.localCallFeed.stream) { + setTracksEnabled(this.localCallFeed.stream.getAudioTracks(), !muted); + } } else { logger.log(`groupCall ${this.groupCallId} setMicrophoneMuted no stream muted ${muted}`); this.initWithAudioMuted = muted; } - this.forEachCall((call) => setTracksEnabled(call.localUsermediaFeed!.stream.getAudioTracks(), !muted)); + this.forEachCall((call) => { + if (call.localUsermediaStream) { + setTracksEnabled(call.localUsermediaStream.getAudioTracks(), !muted); + } + }); this.emit(GroupCallEvent.LocalMuteStateChanged, muted, this.isLocalVideoMuted()); if (!sendUpdatesBefore) await sendUpdates(); @@ -688,13 +700,15 @@ export class GroupCall extends TypedEventEmitter< if (this.localCallFeed) { logger.log( - `groupCall ${this.groupCallId} setLocalVideoMuted stream ${this.localCallFeed.stream.id} muted ${muted}`, + `groupCall ${this.groupCallId} setLocalVideoMuted stream ${this.localCallFeed.feedId} muted ${muted}`, ); const stream = await this.client.getMediaHandler().getUserMediaStream(true, !muted); await this.updateLocalUsermediaStream(stream); this.localCallFeed.setAudioVideoMuted(null, muted); - setTracksEnabled(this.localCallFeed.stream.getVideoTracks(), !muted); + if (this.localCallFeed.stream) { + setTracksEnabled(this.localCallFeed.stream.getVideoTracks(), !muted); + } } else { logger.log(`groupCall ${this.groupCallId} setLocalVideoMuted no stream muted ${muted}`); this.initWithVideoMuted = muted; @@ -736,6 +750,7 @@ export class GroupCall extends TypedEventEmitter< roomId: this.room.roomId, userId: this.client.getUserId()!, deviceId: this.client.getDeviceId()!, + feedId: stream.id, stream, purpose: SDPStreamMetadataPurpose.Screenshare, audioMuted: false, @@ -771,7 +786,9 @@ export class GroupCall extends TypedEventEmitter< this.forEachCall((call) => { if (call.localScreensharingFeed) call.removeLocalFeed(call.localScreensharingFeed); }); - this.client.getMediaHandler().stopScreensharingStream(this.localScreenshareFeed!.stream); + if (this.localScreenshareFeed?.stream) { + this.client.getMediaHandler().stopScreensharingStream(this.localScreenshareFeed.stream); + } // We have to remove the feed manually as MatrixCall has its clone, // so it won't be removed automatically this.removeScreenshareFeed(this.localScreenshareFeed!); @@ -1103,7 +1120,7 @@ export class GroupCall extends TypedEventEmitter< if (state === CallState.Connected) { if (call.isFocus) { - call.subscribeToFocus(); + call.subscribeToFocus(true); } const opponentUserId = call.getOpponentMember()?.userId; diff --git a/src/webrtc/mediaHandler.ts b/src/webrtc/mediaHandler.ts index 338701d7189..c41e609fd3e 100644 --- a/src/webrtc/mediaHandler.ts +++ b/src/webrtc/mediaHandler.ts @@ -402,8 +402,8 @@ export class MediaHandler extends TypedEventEmitter< instead XXX: Is this still true? */ - width: isWebkit ? { exact: 640 } : { ideal: 640 }, - height: isWebkit ? { exact: 360 } : { ideal: 360 }, + width: isWebkit ? { exact: 1280 } : { ideal: 1280 }, + height: isWebkit ? { exact: 720 } : { ideal: 720 }, } : false, };