Skip to content

Commit

Permalink
Add H265RtpDepacketizer
Browse files Browse the repository at this point in the history
This commit adds an H265 depacketizer which takes incoming H265 RTP packets and emits H265 access units. It is closely based on the `H264RtpDepacketizer` added by @Sean-Der in paullouisageneau#1082.

I originally started with a version of this commit that was closer to the `H264RtpDepacketizer` and which emitted individual H265 NALUs in `H265RtpDepacketizer::buildFrames()`. This resulted in calling my `Track::onFrame()` callback for each NALU, which did not work well with the decoder that I'm using which wants to see the VPS/SPS/PPS NALUs as a unit before initializing the decoder (https://intel.github.io/libvpl/v2.10/API_ref/VPL_func_vid_decode.html#mfxvideodecode-decodeheader).

So for the `H265RtpDepacketizer` I've tried to make it emit access units rather than NALUs. An "access unit" is (RFC 7798):

> A set of NAL units that are associated with each other according to a specified classification rule, that are consecutive in decoding order, *and that contain exactly one coded picture.*

"Exactly one coded picture" seems to correspond with what a caller might expect an "onFrame" callback to do. Maybe the `H264RtpDepacketizer` should be revised to similarly emit H264 access units rather than NALUs, too. At least, I could not find a way to receive individual NALUs from the depacketizer and run the VPL decoder without needing to do my own buffering/copying of the NALUs.

With this commit I can now do the following:

* Generate encoded bitstream output from the Intel VPL encoder.
* Pass the output of the encoder one frame at a time to libdatachannel's `Track::send()` on a track with an `H265RtpPacketizer` media handler.
* Transport the video track over a WebRTC connection to a libdatachannel peer.
* Depacketize it with the `H265RtpDepacketizer` media handler in this commit.
* Pass the depacketized output via my `Track::onFrame()` callback to the Intel VPL decoder in "complete frame" mode (https://intel.github.io/libvpl/v2.10/API_ref/VPL_enums.html#_CPPv428MFX_BITSTREAM_COMPLETE_FRAME). Each "onFrame" callback corresponds to a single call to the decoder API to decode a frame.
  • Loading branch information
edmonds committed Oct 28, 2024
1 parent 3c33ea0 commit d535037
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ set(LIBDATACHANNEL_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/src/h264rtpdepacketizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/nalunit.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/h265rtppacketizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/h265rtpdepacketizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/h265nalunit.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/av1rtppacketizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/rtcpnackresponder.cpp
Expand Down Expand Up @@ -120,6 +121,7 @@ set(LIBDATACHANNEL_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h264rtpdepacketizer.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/nalunit.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265rtppacketizer.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265rtpdepacketizer.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265nalunit.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/av1rtppacketizer.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/rtcpnackresponder.hpp
Expand Down
44 changes: 44 additions & 0 deletions include/rtc/h265rtpdepacketizer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/**
* Copyright (c) 2020 Staz Modrzynski
* Copyright (c) 2020-2024 Paul-Louis Ageneau
* Copyright (c) 2024 Robert Edmonds
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

#ifndef RTC_H265_RTP_DEPACKETIZER_H
#define RTC_H265_RTP_DEPACKETIZER_H

#if RTC_ENABLE_MEDIA

#include "common.hpp"
#include "mediahandler.hpp"
#include "message.hpp"
#include "rtp.hpp"

#include <iterator>

namespace rtc {

/// RTP depacketization for H265
class RTC_CPP_EXPORT H265RtpDepacketizer : public MediaHandler {
public:
H265RtpDepacketizer() = default;
virtual ~H265RtpDepacketizer() = default;

void incoming(message_vector &messages, const message_callback &send) override;

private:
std::vector<message_ptr> mRtpBuffer;

message_vector buildFrames(message_vector::iterator firstPkt, message_vector::iterator lastPkt,
uint32_t timestamp);
};

} // namespace rtc

#endif // RTC_ENABLE_MEDIA

#endif // RTC_H265_RTP_DEPACKETIZER_H
1 change: 1 addition & 0 deletions include/rtc/rtc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "h264rtppacketizer.hpp"
#include "h264rtpdepacketizer.hpp"
#include "h265rtppacketizer.hpp"
#include "h265rtpdepacketizer.hpp"
#include "mediahandler.hpp"
#include "plihandler.hpp"
#include "rembhandler.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/h265nalunit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ H265NalUnitFragment::fragmentsFrom(shared_ptr<H265NalUnit> nalu, uint16_t maxFra
auto fragments_count = ceil(double(nalu->size()) / maxFragmentSize);
maxFragmentSize = uint16_t(int(ceil(nalu->size() / fragments_count)));

// 3 bytes for FU indicator and FU header
// 3 bytes for NALU header and FU header
maxFragmentSize -= (H265_NAL_HEADER_SIZE + H265_FU_HEADER_SIZE);
auto f = nalu->forbiddenBit();
uint8_t nuhLayerId = nalu->nuhLayerId() & 0x3F; // 6 bits
Expand Down
143 changes: 143 additions & 0 deletions src/h265rtpdepacketizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/**
* Copyright (c) 2023-2024 Paul-Louis Ageneau
* Copyright (c) 2024 Robert Edmonds
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

#if RTC_ENABLE_MEDIA

#include "h265rtpdepacketizer.hpp"
#include "h265nalunit.hpp"

#include "impl/internals.hpp"

namespace rtc {

const binary naluStartCode = {byte{0}, byte{0}, byte{0}, byte{1}};

const uint8_t naluTypeAP = 48;
const uint8_t naluTypeFU = 49;

message_vector H265RtpDepacketizer::buildFrames(message_vector::iterator begin,
message_vector::iterator end, uint32_t timestamp) {
message_vector out = {};
auto accessUnit = binary{};
auto frameInfo = std::make_shared<FrameInfo>(timestamp);
auto nFrags = 0;

for (auto it = begin; it != end; ++it) {
auto pkt = it->get();
auto pktParsed = reinterpret_cast<const rtc::RtpHeader *>(pkt->data());
auto rtpHeaderSize = pktParsed->getSize() + pktParsed->getExtensionHeaderSize();
auto nalUnitHeader =
H265NalUnitHeader{std::to_integer<uint8_t>(pkt->at(rtpHeaderSize)),
std::to_integer<uint8_t>(pkt->at(rtpHeaderSize + 1))};

if (nalUnitHeader.unitType() == naluTypeFU) {
auto nalUnitFragmentHeader = H265NalUnitFragmentHeader{
std::to_integer<uint8_t>(pkt->at(rtpHeaderSize + sizeof(H265NalUnitHeader)))};

if (nFrags++ == 0) {
accessUnit.insert(accessUnit.end(), naluStartCode.begin(), naluStartCode.end());

nalUnitHeader.setUnitType(nalUnitFragmentHeader.unitType());
accessUnit.emplace_back(byte(nalUnitHeader._first));
accessUnit.emplace_back(byte(nalUnitHeader._second));
}

accessUnit.insert(accessUnit.end(),
pkt->begin() + rtpHeaderSize + sizeof(H265NalUnitHeader) +
sizeof(H265NalUnitFragmentHeader),
pkt->end());
} else if (nalUnitHeader.unitType() == naluTypeAP) {
auto currOffset = rtpHeaderSize + sizeof(H265NalUnitHeader);

while (currOffset + sizeof(uint16_t) < pkt->size()) {
auto naluSize = std::to_integer<uint16_t>(pkt->at(currOffset)) << 8 |
std::to_integer<uint16_t>(pkt->at(currOffset + 1));

currOffset += sizeof(uint16_t);

if (pkt->size() < currOffset + naluSize) {
throw std::runtime_error("H265 AP declared size is larger than buffer");
}

accessUnit.insert(accessUnit.end(), naluStartCode.begin(), naluStartCode.end());

accessUnit.insert(accessUnit.end(), pkt->begin() + currOffset,
pkt->begin() + currOffset + naluSize);

currOffset += naluSize;
}
} else if (nalUnitHeader.unitType() < naluTypeAP) {
// "NAL units with NAL unit type values in the range of 0 to 47, inclusive, may be
// passed to the decoder."
accessUnit.insert(accessUnit.end(), naluStartCode.begin(), naluStartCode.end());
accessUnit.insert(accessUnit.end(), pkt->begin() + rtpHeaderSize, pkt->end());
} else {
// "NAL-unit-like structures with NAL unit type values in the range of 48 to 63,
// inclusive, MUST NOT be passed to the decoder."
}
}

if (!accessUnit.empty()) {
out.emplace_back(make_message(accessUnit.begin(), accessUnit.end(), Message::Binary, 0,
nullptr, frameInfo));
}

return out;
}

void H265RtpDepacketizer::incoming(message_vector &messages, const message_callback &) {
messages.erase(std::remove_if(messages.begin(), messages.end(),
[&](message_ptr message) {
if (message->type == Message::Control) {
return false;
}

if (message->size() < sizeof(RtpHeader)) {
PLOG_VERBOSE << "RTP packet is too small, size="
<< message->size();
return true;
}

mRtpBuffer.push_back(std::move(message));
return true;
}),
messages.end());

while (mRtpBuffer.size() != 0) {
uint32_t current_timestamp = 0;
size_t packets_in_timestamp = 0;

for (const auto &pkt : mRtpBuffer) {
auto p = reinterpret_cast<const rtc::RtpHeader *>(pkt->data());

if (current_timestamp == 0) {
current_timestamp = p->timestamp();
} else if (current_timestamp != p->timestamp()) {
break;
}

packets_in_timestamp++;
}

if (packets_in_timestamp == mRtpBuffer.size()) {
break;
}

auto begin = mRtpBuffer.begin();
auto end = mRtpBuffer.begin() + (packets_in_timestamp - 1);

auto frames = buildFrames(begin, end + 1, current_timestamp);
messages.insert(messages.end(), frames.begin(), frames.end());
mRtpBuffer.erase(mRtpBuffer.begin(), mRtpBuffer.begin() + packets_in_timestamp);
}
}

} // namespace rtc

#endif // RTC_ENABLE_MEDIA

0 comments on commit d535037

Please sign in to comment.