Skip to content

Commit

Permalink
Also unify variable format index for audio
Browse files Browse the repository at this point in the history
  • Loading branch information
myrsloik committed Nov 5, 2024
1 parent 4f6ef06 commit 2817e14
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 79 deletions.
193 changes: 134 additions & 59 deletions src/audiosource.cpp

Large diffs are not rendered by default.

50 changes: 42 additions & 8 deletions src/audiosource.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,24 @@ struct BSAudioFormat {
void Set(int Format, int BitsPerRawSample);
};

struct BSAudioProperties {
// int format, uint64_t ChannelLayout, int samplerate

struct LWAudioProperties {
BSRational TimeBase;
int64_t Duration;

int64_t NumSamples; /* estimated by decoder, may be wrong */
};

struct BSAudioProperties : public LWAudioProperties {
BSAudioFormat AF;
int Format;
int SampleRate;
int Channels;
uint64_t ChannelLayout;
int64_t NumFrames; // can be -1 to signal that the number of frames is completely unknown
int64_t NumSamples; /* estimated by decoder, may be wrong */

int64_t NumFrames;

double StartTime; /* in seconds */
};

Expand All @@ -67,20 +78,20 @@ class LWAudioDecoder {
AVPacket *Packet = nullptr;
bool Seeked = false;

void OpenFile(const std::filesystem::path &SourceFile, int Track, bool VariableFormat, int Threads, const std::map<std::string, std::string> &LAVFOpts, double DrcScale);
void OpenFile(const std::filesystem::path &SourceFile, int Track, int Threads, const std::map<std::string, std::string> &LAVFOpts, double DrcScale);
bool ReadPacket();
bool DecodeNextFrame(bool SkipOutput = false);
void Free();
public:
LWAudioDecoder(const std::filesystem::path &SourceFile, int Track, bool VariableFormat, int Threads, const std::map<std::string, std::string> &LAVFOpts, double DrcScale); // Positive track numbers are absolute. Negative track numbers mean nth audio track to simplify things.
LWAudioDecoder(const std::filesystem::path &SourceFile, int Track, int Threads, const std::map<std::string, std::string> &LAVFOpts, double DrcScale); // Positive track numbers are absolute. Negative track numbers mean nth audio track to simplify things.
~LWAudioDecoder();
[[nodiscard]] int64_t GetSourceSize() const;
[[nodiscard]] int64_t GetSourcePostion() const;
[[nodiscard]] int GetTrack() const; // Useful when opening nth video track to get the actual number
[[nodiscard]] int64_t GetFrameNumber() const; // The frame you will get when calling GetNextFrame()
[[nodiscard]] int64_t GetSamplePos() const; // The frame you will get when calling GetNextFrame()
void SetFrameNumber(int64_t N, int64_t SampleNumber); // Use after seeking to update internal frame number
void GetAudioProperties(BSAudioProperties &VP); // Decodes one frame and advances the position to retrieve the full properties, only call directly after creation
void GetAudioProperties(LWAudioProperties &VP); // Decodes one frame and advances the position to retrieve the full properties, only call directly after creation
[[nodiscard]] AVFrame *GetNextFrame();
bool SkipFrames(int64_t Count);
[[nodiscard]] bool HasMoreFrames() const;
Expand All @@ -105,10 +116,27 @@ class BestAudioFrame {

class BestAudioSource {
public:
struct FormatSet {
BSAudioFormat AF = {};
int Format;
int SampleRate;
int Channels;
uint64_t ChannelLayout;

double StartTime = 0;

int64_t NumFrames; // can be -1 to signal that the number of frames is completely unknown
int64_t NumSamples;
};

struct FrameInfo {
int64_t PTS;
int64_t Start;
int64_t Length;
int Format;
int SampleRate;
int Channels;
uint64_t ChannelLayout;
std::array<uint8_t, HashSize> Hash;
};
private:
Expand Down Expand Up @@ -144,13 +172,16 @@ class BestAudioSource {
AudioTrackIndex TrackIndex;
Cache FrameCache;

std::vector<FormatSet> FormatSets;
FormatSet DefaultFormatSet;

static constexpr int MaxVideoSources = 4;
std::map<std::string, std::string> LAVFOptions;
double DrcScale;
BSAudioProperties AP = {};
std::filesystem::path Source;
int AudioTrack;
bool VariableFormat;
int VariableFormat = -1;
int Threads;
bool LinearMode = false;
uint64_t DecoderSequenceNum = 0;
Expand All @@ -167,6 +198,7 @@ class BestAudioSource {
[[nodiscard]] BestAudioFrame *GetFrameInternal(int64_t N);
[[nodiscard]] BestAudioFrame *GetFrameLinearInternal(int64_t N, int64_t SeekFrame = -1, size_t Depth = 0, bool ForceUnseeked = false);
[[nodiscard]] bool IndexTrack(const ProgressFunction &Progress = nullptr);
void InitializeFormatSets();
void ZeroFillStartPacked(uint8_t *&Data, int64_t &Start, int64_t &Count);
void ZeroFillEndPacked(uint8_t *Data, int64_t Start, int64_t &Count);
bool FillInFramePacked(const BestAudioFrame *Frame, int64_t FrameStartSample, uint8_t *&Data, int64_t &Start, int64_t &Count);
Expand All @@ -180,12 +212,14 @@ class BestAudioSource {
int64_t FirstSamplePos;
};

BestAudioSource(const std::filesystem::path &SourceFile, int Track, int AjustDelay, bool VariableFormat, int Threads, int CacheMode, const std::filesystem::path &CachePath, const std::map<std::string, std::string> *LAVFOpts, double DrcScale, const ProgressFunction &Progress = nullptr);
BestAudioSource(const std::filesystem::path &SourceFile, int Track, int AjustDelay, int Threads, int CacheMode, const std::filesystem::path &CachePath, const std::map<std::string, std::string> *LAVFOpts, double DrcScale, const ProgressFunction &Progress = nullptr);
[[nodiscard]] int GetTrack() const; // Useful when opening nth video track to get the actual number
void SetMaxCacheSize(size_t Bytes); /* default max size is 1GB */
void SetSeekPreRoll(int64_t Frames); /* the number of frames to cache before the position being fast forwarded to */
double GetRelativeStartTime(int Track) const;
[[nodiscard]] const BSAudioProperties &GetAudioProperties() const;
[[nodiscard]] const std::vector<FormatSet> &GetFormatSets() const; /* Get a listing of all the number of formats */
void SelectFormatSet(int Index); /* Sets the output format to the specified format set, passing -1 means the default variable format will be used */
[[nodiscard]] BestAudioFrame *GetFrame(int64_t N, bool Linear = false);
[[nodiscard]] FrameRange GetFrameRangeBySamples(int64_t Start, int64_t Count) const;
void GetPackedAudio(uint8_t *Data, int64_t Start, int64_t Count);
Expand Down
4 changes: 3 additions & 1 deletion src/avisynth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,9 @@ class AvisynthAudioSource : public IClip {
Opts["use_absolute_path"] = "1";

try {
A.reset(new BestAudioSource(CreateProbablyUTF8Path(Source), Track, AdjustDelay, false, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale));
A.reset(new BestAudioSource(CreateProbablyUTF8Path(Source), Track, AdjustDelay, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale));

A->SelectFormatSet(0);

const BSAudioProperties &AP = A->GetAudioProperties();
if (AP.AF.Float && AP.AF.Bits == 32) {
Expand Down
6 changes: 4 additions & 2 deletions src/vapoursynth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ static void VS_CC CreateBestAudioSource(const VSMap *In, VSMap *Out, void *, VSC
if (ShowProgress) {
auto NextUpdate = std::chrono::high_resolution_clock::now();
int LastValue = -1;
D->A.reset(new BestAudioSource(Source, Track, AdjustDelay, false, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale,
D->A.reset(new BestAudioSource(Source, Track, AdjustDelay, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale,
[vsapi, Core, &NextUpdate, &LastValue](int Track, int64_t Cur, int64_t Total) {
if (NextUpdate < std::chrono::high_resolution_clock::now()) {
if (Total == INT64_MAX && Cur == Total) {
Expand All @@ -337,9 +337,11 @@ static void VS_CC CreateBestAudioSource(const VSMap *In, VSMap *Out, void *, VSC
}));

} else {
D->A.reset(new BestAudioSource(Source, Track, AdjustDelay, false, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale));
D->A.reset(new BestAudioSource(Source, Track, AdjustDelay, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale));
}

D->A->SelectFormatSet(0);

const BSAudioProperties &AP = D->A->GetAudioProperties();
D->Is8Bit = (AP.AF.Bits <= 8);
if (!vsapi->queryAudioFormat(&D->AI.format, AP.AF.Float, D->Is8Bit ? 16 : AP.AF.Bits, AP.ChannelLayout, Core))
Expand Down
2 changes: 1 addition & 1 deletion src/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#ifndef VERSION_H
#define VERSION_H

#define BEST_SOURCE_VERSION_MAJOR 6
#define BEST_SOURCE_VERSION_MAJOR 9
#define BEST_SOURCE_VERSION_MINOR 0

#endif
21 changes: 14 additions & 7 deletions src/videosource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1398,10 +1398,10 @@ bool BestVideoSource::InitializeRFF() {
}

void BestVideoSource::InitializeFormatSets() {
std::map<std::tuple<int, int, int>, std::tuple<int64_t, int64_t, int64_t, const FrameInfo *>> SeenSets;
std::map<std::tuple<int, int, int>, std::tuple<int64_t, int64_t, int64_t, bool>> SeenSets;
for (const auto &Iter : TrackIndex.Frames) {
auto V = std::make_tuple(Iter.Format, Iter.Width, Iter.Height);
if (SeenSets.insert(std::make_pair(V, std::make_tuple(0, 0, 0, &Iter))).second)
if (SeenSets.insert(std::make_pair(V, std::make_tuple(0, 0, Iter.PTS, Iter.TFF))).second)
FormatSets.push_back(FormatSet{ {}, Iter.Format, Iter.Width, Iter.Height });
std::get<0>(SeenSets[V])++;
std::get<1>(SeenSets[V]) += Iter.RepeatPict + 2;
Expand All @@ -1410,24 +1410,31 @@ void BestVideoSource::InitializeFormatSets() {
for (auto &Iter : FormatSets) {
auto V = std::make_tuple(Iter.Format, Iter.Width, Iter.Height);
Iter.NumFrames = std::get<0>(SeenSets[V]);
Iter.NumRFFFrames = (std::get<1>(SeenSets[V]) + 1) / 2;
Iter.TFF = std::get<3>(SeenSets[V])->TFF;
if (std::get<3>(SeenSets[V])->PTS != AV_NOPTS_VALUE)
Iter.StartTime = (static_cast<double>(VP.TimeBase.Num) * std::get<3>(SeenSets[V])->PTS) / VP.TimeBase.Den;
Iter.NumRFFFrames = std::get<1>(SeenSets[V]);
Iter.TFF = std::get<3>(SeenSets[V]);
if (std::get<2>(SeenSets[V]) != AV_NOPTS_VALUE)
Iter.StartTime = (static_cast<double>(VP.TimeBase.Num) * std::get<2>(SeenSets[V])) / VP.TimeBase.Den;
Iter.VF.Set(av_pix_fmt_desc_get(static_cast<AVPixelFormat>(Iter.Format)));
}

DefaultFormatSet = FormatSets[0];
DefaultFormatSet.NumFrames = TrackIndex.Frames.size();
DefaultFormatSet.NumRFFFrames = 0;
for (const auto &Iter : FormatSets) {

for (auto &Iter : FormatSets) {
DefaultFormatSet.NumRFFFrames += Iter.NumRFFFrames;
Iter.NumRFFFrames = (Iter.NumRFFFrames + 1) / 2; // Can't round before adding it together

if (DefaultFormatSet.Format != Iter.Format)
DefaultFormatSet.Format = AV_PIX_FMT_NONE;
if (DefaultFormatSet.Width != Iter.Width || DefaultFormatSet.Height != Iter.Height) {
DefaultFormatSet.Width = 0;
DefaultFormatSet.Height = 0;
}
}

DefaultFormatSet.NumRFFFrames = (DefaultFormatSet.NumRFFFrames + 1) / 2;

if (DefaultFormatSet.Format != AV_PIX_FMT_NONE)
DefaultFormatSet.VF.Set(av_pix_fmt_desc_get(static_cast<AVPixelFormat>(DefaultFormatSet.Format)));
else
Expand Down
2 changes: 1 addition & 1 deletion src/videosource.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ struct LWVideoProperties {

struct BSVideoProperties : public LWVideoProperties {
BSVideoFormat VF;
int Format; // fixme, needed?
int Format;

int Width;
int Height;
Expand Down

0 comments on commit 2817e14

Please sign in to comment.