Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio unit test #345

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 76 additions & 7 deletions visualdl/logic/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,14 @@ PYBIND11_MODULE(core, m) {
auto tablet = self.tablet(tag);
return vs::components::ImageReader(self.mode(), tablet);
})
.def("get_text", [](vs::LogReader& self, const std::string& tag) {
.def("get_text",
[](vs::LogReader& self, const std::string& tag) {
auto tablet = self.tablet(tag);
return vs::components::TextReader(tablet);
})
.def("get_audio", [](vs::LogReader& self, const std::string& tag) {
auto tablet = self.tablet(tag);
return vs::components::TextReader(tablet);
return vs::components::AudioReader(self.mode(), tablet);
});

// clang-format on
Expand Down Expand Up @@ -119,10 +124,19 @@ PYBIND11_MODULE(core, m) {
auto tablet = self.AddTablet(tag);
return vs::components::Image(tablet, num_samples, step_cycle);
})
.def("new_text", [](vs::LogWriter& self, const std::string& tag) {
auto tablet = self.AddTablet(tag);
return vs::components::Text(tablet);
});
.def("new_text",
[](vs::LogWriter& self, const std::string& tag) {
auto tablet = self.AddTablet(tag);
return vs::components::Text(tablet);
})
.def("new_audio",
[](vs::LogWriter& self,
const std::string& tag,
int num_samples,
int step_cycle) {
auto tablet = self.AddTablet(tag);
return vs::components::Audio(tablet, num_samples, step_cycle);
});

//------------------- components --------------------
#define ADD_SCALAR_READER(T) \
Expand Down Expand Up @@ -161,7 +175,7 @@ PYBIND11_MODULE(core, m) {
.def("start_sampling", &cp::Image::StartSampling, R"pbdoc(
Start a sampling period, this interface will start a new reservoir sampling phase.
)pbdoc")
.def("is_sample_taken", &cp::Image::IsSampleTaken, R"pbdoc(
.def("is_sample_taken", &cp::Image::IndexOfSampleTaken, R"pbdoc(
Will this sample be taken, this interface is introduced to reduce the cost
of copy image data, by testing whether this image will be sampled, and only
copy data when it should be sampled. In that way, most of un-sampled image
Expand Down Expand Up @@ -219,6 +233,61 @@ PYBIND11_MODULE(core, m) {
.def("total_records", &cp::TextReader::total_records)
.def("size", &cp::TextReader::size);

py::class_<cp::Audio>(m, "AudioWriter", R"pbdoc(
PyBind class. Must instantiate through the LogWriter.
)pbdoc")
.def("set_caption", &cp::Audio::SetCaption, R"pbdoc(
PyBind class. Must instantiate through the LogWriter.
)pbdoc")
.def("start_sampling", &cp::Audio::StartSampling, R"pbdoc(
Start a sampling period, this interface will start a new reservoir sampling phase.
)pbdoc")
.def("is_sample_taken", &cp::Audio::IndexOfSampleTaken, R"pbdoc(
Will this sample be taken, this interface is introduced to reduce the cost
of copy audio data, by testing whether this audio will be sampled, and only
copy data when it should be sampled. In that way, most of un-sampled audio
data need not be copied or processed at all.

:return: Index
:rtype: integer
)pbdoc")
.def("finish_sampling", &cp::Audio::FinishSampling, R"pbdoc(
End a sampling period, it will clear all states for reservoir sampling.
)pbdoc")
.def("set_sample", &cp::Audio::SetSample, R"pbdoc(
Store the flatten audio data with sample rate specified.

:param index:
:type index: integer
:param sample_rate: Sample rate of audio
:type sample_rate: integer
:param audio_data: Flatten audio data
:type audio_data: list
)pbdoc")
.def("add_sample", &cp::Audio::AddSample, R"pbdoc(
A combined interface for is_sample_taken and set_sample, simpler but is less efficient.

:param sample_rate: Sample rate of audio
:type sample_rate: integer
:param audio_data: Flatten audio data
:type audio_data: list
)pbdoc");

py::class_<cp::AudioReader::AudioRecord>(m, "AudioRecord")
// TODO(Nicky) make these copyless.
.def("data", [](cp::AudioReader::AudioRecord& self) { return self.data; })
.def("sample_rate",
[](cp::AudioReader::AudioRecord& self) { return self.sample_rate; })
.def("step_id",
[](cp::AudioReader::AudioRecord& self) { return self.step_id; });

py::class_<cp::AudioReader>(m, "AudioReader")
.def("caption", &cp::AudioReader::caption)
.def("num_records", &cp::AudioReader::num_records)
.def("num_samples", &cp::AudioReader::num_samples)
.def("record", &cp::AudioReader::record)
.def("timestamp", &cp::AudioReader::timestamp);

#define ADD_HISTOGRAM_WRITER(T) \
py::class_<cp::Histogram<T>>(m, "HistogramWriter__" #T, \
R"pbdoc(PyBind class. Must instantiate through the LogWriter.)pbdoc") \
Expand Down
108 changes: 101 additions & 7 deletions visualdl/logic/sdk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void Image::StartSampling() {
num_records_ = 0;
}

int Image::IsSampleTaken() {
int Image::IndexOfSampleTaken() {
if (!ToSampleThisStep()) return -1;
num_records_++;
if (num_records_ <= num_samples_) {
Expand Down Expand Up @@ -195,7 +195,7 @@ struct is_same_type<T, T> {

void Image::AddSample(const std::vector<shape_t>& shape,
const std::vector<value_t>& data) {
auto idx = IsSampleTaken();
auto idx = IndexOfSampleTaken();
if (idx >= 0) {
SetSample(idx, shape, data);
}
Expand All @@ -222,11 +222,6 @@ void Image::SetSample(int index,
CHECK_LT(index, num_samples_);
CHECK_LE(index, num_records_);

// trick to store int8 to protobuf
std::vector<byte_t> data_str(data.size());
for (int i = 0; i < data.size(); i++) {
data_str[i] = data[i];
}
Uint8Image image(new_shape[2], new_shape[0] * new_shape[1]);
NormalizeImage(&image, &data[0], new_shape[0] * new_shape[1], new_shape[2]);

Expand Down Expand Up @@ -352,6 +347,105 @@ std::string TextReader::caption() const {

size_t TextReader::size() const { return reader_.total_records(); }

void Audio::StartSampling() {
if (!ToSampleThisStep()) return;

step_ = writer_.AddRecord();
step_.SetId(step_id_);

time_t time = std::time(nullptr);
step_.SetTimeStamp(time);

// resize record
for (int i = 0; i < num_samples_; i++) {
step_.AddData();
}
num_records_ = 0;
}

int Audio::IndexOfSampleTaken() {
if (!ToSampleThisStep()) return -1;
num_records_++;
if (num_records_ <= num_samples_) {
return num_records_ - 1;
}
float prob = float(num_samples_) / num_records_;
float randv = (float)rand() / RAND_MAX;
if (randv < prob) {
// take this sample
int index = rand() % num_samples_;
return index;
}
return -1;
}

void Audio::FinishSampling() {
step_id_++;
if (ToSampleThisStep()) {
writer_.parent()->PersistToDisk();
}
}

void Audio::AddSample(int sample_rate, const std::vector<value_t>& data) {
auto idx = IndexOfSampleTaken();
if (idx >= 0) {
SetSample(idx, sample_rate, data);
}
}

void Audio::SetSample(int index,
int sample_rate,
const std::vector<value_t>& data) {
CHECK_GT(sample_rate, 0)
<< "sample rate should be something like 6000, 8000 or 44100";
CHECK_LT(index, num_samples_)
<< "index should be less than number of samples";
CHECK_LE(index, num_records_)
<< "index should be less than or equal to number of records";

BinaryRecord brcd(GenBinaryRecordDir(step_.parent()->dir()),
std::string(data.begin(), data.end()));
brcd.tofile();

auto entry = step_.MutableData<std::vector<byte_t>>(index);
// update record
auto old_hash = entry.reader().GetRaw();
if (!old_hash.empty()) {
std::string old_path =
GenBinaryRecordDir(step_.parent()->dir()) + "/" + old_hash;
CHECK_EQ(std::remove(old_path.c_str()), 0) << "delete old binary record "
<< old_path << " failed";
}
entry.SetRaw(brcd.filename());
}

std::string AudioReader::caption() {
CHECK_EQ(reader_.captions().size(), 1);
auto caption = reader_.captions().front();
if (LogReader::TagMatchMode(caption, mode_)) {
return LogReader::GenReadableTag(mode_, caption);
}
string::TagDecode(caption);
return caption;
}

AudioReader::AudioRecord AudioReader::record(int offset, int index) {
AudioRecord res;
auto record = reader_.record(offset);
auto entry = record.data(index);
auto filename = entry.GetRaw();
CHECK(!g_log_dir.empty())
<< "g_log_dir should be set in LogReader construction";
BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), filename);

std::transform(brcd.data.begin(),
brcd.data.end(),
std::back_inserter(res.data),
[](byte_t i) { return (int8_t)(i); });
res.step_id = record.id();
return res;
}

} // namespace components

} // namespace visualdl
116 changes: 113 additions & 3 deletions visualdl/logic/sdk.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,9 @@ struct Image {
void FinishSampling();

/*
* A combined interface for IsSampleTaken and SetSample, simpler but might be
* low effience.
* A combined interface for IndexOfSampleTaken and SetSample, simpler but
* might be
* low efficiency.
*/
void AddSample(const std::vector<shape_t>& shape,
const std::vector<value_t>& data);
Expand All @@ -182,7 +183,7 @@ struct Image {
* copy data when it should be sampled. In that way, most of unsampled image
* data need not be copied or processed at all.
*/
int IsSampleTaken();
int IndexOfSampleTaken();
/*
* Just store a tensor with nothing to do with image format.
*/
Expand Down Expand Up @@ -326,6 +327,115 @@ struct TextReader {
TabletReader reader_;
};

/*
* Image component writer.
*/
struct Audio {
using value_t = float;

/*
* step_cycle: store every `step_cycle` as a record.
* num_samples: how many samples to take in a step.
*/
Audio(Tablet tablet, int num_samples, int step_cycle)
: writer_(tablet), num_samples_(num_samples), step_cycle_(step_cycle) {
CHECK_GT(step_cycle, 0);
CHECK_GT(num_samples, 0);

writer_.SetType(Tablet::Type::kAudio);
// make audio's tag as the default caption.
writer_.SetNumSamples(num_samples);
SetCaption(tablet.reader().tag());
}

void SetCaption(const std::string& c) {
writer_.SetCaptions(std::vector<std::string>({c}));
}

/*
* Start a sampling period, this interface will start a new reservior sampling
* phase.
*/
void StartSampling();
/*
* End a sampling period, it will clear all states for reservior sampling.
*/
void FinishSampling();

/*
* A combined interface for IndexOfSampleTaken and SetSample, simpler but
* might be
* low efficiency.
*/
void AddSample(int sample_rate, const std::vector<value_t>& data);

/*
* Will this sample be taken, this interface is introduced to reduce the cost
* of copy audio data, by testing whether this audio will be sampled, and only
* copy data when it should be sampled. In that way, most of unsampled audio
* data need not be copied or processed at all.
*/
int IndexOfSampleTaken();
/*
* Store audio data with sample rate
*/
void SetSample(int index, int sample_rate, const std::vector<value_t>& data);

protected:
bool ToSampleThisStep() { return step_id_ % step_cycle_ == 0; }

private:
Tablet writer_;
Record step_;
int num_records_{0};
int num_samples_{0};
int step_id_{0};
int step_cycle_;
};

/*
* Audio reader.
*/
struct AudioReader {
using value_t = typename Audio::value_t;

struct AudioRecord {
int step_id;
int sample_rate;
std::vector<int8_t> data;
};

AudioReader(const std::string& mode, TabletReader tablet)
: reader_(tablet), mode_{mode} {}

std::string caption();

// number of steps.
int num_records() { return reader_.total_records(); }

int num_samples() { return reader_.num_samples(); }

int64_t timestamp(int step) { return reader_.record(step).timestamp(); }

/*
* offset: offset of a step.
* index: index of a sample.
*/
AudioRecord record(int offset, int index);

/*
* offset: offset of a step.
* index: index of a sample.
*/
std::vector<value_t> data(int offset, int index);

int stepid(int offset, int index);

private:
TabletReader reader_;
std::string mode_;
};

} // namespace components
} // namespace visualdl

Expand Down
Loading