PaddlePaddle · nickyfantasy · Mar 28, 2018 · Mar 28, 2018 · Mar 28, 2018 · Mar 29, 2018
diff --git a/visualdl/logic/pybind.cc b/visualdl/logic/pybind.cc
@@ -79,9 +79,14 @@ PYBIND11_MODULE(core, m) {
              auto tablet = self.tablet(tag);
              return vs::components::ImageReader(self.mode(), tablet);
            })
-      .def("get_text", [](vs::LogReader& self, const std::string& tag) {
+      .def("get_text",
+           [](vs::LogReader& self, const std::string& tag) {
+             auto tablet = self.tablet(tag);
+             return vs::components::TextReader(tablet);
+           })
+      .def("get_audio", [](vs::LogReader& self, const std::string& tag) {
         auto tablet = self.tablet(tag);
-        return vs::components::TextReader(tablet);
+        return vs::components::AudioReader(self.mode(), tablet);
       });
 
   // clang-format on
@@ -119,10 +124,19 @@ PYBIND11_MODULE(core, m) {
              auto tablet = self.AddTablet(tag);
              return vs::components::Image(tablet, num_samples, step_cycle);
            })
-      .def("new_text", [](vs::LogWriter& self, const std::string& tag) {
-        auto tablet = self.AddTablet(tag);
-        return vs::components::Text(tablet);
-      });
+      .def("new_text",
+           [](vs::LogWriter& self, const std::string& tag) {
+             auto tablet = self.AddTablet(tag);
+             return vs::components::Text(tablet);
+           })
+      .def("new_audio",
+           [](vs::LogWriter& self,
+              const std::string& tag,
+              int num_samples,
+              int step_cycle) {
+             auto tablet = self.AddTablet(tag);
+             return vs::components::Audio(tablet, num_samples, step_cycle);
+           });
 
 //------------------- components --------------------
 #define ADD_SCALAR_READER(T)                               \
@@ -161,7 +175,7 @@ PYBIND11_MODULE(core, m) {
       .def("start_sampling", &cp::Image::StartSampling, R"pbdoc(
         Start a sampling period, this interface will start a new reservoir sampling phase.
       )pbdoc")
-      .def("is_sample_taken", &cp::Image::IsSampleTaken, R"pbdoc(
+      .def("is_sample_taken", &cp::Image::IndexOfSampleTaken, R"pbdoc(
         Will this sample be taken, this interface is introduced to reduce the cost
         of copy image data, by testing whether this image will be sampled, and only
         copy data when it should be sampled. In that way, most of un-sampled image
@@ -219,6 +233,61 @@ PYBIND11_MODULE(core, m) {
       .def("total_records", &cp::TextReader::total_records)
       .def("size", &cp::TextReader::size);
 
+  py::class_<cp::Audio>(m, "AudioWriter", R"pbdoc(
+            PyBind class. Must instantiate through the LogWriter.
+          )pbdoc")
+      .def("set_caption", &cp::Audio::SetCaption, R"pbdoc(
+            PyBind class. Must instantiate through the LogWriter.
+          )pbdoc")
+      .def("start_sampling", &cp::Audio::StartSampling, R"pbdoc(
+            Start a sampling period, this interface will start a new reservoir sampling phase.
+          )pbdoc")
+      .def("is_sample_taken", &cp::Audio::IndexOfSampleTaken, R"pbdoc(
+            Will this sample be taken, this interface is introduced to reduce the cost
+            of copy audio data, by testing whether this audio will be sampled, and only
+            copy data when it should be sampled. In that way, most of un-sampled audio
+            data need not be copied or processed at all.
+
+            :return: Index
+            :rtype: integer
+                  )pbdoc")
+      .def("finish_sampling", &cp::Audio::FinishSampling, R"pbdoc(
+            End a sampling period, it will clear all states for reservoir sampling.
+          )pbdoc")
+      .def("set_sample", &cp::Audio::SetSample, R"pbdoc(
+            Store the flatten audio data with sample rate specified.
+
+            :param index:
+            :type index: integer
+            :param sample_rate: Sample rate of audio
+            :type sample_rate: integer
+            :param audio_data: Flatten audio data
+            :type audio_data: list
+                  )pbdoc")
+      .def("add_sample", &cp::Audio::AddSample, R"pbdoc(
+            A combined interface for is_sample_taken and set_sample, simpler but is less efficient.
+
+            :param sample_rate: Sample rate of audio
+            :type sample_rate: integer
+            :param audio_data: Flatten audio data
+            :type audio_data: list
+                  )pbdoc");
+
+  py::class_<cp::AudioReader::AudioRecord>(m, "AudioRecord")
+      // TODO(Nicky) make these copyless.
+      .def("data", [](cp::AudioReader::AudioRecord& self) { return self.data; })
+      .def("sample_rate",
+           [](cp::AudioReader::AudioRecord& self) { return self.sample_rate; })
+      .def("step_id",
+           [](cp::AudioReader::AudioRecord& self) { return self.step_id; });
+
+  py::class_<cp::AudioReader>(m, "AudioReader")
+      .def("caption", &cp::AudioReader::caption)
+      .def("num_records", &cp::AudioReader::num_records)
+      .def("num_samples", &cp::AudioReader::num_samples)
+      .def("record", &cp::AudioReader::record)
+      .def("timestamp", &cp::AudioReader::timestamp);
+
 #define ADD_HISTOGRAM_WRITER(T)                                          \
   py::class_<cp::Histogram<T>>(m, "HistogramWriter__" #T, \ 
    R"pbdoc(PyBind class. Must instantiate through the LogWriter.)pbdoc") \

diff --git a/visualdl/logic/sdk.cc b/visualdl/logic/sdk.cc
@@ -160,7 +160,7 @@ void Image::StartSampling() {
   num_records_ = 0;
 }
 
-int Image::IsSampleTaken() {
+int Image::IndexOfSampleTaken() {
   if (!ToSampleThisStep()) return -1;
   num_records_++;
   if (num_records_ <= num_samples_) {
@@ -195,7 +195,7 @@ struct is_same_type<T, T> {
 
 void Image::AddSample(const std::vector<shape_t>& shape,
                       const std::vector<value_t>& data) {
-  auto idx = IsSampleTaken();
+  auto idx = IndexOfSampleTaken();
   if (idx >= 0) {
     SetSample(idx, shape, data);
   }
@@ -222,11 +222,6 @@ void Image::SetSample(int index,
   CHECK_LT(index, num_samples_);
   CHECK_LE(index, num_records_);
 
-  // trick to store int8 to protobuf
-  std::vector<byte_t> data_str(data.size());
-  for (int i = 0; i < data.size(); i++) {
-    data_str[i] = data[i];
-  }
   Uint8Image image(new_shape[2], new_shape[0] * new_shape[1]);
   NormalizeImage(&image, &data[0], new_shape[0] * new_shape[1], new_shape[2]);
 
@@ -352,6 +347,105 @@ std::string TextReader::caption() const {
 
 size_t TextReader::size() const { return reader_.total_records(); }
 
+void Audio::StartSampling() {
+  if (!ToSampleThisStep()) return;
+
+  step_ = writer_.AddRecord();
+  step_.SetId(step_id_);
+
+  time_t time = std::time(nullptr);
+  step_.SetTimeStamp(time);
+
+  // resize record
+  for (int i = 0; i < num_samples_; i++) {
+    step_.AddData();
+  }
+  num_records_ = 0;
+}
+
+int Audio::IndexOfSampleTaken() {
+  if (!ToSampleThisStep()) return -1;
+  num_records_++;
+  if (num_records_ <= num_samples_) {
+    return num_records_ - 1;
+  }
+  float prob = float(num_samples_) / num_records_;
+  float randv = (float)rand() / RAND_MAX;
+  if (randv < prob) {
+    // take this sample
+    int index = rand() % num_samples_;
+    return index;
+  }
+  return -1;
+}
+
+void Audio::FinishSampling() {
+  step_id_++;
+  if (ToSampleThisStep()) {
+    writer_.parent()->PersistToDisk();
+  }
+}
+
+void Audio::AddSample(int sample_rate, const std::vector<value_t>& data) {
+  auto idx = IndexOfSampleTaken();
+  if (idx >= 0) {
+    SetSample(idx, sample_rate, data);
+  }
+}
+
+void Audio::SetSample(int index,
+                      int sample_rate,
+                      const std::vector<value_t>& data) {
+  CHECK_GT(sample_rate, 0)
+      << "sample rate should be something like 6000, 8000 or 44100";
+  CHECK_LT(index, num_samples_)
+      << "index should be less than number of samples";
+  CHECK_LE(index, num_records_)
+      << "index should be less than or equal to number of records";
+
+  BinaryRecord brcd(GenBinaryRecordDir(step_.parent()->dir()),
+                    std::string(data.begin(), data.end()));
+  brcd.tofile();
+
+  auto entry = step_.MutableData<std::vector<byte_t>>(index);
+  // update record
+  auto old_hash = entry.reader().GetRaw();
+  if (!old_hash.empty()) {
+    std::string old_path =
+        GenBinaryRecordDir(step_.parent()->dir()) + "/" + old_hash;
+    CHECK_EQ(std::remove(old_path.c_str()), 0) << "delete old binary record "
+                                               << old_path << " failed";
+  }
+  entry.SetRaw(brcd.filename());
+}
+
+std::string AudioReader::caption() {
+  CHECK_EQ(reader_.captions().size(), 1);
+  auto caption = reader_.captions().front();
+  if (LogReader::TagMatchMode(caption, mode_)) {
+    return LogReader::GenReadableTag(mode_, caption);
+  }
+  string::TagDecode(caption);
+  return caption;
+}
+
+AudioReader::AudioRecord AudioReader::record(int offset, int index) {
+  AudioRecord res;
+  auto record = reader_.record(offset);
+  auto entry = record.data(index);
+  auto filename = entry.GetRaw();
+  CHECK(!g_log_dir.empty())
+      << "g_log_dir should be set in LogReader construction";
+  BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), filename);
+
+  std::transform(brcd.data.begin(),
+                 brcd.data.end(),
+                 std::back_inserter(res.data),
+                 [](byte_t i) { return (int8_t)(i); });
+  res.step_id = record.id();
+  return res;
+}
+
 }  // namespace components
 
 }  // namespace visualdl
diff --git a/visualdl/logic/sdk.h b/visualdl/logic/sdk.h
@@ -170,8 +170,9 @@ struct Image {
   void FinishSampling();
 
   /*
-   * A combined interface for IsSampleTaken and SetSample, simpler but might be
-   * low effience.
+   * A combined interface for IndexOfSampleTaken and SetSample, simpler but
+   * might be
+   * low efficiency.
    */
   void AddSample(const std::vector<shape_t>& shape,
                  const std::vector<value_t>& data);
@@ -182,7 +183,7 @@ struct Image {
    * copy data when it should be sampled. In that way, most of unsampled image
    * data need not be copied or processed at all.
    */
-  int IsSampleTaken();
+  int IndexOfSampleTaken();
   /*
    * Just store a tensor with nothing to do with image format.
    */
@@ -326,6 +327,115 @@ struct TextReader {
   TabletReader reader_;
 };
 
+/*
+ * Image component writer.
+ */
+struct Audio {
+  using value_t = float;
+
+  /*
+   * step_cycle: store every `step_cycle` as a record.
+   * num_samples: how many samples to take in a step.
+   */
+  Audio(Tablet tablet, int num_samples, int step_cycle)
+      : writer_(tablet), num_samples_(num_samples), step_cycle_(step_cycle) {
+    CHECK_GT(step_cycle, 0);
+    CHECK_GT(num_samples, 0);
+
+    writer_.SetType(Tablet::Type::kAudio);
+    // make audio's tag as the default caption.
+    writer_.SetNumSamples(num_samples);
+    SetCaption(tablet.reader().tag());
+  }
+
+  void SetCaption(const std::string& c) {
+    writer_.SetCaptions(std::vector<std::string>({c}));
+  }
+
+  /*
+   * Start a sampling period, this interface will start a new reservior sampling
+   * phase.
+   */
+  void StartSampling();
+  /*
+   * End a sampling period, it will clear all states for reservior sampling.
+   */
+  void FinishSampling();
+
+  /*
+   * A combined interface for IndexOfSampleTaken and SetSample, simpler but
+   * might be
+   * low efficiency.
+   */
+  void AddSample(int sample_rate, const std::vector<value_t>& data);
+
+  /*
+   * Will this sample be taken, this interface is introduced to reduce the cost
+   * of copy audio data, by testing whether this audio will be sampled, and only
+   * copy data when it should be sampled. In that way, most of unsampled audio
+   * data need not be copied or processed at all.
+   */
+  int IndexOfSampleTaken();
+  /*
+   * Store audio data with sample rate
+   */
+  void SetSample(int index, int sample_rate, const std::vector<value_t>& data);
+
+protected:
+  bool ToSampleThisStep() { return step_id_ % step_cycle_ == 0; }
+
+private:
+  Tablet writer_;
+  Record step_;
+  int num_records_{0};
+  int num_samples_{0};
+  int step_id_{0};
+  int step_cycle_;
+};
+
+/*
+* Audio reader.
+*/
+struct AudioReader {
+  using value_t = typename Audio::value_t;
+
+  struct AudioRecord {
+    int step_id;
+    int sample_rate;
+    std::vector<int8_t> data;
+  };
+
+  AudioReader(const std::string& mode, TabletReader tablet)
+      : reader_(tablet), mode_{mode} {}
+
+  std::string caption();
+
+  // number of steps.
+  int num_records() { return reader_.total_records(); }
+
+  int num_samples() { return reader_.num_samples(); }
+
+  int64_t timestamp(int step) { return reader_.record(step).timestamp(); }
+
+  /*
+   * offset: offset of a step.
+   * index: index of a sample.
+   */
+  AudioRecord record(int offset, int index);
+
+  /*
+   * offset: offset of a step.
+   * index: index of a sample.
+   */
+  std::vector<value_t> data(int offset, int index);
+
+  int stepid(int offset, int index);
+
+private:
+  TabletReader reader_;
+  std::string mode_;
+};
+
 }  // namespace components
 }  // namespace visualdl