diff --git a/CMakeLists.txt b/CMakeLists.txt index 590abdd68..23e83ab88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,13 +23,15 @@ include(external/zlib) # download, build, install zlib include(external/gflags) # download, build, install gflags include(external/glog) # download, build, install glog include(external/gtest) # download, build, install gtest +include(external/eigen) # download eigen include(external/pybind11) # download pybind11 include(external/protobuf) # download, build, install protobuf include(external/python) # find python and set path include_directories(${PROJECT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) -include_directories(${PROJECT_SOURCE_DIR}/thirdparty/local/include) +# TODO(ChunweiYan) debug, remote latter +#include_directories(/home/superjom/project/VisualDL/build/third_party/eigen3/src/extern_eigen3) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/visualdl/storage) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/visualdl/logic) @@ -37,13 +39,14 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/visualdl/python) add_executable(vl_test ${PROJECT_SOURCE_DIR}/visualdl/test.cc - ${PROJECT_SOURCE_DIR}/visualdl/storage/storage_test.cc ${PROJECT_SOURCE_DIR}/visualdl/logic/sdk_test.cc + ${PROJECT_SOURCE_DIR}/visualdl/storage/storage_test.cc ${PROJECT_SOURCE_DIR}/visualdl/utils/test_concurrency.cc + ${PROJECT_SOURCE_DIR}/visualdl/utils/test_image.cc ${PROJECT_SOURCE_DIR}/visualdl/utils/concurrency.h ${PROJECT_SOURCE_DIR}/visualdl/utils/filesystem.h ) -target_link_libraries(vl_test sdk storage entry im gtest glog protobuf gflags pthread) +target_link_libraries(vl_test sdk storage entry tablet im gtest glog protobuf gflags pthread) enable_testing () diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake new file mode 100644 index 000000000..f7483f6be --- /dev/null +++ b/cmake/external/eigen.cmake @@ -0,0 +1,30 @@ +INCLUDE(ExternalProject) + +SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) + +INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/extern_eigen3) + +ExternalProject_Add( + extern_eigen3 + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" + GIT_TAG "master" + PREFIX ${EIGEN_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_eigen3 = \"${dummyfile}\";") + add_library(eigen3 STATIC ${dummyfile}) +else() + add_library(eigen3 INTERFACE) +endif() + +add_dependencies(eigen3 extern_eigen3) + +LIST(APPEND external_project_dependencies eigen3) diff --git a/server/visualdl/lib.py b/server/visualdl/lib.py new file mode 100644 index 000000000..4f5c49b17 --- /dev/null +++ b/server/visualdl/lib.py @@ -0,0 +1,124 @@ +import pprint +import re +import urllib +from tempfile import NamedTemporaryFile + +import numpy as np +from PIL import Image + +import storage + + +def get_modes(storage): + return storage.modes() + + +def get_scalar_tags(storage, mode): + result = {} + for mode in storage.modes(): + reader = storage.as_mode(mode) + tags = reader.tags('scalar') + if tags: + result[mode] = {} + for tag in tags: + result[mode][tag] = { + 'displayName': reader.scalar(tag).caption(), + 'description': "", + } + return result + + +def get_scalar(storage, mode, tag): + reader = storage.as_mode(mode) + scalar = reader.scalar(tag) + + records = scalar.records() + ids = scalar.ids() + timestamps = scalar.timestamps() + + result = zip(timestamps, ids, records) + return result + + +def get_image_tags(storage): + result = {} + + for mode in storage.modes(): + reader = storage.as_mode(mode) + tags = reader.tags('image') + if tags: + result[mode] = {} + for tag in tags: + image = reader.image(tag) + for i in xrange(max(1, image.num_samples())): + caption = tag if image.num_samples() <= 1 else '%s/%d'%(tag, i) + result[mode][caption] = { + 'displayName': caption, + 'description': "", + 'samples': 1, + } + return result + + +def get_image_tag_steps(storage, mode, tag): + print 'image_tag_steps,mode,tag:', mode, tag + # remove suffix '/x' + res = re.search(r".*/([0-9]+$)", tag) + sample_index = 0 + origin_tag = tag + if res: + tag = tag[:tag.rfind('/')] + sample_index = int(res.groups()[0]) + + reader = storage.as_mode(mode) + image = reader.image(tag) + res = [] + + for step_index in range(image.num_records()): + record = image.record(step_index, sample_index) + shape = record.shape() + assert shape, "%s,%s" % (mode, tag) + query = urllib.urlencode({ + 'sample': 0, + 'index': step_index, + 'tag': origin_tag, + 'run': mode, + }) + res.append({ + 'height': shape[0], + 'width': shape[1], + 'step': record.step_id(), + 'wall_time': image.timestamp(step_index), + 'query': query, + }) + return res + + +def get_invididual_image(storage, mode, tag, step_index): + reader = storage.as_mode(mode) + res = re.search(r".*/([0-9]+$)", tag) + # remove suffix '/x' + if res: + offset = int(res.groups()[0]) + tag = tag[:tag.rfind('/')] + + image = reader.image(tag) + record = image.record(step_index, offset) + + data = np.array(record.data(), dtype='uint8').reshape(record.shape()) + tempfile = NamedTemporaryFile(mode='w+b', suffix='.png') + with Image.fromarray(data) as im: + im.save(tempfile) + tempfile.seek(0, 0) + return tempfile + + +if __name__ == '__main__': + reader = storage.StorageReader('./tmp/mock') + tags = get_image_tags(reader) + + tags = get_image_tag_steps(reader, 'train', 'layer1/layer2/image0/0') + pprint.pprint(tags) + + image = get_invididual_image(reader, "train", 'layer1/layer2/image0/0', 2) + print image diff --git a/server/visualdl/lib_test.py b/server/visualdl/lib_test.py new file mode 100644 index 000000000..117cc2d89 --- /dev/null +++ b/server/visualdl/lib_test.py @@ -0,0 +1,54 @@ +import lib +import unittest +import storage +import pprint +from storage_mock import add_scalar, add_image + + +class LibTest(unittest.TestCase): + def setUp(self): + dir = "./tmp/mock" + writer = storage.StorageWriter(dir, sync_cycle=20) + + add_scalar(writer, "train", "layer/scalar0/min", 1000, 1) + add_scalar(writer, "test", "layer/scalar0/min", 1000, 10) + add_scalar(writer, "valid", "layer/scalar0/min", 1000, 10) + + add_scalar(writer, "train", "layer/scalar0/max", 1000, 1) + add_scalar(writer, "test", "layer/scalar0/max", 1000, 10) + add_scalar(writer, "valid", "layer/scalar0/max", 1000, 10) + + add_image(writer, "train", "layer/image0", 7, 10, 1) + add_image(writer, "test", "layer/image0", 7, 10, 3) + + add_image(writer, "train", "layer/image1", 7, 10, 1, shape=[30,30,2]) + add_image(writer, "test", "layer/image1", 7, 10, 1, shape=[30,30,2]) + + self.reader = storage.StorageReader(dir) + + def test_modes(self): + modes = lib.get_modes(self.reader) + self.assertEqual(sorted(modes), sorted(["train", "test", "valid"])) + + def test_scalar(self): + + for mode in "train test valid".split(): + tags = lib.get_scalar_tags(self.reader, mode) + print 'scalar tags:' + pprint.pprint(tags) + self.assertEqual(len(tags), 3) + self.assertEqual(sorted(tags.keys()), sorted("train test valid".split())) + + def test_image(self): + tags = lib.get_image_tags(self.reader) + self.assertEqual(len(tags), 2) + + tags = lib.get_image_tag_steps(self.reader, 'train', 'layer/image0/0') + pprint.pprint(tags) + + image = lib.get_invididual_image(self.reader, "train", 'layer/image0/0', 2) + print image + + +if __name__ == '__main__': + unittest.main() diff --git a/server/visualdl/mock.sh b/server/visualdl/mock.sh new file mode 100644 index 000000000..966fa6899 --- /dev/null +++ b/server/visualdl/mock.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -ex + +export PYTHONPATH="/home/superjom/project/VisualDL/build/visualdl/logic:/home/superjom/project/VisualDL/visualdl/python" + +python lib_test.py diff --git a/server/visualdl/run.sh b/server/visualdl/run.sh new file mode 100644 index 000000000..5f62f0eb2 --- /dev/null +++ b/server/visualdl/run.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ex + +export PYTHONPATH="$(pwd)/..:/home/superjom/project/VisualDL/build/visualdl/logic:/home/superjom/project/VisualDL/visualdl/python" +export FLASK_APP=visual_dl.py +export FLASK_DEBUG=1 + +python visual_dl.py --logdir ./tmp/mock --host 172.23.233.68 --port 8043 diff --git a/server/visualdl/storage_mock.py b/server/visualdl/storage_mock.py new file mode 100644 index 000000000..5ba26b15f --- /dev/null +++ b/server/visualdl/storage_mock.py @@ -0,0 +1,49 @@ +import random +import time +import unittest + +import numpy as np + + +def add_scalar(writer, mode, tag, num_steps, skip): + my_writer = writer.as_mode(mode) + scalar = my_writer.scalar(tag) + for i in range(num_steps): + if i % skip == 0: + scalar.add_record(i, random.random()) + + +def add_image(writer, + mode, + tag, + num_samples, + num_passes, + step_cycle, + shape=[50, 50, 3]): + writer_ = writer.as_mode(mode) + image_writer = writer_.image(tag, num_samples, step_cycle) + + for pass_ in xrange(num_passes): + image_writer.start_sampling() + for ins in xrange(2 * num_samples): + index = image_writer.is_sample_taken() + if index != -1: + data = np.random.random(shape) * 256 + data = np.ndarray.flatten(data) + assert shape + assert len(data) > 0 + image_writer.set_sample(index, shape, list(data)) + image_writer.finish_sampling() + + +if __name__ == '__main__': + add_scalar("train", "layer/scalar0/min", 1000, 1) + add_scalar("test", "layer/scalar0/min", 1000, 10) + add_scalar("valid", "layer/scalar0/min", 1000, 10) + + add_scalar("train", "layer/scalar0/max", 1000, 1) + add_scalar("test", "layer/scalar0/max", 1000, 10) + add_scalar("valid", "layer/scalar0/max", 1000, 10) + + add_image("train", "layer/image0", 7, 10, 1) + add_image("test", "layer/image0", 7, 10, 3) diff --git a/server/visualdl/visual_dl.py b/server/visualdl/visual_dl.py index 242081b0f..e90807a68 100644 --- a/server/visualdl/visual_dl.py +++ b/server/visualdl/visual_dl.py @@ -1,27 +1,27 @@ -""" entry point of visual_dl -""" import json import os +import re import sys from optparse import OptionParser -from flask import Flask, redirect -from flask import request -from flask import send_from_directory -from flask import Response +from flask import (Flask, Response, redirect, request, send_file, + send_from_directory) -from visualdl.log import logger +import lib +import storage import visualdl.mock.data as mock_data import visualdl.mock.tags as mock_tags +from visualdl.log import logger import storage import graph app = Flask(__name__, static_url_path="") +# set static expires in a short time to reduce browser's memory usage. +app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 30 def option_parser(): """ - :return: """ parser = OptionParser(usage="usage: visual_dl visual_dl.py "\ @@ -88,29 +88,29 @@ def logdir(): @app.route('/data/runs') def runs(): - is_debug = bool(request.args.get('debug')) - result = gen_result(0, "", ["train", "test"]) + modes = storage.modes() + result = gen_result(0, "", lib.get_modes()) return Response(json.dumps(result), mimetype='application/json') @app.route("/data/plugin/scalars/tags") -def tags(): +def scalar_tags(): + mode = request.args.get('mode') is_debug = bool(request.args.get('debug')) - tag = request.args.get('tag') if is_debug: result = mock_tags.data() else: - result = {} - print 'modes', storage.modes() - for mode in storage.modes(): - result[mode] = {} - reader = storage.as_mode(mode) - for tag in reader.tags("scalar"): - result[mode][tag] = { - 'displayName': reader.scalar(tag).caption(), - 'description': "" - } - print 'tags', result + result = lib.get_scalar_tags(storage, mode) + print 'scalar tags (mode: %s)' % mode, result + result = gen_result(0, "", result) + return Response(json.dumps(result), mimetype='application/json') + + +@app.route("/data/plugin/images/tags") +def image_tags(): + mode = request.args.get('run') + result = lib.get_image_tags(storage) + print 'image tags (mode: %s)'%mode, result result = gen_result(0, "", result) return Response(json.dumps(result), mimetype='application/json') @@ -121,21 +121,40 @@ def scalars(): tag = request.args.get('tag') is_debug = bool(request.args.get('debug')) if is_debug: - result = gen_result(0, "", mock_data.sequence_data()) + result = mock_data.sequence_data() else: - reader = storage.as_mode(run) - scalar = reader.scalar(tag) + result = lib.get_scalar(storage, run, tag) - records = scalar.records() - ids = scalar.ids() - timestamps = scalar.timestamps() + result = gen_result(0, "", result) + return Response(json.dumps(result), mimetype='application/json') - result = zip(timestamps, ids, records) - result = gen_result(0, "", result) + +@app.route('/data/plugin/images/images') +def images(): + mode = request.args.get('run') + # TODO(ChunweiYan) update this when frontend fix the field name + #tag = request.args.get('tag') + tag = request.args.get('displayName') + + result = lib.get_image_tag_steps(storage, mode, tag) + result = gen_result(0, "", result) return Response(json.dumps(result), mimetype='application/json') +@app.route('/data/plugin/images/individualImage') +def individual_image(): + mode = request.args.get('run') + tag = request.args.get('tag') # include a index + step_index = int(request.args.get('index')) # index of step + offset = 0 + + imagefile = lib.get_invididual_image(storage, mode, tag, step_index) + response = send_file( + imagefile, as_attachment=True, attachment_filename='img.png') + return response + + @app.route('/data/plugin/graphs/graph') def graph(): model_json = graph.load_model("") @@ -144,4 +163,4 @@ def graph(): if __name__ == '__main__': logger.info(" port=" + str(options.port)) - app.run(debug=False, host=options.host, port=options.port) + app.run(debug=True, host=options.host, port=options.port) diff --git a/tests.sh b/tests.sh index 961e2f592..f731e65bd 100644 --- a/tests.sh +++ b/tests.sh @@ -2,11 +2,16 @@ set -ex mode=$1 -cur=$(pwd) +readonly cur=$(pwd) +readonly core_path=$cur/build/visualdl/logic +readonly python_path=$cur/visualdl/python + +export PYTHONPATH="${core_path}:${python_path}" backend_test() { cd $cur sudo pip install numpy + sudo pip install Pillow mkdir -p build cd build cmake .. @@ -21,6 +26,13 @@ frontend_test() { npm run build } +server_test() { + cd $cur/server + bash build.sh + cd $cur/server/visualdl + python lib_test.py +} + echo "mode" $mode if [ $mode = "backend" ]; then @@ -28,6 +40,7 @@ if [ $mode = "backend" ]; then elif [ $mode = "all" ]; then frontend_test backend_test + server_test else frontend_test fi diff --git a/visualdl/logic/CMakeLists.txt b/visualdl/logic/CMakeLists.txt index f7d825444..44c6579ec 100644 --- a/visualdl/logic/CMakeLists.txt +++ b/visualdl/logic/CMakeLists.txt @@ -1,11 +1,10 @@ -#add_library(sdk ${PROJECT_SOURCE_DIR}/visualdl/logic/sdk.cc) add_library(im ${PROJECT_SOURCE_DIR}/visualdl/logic/im.cc) -add_library(sdk ${PROJECT_SOURCE_DIR}/visualdl/logic/sdk.cc) +add_library(sdk ${PROJECT_SOURCE_DIR}/visualdl/logic/sdk.cc ${PROJECT_SOURCE_DIR}/visualdl/utils/image.h) add_dependencies(im storage_proto) add_dependencies(sdk entry storage storage_proto) ## pybind add_library(core SHARED ${PROJECT_SOURCE_DIR}/visualdl/logic/pybind.cc) -add_dependencies(core pybind python im entry storage sdk protobuf glog) -target_link_libraries(core PRIVATE pybind entry python im storage sdk protobuf glog) +add_dependencies(core pybind python im entry tablet storage sdk protobuf glog eigen3) +target_link_libraries(core PRIVATE pybind entry python im tablet storage sdk protobuf glog) set_target_properties(core PROPERTIES PREFIX "" SUFFIX ".so") diff --git a/visualdl/logic/im.cc b/visualdl/logic/im.cc index a2ec1f22e..ff6da2478 100644 --- a/visualdl/logic/im.cc +++ b/visualdl/logic/im.cc @@ -37,6 +37,7 @@ template class SimpleWriteSyncGuard>; template class SimpleWriteSyncGuard>; template class SimpleWriteSyncGuard>; template class SimpleWriteSyncGuard>; +template class SimpleWriteSyncGuard>>; template class SimpleWriteSyncGuard>; } // namespace visualdl diff --git a/visualdl/logic/pybind.cc b/visualdl/logic/pybind.cc index 26369794f..3fbe53eb6 100644 --- a/visualdl/logic/pybind.cc +++ b/visualdl/logic/pybind.cc @@ -11,28 +11,7 @@ namespace cp = visualdl::components; PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of VisualDL"); -#define ADD_SCALAR(T) \ - py::class_>(m, "ScalarReader__" #T) \ - .def("records", &cp::ScalarReader::records) \ - .def("timestamps", &cp::ScalarReader::timestamps) \ - .def("ids", &cp::ScalarReader::ids) \ - .def("caption", &cp::ScalarReader::caption); - ADD_SCALAR(int); - ADD_SCALAR(float); - ADD_SCALAR(double); - ADD_SCALAR(int64_t); -#undef ADD_SCALAR - -#define ADD_SCALAR_WRITER(T) \ - py::class_>(m, "ScalarWriter__" #T) \ - .def("set_caption", &cp::Scalar::SetCaption) \ - .def("add_record", &cp::Scalar::AddRecord); - ADD_SCALAR_WRITER(int); - ADD_SCALAR_WRITER(float); - ADD_SCALAR_WRITER(double); -#undef ADD_SCALAR_WRITER - -#define ADD_SCALAR(T) \ +#define READER_ADD_SCALAR(T) \ .def("get_scalar_" #T, [](vs::Reader& self, const std::string& tag) { \ auto tablet = self.tablet(tag); \ return vs::components::ScalarReader(std::move(tablet)); \ @@ -46,13 +25,17 @@ PYBIND11_PLUGIN(core) { .def("modes", [](vs::Reader& self) { return self.storage().modes(); }) .def("tags", &vs::Reader::tags) // clang-format off - ADD_SCALAR(float) - ADD_SCALAR(double) - ADD_SCALAR(int); -// clang-format on -#undef ADD_SCALAR + READER_ADD_SCALAR(float) + READER_ADD_SCALAR(double) + READER_ADD_SCALAR(int) + // clang-format on + .def("get_image", [](vs::Reader& self, const std::string& tag) { + auto tablet = self.tablet(tag); + return vs::components::ImageReader(self.mode(), tablet); + }); +#undef READER_ADD_SCALAR -#define ADD_SCALAR(T) \ +#define WRITER_ADD_SCALAR(T) \ .def("new_scalar_" #T, [](vs::Writer& self, const std::string& tag) { \ auto tablet = self.AddTablet(tag); \ return cp::Scalar(tablet); \ @@ -65,10 +48,68 @@ PYBIND11_PLUGIN(core) { }) .def("as_mode", &vs::Writer::AsMode) // clang-format off - ADD_SCALAR(float) - ADD_SCALAR(double) - ADD_SCALAR(int); -// clang-format on -#undef ADD_SCALAR + WRITER_ADD_SCALAR(float) + WRITER_ADD_SCALAR(double) + WRITER_ADD_SCALAR(int) + // clang-format on + .def("new_image", + [](vs::Writer& self, + const std::string& tag, + int num_samples, + int step_cycle) { + auto tablet = self.AddTablet(tag); + return vs::components::Image(tablet, num_samples, step_cycle); + }); + +//------------------- components -------------------- +#define ADD_SCALAR_READER(T) \ + py::class_>(m, "ScalarReader__" #T) \ + .def("records", &cp::ScalarReader::records) \ + .def("timestamps", &cp::ScalarReader::timestamps) \ + .def("ids", &cp::ScalarReader::ids) \ + .def("caption", &cp::ScalarReader::caption); + ADD_SCALAR_READER(int); + ADD_SCALAR_READER(float); + ADD_SCALAR_READER(double); + ADD_SCALAR_READER(int64_t); +#undef ADD_SCALAR_READER + +#define ADD_SCALAR_WRITER(T) \ + py::class_>(m, "ScalarWriter__" #T) \ + .def("set_caption", &cp::Scalar::SetCaption) \ + .def("add_record", &cp::Scalar::AddRecord); + ADD_SCALAR_WRITER(int); + ADD_SCALAR_WRITER(float); + ADD_SCALAR_WRITER(double); +#undef ADD_SCALAR_WRITER + + // clang-format on + py::class_(m, "ImageWriter") + .def("set_caption", &cp::Image::SetCaption) + .def("start_sampling", &cp::Image::StartSampling) + .def("is_sample_taken", &cp::Image::IsSampleTaken) + .def("finish_sampling", &cp::Image::FinishSampling) + .def("set_sample", &cp::Image::SetSample); + + py::class_(m, "ImageRecord") + // TODO(ChunweiYan) make these copyless. + .def("data", + [](cp::ImageReader::ImageRecord& self) { + return self.data; + }) + .def("shape", + [](cp::ImageReader::ImageRecord& self) { return self.shape; }) + .def("step_id", + [](cp::ImageReader::ImageRecord& self) { return self.step_id; }); + + py::class_(m, "ImageReader") + .def("caption", &cp::ImageReader::caption) + .def("num_records", &cp::ImageReader::num_records) + .def("num_samples", &cp::ImageReader::num_samples) + .def("record", &cp::ImageReader::record) + .def("timestamp", &cp::ImageReader::timestamp); + + // .def("data", &cp::ImageReader::data) + // .def("shape", &cp::ImageReader::shape); } // end pybind diff --git a/visualdl/logic/sdk.cc b/visualdl/logic/sdk.cc index 62c46ea6d..7b0c74950 100644 --- a/visualdl/logic/sdk.cc +++ b/visualdl/logic/sdk.cc @@ -1,5 +1,7 @@ #include "visualdl/logic/sdk.h" +#include "visualdl/utils/image.h" + namespace visualdl { namespace components { @@ -47,6 +49,118 @@ template class ScalarReader; template class ScalarReader; template class ScalarReader; +void Image::StartSampling() { + if (!ToSampleThisStep()) return; + + step_ = writer_.AddRecord(); + step_.SetId(step_id_); + + time_t time = std::time(nullptr); + step_.SetTimeStamp(time); + + // resize record + for (int i = 0; i < num_samples_; i++) { + step_.AddData(); + } + num_records_ = 0; +} + +int Image::IsSampleTaken() { + if (!ToSampleThisStep()) return -1; + num_records_++; + if (num_records_ <= num_samples_) { + return num_records_ - 1; + } + float prob = float(num_samples_) / num_records_; + float randv = (float)rand() / RAND_MAX; + if (randv < prob) { + // take this sample + int index = rand() % num_samples_; + return index; + } + return -1; +} + +void Image::FinishSampling() { + step_id_++; + if (ToSampleThisStep()) { + // TODO(ChunweiYan) much optimizement here. + writer_.parent()->PersistToDisk(); + } +} + +template +struct is_same_type { + static const bool value = false; +}; +template +struct is_same_type { + static const bool value = true; +}; + +void Image::SetSample(int index, + const std::vector& shape, + const std::vector& data) { + // production + int size = std::accumulate( + shape.begin(), shape.end(), 1., [](int a, int b) { return a * b; }); + CHECK_GT(size, 0); + CHECK_EQ(shape.size(), 3) + << "shape should be something like (width, height, num_channel)"; + CHECK_LE(shape.back(), 3); + CHECK_GE(shape.back(), 2); + CHECK_EQ(size, data.size()) << "image's shape not match data"; + CHECK_LT(index, num_samples_); + CHECK_LE(index, num_records_); + + auto entry = step_.MutableData>(index); + // trick to store int8 to protobuf + std::vector data_str(data.size()); + for (int i = 0; i < data.size(); i++) { + data_str[i] = data[i]; + } + Uint8Image image(shape[2], shape[0] * shape[1]); + NormalizeImage(&image, &data[0], shape[0] * shape[1], shape[2]); + // entry.SetRaw(std::string(data_str.begin(), data_str.end())); + entry.SetRaw( + std::string(image.data(), image.data() + image.rows() * image.cols())); + + static_assert( + !is_same_type::value, + "value_t should not use int64_t field, this type is used to store shape"); + + // set meta with hack + Entry meta; + meta.set_parent(entry.parent()); + meta.entry = entry.entry; + meta.SetMulti(shape); +} + +std::string ImageReader::caption() { + CHECK_EQ(reader_.captions().size(), 1); + auto caption = reader_.captions().front(); + if (Reader::TagMatchMode(caption, mode_)) { + return Reader::GenReadableTag(mode_, caption); + } + string::TagDecode(caption); + return caption; +} + +ImageReader::ImageRecord ImageReader::record(int offset, int index) { + ImageRecord res; + auto record = reader_.record(offset); + auto data_entry = record.data>(index); + auto shape_entry = record.data(index); + auto data_str = data_entry.GetRaw(); + std::transform(data_str.begin(), + data_str.end(), + std::back_inserter(res.data), + [](byte_t i) { return (int)(i); }); + res.shape = shape_entry.GetMulti(); + res.step_id = record.id(); + return res; +} + } // namespace components } // namespace visualdl diff --git a/visualdl/logic/sdk.h b/visualdl/logic/sdk.h index 66135d23d..bf3ccda64 100644 --- a/visualdl/logic/sdk.h +++ b/visualdl/logic/sdk.h @@ -32,6 +32,7 @@ class Writer { string::TagEncode(tmp); auto res = storage_.AddTablet(tmp); res.SetCaptions(std::vector({mode_})); + res.SetTag(mode_, tag); return res; } @@ -52,6 +53,8 @@ class Reader { return tmp; } + const std::string& mode() { return mode_; } + TabletReader tablet(const std::string& tag) { auto tmp = mode_ + "/" + tag; string::TagEncode(tmp); @@ -62,7 +65,7 @@ class Reader { auto tags = reader_.all_tags(); auto it = std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) { - return !TagMatchMode(tag); + return !TagMatchMode(tag, mode_); }); tags.erase(it + 1); return tags; @@ -74,8 +77,8 @@ class Reader { CHECK(!tags.empty()); std::vector res; for (const auto& tag : tags) { - if (TagMatchMode(tag)) { - res.push_back(GenReadableTag(tag)); + if (TagMatchMode(tag, mode_)) { + res.push_back(GenReadableTag(mode_, tag)); } } return res; @@ -83,17 +86,19 @@ class Reader { StorageReader& storage() { return reader_; } -protected: - bool TagMatchMode(const std::string& tag) { - if (tag.size() <= mode_.size()) return false; - return tag.substr(0, mode_.size()) == mode_; - } - std::string GenReadableTag(const std::string& tag) { + static std::string GenReadableTag(const std::string& mode, + const std::string& tag) { auto tmp = tag; string::TagDecode(tmp); - return tmp.substr(mode_.size() + 1); // including `/` + return tmp.substr(mode.size() + 1); // including `/` + } + + static bool TagMatchMode(const std::string& tag, const std::string& mode) { + if (tag.size() <= mode.size()) return false; + return tag.substr(0, mode.size()) == mode; } +protected: private: StorageReader reader_; std::string mode_{kDefaultMode}; @@ -140,6 +145,112 @@ struct ScalarReader { TabletReader reader_; }; +/* + * Image component writer. + */ +struct Image { + using value_t = float; + using shape_t = int64_t; + + /* + * step_cycle: store every `step_cycle` as a record. + * num_samples: how many samples to take in a step. + */ + Image(Tablet tablet, int num_samples, int step_cycle) + : writer_(tablet), num_samples_(num_samples), step_cycle_(step_cycle) { + CHECK_GT(step_cycle, 0); + CHECK_GT(num_samples, 0); + + writer_.SetType(Tablet::Type::kImage); + // make image's tag as the default caption. + writer_.SetNumSamples(num_samples); + SetCaption(tablet.reader().tag()); + } + void SetCaption(const std::string& c) { + writer_.SetCaptions(std::vector({c})); + } + /* + * Start a sample period. + */ + void StartSampling(); + /* + * Will this sample will be taken. + */ + int IsSampleTaken(); + /* + * End a sample period. + */ + void FinishSampling(); + + /* + * Just store a tensor with nothing to do with image format. + */ + void SetSample(int index, + const std::vector& shape, + const std::vector& data); + +protected: + bool ToSampleThisStep() { return step_id_ % step_cycle_ == 0; } + +private: + Tablet writer_; + Record step_; + int num_records_{0}; + int num_samples_{0}; + int step_id_{0}; + int step_cycle_; +}; + +/* + * Image reader. + */ +struct ImageReader { + using value_t = typename Image::value_t; + using shape_t = typename Image::shape_t; + + struct ImageRecord { + int step_id; + std::vector data; + std::vector shape; + }; + + ImageReader(const std::string& mode, TabletReader tablet) + : reader_(tablet), mode_{mode} {} + + std::string caption(); + + // number of steps. + int num_records() { return reader_.total_records(); } + + int num_samples() { return reader_.num_samples(); } + + int64_t timestamp(int step) { return reader_.record(step).timestamp(); } + + /* + * offset: offset of a step. + * index: index of a sample. + */ + ImageRecord record(int offset, int index); + + /* + * offset: offset of a step. + * index: index of a sample. + */ + std::vector data(int offset, int index); + + /* + * offset: offset of a step. + * index: index of a sample. + */ + std::vector shape(int offset, int index); + + int stepid(int offset, int index); + +private: + TabletReader reader_; + std::string mode_; +}; + } // namespace components } // namespace visualdl diff --git a/visualdl/logic/sdk_test.cc b/visualdl/logic/sdk_test.cc index 3a2b2f5ef..b82e25437 100644 --- a/visualdl/logic/sdk_test.cc +++ b/visualdl/logic/sdk_test.cc @@ -2,6 +2,8 @@ #include +using namespace std; + namespace visualdl { TEST(Scalar, write) { @@ -40,4 +42,41 @@ TEST(Scalar, write) { ASSERT_EQ(scalar_reader1.caption(), "customized caption"); } +TEST(Image, test) { + const auto dir = "./tmp/sdk_test.image"; + Writer writer__(dir, 4); + auto writer = writer__.AsMode("train"); + + auto tablet = writer.AddTablet("image0"); + components::Image image(tablet, 3, 1); + const int num_steps = 10; + + LOG(INFO) << "write images"; + image.SetCaption("this is an image"); + for (int step = 0; step < num_steps; step++) { + image.StartSampling(); + for (int i = 0; i < 7; i++) { + vector shape({5, 5, 3}); + vector data; + for (int j = 0; j < 3 * 5 * 5; j++) { + data.push_back(float(rand()) / RAND_MAX); + } + int index = image.IsSampleTaken(); + if (index != -1) { + image.SetSample(index, shape, data); + } + } + image.FinishSampling(); + } + + LOG(INFO) << "read images"; + // read it + Reader reader__(dir); + auto reader = reader__.AsMode("train"); + auto tablet2read = reader.tablet("image0"); + components::ImageReader image2read("train", tablet2read); + CHECK_EQ(image2read.caption(), "this is an image"); + CHECK_EQ(image2read.num_records(), num_steps); +} + } // namespace visualdl diff --git a/visualdl/python/dog.jpg b/visualdl/python/dog.jpg new file mode 100644 index 000000000..c1df6ba3b Binary files /dev/null and b/visualdl/python/dog.jpg differ diff --git a/visualdl/python/storage.py b/visualdl/python/storage.py index 694539a0b..4b6bdd4c6 100644 --- a/visualdl/python/storage.py +++ b/visualdl/python/storage.py @@ -31,6 +31,9 @@ def scalar(self, tag, type='float'): } return type2scalar[type](tag) + def image(self, tag): + return self.reader.get_image(tag) + class StorageWriter(object): @@ -50,3 +53,6 @@ def scalar(self, tag, type='float'): 'int': self.writer.new_scalar_int, } return type2scalar[type](tag) + + def image(self, tag, num_samples, step_cycle): + return self.writer.new_image(tag, num_samples, step_cycle) diff --git a/visualdl/python/test_storage.py b/visualdl/python/test_storage.py index 328ee74ef..078a95871 100644 --- a/visualdl/python/test_storage.py +++ b/visualdl/python/test_storage.py @@ -1,18 +1,21 @@ -import storage -import numpy as np -import unittest import random import time +import unittest +from PIL import Image + +import numpy as np + +import storage class StorageTest(unittest.TestCase): def setUp(self): self.dir = "./tmp/storage_test" - - def test_read(self): - print 'test write' self.writer = storage.StorageWriter( self.dir, sync_cycle=1).as_mode("train") + + def test_scalar(self): + print 'test write' scalar = self.writer.scalar("model/scalar/min") # scalar.set_caption("model/scalar/min") for i in range(10): @@ -29,6 +32,75 @@ def test_read(self): print 'records', records print 'ids', ids + def test_image(self): + tag = "layer1/layer2/image0" + image_writer = self.writer.image(tag, 10, 1) + num_passes = 10 + num_samples = 100 + shape = [10, 10, 3] + + for pass_ in xrange(num_passes): + image_writer.start_sampling() + for ins in xrange(num_samples): + index = image_writer.is_sample_taken() + if index != -1: + data = np.random.random(shape) * 256 + data = np.ndarray.flatten(data) + image_writer.set_sample(index, shape, list(data)) + image_writer.finish_sampling() + + self.reader = storage.StorageReader(self.dir).as_mode("train") + image_reader = self.reader.image(tag) + self.assertEqual(image_reader.caption(), tag) + self.assertEqual(image_reader.num_records(), num_passes) + + image_record = image_reader.record(0, 1) + self.assertTrue(np.equal(image_record.shape(), shape).all()) + data = image_record.data() + self.assertEqual(len(data), np.prod(shape)) + + image_tags = self.reader.tags("image") + self.assertTrue(image_tags) + self.assertEqual(len(image_tags), 1) + + def test_check_image(self): + ''' + check whether the storage will keep image data consistent + ''' + print 'check image' + tag = "layer1/check/image1" + image_writer = self.writer.image(tag, 10, 1) + + image = Image.open("./dog.jpg") + shape = [image.size[1], image.size[0], 3] + origin_data = np.array(image.getdata()).flatten() + + self.reader = storage.StorageReader(self.dir).as_mode("train") + + image_writer.start_sampling() + index = image_writer.is_sample_taken() + image_writer.set_sample(index, shape, list(origin_data)) + image_writer.finish_sampling() + + # read and check whether the original image will be displayed + + image_reader = self.reader.image(tag) + image_record = image_reader.record(0, 0) + data = image_record.data() + shape = image_record.shape() + + PIL_image_shape = (shape[0] * shape[1], shape[2]) + data = np.array(data, dtype='uint8').reshape(PIL_image_shape) + print 'origin', origin_data.flatten() + print 'data', data.flatten() + image = Image.fromarray(data.reshape(shape)) + # manully check the image and found that nothing wrong with the image storage. + # image.show() + + # after scale, elements are changed. + # self.assertTrue( + # np.equal(origin_data.reshape(PIL_image_shape), data).all()) + if __name__ == '__main__': unittest.main() diff --git a/visualdl/storage/CMakeLists.txt b/visualdl/storage/CMakeLists.txt index ec9dfbbf8..51a0b415d 100644 --- a/visualdl/storage/CMakeLists.txt +++ b/visualdl/storage/CMakeLists.txt @@ -12,4 +12,4 @@ add_library(storage storage.cc storage.h ${PROTO_SRCS} ${PROTO_HDRS}) add_dependencies(entry storage_proto im) add_dependencies(record storage_proto entry) add_dependencies(tablet storage_proto) -add_dependencies(storage storage_proto) +add_dependencies(storage storage_proto record tablet entry) diff --git a/visualdl/storage/entry.cc b/visualdl/storage/entry.cc index 0d4fc8dd3..f2f2d0c64 100644 --- a/visualdl/storage/entry.cc +++ b/visualdl/storage/entry.cc @@ -10,7 +10,33 @@ namespace visualdl { WRITE_GUARD \ } +#define IMPL_ENTRY_SETMUL(ctype__, dtype__, field__) \ + template <> \ + void Entry::SetMulti(const std::vector& vs) { \ + entry->set_dtype(storage::DataType::dtype__); \ + entry->clear_##field__(); \ + for (auto v : vs) { \ + entry->add_##field__(v); \ + } \ + WRITE_GUARD \ + } + +template <> +void Entry>::Set(std::vector v) { + entry->set_dtype(storage::DataType::kBytes); + entry->set_y(std::string(v.begin(), v.end())); + WRITE_GUARD +} + +template <> +void Entry>::Add(std::vector v) { + entry->set_dtype(storage::DataType::kBytess); + *entry->add_ys() = std::string(v.begin(), v.end()); + WRITE_GUARD +} + IMPL_ENTRY_SET_OR_ADD(Set, int, kInt32, set_i32); +IMPL_ENTRY_SET_OR_ADD(Set, std::string, kString, set_s); IMPL_ENTRY_SET_OR_ADD(Set, int64_t, kInt64, set_i64); IMPL_ENTRY_SET_OR_ADD(Set, bool, kBool, set_b); IMPL_ENTRY_SET_OR_ADD(Set, float, kFloat, set_f); @@ -22,10 +48,16 @@ IMPL_ENTRY_SET_OR_ADD(Add, double, kDoubles, add_ds); IMPL_ENTRY_SET_OR_ADD(Add, std::string, kStrings, add_ss); IMPL_ENTRY_SET_OR_ADD(Add, bool, kBools, add_bs); +IMPL_ENTRY_SETMUL(int, kInt32, i32s); +IMPL_ENTRY_SETMUL(int64_t, kInt64, i64s); +IMPL_ENTRY_SETMUL(float, kFloat, fs); +IMPL_ENTRY_SETMUL(double, kDouble, ds); +IMPL_ENTRY_SETMUL(bool, kBool, bs); + #define IMPL_ENTRY_GET(T, fieldname__) \ template <> \ T EntryReader::Get() const { \ - data_.fieldname__(); \ + return data_.fieldname__(); \ } IMPL_ENTRY_GET(int, i32); @@ -35,6 +67,12 @@ IMPL_ENTRY_GET(double, d); IMPL_ENTRY_GET(std::string, s); IMPL_ENTRY_GET(bool, b); +template <> +std::vector EntryReader>::Get() const { + const auto& y = data_.y(); + return std::vector(y.begin(), y.end()); +} + #define IMPL_ENTRY_GET_MULTI(T, fieldname__) \ template <> \ std::vector EntryReader::GetMulti() const { \ @@ -43,6 +81,7 @@ IMPL_ENTRY_GET(bool, b); } IMPL_ENTRY_GET_MULTI(int, i32s); +IMPL_ENTRY_GET_MULTI(int64_t, i64s); IMPL_ENTRY_GET_MULTI(float, fs); IMPL_ENTRY_GET_MULTI(double, ds); IMPL_ENTRY_GET_MULTI(std::string, ss); @@ -52,10 +91,12 @@ template class Entry; template class Entry; template class Entry; template class Entry; +template class Entry>; template class EntryReader; template class EntryReader; template class EntryReader; template class EntryReader; +template class EntryReader>; } // namespace visualdl diff --git a/visualdl/storage/entry.h b/visualdl/storage/entry.h index 060b03827..343fa310d 100644 --- a/visualdl/storage/entry.h +++ b/visualdl/storage/entry.h @@ -9,6 +9,8 @@ namespace visualdl { struct Storage; +using byte_t = unsigned char; + /* * Utility helper for storage::Entry. */ @@ -19,8 +21,9 @@ struct Entry { storage::Entry* entry{nullptr}; Entry() {} - explicit Entry(storage::Entry* entry, Storage* parent) - : entry(entry), x_(parent) {} + Entry(storage::Entry* entry, Storage* parent) : entry(entry), x_(parent) {} + Entry(const Entry& other) : entry(other.entry), x_(other.x_) {} + void operator()(storage::Entry* entry, Storage* parent) { this->entry = entry; x_ = parent; @@ -29,10 +32,15 @@ struct Entry { // Set a single value. void Set(T v); + void SetRaw(const std::string& bytes) { entry->set_y(bytes); } + // Add a value to repeated message field. void Add(T v); + void SetMulti(const std::vector& v); + Storage* parent() { return x_; } + void set_parent(Storage* x) { x_ = x; } private: Storage* x_; @@ -46,6 +54,8 @@ struct EntryReader { // Get repeated field. std::vector GetMulti() const; + std::string GetRaw() { return data_.y(); } + private: storage::Entry data_; }; diff --git a/visualdl/storage/record.h b/visualdl/storage/record.h index 4e5fc7fd9..31fae1bb9 100644 --- a/visualdl/storage/record.h +++ b/visualdl/storage/record.h @@ -30,7 +30,9 @@ struct Record { DECL_GUARD(Record) + Record() {} Record(storage::Record* x, Storage* parent) : data_(x), x_(parent) {} + Record(const Record& other) : data_(other.data_), x_(other.x_) {} // write operations void SetTimeStamp(int64_t x) { @@ -59,6 +61,12 @@ struct Record { return Entry(data_->add_data(), parent()); } + template + Entry MutableData(int i) { + WRITE_GUARD + return Entry(data_->mutable_data(i), parent()); + } + Storage* parent() { return x_; } private: diff --git a/visualdl/storage/storage.proto b/visualdl/storage/storage.proto index 381ee9110..7ae68f29b 100644 --- a/visualdl/storage/storage.proto +++ b/visualdl/storage/storage.proto @@ -9,15 +9,16 @@ enum DataType { kDouble = 3; kString = 4; kBool = 5; + kBytes = 6; // entrys - kInt64s = 6; - kFloats = 7; - kDoubles = 8; - kStrings = 9; - kInt32s = 10; - kBools = 11; - - kUnknown = 12; + kInt64s = 7; + kFloats = 8; + kDoubles = 9; + kStrings = 10; + kInt32s = 11; + kBools = 12; + kBytess = 13; + kUnknown = 14; } // A data array, which type is `type`. @@ -29,16 +30,18 @@ message Entry { int32 i32 = 2; int64 i64 = 3; string s = 4; - float f = 5; - double d = 6; - bool b = 7; + bytes y = 5; + float f = 6; + double d = 7; + bool b = 8; // array - repeated int64 i64s = 8; - repeated float fs = 9; - repeated double ds = 10; - repeated int32 i32s = 11; - repeated string ss = 12; - repeated bool bs = 13; + repeated int64 i64s = 9; + repeated float fs = 10; + repeated double ds = 11; + repeated int32 i32s = 12; + repeated string ss = 13; + repeated bool bs = 14; + repeated bytes ys = 15; } /* diff --git a/visualdl/storage/tablet.cc b/visualdl/storage/tablet.cc index e69de29bb..c64273f2a 100644 --- a/visualdl/storage/tablet.cc +++ b/visualdl/storage/tablet.cc @@ -0,0 +1,7 @@ +#include "visualdl/storage/tablet.h" + +namespace visualdl { + +TabletReader Tablet::reader() { return TabletReader(*data_); } + +} // namespace visualdl diff --git a/visualdl/storage/tablet.h b/visualdl/storage/tablet.h index 1c0c9136e..ba413be20 100644 --- a/visualdl/storage/tablet.h +++ b/visualdl/storage/tablet.h @@ -10,6 +10,8 @@ namespace visualdl { +struct TabletReader; + /* * Tablet is a helper for operations on storage::Tablet. */ @@ -80,6 +82,8 @@ struct Tablet { WRITE_GUARD } + TabletReader reader(); + Storage* parent() const { return x_; } private: diff --git a/visualdl/utils/image.h b/visualdl/utils/image.h new file mode 100644 index 000000000..77b44c37a --- /dev/null +++ b/visualdl/utils/image.h @@ -0,0 +1,89 @@ +#ifndef VISUALDL_UTILS_IMAGE_H +#define VISUALDL_UTILS_IMAGE_H + +#include +#include +#include + +namespace visualdl { + +using uint8_t = unsigned char; + +/* + * 2: height*width, channel + */ +template +using ImageDT = + Eigen::Matrix; +using Uint8Image = ImageDT; + +/* + * hw: height*width + * depth: number of channels + */ +static void NormalizeImage(Uint8Image* image, + const float* buffer, + int hw, + int depth) { + // Both image and buffer should be used in row major. + Eigen::Map> + values(buffer, depth, hw); + + CHECK_EQ(image->size(), hw * depth); + CHECK_EQ(image->row(0).size(), hw); + CHECK_EQ(image->col(0).size(), depth); + + std::vector infinite_pixels; + // compute min and max ignoring nonfinite pixels + float image_min = std::numeric_limits::infinity(); + float image_max = -image_min; + for (int i = 0; i < hw; i++) { + bool finite = true; + for (int j = 0; j < depth; j++) { + // if infinite, skip this pixel + if (!std::isfinite(values(j, i))) { + infinite_pixels.emplace_back(i); + finite = false; + break; + } + } + if (finite) { + for (int j = 0; j < depth; j++) { + float v = values(j, i); + image_min = std::min(image_min, v); + image_max = std::max(image_max, v); + } + } + } + + // Pick an affine transform into uint8 + const float kZeroThreshold = 1e-6; + float scale, offset; + if (image_min < 0) { + float max_val = std::max(std::abs(image_min), image_max); + scale = (max_val < kZeroThreshold ? 0.0f : 127.0f) / max_val; + } else { + scale = (image_max < kZeroThreshold ? 0.0f : 255.0f) / image_max; + offset = 0.0f; + } + + LOG(INFO) << "scale " << scale; + + // Transform image, turning nonfinite values to bad_color + for (int i = 0; i < depth; i++) { + auto tmp = scale * values.row(i).array() + offset; + image->row(i) = tmp.cast(); + } + + for (int pixel : infinite_pixels) { + for (int i = 0; i < depth; i++) { + // TODO(ChunweiYan) use some highlight color to represent infinite pixels. + (*image)(pixel, i) = (uint8_t)0; + } + } +} + +} // namespace visualdl + +#endif diff --git a/visualdl/utils/test_image.cc b/visualdl/utils/test_image.cc new file mode 100644 index 000000000..c2a6558dd --- /dev/null +++ b/visualdl/utils/test_image.cc @@ -0,0 +1,19 @@ +#include "visualdl/utils/image.h" + +#include + +using namespace visualdl; + +TEST(image, NormalizeImage) { + Uint8Image image(128, 3); + const int size = 128 * 3; + float arr[size]; + + for (int i = 0; i < size; i++) { + // set a strange scale + arr[i] = 234. * (rand() / RAND_MAX - 0.5); + } + + NormalizeImage(&image, arr, 3, 128); +} +