diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d68f38dd..f356b9288 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ add_executable(vl_test ${PROJECT_SOURCE_DIR}/visualdl/logic/sdk_test.cc ${PROJECT_SOURCE_DIR}/visualdl/logic/histogram_test.cc ${PROJECT_SOURCE_DIR}/visualdl/storage/storage_test.cc + ${PROJECT_SOURCE_DIR}/visualdl/storage/test_binary_record.cc ${PROJECT_SOURCE_DIR}/visualdl/utils/test_concurrency.cc ${PROJECT_SOURCE_DIR}/visualdl/utils/test_image.cc ${PROJECT_SOURCE_DIR}/visualdl/utils/concurrency.h diff --git a/visualdl/logic/sdk.cc b/visualdl/logic/sdk.cc index d252ae3d0..338c29fba 100644 --- a/visualdl/logic/sdk.cc +++ b/visualdl/logic/sdk.cc @@ -1,11 +1,83 @@ #include "visualdl/logic/sdk.h" #include "visualdl/logic/histogram.h" +#include "visualdl/storage/binary_record.h" #include "visualdl/utils/image.h" #include "visualdl/utils/macro.h" namespace visualdl { +// global log dir, a hack solution to pass accross all the components. +// One process of VDL backend can only process a single logdir, so this +// is OK. +std::string g_log_dir; + +LogWriter LogWriter::AsMode(const std::string& mode) { + LogWriter writer = *this; + storage_.AddMode(mode); + writer.mode_ = mode; + return writer; +} + +Tablet LogWriter::AddTablet(const std::string& tag) { + // TODO(ChunweiYan) add string check here. + auto tmp = mode_ + "/" + tag; + string::TagEncode(tmp); + auto res = storage_.AddTablet(tmp); + res.SetCaptions(std::vector({mode_})); + res.SetTag(mode_, tag); + return res; +} + +LogReader::LogReader(const std::string& dir) : reader_(dir) { g_log_dir = dir; } + +LogReader LogReader::AsMode(const std::string& mode) { + auto tmp = *this; + tmp.mode_ = mode; + return tmp; +} + +TabletReader LogReader::tablet(const std::string& tag) { + auto tmp = mode_ + "/" + tag; + string::TagEncode(tmp); + return reader_.tablet(tmp); +} + +std::vector LogReader::all_tags() { + auto tags = reader_.all_tags(); + auto it = + std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) { + return !TagMatchMode(tag, mode_); + }); + tags.erase(it + 1); + return tags; +} + +std::vector LogReader::tags(const std::string& component) { + auto type = Tablet::type(component); + auto tags = reader_.tags(type); + CHECK(!tags.empty()) << "component " << component << " has no taged records"; + std::vector res; + for (const auto& tag : tags) { + if (TagMatchMode(tag, mode_)) { + res.push_back(GenReadableTag(mode_, tag)); + } + } + return res; +} + +std::string LogReader::GenReadableTag(const std::string& mode, + const std::string& tag) { + auto tmp = tag; + string::TagDecode(tmp); + return tmp.substr(mode.size() + 1); // including `/` +} + +bool LogReader::TagMatchMode(const std::string& tag, const std::string& mode) { + if (tag.size() <= mode.size()) return false; + return tag.substr(0, mode.size()) == mode; +} + namespace components { template @@ -103,8 +175,10 @@ void Image::SetSample(int index, new_shape.emplace_back(1); } // production - int size = std::accumulate( - new_shape.begin(), new_shape.end(), 1., [](int a, int b) { return a * b; }); + int size = + std::accumulate(new_shape.begin(), new_shape.end(), 1., [](int a, int b) { + return a * b; + }); CHECK_GT(size, 0); CHECK_LE(new_shape.size(), 3) << "shape should be something like (width, height, num_channel)"; @@ -114,7 +188,6 @@ void Image::SetSample(int index, CHECK_LT(index, num_samples_); CHECK_LE(index, num_records_); - auto entry = step_.MutableData>(index); // trick to store int8 to protobuf std::vector data_str(data.size()); for (int i = 0; i < data.size(); i++) { @@ -122,9 +195,14 @@ void Image::SetSample(int index, } Uint8Image image(new_shape[2], new_shape[0] * new_shape[1]); NormalizeImage(&image, &data[0], new_shape[0] * new_shape[1], new_shape[2]); - // entry.SetRaw(std::string(data_str.begin(), data_str.end())); - entry.SetRaw( + + BinaryRecord brcd( + GenBinaryRecordDir(step_.parent()->dir()), std::string(image.data(), image.data() + image.rows() * image.cols())); + brcd.tofile(); + + auto entry = step_.MutableData>(index); + entry.SetRaw(brcd.hash()); static_assert( !is_same_type::value, @@ -132,12 +210,6 @@ void Image::SetSample(int index, // set meta. entry.SetMulti(new_shape); - - // // set meta with hack - // Entry meta; - // meta.set_parent(entry.parent()); - // meta.entry = entry.entry; - // meta.SetMulti(shape); } std::string ImageReader::caption() { @@ -154,9 +226,13 @@ ImageReader::ImageRecord ImageReader::record(int offset, int index) { ImageRecord res; auto record = reader_.record(offset); auto entry = record.data(index); - auto data_str = entry.GetRaw(); - std::transform(data_str.begin(), - data_str.end(), + auto data_hash = entry.GetRaw(); + CHECK(!g_log_dir.empty()) + << "g_log_dir should be set in LogReader construction"; + BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), data_hash); + + std::transform(brcd.data.begin(), + brcd.data.end(), std::back_inserter(res.data), [](byte_t i) { return (int)(i); }); res.shape = entry.GetMulti(); diff --git a/visualdl/logic/sdk.h b/visualdl/logic/sdk.h index 88b959904..37c5e1849 100644 --- a/visualdl/logic/sdk.h +++ b/visualdl/logic/sdk.h @@ -31,25 +31,9 @@ class LogWriter { storage_.AddMode(mode); } - LogWriter AsMode(const std::string& mode) { - LogWriter writer = *this; - storage_.AddMode(mode); - writer.mode_ = mode; - return writer; - } + LogWriter AsMode(const std::string& mode); - /** - * create a new tablet - */ - Tablet AddTablet(const std::string& tag) { - // TODO(ChunweiYan) add string check here. - auto tmp = mode_ + "/" + tag; - string::TagEncode(tmp); - auto res = storage_.AddTablet(tmp); - res.SetCaptions(std::vector({mode_})); - res.SetTag(mode_, tag); - return res; - } + Tablet AddTablet(const std::string& tag); Storage& storage() { return storage_; } @@ -64,61 +48,26 @@ class LogWriter { */ class LogReader { public: - LogReader(const std::string& dir) : reader_(dir) {} + LogReader(const std::string& dir); void SetMode(const std::string& mode) { mode_ = mode; } - LogReader AsMode(const std::string& mode) { - auto tmp = *this; - tmp.mode_ = mode; - return tmp; - } + LogReader AsMode(const std::string& mode); const std::string& mode() { return mode_; } - TabletReader tablet(const std::string& tag) { - auto tmp = mode_ + "/" + tag; - string::TagEncode(tmp); - return reader_.tablet(tmp); - } + TabletReader tablet(const std::string& tag); - std::vector all_tags() { - auto tags = reader_.all_tags(); - auto it = - std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) { - return !TagMatchMode(tag, mode_); - }); - tags.erase(it + 1); - return tags; - } + std::vector all_tags(); - std::vector tags(const std::string& component) { - auto type = Tablet::type(component); - auto tags = reader_.tags(type); - CHECK(!tags.empty()) << "component " << component - << " has no taged records"; - std::vector res; - for (const auto& tag : tags) { - if (TagMatchMode(tag, mode_)) { - res.push_back(GenReadableTag(mode_, tag)); - } - } - return res; - } + std::vector tags(const std::string& component); StorageReader& storage() { return reader_; } static std::string GenReadableTag(const std::string& mode, - const std::string& tag) { - auto tmp = tag; - string::TagDecode(tmp); - return tmp.substr(mode.size() + 1); // including `/` - } + const std::string& tag); - static bool TagMatchMode(const std::string& tag, const std::string& mode) { - if (tag.size() <= mode.size()) return false; - return tag.substr(0, mode.size()) == mode; - } + static bool TagMatchMode(const std::string& tag, const std::string& mode); protected: private: diff --git a/visualdl/server/lib.py b/visualdl/server/lib.py index 23fa7b959..3d890badb 100644 --- a/visualdl/server/lib.py +++ b/visualdl/server/lib.py @@ -137,11 +137,6 @@ def get_invididual_image(storage, mode, tag, step_index, max_size=80): data = np.array(record.data(), dtype='uint8').reshape(shape) tempfile = NamedTemporaryFile(mode='w+b', suffix='.png') with Image.fromarray(data) as im: - size = max(shape[0], shape[1]) - if size > max_size: - scale = max_size * 1. / size - scaled_shape = (int(shape[0] * scale), int(shape[1] * scale)) - im = im.resize(scaled_shape) im.save(tempfile) tempfile.seek(0, 0) return tempfile diff --git a/visualdl/storage/binary_record.h b/visualdl/storage/binary_record.h new file mode 100644 index 000000000..8eeab7810 --- /dev/null +++ b/visualdl/storage/binary_record.h @@ -0,0 +1,80 @@ +#ifndef VISUALDL_STORAGE_BINARY_RECORD_H +#define VISUALDL_STORAGE_BINARY_RECORD_H + +#include +#include + +#include "visualdl/utils/filesystem.h" + +namespace visualdl { + +static std::string GenBinaryRecordDir(const std::string& dir) { + return dir + "/binary_records"; +} + +// A storage helper to save large file(currently just for Image component). +// The protobuf message has some limitation on meassage size, and LogWriter +// will maintain a memory of all the messages, it is bad to store images +// directly in protobuf. So a simple binary storage is used to serialize images +// to disk. +struct BinaryRecord { + std::hash hasher; + + BinaryRecord(const std::string dir, std::string&& data) + : data_(data), dir_(dir) { + hash_ = std::to_string(hasher(data)); + path_ = dir + "/" + hash(); + } + + const std::string& path() { return path_; } + + void tofile() { + fs::TryRecurMkdir(dir_); + std::fstream file(path_, file.binary | file.out); + CHECK(file.is_open()) << "open " << path_ << " failed"; + + size_t size = data_.size(); + file.write(reinterpret_cast(&size), sizeof(size)); + file.write(data_.data(), data_.size()); + } + + const std::string& hash() { return hash_; } + +private: + std::string dir_; + std::string path_; + std::string data_; + std::string hash_; +}; + +struct BinaryRecordReader { + std::string data; + std::hash hasher; + + BinaryRecordReader(const std::string& dir, const std::string& hash) + : dir_(dir), hash_(hash) { + fromfile(); + } + std::string hash() { return std::to_string(hasher(data)); } + +protected: + void fromfile() { + std::string path = dir_ + "/" + hash_; + std::ifstream file(path, file.binary); + CHECK(file.is_open()) << " failed to open file " << path; + + size_t size; + file.read(reinterpret_cast(&size), sizeof(size_t)); + data.resize(size); + file.read(&data[0], size); + + CHECK_EQ(hash(), hash_) << "data broken: " << path; + } + +private: + std::string dir_; + std::string hash_; +}; + +} // namespace visualdl +#endif diff --git a/visualdl/storage/test_binary_record.cc b/visualdl/storage/test_binary_record.cc new file mode 100644 index 000000000..c995f1df7 --- /dev/null +++ b/visualdl/storage/test_binary_record.cc @@ -0,0 +1,15 @@ +#include "visualdl/storage/binary_record.h" + +#include + +using namespace visualdl; + +TEST(BinaryRecord, init) { + std::string message = "hello world"; + BinaryRecord rcd("./", std::move(message)); + rcd.tofile(); + + BinaryRecordReader reader("./", rcd.hash()); + LOG(INFO) << reader.data; + ASSERT_EQ(reader.data, "hello world"); +} diff --git a/visualdl/utils/filesystem.h b/visualdl/utils/filesystem.h index 5569922bd..b993e86d5 100644 --- a/visualdl/utils/filesystem.h +++ b/visualdl/utils/filesystem.h @@ -7,6 +7,8 @@ #include #include +#include "visualdl/utils/logging.h" + namespace visualdl { namespace fs { @@ -44,7 +46,6 @@ bool DeSerializeFromFile(T* proto, const std::string& path) { } static void TryMkdir(const std::string& dir) { -// VLOG(1) << "try to mkdir " << dir; struct stat st = {0}; if (stat(dir.c_str(), &st) == -1) { ::mkdir(dir.c_str(), 0700); @@ -67,7 +68,6 @@ static void TryRecurMkdir(const std::string& path) { inline void Write(const std::string& path, const std::string& buffer, std::ios::openmode open_mode = std::ios::binary) { - VLOG(1) << "write to path " << path; std::ofstream file(path, open_mode); CHECK(file.is_open()) << "failed to open " << path; file.write(buffer.c_str(), buffer.size()); @@ -76,7 +76,6 @@ inline void Write(const std::string& path, inline std::string Read(const std::string& path, std::ios::openmode open_mode = std::ios::binary) { - VLOG(1) << "read from path " << path; std::string buffer; std::ifstream file(path, open_mode | std::ios::ate); CHECK(file.is_open()) << "failed to open " << path;