Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/enhance image storage #120

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ add_executable(vl_test
${PROJECT_SOURCE_DIR}/visualdl/logic/sdk_test.cc
${PROJECT_SOURCE_DIR}/visualdl/logic/histogram_test.cc
${PROJECT_SOURCE_DIR}/visualdl/storage/storage_test.cc
${PROJECT_SOURCE_DIR}/visualdl/storage/test_binary_record.cc
${PROJECT_SOURCE_DIR}/visualdl/utils/test_concurrency.cc
${PROJECT_SOURCE_DIR}/visualdl/utils/test_image.cc
${PROJECT_SOURCE_DIR}/visualdl/utils/concurrency.h
Expand Down
104 changes: 90 additions & 14 deletions visualdl/logic/sdk.cc
Original file line number Diff line number Diff line change
@@ -1,11 +1,83 @@
#include "visualdl/logic/sdk.h"

#include "visualdl/logic/histogram.h"
#include "visualdl/storage/binary_record.h"
#include "visualdl/utils/image.h"
#include "visualdl/utils/macro.h"

namespace visualdl {

// global log dir, a hack solution to pass accross all the components.
// One process of VDL backend can only process a single logdir, so this
// is OK.
std::string g_log_dir;

LogWriter LogWriter::AsMode(const std::string& mode) {
LogWriter writer = *this;
storage_.AddMode(mode);
writer.mode_ = mode;
return writer;
}

Tablet LogWriter::AddTablet(const std::string& tag) {
// TODO(ChunweiYan) add string check here.
auto tmp = mode_ + "/" + tag;
string::TagEncode(tmp);
auto res = storage_.AddTablet(tmp);
res.SetCaptions(std::vector<std::string>({mode_}));
res.SetTag(mode_, tag);
return res;
}

LogReader::LogReader(const std::string& dir) : reader_(dir) { g_log_dir = dir; }

LogReader LogReader::AsMode(const std::string& mode) {
auto tmp = *this;
tmp.mode_ = mode;
return tmp;
}

TabletReader LogReader::tablet(const std::string& tag) {
auto tmp = mode_ + "/" + tag;
string::TagEncode(tmp);
return reader_.tablet(tmp);
}

std::vector<std::string> LogReader::all_tags() {
auto tags = reader_.all_tags();
auto it =
std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) {
return !TagMatchMode(tag, mode_);
});
tags.erase(it + 1);
return tags;
}

std::vector<std::string> LogReader::tags(const std::string& component) {
auto type = Tablet::type(component);
auto tags = reader_.tags(type);
CHECK(!tags.empty()) << "component " << component << " has no taged records";
std::vector<std::string> res;
for (const auto& tag : tags) {
if (TagMatchMode(tag, mode_)) {
res.push_back(GenReadableTag(mode_, tag));
}
}
return res;
}

std::string LogReader::GenReadableTag(const std::string& mode,
const std::string& tag) {
auto tmp = tag;
string::TagDecode(tmp);
return tmp.substr(mode.size() + 1); // including `/`
}

bool LogReader::TagMatchMode(const std::string& tag, const std::string& mode) {
if (tag.size() <= mode.size()) return false;
return tag.substr(0, mode.size()) == mode;
}

namespace components {

template <typename T>
Expand Down Expand Up @@ -103,8 +175,10 @@ void Image::SetSample(int index,
new_shape.emplace_back(1);
}
// production
int size = std::accumulate(
new_shape.begin(), new_shape.end(), 1., [](int a, int b) { return a * b; });
int size =
std::accumulate(new_shape.begin(), new_shape.end(), 1., [](int a, int b) {
return a * b;
});
CHECK_GT(size, 0);
CHECK_LE(new_shape.size(), 3)
<< "shape should be something like (width, height, num_channel)";
Expand All @@ -114,30 +188,28 @@ void Image::SetSample(int index,
CHECK_LT(index, num_samples_);
CHECK_LE(index, num_records_);

auto entry = step_.MutableData<std::vector<byte_t>>(index);
// trick to store int8 to protobuf
std::vector<byte_t> data_str(data.size());
for (int i = 0; i < data.size(); i++) {
data_str[i] = data[i];
}
Uint8Image image(new_shape[2], new_shape[0] * new_shape[1]);
NormalizeImage(&image, &data[0], new_shape[0] * new_shape[1], new_shape[2]);
// entry.SetRaw(std::string(data_str.begin(), data_str.end()));
entry.SetRaw(

BinaryRecord brcd(
GenBinaryRecordDir(step_.parent()->dir()),
std::string(image.data(), image.data() + image.rows() * image.cols()));
brcd.tofile();

auto entry = step_.MutableData<std::vector<byte_t>>(index);
entry.SetRaw(brcd.hash());

static_assert(
!is_same_type<value_t, shape_t>::value,
"value_t should not use int64_t field, this type is used to store shape");

// set meta.
entry.SetMulti(new_shape);

// // set meta with hack
// Entry<shape_t> meta;
// meta.set_parent(entry.parent());
// meta.entry = entry.entry;
// meta.SetMulti(shape);
}

std::string ImageReader::caption() {
Expand All @@ -154,9 +226,13 @@ ImageReader::ImageRecord ImageReader::record(int offset, int index) {
ImageRecord res;
auto record = reader_.record(offset);
auto entry = record.data(index);
auto data_str = entry.GetRaw();
std::transform(data_str.begin(),
data_str.end(),
auto data_hash = entry.GetRaw();
CHECK(!g_log_dir.empty())
<< "g_log_dir should be set in LogReader construction";
BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), data_hash);

std::transform(brcd.data.begin(),
brcd.data.end(),
std::back_inserter(res.data),
[](byte_t i) { return (int)(i); });
res.shape = entry.GetMulti<shape_t>();
Expand Down
69 changes: 9 additions & 60 deletions visualdl/logic/sdk.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,9 @@ class LogWriter {
storage_.AddMode(mode);
}

LogWriter AsMode(const std::string& mode) {
LogWriter writer = *this;
storage_.AddMode(mode);
writer.mode_ = mode;
return writer;
}
LogWriter AsMode(const std::string& mode);

/**
* create a new tablet
*/
Tablet AddTablet(const std::string& tag) {
// TODO(ChunweiYan) add string check here.
auto tmp = mode_ + "/" + tag;
string::TagEncode(tmp);
auto res = storage_.AddTablet(tmp);
res.SetCaptions(std::vector<std::string>({mode_}));
res.SetTag(mode_, tag);
return res;
}
Tablet AddTablet(const std::string& tag);

Storage& storage() { return storage_; }

Expand All @@ -64,61 +48,26 @@ class LogWriter {
*/
class LogReader {
public:
LogReader(const std::string& dir) : reader_(dir) {}
LogReader(const std::string& dir);

void SetMode(const std::string& mode) { mode_ = mode; }

LogReader AsMode(const std::string& mode) {
auto tmp = *this;
tmp.mode_ = mode;
return tmp;
}
LogReader AsMode(const std::string& mode);

const std::string& mode() { return mode_; }

TabletReader tablet(const std::string& tag) {
auto tmp = mode_ + "/" + tag;
string::TagEncode(tmp);
return reader_.tablet(tmp);
}
TabletReader tablet(const std::string& tag);

std::vector<std::string> all_tags() {
auto tags = reader_.all_tags();
auto it =
std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) {
return !TagMatchMode(tag, mode_);
});
tags.erase(it + 1);
return tags;
}
std::vector<std::string> all_tags();

std::vector<std::string> tags(const std::string& component) {
auto type = Tablet::type(component);
auto tags = reader_.tags(type);
CHECK(!tags.empty()) << "component " << component
<< " has no taged records";
std::vector<std::string> res;
for (const auto& tag : tags) {
if (TagMatchMode(tag, mode_)) {
res.push_back(GenReadableTag(mode_, tag));
}
}
return res;
}
std::vector<std::string> tags(const std::string& component);

StorageReader& storage() { return reader_; }

static std::string GenReadableTag(const std::string& mode,
const std::string& tag) {
auto tmp = tag;
string::TagDecode(tmp);
return tmp.substr(mode.size() + 1); // including `/`
}
const std::string& tag);

static bool TagMatchMode(const std::string& tag, const std::string& mode) {
if (tag.size() <= mode.size()) return false;
return tag.substr(0, mode.size()) == mode;
}
static bool TagMatchMode(const std::string& tag, const std::string& mode);

protected:
private:
Expand Down
5 changes: 0 additions & 5 deletions visualdl/server/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,6 @@ def get_invididual_image(storage, mode, tag, step_index, max_size=80):
data = np.array(record.data(), dtype='uint8').reshape(shape)
tempfile = NamedTemporaryFile(mode='w+b', suffix='.png')
with Image.fromarray(data) as im:
size = max(shape[0], shape[1])
if size > max_size:
scale = max_size * 1. / size
scaled_shape = (int(shape[0] * scale), int(shape[1] * scale))
im = im.resize(scaled_shape)
im.save(tempfile)
tempfile.seek(0, 0)
return tempfile
Expand Down
80 changes: 80 additions & 0 deletions visualdl/storage/binary_record.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#ifndef VISUALDL_STORAGE_BINARY_RECORD_H
#define VISUALDL_STORAGE_BINARY_RECORD_H

#include <fstream>
#include <functional>

#include "visualdl/utils/filesystem.h"

namespace visualdl {

static std::string GenBinaryRecordDir(const std::string& dir) {
return dir + "/binary_records";
}

// A storage helper to save large file(currently just for Image component).
// The protobuf message has some limitation on meassage size, and LogWriter
// will maintain a memory of all the messages, it is bad to store images
// directly in protobuf. So a simple binary storage is used to serialize images
// to disk.
struct BinaryRecord {
std::hash<std::string> hasher;

BinaryRecord(const std::string dir, std::string&& data)
: data_(data), dir_(dir) {
hash_ = std::to_string(hasher(data));
path_ = dir + "/" + hash();
}

const std::string& path() { return path_; }

void tofile() {
fs::TryRecurMkdir(dir_);
std::fstream file(path_, file.binary | file.out);
CHECK(file.is_open()) << "open " << path_ << " failed";

size_t size = data_.size();
file.write(reinterpret_cast<char*>(&size), sizeof(size));
file.write(data_.data(), data_.size());
}

const std::string& hash() { return hash_; }

private:
std::string dir_;
std::string path_;
std::string data_;
std::string hash_;
};

struct BinaryRecordReader {
std::string data;
std::hash<std::string> hasher;

BinaryRecordReader(const std::string& dir, const std::string& hash)
: dir_(dir), hash_(hash) {
fromfile();
}
std::string hash() { return std::to_string(hasher(data)); }

protected:
void fromfile() {
std::string path = dir_ + "/" + hash_;
std::ifstream file(path, file.binary);
CHECK(file.is_open()) << " failed to open file " << path;

size_t size;
file.read(reinterpret_cast<char*>(&size), sizeof(size_t));
data.resize(size);
file.read(&data[0], size);

CHECK_EQ(hash(), hash_) << "data broken: " << path;
}

private:
std::string dir_;
std::string hash_;
};

} // namespace visualdl
#endif
15 changes: 15 additions & 0 deletions visualdl/storage/test_binary_record.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "visualdl/storage/binary_record.h"

#include <gtest/gtest.h>

using namespace visualdl;

TEST(BinaryRecord, init) {
std::string message = "hello world";
BinaryRecord rcd("./", std::move(message));
rcd.tofile();

BinaryRecordReader reader("./", rcd.hash());
LOG(INFO) << reader.data;
ASSERT_EQ(reader.data, "hello world");
}
5 changes: 2 additions & 3 deletions visualdl/utils/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <unistd.h>
#include <fstream>

#include "visualdl/utils/logging.h"

namespace visualdl {

namespace fs {
Expand Down Expand Up @@ -44,7 +46,6 @@ bool DeSerializeFromFile(T* proto, const std::string& path) {
}

static void TryMkdir(const std::string& dir) {
// VLOG(1) << "try to mkdir " << dir;
struct stat st = {0};
if (stat(dir.c_str(), &st) == -1) {
::mkdir(dir.c_str(), 0700);
Expand All @@ -67,7 +68,6 @@ static void TryRecurMkdir(const std::string& path) {
inline void Write(const std::string& path,
const std::string& buffer,
std::ios::openmode open_mode = std::ios::binary) {
VLOG(1) << "write to path " << path;
std::ofstream file(path, open_mode);
CHECK(file.is_open()) << "failed to open " << path;
file.write(buffer.c_str(), buffer.size());
Expand All @@ -76,7 +76,6 @@ inline void Write(const std::string& path,

inline std::string Read(const std::string& path,
std::ios::openmode open_mode = std::ios::binary) {
VLOG(1) << "read from path " << path;
std::string buffer;
std::ifstream file(path, open_mode | std::ios::ate);
CHECK(file.is_open()) << "failed to open " << path;
Expand Down