From 73264c76d8fa9715c5a1b1b6a805509e1a3ff442 Mon Sep 17 00:00:00 2001 From: Pierre Dejoue Date: Thu, 10 Aug 2023 15:26:50 +0200 Subject: [PATCH] Use stdutils::io for file parsing --- src/picross/src/picross_io.cpp | 63 ++--- src/stdutils/CMakeLists.txt | 1 + src/stdutils/include/stdutils/io.h | 256 ++++++++++++++++++++ src/stdutils/src/io.cpp | 37 +++ src/tests/stdutils/CMakeLists.txt | 1 + src/tests/stdutils/src/test_io.cpp | 372 +++++++++++++++++++++++++++++ 6 files changed, 688 insertions(+), 42 deletions(-) create mode 100644 src/stdutils/include/stdutils/io.h create mode 100644 src/stdutils/src/io.cpp create mode 100644 src/tests/stdutils/src/test_io.cpp diff --git a/src/picross/src/picross_io.cpp b/src/picross/src/picross_io.cpp index 81eca78..98353e6 100644 --- a/src/picross/src/picross_io.cpp +++ b/src/picross/src/picross_io.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include +#include #include #include @@ -110,7 +111,7 @@ class FileParser std::istringstream iss(line_to_parse); std::string token; - // Copy the first word in 'token' + // Copy the first word in 'token' (trailing whitespaces are skipped) iss >> token; if (token == "GRID") @@ -167,14 +168,11 @@ class FileParser } else if (token == "#") { - // Comment line are ignored - } - else if (token.empty()) - { - // Empty lines are ignored + // Comment lines are ignored } else { + assert(!token.empty()); // Blank lines are already filtered out error_decorator(error_handler, "Invalid token " + token); } } @@ -240,9 +238,7 @@ class FileParser { UNUSED(error_handler); - // Ignore whiteline - if (line_to_parse.empty() || std::all_of(line_to_parse.cbegin(), line_to_parse.cend(), [](char c) { return c == '\t' || c == ' '; } )) - return; + // Blank lines are already filtered out // Ignore comments if (line_to_parse[0] == '#') @@ -390,12 +386,9 @@ class FileParser { // Copy the first word in 'token' iss >> token; + assert(!token.empty()); // Blank lines are already filtered out - if (token.empty()) - { - // An empty line outside of the rows or columns sections can just be ignored - } - else if (token == "title") + if (token == "title") { std::stringbuf remaining; iss >> &remaining; @@ -525,8 +518,7 @@ class FileParser if (pos0 != std::string::npos && pos1 != std::string::npos) { // Extract text in quotes - assert(pos1 > pos0); - return str.substr(pos0 + 1, pos1 - pos0 - 1); + return (pos0 + 1 < pos1) ? str.substr(pos0 + 1, pos1 - pos0 - 1) : ""; } else { @@ -600,32 +592,19 @@ std::vector parse_input_file_generic(std::string_view filepath, const Er std::ifstream inputstream(filepath.data()); if (inputstream.is_open()) { + // Start line by line parsing FileParser parser; - // Line buffer and stream - char line_buf[BUF_SZ]; - std::stringstream line_ss; - unsigned int line_nb = 0; - // Start parsing - while (inputstream.good()) + auto line_stream = stdutils::io::SkipLineStream(inputstream).skip_blank_lines(); + std::string line; + while (line_stream.getline(line)) { - const bool fail = inputstream.getline(line_buf, BUF_SZ).fail(); - line_ss << line_buf; - if (!fail) - { - line_nb++; - std::string line = line_ss.str(); - parser.parse_line(line, grids, [line_nb, &line, &error_handler](std::string_view msg) - { - std::ostringstream oss; - oss << "[" << msg << "] on line " << line_nb << ": " << line; - error_handler(ErrorCode::PARSING_ERROR, oss.str()); - }); - line_ss.str(""); - } - else if (!inputstream.eof()) - { - inputstream.clear(); // Line is longer than BUF_SZ - } + const auto line_nb = line_stream.line_nb(); + parser.parse_line(line, grids, [line_nb, &line, &error_handler](std::string_view msg) + { + std::ostringstream oss; + oss << "[" << msg << "] on line " << line_nb << ": " << line; + error_handler(ErrorCode::PARSING_ERROR, oss.str()); + }); } std::for_each(grids.begin(), grids.end(), [&result](GridComponents& grid_comps) { auto& new_grid = result.emplace_back( @@ -704,11 +683,11 @@ void write_metadata_non_format(std::ostream& out, const std::map +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace stdutils +{ +namespace io +{ + +/** + * IO error handling + * + * Severity code: + * Negative: Non-recoverable, output should be ignored. + * Positive: Output is usable despite the errors. + * + * The user is free to expand or override the list of severity codes proposed below + */ +using SeverityCode = int; +struct Severity +{ + static constexpr SeverityCode FATAL = -2; + static constexpr SeverityCode EXCPT = -1; + static constexpr SeverityCode ERR = 1; + static constexpr SeverityCode WARN = 2; +}; + +std::string_view str_severity_code(SeverityCode code); + +using ErrorMessage = std::string_view; + +using ErrorHandler = std::function; + +/** + * Pass a file to a parser of std::basic_istream + */ +template +using StreamParser = std::function>&, const stdutils::io::ErrorHandler&)>; + +template +Ret open_and_parse_file(const std::filesystem::path& filepath, const StreamParser& stream_parser, const stdutils::io::ErrorHandler& err_handler) noexcept; + +/** + * LineStream: A wrapper around std::getline to count line nb + */ +template +class Basic_LineStream +{ +public: + using stream_type = std::basic_istream>; + + Basic_LineStream(stream_type& source); + + const stream_type& stream() const { return m_stream; }; + const std::size_t line_nb() const { return m_line_nb; }; + + // Return the last value of static_cast(std::getline(...)) + // When false is returned, neither the content of out_str nor the line_nb() should be trusted. + // Note: When EOF is first encountered, true is returned. false will be returned on the next call. + bool getline(std::basic_string& out_str); + +private: + stream_type& m_stream; + std::size_t m_line_nb; +}; + +using LineStream = Basic_LineStream; + +/** + * SkipLineStream: A LineStream with a set of conditions to skip lines + */ +template +class Basic_SkipLineStream +{ +public: + Basic_SkipLineStream(typename Basic_LineStream::stream_type& source); + Basic_SkipLineStream(const Basic_LineStream& linestream); // Implicit conversion from Basic_LineStream + + const typename Basic_LineStream::stream_type& stream() const { return m_linestream.stream(); }; + const std::size_t line_nb() const { return m_linestream.line_nb(); }; + + // Skip lines + Basic_SkipLineStream& skip_empty_lines(); + Basic_SkipLineStream& skip_blank_lines(); + Basic_SkipLineStream& skip_comment_lines(std::string_view comment_token); + + // See notes on Basic_LineStream::getline() + bool getline(std::basic_string& out_str); + +private: + bool skip_line(const std::string& line) const; + + Basic_LineStream m_linestream; + std::vector m_skip_tokens; + bool m_skip_empty_lines; + bool m_skip_blank_lines; +}; + +using SkipLineStream = Basic_SkipLineStream; + +/** + * countlines() + * + * The number of lines is the same whether that the last one ends with '\n' or not. + */ +template +std::size_t countlines(std::basic_istream>& istream); + +// +// +// IMPLEMENTATION +// +// + +template +Ret open_and_parse_file(const std::filesystem::path& filepath, const StreamParser& stream_parser, const stdutils::io::ErrorHandler& err_handler) noexcept +{ + static_assert(std::is_nothrow_default_constructible_v); + try + { + std::basic_ifstream inputstream(filepath); + if (inputstream.is_open()) + { + return stream_parser(inputstream, err_handler); + } + else + { + std::stringstream oss; + oss << "Cannot open file " << filepath; + err_handler(stdutils::io::Severity::FATAL, oss.str()); + } + } + catch(const std::exception& e) + { + std::stringstream oss; + oss << "Exception: " << e.what(); + err_handler(stdutils::io::Severity::EXCPT, oss.str()); + } + return Ret(); +} + +template +Basic_LineStream::Basic_LineStream(stream_type& source) + : m_stream(source) + , m_line_nb(0) +{ +} + +template +bool Basic_LineStream::getline(std::basic_string& out_str) +{ + bool no_fail = !m_stream.fail(); + if (no_fail) + { + m_line_nb++; + no_fail = static_cast(std::getline(m_stream, out_str)); + } + return no_fail; +} + + +template +Basic_SkipLineStream::Basic_SkipLineStream(typename Basic_LineStream::stream_type& source) + : m_linestream(source) + , m_skip_tokens() + , m_skip_empty_lines(false) + , m_skip_blank_lines(false) +{} + +template +Basic_SkipLineStream::Basic_SkipLineStream(const Basic_LineStream& linestream) + : m_linestream(linestream) + , m_skip_tokens() + , m_skip_empty_lines(false) + , m_skip_blank_lines(false) +{} + +template +Basic_SkipLineStream& Basic_SkipLineStream::skip_empty_lines() +{ + m_skip_empty_lines = true; + return *this; +} + +template +Basic_SkipLineStream& Basic_SkipLineStream::skip_blank_lines() +{ + m_skip_empty_lines = true; + m_skip_blank_lines = true; + return *this; +} + +template +Basic_SkipLineStream& Basic_SkipLineStream::skip_comment_lines(std::string_view comment_token) +{ + m_skip_tokens.emplace_back(comment_token); + return *this; +} + +template +bool Basic_SkipLineStream::skip_line(const std::string& line) const +{ + if (m_skip_empty_lines && line.empty()) + { + return true; + } + if (m_skip_blank_lines || !m_skip_tokens.empty()) + { + std::istringstream iss(line); + std::string token; + iss >> token; // leading whitespaces are skipped + if (m_skip_blank_lines && token.empty()) + { + return true; + } + return std::any_of(m_skip_tokens.cbegin(), m_skip_tokens.cend(), [&token](const auto& comment_token) { + return token.substr(0, comment_token.size()) == comment_token; + }); + } + return false; +} + +template +bool Basic_SkipLineStream::getline(std::basic_string& out_str) +{ + bool no_fail = false; + bool skip = false; + do + { + no_fail = m_linestream.getline(out_str); + skip = no_fail && skip_line(out_str); + } while (skip); + return no_fail; +} + +template +std::size_t countlines(std::basic_istream>& istream) +{ + LineStream line_stream(istream); + std::size_t result = 0; + std::string line; + while (line_stream.getline(line)) { result++; } + return result; +} + +} // namespace io +} // namespace stdutils diff --git a/src/stdutils/src/io.cpp b/src/stdutils/src/io.cpp new file mode 100644 index 0000000..e7e57f1 --- /dev/null +++ b/src/stdutils/src/io.cpp @@ -0,0 +1,37 @@ + +#include + +#include + + +namespace stdutils +{ +namespace io +{ + +std::string_view str_severity_code(SeverityCode code) +{ + if (code == Severity::FATAL) + { + return "FATAL"; + } + else if (code == Severity::EXCPT) + { + return "EXCPT"; + } + else if (code == Severity::WARN) + { + return "WARNING"; + } + else if (code == Severity::ERR) + { + return "ERROR"; + } + else + { + return "UNKNOWN"; + } +} + +} +} diff --git a/src/tests/stdutils/CMakeLists.txt b/src/tests/stdutils/CMakeLists.txt index 527642d..c067904 100644 --- a/src/tests/stdutils/CMakeLists.txt +++ b/src/tests/stdutils/CMakeLists.txt @@ -4,6 +4,7 @@ include(catch2) set(UTESTS_SOURCES + src/test_io.cpp src/test_span.cpp ) diff --git a/src/tests/stdutils/src/test_io.cpp b/src/tests/stdutils/src/test_io.cpp new file mode 100644 index 0000000..c8a116e --- /dev/null +++ b/src/tests/stdutils/src/test_io.cpp @@ -0,0 +1,372 @@ +#include + +#include + +#include +#include + + +// Not really a test, just going step by step while the user is reading from a stream with std::getline +TEST_CASE("Basics of std::basic_istream", "[stdutils::io]") +{ + static const char* example_txt = +R"(a +b + +c)"; + + std::istringstream sstream(example_txt); + REQUIRE(sstream.good() == true); + bool no_failure = false; + std::string line; + + no_failure = static_cast(std::getline(sstream, line)); + REQUIRE(no_failure == true); + CHECK(line == "a"); + CHECK(sstream.good() == true); + CHECK(sstream.eof() == false); + CHECK(sstream.fail() == false); + CHECK(sstream.bad() == false); + + no_failure = static_cast(std::getline(sstream, line)); + REQUIRE(no_failure == true); + CHECK(line == "b"); + + no_failure = static_cast(std::getline(sstream, line)); + REQUIRE(no_failure == true); + CHECK(line == ""); + + no_failure = static_cast(std::getline(sstream, line)); // Last line + REQUIRE(no_failure == true); + CHECK(line == "c"); + CHECK(sstream.good() == false); + CHECK(sstream.eof() == true); + CHECK(sstream.fail() == false); + CHECK(sstream.bad() == false); + + no_failure = static_cast(std::getline(sstream, line)); // Past last line + REQUIRE(no_failure == false); + CHECK(line == "c"); // The output line is untampered with + CHECK(sstream.good() == false); + CHECK(sstream.eof() == true); + CHECK(sstream.fail() == true); + CHECK(sstream.bad() == false); +} + +TEST_CASE("LineStream on a one-liner", "[stdutils::io]") +{ + static const char* oneliner = "They threw the Warden of Heaven out of the airlock!"; + + std::istringstream sstream(oneliner); + stdutils::io::LineStream line_stream(sstream); + REQUIRE(line_stream.stream().good() == true); + + std::string line; + bool no_failure = line_stream.getline(line); + REQUIRE(no_failure == true); + CHECK(line.empty() == false); + CHECK(line_stream.line_nb() == 1); + CHECK(line_stream.stream().good() == false); + CHECK(line_stream.stream().eof() == true); + CHECK(line_stream.stream().fail() == false); + + no_failure = line_stream.getline(line); // read past EOF + REQUIRE(no_failure == false); + // Once getline returns false, the content of the line string and the line_nb are irrelevant + CHECK(line_stream.stream().good() == false); + CHECK(line_stream.stream().eof() == true); + CHECK(line_stream.stream().fail() == true); + + no_failure = line_stream.getline(line); // still past EOF + REQUIRE(no_failure == false); + CHECK(line_stream.stream().good() == false); + CHECK(line_stream.stream().eof() == true); + CHECK(line_stream.stream().fail() == true); +} + +TEST_CASE("LineStream on a multiline text", "[stdutils::io]") +{ + // 6 non-empty lines + 1 empty line (the first one) + static const char* example_txt = R"( +I've seen things you people wouldn't believe. +Attack ships on fire off the shoulder of Orion. +I watched C-beams glitter in the dark near the Tannhauser gate. +All those moments will be lost in time... +like tears in rain... +Time to die.)"; + + std::istringstream sstream(example_txt); + stdutils::io::LineStream line_stream(sstream); + REQUIRE(line_stream.stream().good() == true); + + // Parse all lines (including the empty ones) + std::string line; + std::size_t count_parsed_lines = 0; + while(line_stream.getline(line)) + { + count_parsed_lines++; + CHECK(line.empty() == (count_parsed_lines == 1)); + CHECK(line_stream.line_nb() == count_parsed_lines); + CHECK(line_stream.stream().fail() == false); // But, good() might be false and eof() might be true + } + CHECK(count_parsed_lines == 7); + CHECK(line_stream.stream().good() == false); + CHECK(line_stream.stream().eof() == true); + CHECK(line_stream.stream().fail() == true); + CHECK(line_stream.stream().bad() == false); + + // Read past failure + const bool no_failure = line_stream.getline(line); + REQUIRE(no_failure == false); + CHECK(line_stream.stream().good() == false); + CHECK(line_stream.stream().eof() == true); + CHECK(line_stream.stream().fail() == true); + CHECK(line_stream.stream().bad() == false); +} + +TEST_CASE("countlines on empty string", "[stdutils::io]") +{ + static const char* test = ""; + std::istringstream istream(test); + CHECK(stdutils::io::countlines(istream) == 0); +} + +TEST_CASE("countlines on one-liner", "[stdutils::io]") +{ + static const char* test = "hello" ; + std::istringstream istream(test); + CHECK(stdutils::io::countlines(istream) == 1); +} + +TEST_CASE("countlines on multiline no eof", "[stdutils::io]") +{ + static const char* test = +R"(hello + +world)"; + std::istringstream istream(test); + CHECK(stdutils::io::countlines(istream) == 3); +} + +TEST_CASE("countlines on multiline with eof", "[stdutils::io]") +{ + static const char* test = +R"(hello + +world +)"; + std::istringstream istream(test); + CHECK(stdutils::io::countlines(istream) == 3); +} + +TEST_CASE("SkipLineStream to skip empty lines", "[stdutils::io]") +{ + // 6 non-empty lines + static const char* example_txt = R"( +I've seen things you people wouldn't believe. +Attack ships on fire off the shoulder of Orion. +I watched C-beams glitter in the dark near the Tannhauser gate. + +All those moments will be lost in time... + +like tears in rain... + +Time to die. + + +)"; + + std::istringstream sstream(example_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_empty_lines(); + REQUIRE(line_stream.stream().good() == true); + + // Read all lines, skipping the empty ones + std::string line; + std::size_t count_non_empty_lines = 0; + while(line_stream.getline(line)) + { + count_non_empty_lines++; + CHECK(line.empty() == false); + CHECK(line_stream.line_nb() >= count_non_empty_lines); + } + CHECK(count_non_empty_lines == 6); + CHECK(line_stream.stream().good() == false); + CHECK(line_stream.stream().eof() == true); + CHECK(line_stream.stream().fail() == true); + CHECK(line_stream.stream().bad() == false); + + // Read past failure + const bool no_failure = line_stream.getline(line); + REQUIRE(no_failure == false); + CHECK(line_stream.stream().good() == false); + CHECK(line_stream.stream().eof() == true); + CHECK(line_stream.stream().fail() == true); + CHECK(line_stream.stream().bad() == false); +} + +TEST_CASE("SkipLineStream to skip blank lines", "[stdutils::io]") +{ + static const char* example_txt = "a\n\n \n\t\nb\n\n"; + + std::istringstream sstream(example_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_blank_lines(); + + std::string line; + bool no_failure = line_stream.getline(line); + REQUIRE(no_failure == true); + CHECK(line == "a"); + CHECK(line_stream.line_nb() == 1); + + no_failure = line_stream.getline(line); // Last non-blank line + REQUIRE(no_failure == true); + CHECK(line == "b"); + CHECK(line_stream.line_nb() == 5); + + no_failure = line_stream.getline(line); // Past the last line + CHECK(no_failure == false); + // Once getline returns false, the content of the line string and the line_nb are irrelevant +} + +TEST_CASE("SkipLineStream to skip empty lines and comments", "[stdutils::io]") +{ + + // 10 lines of code + 5 empty lines + lots of comments + static const char* python_txt = R"( +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import itertools + +# Puzzle. The goal is to draw a path from A to S with some constraints +# +# A +# B C +# D E F +# G H +# I J K +# L M +# N O P +# Q R +# S +# + +def dfs(graph, start, exit_condition): + for path in dfs_recurse(graph, [ start ], exit_condition): + yield path + +# +# Main() +# +def main(): + total_count = 0 + credits_ok_count = 0 + count_by_length = { } + +if __name__ == "__main__": + main() +)"; + const std::size_t expected_lines_of_code = 10; + const std::size_t expected_empty_lines = 5; + + std::size_t count_lines_of_code = 0; + std::size_t count_lines_non_empty = 0; + std::size_t count_lines_of_code_or_empty = 0; + std::size_t count_lines_total = 0; + std::string line; + + { + std::istringstream sstream(python_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_empty_lines().skip_comment_lines("#"); + while(line_stream.getline(line)) + { + count_lines_of_code++; + } + CHECK(count_lines_of_code == expected_lines_of_code); + } + { + std::istringstream sstream(python_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_empty_lines(); + while(line_stream.getline(line)) + { + count_lines_non_empty++; + } + CHECK(count_lines_non_empty > expected_lines_of_code); + } + { + std::istringstream sstream(python_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_comment_lines("#"); + while(line_stream.getline(line)) + { + count_lines_of_code_or_empty++; + } + CHECK(count_lines_of_code_or_empty == expected_lines_of_code + expected_empty_lines); + } + { + std::istringstream sstream(python_txt); + count_lines_total = stdutils::io::countlines(sstream); + } + CHECK(count_lines_non_empty + expected_empty_lines == count_lines_total); +} + +TEST_CASE("SkipLineStream to skip comments", "[stdutils::io]") +{ + + static const char* commented_txt = +R"(/ a +// b +/// c +// +/ +d +e + / a + // b + /// c + // + /)"; + + std::string line; + { + std::istringstream sstream(commented_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_comment_lines("/"); + std::size_t count_lines_without_comment = 0; + while(line_stream.getline(line)) + { + count_lines_without_comment++; + } + CHECK(count_lines_without_comment == 2); + } + { + std::istringstream sstream(commented_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_comment_lines("//"); + std::size_t count_lines_without_comment = 0; + while(line_stream.getline(line)) + { + count_lines_without_comment++; + } + CHECK(count_lines_without_comment == 6); + } + { + std::istringstream sstream(commented_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_comment_lines("///"); + std::size_t count_lines_without_comment = 0; + while(line_stream.getline(line)) + { + count_lines_without_comment++; + } + CHECK(count_lines_without_comment == 10); + } + { + std::istringstream sstream(commented_txt); + auto line_stream = stdutils::io::SkipLineStream(sstream).skip_comment_lines("////"); + std::size_t count_lines_without_comment = 0; + while(line_stream.getline(line)) + { + count_lines_without_comment++; + } + CHECK(count_lines_without_comment == 12); + } + { + std::istringstream sstream(commented_txt); + CHECK(stdutils::io::countlines(sstream) == 12); + } +}