Skip to content

Commit

Permalink
addresses #120 (URL percent decode / sanitize)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielweck committed Nov 10, 2014
1 parent cb174c1 commit bc86c60
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 22 deletions.
28 changes: 28 additions & 0 deletions ePub3/ePub/package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
#include <ePub3/xml/document.h>
#include <ePub3/xml/element.h>

//#include "iri.h"
#include <google-url/url_canon.h>
#include <google-url/url_util.h>

EPUB3_BEGIN_NAMESPACE

#define _XML_OVERRIDE_SWITCHES (EPUB_USE(LIBXML2) && PROMISCUOUS_LIBXML_OVERRIDES == 0)
Expand Down Expand Up @@ -174,6 +178,30 @@ ConstManifestItemPtr PackageBase::ManifestItemAtRelativePath(const string& path)
if (item.second->AbsolutePath() == absPath)
return item.second;
}

// Edge case...
// before giving up, let's check for lower/upper-case percent encoding mismatch (e.g. %2B vs. %2b)

//if ( path.find("%") != std::string::npos ) SOMETIMES OPF MANIFEST ITEM HREF IS PERCENT-ESCAPED, BUT NOT HTML SRC !!

url_canon::RawCanonOutputW<256> output;
url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.size()), &output);
string path_(output.data(), output.length());

string absPath_ = _pathBase + (path_[0] == '/' ? path_.substr(1) : path_);
for (auto& item : _manifest)
{
string absolute = item.second->AbsolutePath();

url_canon::RawCanonOutputW<256> output_;
url_util::DecodeURLEscapeSequences(absolute.c_str(), static_cast<int>(absolute.size()), &output_);
string absolute_(output_.data(), output_.length());

if (absolute_ == absPath_)
return item.second;
}

printf("MISSING ManifestItemAtRelativePath %s (%s)\n", path.c_str(), absPath.c_str());
return nullptr;
}
shared_ptr<NavigationTable> PackageBase::NavigationTable(const string &title) const
Expand Down
6 changes: 0 additions & 6 deletions ePub3/ePub/zip_archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,6 @@ ArchiveItemInfo ZipArchive::InfoAtPath(const string & path) const
throw std::runtime_error(std::string("zip_stat("+path.stl_str()+") - " + zip_strerror(_zip)));
return ZipItemInfo(sbuf);
}
string ZipArchive::Sanitized(const string& path) const
{
if ( path.find('/') == 0 )
return path.substr(1);
return path;
}

void ZipWriter::DataBlob::Append(const void *data, size_t len)
{
Expand Down
5 changes: 1 addition & 4 deletions ePub3/ePub/zip_archive.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,7 @@ class ZipArchive : public Archive

typedef std::list<zip_source*> ZipSourceList;
ZipSourceList _liveSources; ///< A list of live zip sources, which must be cleaned up upon closing.

///
/// Sanitizes a path string, since `libzip` can be finnicky about them.
string Sanitized(const string& path) const;

};

EPUB3_END_NAMESPACE
Expand Down
7 changes: 1 addition & 6 deletions ePub3/utilities/byte_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,12 +763,7 @@ bool ZipFileByteStream::IsOpen() const _NOEXCEPT
{
return _file != nullptr;
}
string ZipFileByteStream::Sanitized(const string& path) const
{
if ( path.find('/') == 0 )
return path.substr(1);
return path;
}

bool ZipFileByteStream::Open(struct zip *archive, const string &path, int flags)
{
if ( _file != nullptr )
Expand Down
31 changes: 25 additions & 6 deletions ePub3/utilities/byte_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,31 @@
struct zip;
struct zip_file;

//#include "iri.h"
#include <google-url/url_canon.h>
#include <google-url/url_util.h>

EPUB3_BEGIN_NAMESPACE

/**
static string Sanitized(const string& path)
{
if ( path.find("%") != std::string::npos )
{
url_canon::RawCanonOutputW<256> output;
url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.size()), &output);
string path_(output.data(), output.length());

if ( path_.find('/') == 0 )
return path_.substr(1);
return path_;
}

if ( path.find('/') == 0 )
return path.substr(1);
return path;
}

/**
The abstract base class for all stream and pipe objects used by the Readium SDK.
This class declares the standard interface for a stream-- that is, an object to
Expand All @@ -57,7 +79,7 @@ class ByteStream
///
/// A value to be returned when a real count is not possible.
static const size_type UnknownSize = 0;

public:
ByteStream() : _eof(false), _err(0) {}
virtual ~ByteStream() {}
Expand Down Expand Up @@ -606,10 +628,7 @@ class ZipFileByteStream : public SeekableByteStream
protected:
struct zip_file* _file; ///< The underlying Zip file stream.
std::ios::openmode _mode; ///< The mode used to open the file (used by Clone()).

///
/// B
string Sanitized(const string& path) const;

};

#ifdef SUPPORT_ASYNC
Expand Down

0 comments on commit bc86c60

Please sign in to comment.