Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

std::string vs. ePub3::string .size() methods #142

Merged
merged 1 commit into from
Dec 12, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions ePub3/ePub/package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,19 @@ ConstManifestItemPtr PackageBase::ManifestItemAtRelativePath(const string& path)

// Edge case...
// before giving up, let's check for lower/upper-case percent encoding mismatch (e.g. %2B vs. %2b)
// (well, we're normalising to un-escaped paths)

//if ( path.find("%") != std::string::npos ) SOMETIMES OPF MANIFEST ITEM HREF IS PERCENT-ESCAPED, BUT NOT HTML SRC !!

url_canon::RawCanonOutputW<256> output;
url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.size()), &output);

// SEE BELOW FOR A DEBUGGING BREAKPOINT / CHECK
// note that std::string .size() is the same as
// ePub3:string .utf8_size() defined in utfstring.h (equivalent to strlen(str.c_str()) ),
// but not the same as ePub3:string .size() !!
// WATCH OUT!
url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.utf8_size()), &output);

string path_(output.data(), output.length());

string absPath_ = _pathBase + (path_[0] == '/' ? path_.substr(1) : path_);
Expand All @@ -194,7 +202,24 @@ ConstManifestItemPtr PackageBase::ManifestItemAtRelativePath(const string& path)
string absolute = item.second->AbsolutePath();

url_canon::RawCanonOutputW<256> output_;
url_util::DecodeURLEscapeSequences(absolute.c_str(), static_cast<int>(absolute.size()), &output_);

// THIS IS FOR DEBUGGING, SEE COMMENT BELOW ...
const char * absChars = absolute.c_str();
int absLength_STRLEN = strlen(absChars);
int absLength_SIZE = static_cast<int>(absolute.size());
int absLength_UTF8SIZE = static_cast<int>(absolute.utf8_size());
if (absLength_STRLEN != absLength_SIZE || absLength_STRLEN != absLength_UTF8SIZE || absLength_SIZE != absLength_UTF8SIZE)
{
// Place breakpoint here
//printf("String length DIFF absLength_STRLEN:%d - absLength_SIZE:%d - absLength_UTF8SIZE:%d\n", absLength_STRLEN, absLength_SIZE, absLength_UTF8SIZE);
}

// note that std::string .size() is the same as
// ePub3:string .utf8_size() defined in utfstring.h (equivalent to strlen(str.c_str()) ),
// but not the same as ePub3:string .size() !!
// WATCH OUT!
url_util::DecodeURLEscapeSequences(absolute.c_str(), static_cast<int>(absolute.utf8_size()), &output_);

string absolute_(output_.data(), output_.length());

if (absolute_ == absPath_)
Expand Down Expand Up @@ -1233,7 +1258,9 @@ shared_ptr<ManifestItem> Package::ManifestItemForCFI(ePub3::CFI &cfi, CFI* pRema

unique_ptr<ByteStream> Package::ReadStreamForRelativePath(const string &path) const
{
return _archive->ByteStreamAtPath(_Str(_pathBase, path.stl_str()));
string absPath = _pathBase + (path[0] == '/' ? path.substr(1) : path);
//_Str(_pathBase, path.stl_str())
return _archive->ByteStreamAtPath(absPath);
}

#ifdef SUPPORT_ASYNC
Expand Down
8 changes: 7 additions & 1 deletion ePub3/utilities/byte_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ static string Sanitized(const string& path)
if ( path.find("%") != std::string::npos )
{
url_canon::RawCanonOutputW<256> output;
url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.size()), &output);

// note that std::string .size() is the same as
// ePub3:string .utf8_size() defined in utfstring.h (equivalent to strlen(str.c_str()) ),
// but not the same as ePub3:string .size() !!
// WATCH OUT!
url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.utf8_size()), &output);

string path_(output.data(), output.length());

if ( path_.find('/') == 0 )
Expand Down
9 changes: 8 additions & 1 deletion ePub3/utilities/iri.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,14 @@ const string IRI::Path(bool urlEncoded) const
return encodedPath;

url_canon::RawCanonOutputW<256> output;
url_util::DecodeURLEscapeSequences(encodedPath.c_str(), static_cast<int>(encodedPath.size()), &output);

// note that .size() is on std::string here (equivalent to strlen(str.c_str()) ),
// which is in fact the same as ePub3:string .utf8_size() defined in utfstring.h,
// but not the same as ePub3:string .size() !!
// WATCH OUT!
int length = static_cast<int>(encodedPath.size());

url_util::DecodeURLEscapeSequences(encodedPath.c_str(), length, &output);
return string(output.data(), output.length());
}
const CFI IRI::ContentFragmentIdentifier() const
Expand Down
2 changes: 2 additions & 0 deletions ePub3/utilities/utfstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ string::string(const xmlChar * pos, const xmlChar * end) : _base(reinterpret_cas
}
string::size_type string::size() const _NOEXCEPT
{
// note that ePub3::string .utf8_size() actually returns _base.size() (from std::string)
// but that ePub3::string .size() does not necessarily return the same as std::string .size() !
return to_utf32_size(_base.size());
}
void string::resize(size_type n, value_type c)
Expand Down