readium · danielweck · Dec 12, 2014 · Dec 12, 2014
diff --git a/ePub3/ePub/package.cpp b/ePub3/ePub/package.cpp
@@ -181,11 +181,19 @@ ConstManifestItemPtr PackageBase::ManifestItemAtRelativePath(const string& path)
 
     // Edge case...
     // before giving up, let's check for lower/upper-case percent encoding mismatch (e.g. %2B vs. %2b)
+    // (well, we're normalising to un-escaped paths)
 
     //if ( path.find("%") != std::string::npos ) SOMETIMES OPF MANIFEST ITEM HREF IS PERCENT-ESCAPED, BUT NOT HTML SRC !!
 
     url_canon::RawCanonOutputW<256> output;
-    url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.size()), &output);
+
+    // SEE BELOW FOR A DEBUGGING BREAKPOINT / CHECK
+    // note that std::string .size() is the same as
+    // ePub3:string .utf8_size() defined in utfstring.h (equivalent to strlen(str.c_str()) ),
+    // but not the same as ePub3:string .size() !!
+    // WATCH OUT!
+    url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.utf8_size()), &output);
+
     string path_(output.data(), output.length());
 
     string absPath_ = _pathBase + (path_[0] == '/' ? path_.substr(1) : path_);
@@ -194,7 +202,24 @@ ConstManifestItemPtr PackageBase::ManifestItemAtRelativePath(const string& path)
         string absolute = item.second->AbsolutePath();
 
         url_canon::RawCanonOutputW<256> output_;
-        url_util::DecodeURLEscapeSequences(absolute.c_str(), static_cast<int>(absolute.size()), &output_);
+
+//        THIS IS FOR DEBUGGING, SEE COMMENT BELOW ...
+        const char * absChars = absolute.c_str();
+        int absLength_STRLEN = strlen(absChars);
+        int absLength_SIZE = static_cast<int>(absolute.size());
+        int absLength_UTF8SIZE = static_cast<int>(absolute.utf8_size());
+        if (absLength_STRLEN != absLength_SIZE || absLength_STRLEN != absLength_UTF8SIZE || absLength_SIZE != absLength_UTF8SIZE)
+        {
+            // Place breakpoint here
+            //printf("String length DIFF absLength_STRLEN:%d - absLength_SIZE:%d - absLength_UTF8SIZE:%d\n", absLength_STRLEN, absLength_SIZE, absLength_UTF8SIZE);
+        }
+
+        // note that std::string .size() is the same as
+        // ePub3:string .utf8_size() defined in utfstring.h (equivalent to strlen(str.c_str()) ),
+        // but not the same as ePub3:string .size() !!
+        // WATCH OUT!
+        url_util::DecodeURLEscapeSequences(absolute.c_str(), static_cast<int>(absolute.utf8_size()), &output_);
+
         string absolute_(output_.data(), output_.length());
 
         if (absolute_ == absPath_)
@@ -1233,7 +1258,9 @@ shared_ptr<ManifestItem> Package::ManifestItemForCFI(ePub3::CFI &cfi, CFI* pRema
 
 unique_ptr<ByteStream> Package::ReadStreamForRelativePath(const string &path) const
 {
-    return _archive->ByteStreamAtPath(_Str(_pathBase, path.stl_str()));
+    string absPath = _pathBase + (path[0] == '/' ? path.substr(1) : path);
+    //_Str(_pathBase, path.stl_str())
+    return _archive->ByteStreamAtPath(absPath);
 }
 
 #ifdef SUPPORT_ASYNC

diff --git a/ePub3/utilities/byte_stream.h b/ePub3/utilities/byte_stream.h
@@ -43,7 +43,13 @@ static string Sanitized(const string& path)
     if ( path.find("%") != std::string::npos )
     {
         url_canon::RawCanonOutputW<256> output;
-        url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.size()), &output);
+
+        // note that std::string .size() is the same as
+        // ePub3:string .utf8_size() defined in utfstring.h (equivalent to strlen(str.c_str()) ),
+        // but not the same as ePub3:string .size() !!
+        // WATCH OUT!
+        url_util::DecodeURLEscapeSequences(path.c_str(), static_cast<int>(path.utf8_size()), &output);
+
         string path_(output.data(), output.length());
 
         if ( path_.find('/') == 0 )

diff --git a/ePub3/utilities/iri.cpp b/ePub3/utilities/iri.cpp
@@ -179,7 +179,14 @@ const string IRI::Path(bool urlEncoded) const
         return encodedPath;
 
     url_canon::RawCanonOutputW<256> output;
-    url_util::DecodeURLEscapeSequences(encodedPath.c_str(), static_cast<int>(encodedPath.size()), &output);
+
+    // note that .size() is on std::string here (equivalent to strlen(str.c_str()) ),
+    // which is in fact the same as ePub3:string .utf8_size() defined in utfstring.h,
+    // but not the same as ePub3:string .size() !!
+    // WATCH OUT!
+    int length = static_cast<int>(encodedPath.size());
+
+    url_util::DecodeURLEscapeSequences(encodedPath.c_str(), length, &output);
     return string(output.data(), output.length());
 }
 const CFI IRI::ContentFragmentIdentifier() const

diff --git a/ePub3/utilities/utfstring.cpp b/ePub3/utilities/utfstring.cpp
@@ -126,6 +126,8 @@ string::string(const xmlChar * pos, const xmlChar * end) : _base(reinterpret_cas
 }
 string::size_type string::size() const _NOEXCEPT
 {
+    // note that ePub3::string .utf8_size() actually returns _base.size() (from std::string)
+    // but that ePub3::string .size() does not necessarily return the same as std::string .size() !
     return to_utf32_size(_base.size());
 }
 void string::resize(size_type n, value_type c)