Skip to content

Commit

Permalink
src: remove icu usage from node_string.cc
Browse files Browse the repository at this point in the history
PR-URL: #46548
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Michael Dawson <midawson@redhat.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
anonrig authored and MylesBorins committed Feb 20, 2023
1 parent 20cb13b commit bdba600
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 47 deletions.
81 changes: 42 additions & 39 deletions src/inspector/node_string.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#include "node_string.h"
#include "node/inspector/protocol/Protocol.h"
#include "node_util.h"
#include "simdutf.h"

#include <unicode/unistr.h>

namespace node {
namespace inspector {
namespace protocol {
Expand All @@ -12,27 +11,34 @@ namespace StringUtil {
size_t kNotFound = std::string::npos;

// NOLINTNEXTLINE(runtime/references) V8 API requirement
void builderAppendQuotedString(StringBuilder& builder, const String& string) {
void builderAppendQuotedString(StringBuilder& builder,
const std::string_view string) {
builder.put('"');
if (!string.empty()) {
icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8(
icu::StringPiece(string.data(), string.length()));
escapeWideStringForJSON(
reinterpret_cast<const uint16_t*>(utf16.getBuffer()), utf16.length(),
&builder);
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
CHECK_EQ(expected_utf16_length, utf16_length);
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length,
&builder);
}
builder.put('"');
}

std::unique_ptr<Value> parseJSON(const String& string) {
std::unique_ptr<Value> parseJSON(const std::string_view string) {
if (string.empty())
return nullptr;

icu::UnicodeString utf16 =
icu::UnicodeString::fromUTF8(icu::StringPiece(string.data(),
string.length()));
return parseJSONCharacters(
reinterpret_cast<const uint16_t*>(utf16.getBuffer()), utf16.length());
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
CHECK_EQ(expected_utf16_length, utf16_length);
return parseJSONCharacters(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length);
}

std::unique_ptr<Value> parseJSON(v8_inspector::StringView string) {
Expand All @@ -50,24 +56,15 @@ String StringViewToUtf8(v8_inspector::StringView view) {
return std::string(reinterpret_cast<const char*>(view.characters8()),
view.length());
}
const uint16_t* source = view.characters16();
const UChar* unicodeSource = reinterpret_cast<const UChar*>(source);
static_assert(sizeof(*source) == sizeof(*unicodeSource),
"sizeof(*source) == sizeof(*unicodeSource)");

size_t result_length = view.length() * sizeof(*source);
std::string result(result_length, '\0');
icu::UnicodeString utf16(unicodeSource, view.length());
// ICU components for std::string compatibility are not enabled in build...
bool done = false;
while (!done) {
icu::CheckedArrayByteSink sink(&result[0], result_length);
utf16.toUTF8(sink);
result_length = sink.NumberOfBytesAppended();
result.resize(result_length);
done = !sink.Overflowed();
}
return result;
const char16_t* source =
reinterpret_cast<const char16_t*>(view.characters16());
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(source, view.length());
MaybeStackBuffer<char> buffer(expected_utf8_length);
size_t utf8_length =
simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
return String(buffer.out(), utf8_length);
}

String fromDouble(double d) {
Expand All @@ -86,7 +83,8 @@ double toDouble(const char* buffer, size_t length, bool* ok) {
return d;
}

std::unique_ptr<Value> parseMessage(const std::string& message, bool binary) {
std::unique_ptr<Value> parseMessage(const std::string_view message,
bool binary) {
if (binary) {
return Value::parseBinary(
reinterpret_cast<const uint8_t*>(message.data()),
Expand All @@ -109,16 +107,21 @@ String fromUTF8(const uint8_t* data, size_t length) {
}

String fromUTF16(const uint16_t* data, size_t length) {
icu::UnicodeString utf16(reinterpret_cast<const char16_t*>(data), length);
std::string result;
return utf16.toUTF8String(result);
auto casted_data = reinterpret_cast<const char16_t*>(data);
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(casted_data, length);
MaybeStackBuffer<char> buffer(expected_utf8_length);
size_t utf8_length =
simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
return String(buffer.out(), utf8_length);
}

const uint8_t* CharactersUTF8(const String& s) {
const uint8_t* CharactersUTF8(const std::string_view s) {
return reinterpret_cast<const uint8_t*>(s.data());
}

size_t CharacterCount(const String& s) {
size_t CharacterCount(const std::string_view s) {
// TODO(@anonrig): Test to make sure CharacterCount returns correctly.
return simdutf::utf32_length_from_utf8(s.data(), s.length());
}
Expand Down
21 changes: 13 additions & 8 deletions src/inspector/node_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,26 @@ double toDouble(const char* buffer, size_t length, bool* ok);
String StringViewToUtf8(v8_inspector::StringView view);

// NOLINTNEXTLINE(runtime/references)
void builderAppendQuotedString(StringBuilder& builder, const String&);
std::unique_ptr<Value> parseJSON(const String&);
void builderAppendQuotedString(StringBuilder& builder, const std::string_view);
std::unique_ptr<Value> parseJSON(const std::string_view);
std::unique_ptr<Value> parseJSON(v8_inspector::StringView view);

std::unique_ptr<Value> parseMessage(const std::string& message, bool binary);
std::unique_ptr<Value> parseMessage(const std::string_view message,
bool binary);
ProtocolMessage jsonToMessage(String message);
ProtocolMessage binaryToMessage(std::vector<uint8_t> message);
String fromUTF8(const uint8_t* data, size_t length);
String fromUTF16(const uint16_t* data, size_t length);
const uint8_t* CharactersUTF8(const String& s);
size_t CharacterCount(const String& s);
const uint8_t* CharactersUTF8(const std::string_view s);
size_t CharacterCount(const std::string_view s);

// Unimplemented. The generated code will fall back to CharactersUTF8().
inline uint8_t* CharactersLatin1(const String& s) { return nullptr; }
inline const uint16_t* CharactersUTF16(const String& s) { return nullptr; }
inline uint8_t* CharactersLatin1(const std::string_view s) {
return nullptr;
}
inline const uint16_t* CharactersUTF16(const std::string_view s) {
return nullptr;
}

extern size_t kNotFound;
} // namespace StringUtil
Expand All @@ -92,7 +97,7 @@ class Binary {
const uint8_t* data() const { UNREACHABLE(); }
size_t size() const { UNREACHABLE(); }
String toBase64() const { UNREACHABLE(); }
static Binary fromBase64(const String& base64, bool* success) {
static Binary fromBase64(const std::string_view base64, bool* success) {
UNREACHABLE();
}
static Binary fromSpan(const uint8_t* data, size_t size) { UNREACHABLE(); }
Expand Down

0 comments on commit bdba600

Please sign in to comment.