Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sha256 Presto function #1000

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions velox/docs/functions/binary.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Binary Functions

Computes the md5 hash of ``binary``.

.. function:: sha256(binary) -> varbinary

Computes the SHA-256 hash of ``binary``.

.. function:: to_base64(binary) -> varchar

Encodes ``binary`` into a base64 string representation.
Expand Down
11 changes: 11 additions & 0 deletions velox/functions/lib/string/StringImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <vector>
#include "folly/CPortability.h"
#include "folly/Likely.h"
#include "folly/ssl/OpenSSLHash.h"
#include "velox/common/base/Exceptions.h"
#include "velox/common/encode/Base64.h"
#include "velox/external/md5/md5.h"
Expand Down Expand Up @@ -275,6 +276,16 @@ FOLLY_ALWAYS_INLINE bool md5_radix(
return true;
}

/// Compute the SHA256 Hash.
template <typename TOutString, typename TInString>
FOLLY_ALWAYS_INLINE bool sha256(TOutString& output, const TInString& input) {
output.resize(32);
folly::ssl::OpenSSLHash::sha256(
folly::MutableByteRange((uint8_t*)output.data(), output.size()),
folly::ByteRange((const uint8_t*)input.data(), input.size()));
return true;
}

template <typename TOutString, typename TInString>
FOLLY_ALWAYS_INLINE bool toHex(TOutString& output, const TInString& input) {
static const char* const kHexTable =
Expand Down
11 changes: 11 additions & 0 deletions velox/functions/prestosql/StringFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ struct Md5Function {
}
};

/// sha256(varbinary) -> varbinary
template <typename T>
struct Sha256Function {
VELOX_DEFINE_FUNCTION_TYPES(T);

template <typename TTo, typename TFrom>
FOLLY_ALWAYS_INLINE bool call(TTo& result, const TFrom& input) {
return stringImpl::sha256(result, input);
}
};

template <typename T>
struct ToHexFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ void registerSimpleFunctions() {
// Register hash functions.
registerFunction<XxHash64Function, Varbinary, Varbinary>({"xxhash64"});
registerFunction<Md5Function, Varbinary, Varbinary>({"md5"});
registerFunction<Sha256Function, Varbinary, Varbinary>({"sha256"});

registerFunction<ToHexFunction, Varchar, Varbinary>({"to_hex"});
registerFunction<FromHexFunction, Varbinary, Varchar>({"from_hex"});
Expand Down
37 changes: 30 additions & 7 deletions velox/functions/prestosql/tests/StringFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,15 @@ int expectedLength(int i) {
}

std::string hexToDec(const std::string& str) {
char output[16];
auto chars = str.data();
for (int i = 0; i < 16; i++) {
int high = facebook::velox::functions::stringImpl::fromHex(chars[2 * i]);
int low = facebook::velox::functions::stringImpl::fromHex(chars[2 * i + 1]);
output[i] = (high << 4) | (low & 0xf);
VELOX_CHECK_EQ(str.size() % 2, 0);
std::string out;
out.resize(str.size() / 2);
for (int i = 0; i < out.size(); ++i) {
int high = facebook::velox::functions::stringImpl::fromHex(str[2 * i]);
int low = facebook::velox::functions::stringImpl::fromHex(str[2 * i + 1]);
out[i] = (high << 4) | (low & 0xf);
}
return std::string(output, 16);
return out;
}
} // namespace

Expand Down Expand Up @@ -1029,6 +1030,28 @@ TEST_F(StringFunctionsTest, md5) {
EXPECT_EQ(std::nullopt, md5(std::nullopt));
}

TEST_F(StringFunctionsTest, sha256) {
const auto sha256 = [&](std::optional<std::string> arg) {
return evaluateOnce<std::string, std::string>(
"sha256(c0)", {arg}, {VARBINARY()});
};

EXPECT_EQ(
hexToDec(
"02208b9403a87df9f4ed6b2ee2657efaa589026b4cce9accc8e8a5bf3d693c86"),
sha256("hashme"));
EXPECT_EQ(
hexToDec(
"d0067cad9a63e0813759a2bb841051ca73570c0da2e08e840a8eb45db6a7a010"),
sha256("Infinity"));
EXPECT_EQ(
hexToDec(
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"),
sha256(""));

EXPECT_EQ(std::nullopt, sha256(std::nullopt));
}

void StringFunctionsTest::testReplaceInPlace(
const std::vector<std::pair<std::string, std::string>>& tests,
const std::string& search,
Expand Down