Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize int128 division normalization #171

Merged
merged 2 commits into from
Jun 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 22 additions & 36 deletions include/intx/int128.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -659,55 +659,41 @@ inline div_result<uint128> udivrem(uint128 x, uint128 y) noexcept
{
INTX_REQUIRE(y.lo != 0); // Division by 0.

uint64_t xn_ex, xn_hi, xn_lo, yn;

auto lsh = clz(y.lo);
if (lsh != 0)
{
auto rsh = 64 - lsh;
xn_ex = x.hi >> rsh;
xn_hi = (x.lo >> rsh) | (x.hi << lsh);
xn_lo = x.lo << lsh;
yn = y.lo << lsh;
}
else
{
xn_ex = 0;
xn_hi = x.hi;
xn_lo = x.lo;
yn = y.lo;
}

auto v = reciprocal_2by1(yn);

auto res = udivrem_2by1({xn_ex, xn_hi}, yn, v);
auto q1 = res.quot;

res = udivrem_2by1({res.rem, xn_lo}, yn, v);

return {{q1, res.quot}, res.rem >> lsh};
const auto lsh = clz(y.lo);
const auto rsh = (64 - lsh) % 64;
const auto rsh_mask = uint64_t{lsh == 0} - 1;

const auto yn = y.lo << lsh;
const auto xn_lo = x.lo << lsh;
const auto xn_hi = (x.hi << lsh) | ((x.lo >> rsh) & rsh_mask);
const auto xn_ex = (x.hi >> rsh) & rsh_mask;

const auto v = reciprocal_2by1(yn);
const auto res1 = udivrem_2by1({xn_ex, xn_hi}, yn, v);
const auto res2 = udivrem_2by1({res1.rem, xn_lo}, yn, v);
return {{res1.quot, res2.quot}, res2.rem >> lsh};
}

if (y.hi > x.hi)
return {0, x};

auto lsh = clz(y.hi);
const auto lsh = clz(y.hi);
if (lsh == 0)
{
const auto q = unsigned{y.hi < x.hi} | unsigned{y.lo <= x.lo};
return {q, x - (q ? y : 0)};
}

auto rsh = 64 - lsh;
const auto rsh = 64 - lsh;

auto yn_lo = y.lo << lsh;
auto yn_hi = (y.lo >> rsh) | (y.hi << lsh);
auto xn_ex = x.hi >> rsh;
auto xn_hi = (x.lo >> rsh) | (x.hi << lsh);
auto xn_lo = x.lo << lsh;
const auto yn_lo = y.lo << lsh;
const auto yn_hi = (y.hi << lsh) | (y.lo >> rsh);
const auto xn_lo = x.lo << lsh;
const auto xn_hi = (x.hi << lsh) | (x.lo >> rsh);
const auto xn_ex = x.hi >> rsh;

auto v = reciprocal_3by2({yn_hi, yn_lo});
auto res = udivrem_3by2(xn_ex, xn_hi, xn_lo, {yn_hi, yn_lo}, v);
const auto v = reciprocal_3by2({yn_hi, yn_lo});
const auto res = udivrem_3by2(xn_ex, xn_hi, xn_lo, {yn_hi, yn_lo}, v);

return {res.quot, res.rem >> lsh};
}
Expand Down
1 change: 1 addition & 0 deletions test/benchmarks/bench_int128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ static void udiv128(benchmark::State& state)
{
const uint128 inputs[][2] = {
{0x537e3fbc5318dbc0e7e47d96b32ef2d5_u128, 0x395df916dfd1b5e38ae7c47ce8a620f_u128},
{0x837e3fbc5318dbc0e7e47d96b32ef2d5_u128, 0x895df916dfd1b5e38ae7c47ce8a620f_u128},
{0xee657725ff64cd48b8fe188a09dc4f78_u128, 3}, // worst shift
{0x0e657725ff64cd48b8fe188a09dc4f78_u128, 0xe7e47d96b32ef2d5}, // single long normalized
{0x0e657725ff64cd48b8fe188a09dc4f78_u128, 0x77e47d96b32ef2d5}, // single long
Expand Down