From 68cc5ce410b036a05d455030ed8fd87a19aa66bc Mon Sep 17 00:00:00 2001 From: Daniel Mendler Date: Tue, 29 Oct 2019 08:51:09 +0100 Subject: [PATCH] apply a series of simplifications * This is in preparation of the size_t change/a potential representation change to use full width as in tfm, if a (partial?) merge with tfm is desired. These changes have their own merits however. * Remove obfuscating tmpx digit pointers (fewer variables, it is more obvious what is being manipulated) * Reduce scope of variables where possible * Stricter error handling/checking (for example handling in karatsuba was broken) * In some cases the result was written even in the case of an error (e.g. s_mp_is_divisible). This will hide bugs, since the user should check the return value (enforced by MP_WUR). Furthermore if the user accesses the non-initialized result, valgrind will complain for example. Global static analysis like coverity will also detect the issue. Therefore this improves the status quo. * Introduce generic, private MP_EXCH macro which can be used to swap values. * Some control flow simplifications, e.g, loops instead of goto * Some renamings of variables/labels for consistency * I didn't read through some very complex functions. They are so complex, I am too afraid and lazy to touch them. Maybe someone resposible wants to simplify them if possible. Hint... Hint... - mp_prime_strong_lucas_selfridge.c - s_mp_exptmod.c - s_mp_exptmod_fast.c --- libtommath_VS2008.vcproj | 4 -- makefile | 32 +++++----- makefile.mingw | 32 +++++----- makefile.msvc | 32 +++++----- makefile.shared | 32 +++++----- makefile.unix | 32 +++++----- mp_add_d.c | 39 ++++-------- mp_div_2.c | 19 ++---- mp_div_2d.c | 34 ++++------ mp_div_d.c | 9 +-- mp_dr_reduce.c | 56 +++++++--------- mp_mod_d.c | 10 --- mp_montgomery_reduce.c | 59 +++++++---------- mp_mul.c | 16 ++--- mp_mul_2.c | 61 ++++++++---------- mp_mul_2d.c | 27 ++++---- mp_mul_d.c | 27 +++----- mp_prime_fermat.c | 12 +--- mp_prime_frobenius_underwood.c | 66 +++++++++---------- mp_prime_is_prime.c | 14 ++-- mp_prime_miller_rabin.c | 28 ++++---- mp_prime_next_prime.c | 27 ++++---- mp_prime_strong_lucas_selfridge.c | 2 +- mp_radix_size.c | 5 +- mp_reduce_2k.c | 35 +++++----- mp_reduce_2k_l.c | 33 +++++----- mp_reduce_2k_setup.c | 14 ++-- mp_root.c | 4 +- mp_sub_d.c | 27 +++----- s_mp_add.c | 85 ++++++++++--------------- s_mp_balance_mul.c | 32 ++++------ s_mp_get_bit.c | 6 +- s_mp_invmod_fast.c | 74 +++++++++++----------- s_mp_invmod_slow.c | 90 +++++++++++++------------- s_mp_karatsuba_mul.c | 81 ++++++++++-------------- s_mp_karatsuba_sqr.c | 60 +++++++----------- s_mp_log.c | 11 ++-- s_mp_log_d.c | 15 ++--- s_mp_montgomery_reduce_fast.c | 102 ++++++++++-------------------- s_mp_mul_digs.c | 29 +++------ s_mp_mul_digs_fast.c | 32 ++++------ s_mp_mul_high_digs.c | 28 +++----- s_mp_mul_high_digs_fast.c | 29 +++------ s_mp_prime_is_divisible.c | 18 +++--- s_mp_sqr.c | 28 ++++---- s_mp_sqr_fast.c | 32 ++++------ s_mp_sub.c | 75 +++++++++------------- s_mp_toom_sqr.c | 12 +--- tommath.def | 1 - tommath.h | 2 +- tommath_class.h | 14 ++-- tommath_private.h | 2 +- 52 files changed, 682 insertions(+), 964 deletions(-) delete mode 100644 mp_mod_d.c diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj index 6d473db0e..fe5aa34db 100644 --- a/libtommath_VS2008.vcproj +++ b/libtommath_VS2008.vcproj @@ -576,10 +576,6 @@ RelativePath="mp_mod_2d.c" > - - diff --git a/makefile b/makefile index fd0f4c2ae..06be6595b 100644 --- a/makefile +++ b/makefile @@ -34,22 +34,22 @@ mp_from_ubin.o mp_fwrite.o mp_gcd.o mp_get_double.o mp_get_i32.o mp_get_i64.o mp mp_get_mag_u32.o mp_get_mag_u64.o mp_get_mag_ul.o mp_get_mag_ull.o mp_grow.o mp_init.o mp_init_copy.o \ mp_init_i32.o mp_init_i64.o mp_init_l.o mp_init_ll.o mp_init_multi.o mp_init_set.o mp_init_size.o \ mp_init_u32.o mp_init_u64.o mp_init_ul.o mp_init_ull.o mp_invmod.o mp_is_square.o mp_kronecker.o mp_lcm.o \ -mp_log.o mp_lshd.o mp_mod.o mp_mod_2d.o mp_mod_d.o mp_montgomery_calc_normalization.o \ -mp_montgomery_reduce.o mp_montgomery_setup.o mp_mul.o mp_mul_2.o mp_mul_2d.o mp_mul_d.o mp_mulmod.o \ -mp_neg.o mp_or.o mp_pack.o mp_pack_count.o mp_prime_fermat.o mp_prime_frobenius_underwood.o \ -mp_prime_is_prime.o mp_prime_miller_rabin.o mp_prime_next_prime.o mp_prime_rabin_miller_trials.o \ -mp_prime_rand.o mp_prime_strong_lucas_selfridge.o mp_prime_tab.o mp_radix_size.o mp_radix_smap.o \ -mp_rand.o mp_read_radix.o mp_reduce.o mp_reduce_2k.o mp_reduce_2k_l.o mp_reduce_2k_setup.o \ -mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l.o mp_reduce_setup.o mp_root.o mp_rshd.o \ -mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o mp_set_ll.o mp_set_u32.o \ -mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o mp_sqrmod.o mp_sqrt.o \ -mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ -mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o s_mp_div_recursive.o s_mp_div_school.o \ -s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o \ -s_mp_karatsuba_mul.o s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o \ -s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o \ -s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_sqr.o \ -s_mp_sqr_fast.o s_mp_sub.o s_mp_toom_mul.o s_mp_toom_sqr.o +mp_log.o mp_lshd.o mp_mod.o mp_mod_2d.o mp_montgomery_calc_normalization.o mp_montgomery_reduce.o \ +mp_montgomery_setup.o mp_mul.o mp_mul_2.o mp_mul_2d.o mp_mul_d.o mp_mulmod.o mp_neg.o mp_or.o mp_pack.o \ +mp_pack_count.o mp_prime_fermat.o mp_prime_frobenius_underwood.o mp_prime_is_prime.o \ +mp_prime_miller_rabin.o mp_prime_next_prime.o mp_prime_rabin_miller_trials.o mp_prime_rand.o \ +mp_prime_strong_lucas_selfridge.o mp_prime_tab.o mp_radix_size.o mp_radix_smap.o mp_rand.o \ +mp_read_radix.o mp_reduce.o mp_reduce_2k.o mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o \ +mp_reduce_is_2k.o mp_reduce_is_2k_l.o mp_reduce_setup.o mp_root.o mp_rshd.o mp_sbin_size.o mp_set.o \ +mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o \ +mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o \ +mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o \ +s_mp_add.o s_mp_balance_mul.o s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o \ +s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o \ +s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_fast.o \ +s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o s_mp_mul_high_digs_fast.o \ +s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o \ +s_mp_toom_mul.o s_mp_toom_sqr.o #END_INS diff --git a/makefile.mingw b/makefile.mingw index 03090e741..36487479a 100644 --- a/makefile.mingw +++ b/makefile.mingw @@ -37,22 +37,22 @@ mp_from_ubin.o mp_fwrite.o mp_gcd.o mp_get_double.o mp_get_i32.o mp_get_i64.o mp mp_get_mag_u32.o mp_get_mag_u64.o mp_get_mag_ul.o mp_get_mag_ull.o mp_grow.o mp_init.o mp_init_copy.o \ mp_init_i32.o mp_init_i64.o mp_init_l.o mp_init_ll.o mp_init_multi.o mp_init_set.o mp_init_size.o \ mp_init_u32.o mp_init_u64.o mp_init_ul.o mp_init_ull.o mp_invmod.o mp_is_square.o mp_kronecker.o mp_lcm.o \ -mp_log.o mp_lshd.o mp_mod.o mp_mod_2d.o mp_mod_d.o mp_montgomery_calc_normalization.o \ -mp_montgomery_reduce.o mp_montgomery_setup.o mp_mul.o mp_mul_2.o mp_mul_2d.o mp_mul_d.o mp_mulmod.o \ -mp_neg.o mp_or.o mp_pack.o mp_pack_count.o mp_prime_fermat.o mp_prime_frobenius_underwood.o \ -mp_prime_is_prime.o mp_prime_miller_rabin.o mp_prime_next_prime.o mp_prime_rabin_miller_trials.o \ -mp_prime_rand.o mp_prime_strong_lucas_selfridge.o mp_prime_tab.o mp_radix_size.o mp_radix_smap.o \ -mp_rand.o mp_read_radix.o mp_reduce.o mp_reduce_2k.o mp_reduce_2k_l.o mp_reduce_2k_setup.o \ -mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l.o mp_reduce_setup.o mp_root.o mp_rshd.o \ -mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o mp_set_ll.o mp_set_u32.o \ -mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o mp_sqrmod.o mp_sqrt.o \ -mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ -mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o s_mp_div_recursive.o s_mp_div_school.o \ -s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o \ -s_mp_karatsuba_mul.o s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o \ -s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o \ -s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_sqr.o \ -s_mp_sqr_fast.o s_mp_sub.o s_mp_toom_mul.o s_mp_toom_sqr.o +mp_log.o mp_lshd.o mp_mod.o mp_mod_2d.o mp_montgomery_calc_normalization.o mp_montgomery_reduce.o \ +mp_montgomery_setup.o mp_mul.o mp_mul_2.o mp_mul_2d.o mp_mul_d.o mp_mulmod.o mp_neg.o mp_or.o mp_pack.o \ +mp_pack_count.o mp_prime_fermat.o mp_prime_frobenius_underwood.o mp_prime_is_prime.o \ +mp_prime_miller_rabin.o mp_prime_next_prime.o mp_prime_rabin_miller_trials.o mp_prime_rand.o \ +mp_prime_strong_lucas_selfridge.o mp_prime_tab.o mp_radix_size.o mp_radix_smap.o mp_rand.o \ +mp_read_radix.o mp_reduce.o mp_reduce_2k.o mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o \ +mp_reduce_is_2k.o mp_reduce_is_2k_l.o mp_reduce_setup.o mp_root.o mp_rshd.o mp_sbin_size.o mp_set.o \ +mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o \ +mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o \ +mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o \ +s_mp_add.o s_mp_balance_mul.o s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o \ +s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o \ +s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_fast.o \ +s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o s_mp_mul_high_digs_fast.o \ +s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o \ +s_mp_toom_mul.o s_mp_toom_sqr.o HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.msvc b/makefile.msvc index b78df1ea7..607b705a1 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -29,22 +29,22 @@ mp_from_ubin.obj mp_fwrite.obj mp_gcd.obj mp_get_double.obj mp_get_i32.obj mp_ge mp_get_mag_u32.obj mp_get_mag_u64.obj mp_get_mag_ul.obj mp_get_mag_ull.obj mp_grow.obj mp_init.obj mp_init_copy.obj \ mp_init_i32.obj mp_init_i64.obj mp_init_l.obj mp_init_ll.obj mp_init_multi.obj mp_init_set.obj mp_init_size.obj \ mp_init_u32.obj mp_init_u64.obj mp_init_ul.obj mp_init_ull.obj mp_invmod.obj mp_is_square.obj mp_kronecker.obj mp_lcm.obj \ -mp_log.obj mp_lshd.obj mp_mod.obj mp_mod_2d.obj mp_mod_d.obj mp_montgomery_calc_normalization.obj \ -mp_montgomery_reduce.obj mp_montgomery_setup.obj mp_mul.obj mp_mul_2.obj mp_mul_2d.obj mp_mul_d.obj mp_mulmod.obj \ -mp_neg.obj mp_or.obj mp_pack.obj mp_pack_count.obj mp_prime_fermat.obj mp_prime_frobenius_underwood.obj \ -mp_prime_is_prime.obj mp_prime_miller_rabin.obj mp_prime_next_prime.obj mp_prime_rabin_miller_trials.obj \ -mp_prime_rand.obj mp_prime_strong_lucas_selfridge.obj mp_prime_tab.obj mp_radix_size.obj mp_radix_smap.obj \ -mp_rand.obj mp_read_radix.obj mp_reduce.obj mp_reduce_2k.obj mp_reduce_2k_l.obj mp_reduce_2k_setup.obj \ -mp_reduce_2k_setup_l.obj mp_reduce_is_2k.obj mp_reduce_is_2k_l.obj mp_reduce_setup.obj mp_root.obj mp_rshd.obj \ -mp_sbin_size.obj mp_set.obj mp_set_double.obj mp_set_i32.obj mp_set_i64.obj mp_set_l.obj mp_set_ll.obj mp_set_u32.obj \ -mp_set_u64.obj mp_set_ul.obj mp_set_ull.obj mp_shrink.obj mp_signed_rsh.obj mp_sqr.obj mp_sqrmod.obj mp_sqrt.obj \ -mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj mp_to_ubin.obj mp_ubin_size.obj \ -mp_unpack.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_balance_mul.obj s_mp_div_recursive.obj s_mp_div_school.obj \ -s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_get_bit.obj s_mp_invmod_fast.obj s_mp_invmod_slow.obj \ -s_mp_karatsuba_mul.obj s_mp_karatsuba_sqr.obj s_mp_log.obj s_mp_log_d.obj s_mp_log_pow2.obj \ -s_mp_montgomery_reduce_fast.obj s_mp_mul_digs.obj s_mp_mul_digs_fast.obj s_mp_mul_high_digs.obj \ -s_mp_mul_high_digs_fast.obj s_mp_prime_is_divisible.obj s_mp_rand_jenkins.obj s_mp_rand_platform.obj s_mp_sqr.obj \ -s_mp_sqr_fast.obj s_mp_sub.obj s_mp_toom_mul.obj s_mp_toom_sqr.obj +mp_log.obj mp_lshd.obj mp_mod.obj mp_mod_2d.obj mp_montgomery_calc_normalization.obj mp_montgomery_reduce.obj \ +mp_montgomery_setup.obj mp_mul.obj mp_mul_2.obj mp_mul_2d.obj mp_mul_d.obj mp_mulmod.obj mp_neg.obj mp_or.obj mp_pack.obj \ +mp_pack_count.obj mp_prime_fermat.obj mp_prime_frobenius_underwood.obj mp_prime_is_prime.obj \ +mp_prime_miller_rabin.obj mp_prime_next_prime.obj mp_prime_rabin_miller_trials.obj mp_prime_rand.obj \ +mp_prime_strong_lucas_selfridge.obj mp_prime_tab.obj mp_radix_size.obj mp_radix_smap.obj mp_rand.obj \ +mp_read_radix.obj mp_reduce.obj mp_reduce_2k.obj mp_reduce_2k_l.obj mp_reduce_2k_setup.obj mp_reduce_2k_setup_l.obj \ +mp_reduce_is_2k.obj mp_reduce_is_2k_l.obj mp_reduce_setup.obj mp_root.obj mp_rshd.obj mp_sbin_size.obj mp_set.obj \ +mp_set_double.obj mp_set_i32.obj mp_set_i64.obj mp_set_l.obj mp_set_ll.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj \ +mp_set_ull.obj mp_shrink.obj mp_signed_rsh.obj mp_sqr.obj mp_sqrmod.obj mp_sqrt.obj mp_sqrtmod_prime.obj mp_sub.obj \ +mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj mp_to_ubin.obj mp_ubin_size.obj mp_unpack.obj mp_xor.obj mp_zero.obj \ +s_mp_add.obj s_mp_balance_mul.obj s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj \ +s_mp_exptmod_fast.obj s_mp_get_bit.obj s_mp_invmod_fast.obj s_mp_invmod_slow.obj s_mp_karatsuba_mul.obj \ +s_mp_karatsuba_sqr.obj s_mp_log.obj s_mp_log_d.obj s_mp_log_pow2.obj s_mp_montgomery_reduce_fast.obj \ +s_mp_mul_digs.obj s_mp_mul_digs_fast.obj s_mp_mul_high_digs.obj s_mp_mul_high_digs_fast.obj \ +s_mp_prime_is_divisible.obj s_mp_rand_jenkins.obj s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_fast.obj s_mp_sub.obj \ +s_mp_toom_mul.obj s_mp_toom_sqr.obj HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.shared b/makefile.shared index 34471079d..df3f720d9 100644 --- a/makefile.shared +++ b/makefile.shared @@ -31,22 +31,22 @@ mp_from_ubin.o mp_fwrite.o mp_gcd.o mp_get_double.o mp_get_i32.o mp_get_i64.o mp mp_get_mag_u32.o mp_get_mag_u64.o mp_get_mag_ul.o mp_get_mag_ull.o mp_grow.o mp_init.o mp_init_copy.o \ mp_init_i32.o mp_init_i64.o mp_init_l.o mp_init_ll.o mp_init_multi.o mp_init_set.o mp_init_size.o \ mp_init_u32.o mp_init_u64.o mp_init_ul.o mp_init_ull.o mp_invmod.o mp_is_square.o mp_kronecker.o mp_lcm.o \ -mp_log.o mp_lshd.o mp_mod.o mp_mod_2d.o mp_mod_d.o mp_montgomery_calc_normalization.o \ -mp_montgomery_reduce.o mp_montgomery_setup.o mp_mul.o mp_mul_2.o mp_mul_2d.o mp_mul_d.o mp_mulmod.o \ -mp_neg.o mp_or.o mp_pack.o mp_pack_count.o mp_prime_fermat.o mp_prime_frobenius_underwood.o \ -mp_prime_is_prime.o mp_prime_miller_rabin.o mp_prime_next_prime.o mp_prime_rabin_miller_trials.o \ -mp_prime_rand.o mp_prime_strong_lucas_selfridge.o mp_prime_tab.o mp_radix_size.o mp_radix_smap.o \ -mp_rand.o mp_read_radix.o mp_reduce.o mp_reduce_2k.o mp_reduce_2k_l.o mp_reduce_2k_setup.o \ -mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l.o mp_reduce_setup.o mp_root.o mp_rshd.o \ -mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o mp_set_ll.o mp_set_u32.o \ -mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o mp_sqrmod.o mp_sqrt.o \ -mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ -mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o s_mp_div_recursive.o s_mp_div_school.o \ -s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o \ -s_mp_karatsuba_mul.o s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o \ -s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o \ -s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_sqr.o \ -s_mp_sqr_fast.o s_mp_sub.o s_mp_toom_mul.o s_mp_toom_sqr.o +mp_log.o mp_lshd.o mp_mod.o mp_mod_2d.o mp_montgomery_calc_normalization.o mp_montgomery_reduce.o \ +mp_montgomery_setup.o mp_mul.o mp_mul_2.o mp_mul_2d.o mp_mul_d.o mp_mulmod.o mp_neg.o mp_or.o mp_pack.o \ +mp_pack_count.o mp_prime_fermat.o mp_prime_frobenius_underwood.o mp_prime_is_prime.o \ +mp_prime_miller_rabin.o mp_prime_next_prime.o mp_prime_rabin_miller_trials.o mp_prime_rand.o \ +mp_prime_strong_lucas_selfridge.o mp_prime_tab.o mp_radix_size.o mp_radix_smap.o mp_rand.o \ +mp_read_radix.o mp_reduce.o mp_reduce_2k.o mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o \ +mp_reduce_is_2k.o mp_reduce_is_2k_l.o mp_reduce_setup.o mp_root.o mp_rshd.o mp_sbin_size.o mp_set.o \ +mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o \ +mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o \ +mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o \ +s_mp_add.o s_mp_balance_mul.o s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o \ +s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o \ +s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_fast.o \ +s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o s_mp_mul_high_digs_fast.o \ +s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o \ +s_mp_toom_mul.o s_mp_toom_sqr.o #END_INS diff --git a/makefile.unix b/makefile.unix index 8134e909c..cc07c0c70 100644 --- a/makefile.unix +++ b/makefile.unix @@ -38,22 +38,22 @@ mp_from_ubin.o mp_fwrite.o mp_gcd.o mp_get_double.o mp_get_i32.o mp_get_i64.o mp mp_get_mag_u32.o mp_get_mag_u64.o mp_get_mag_ul.o mp_get_mag_ull.o mp_grow.o mp_init.o mp_init_copy.o \ mp_init_i32.o mp_init_i64.o mp_init_l.o mp_init_ll.o mp_init_multi.o mp_init_set.o mp_init_size.o \ mp_init_u32.o mp_init_u64.o mp_init_ul.o mp_init_ull.o mp_invmod.o mp_is_square.o mp_kronecker.o mp_lcm.o \ -mp_log.o mp_lshd.o mp_mod.o mp_mod_2d.o mp_mod_d.o mp_montgomery_calc_normalization.o \ -mp_montgomery_reduce.o mp_montgomery_setup.o mp_mul.o mp_mul_2.o mp_mul_2d.o mp_mul_d.o mp_mulmod.o \ -mp_neg.o mp_or.o mp_pack.o mp_pack_count.o mp_prime_fermat.o mp_prime_frobenius_underwood.o \ -mp_prime_is_prime.o mp_prime_miller_rabin.o mp_prime_next_prime.o mp_prime_rabin_miller_trials.o \ -mp_prime_rand.o mp_prime_strong_lucas_selfridge.o mp_prime_tab.o mp_radix_size.o mp_radix_smap.o \ -mp_rand.o mp_read_radix.o mp_reduce.o mp_reduce_2k.o mp_reduce_2k_l.o mp_reduce_2k_setup.o \ -mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l.o mp_reduce_setup.o mp_root.o mp_rshd.o \ -mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o mp_set_ll.o mp_set_u32.o \ -mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o mp_sqrmod.o mp_sqrt.o \ -mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \ -mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o s_mp_div_recursive.o s_mp_div_school.o \ -s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o \ -s_mp_karatsuba_mul.o s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o \ -s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o \ -s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_sqr.o \ -s_mp_sqr_fast.o s_mp_sub.o s_mp_toom_mul.o s_mp_toom_sqr.o +mp_log.o mp_lshd.o mp_mod.o mp_mod_2d.o mp_montgomery_calc_normalization.o mp_montgomery_reduce.o \ +mp_montgomery_setup.o mp_mul.o mp_mul_2.o mp_mul_2d.o mp_mul_d.o mp_mulmod.o mp_neg.o mp_or.o mp_pack.o \ +mp_pack_count.o mp_prime_fermat.o mp_prime_frobenius_underwood.o mp_prime_is_prime.o \ +mp_prime_miller_rabin.o mp_prime_next_prime.o mp_prime_rabin_miller_trials.o mp_prime_rand.o \ +mp_prime_strong_lucas_selfridge.o mp_prime_tab.o mp_radix_size.o mp_radix_smap.o mp_rand.o \ +mp_read_radix.o mp_reduce.o mp_reduce_2k.o mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o \ +mp_reduce_is_2k.o mp_reduce_is_2k_l.o mp_reduce_setup.o mp_root.o mp_rshd.o mp_sbin_size.o mp_set.o \ +mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o \ +mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o \ +mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o \ +s_mp_add.o s_mp_balance_mul.o s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o \ +s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o \ +s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_fast.o \ +s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o s_mp_mul_high_digs_fast.o \ +s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o \ +s_mp_toom_mul.o s_mp_toom_sqr.o HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/mp_add_d.c b/mp_add_d.c index 4508cc87b..43d50e879 100644 --- a/mp_add_d.c +++ b/mp_add_d.c @@ -6,9 +6,7 @@ /* single digit addition */ mp_err mp_add_d(const mp_int *a, mp_digit b, mp_int *c) { - mp_err err; - int ix, oldused; - mp_digit *tmpa, *tmpc; + int oldused; /* fast path for a == c */ if (a == c) { @@ -27,6 +25,7 @@ mp_err mp_add_d(const mp_int *a, mp_digit b, mp_int *c) /* grow c as required */ if (c->alloc < (a->used + 1)) { + mp_err err; if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) { return err; } @@ -34,6 +33,7 @@ mp_err mp_add_d(const mp_int *a, mp_digit b, mp_int *c) /* if a is negative and |a| >= b, call c = |a| - b */ if ((a->sign == MP_NEG) && ((a->used > 1) || (a->dp[0] >= b))) { + mp_err err; mp_int a_ = *a; /* temporarily fix sign of a */ a_.sign = MP_ZPOS; @@ -53,49 +53,34 @@ mp_err mp_add_d(const mp_int *a, mp_digit b, mp_int *c) /* old number of used digits in c */ oldused = c->used; - /* source alias */ - tmpa = a->dp; - - /* destination alias */ - tmpc = c->dp; - /* if a is positive */ if (a->sign == MP_ZPOS) { /* add digits, mu is carry */ + int i; mp_digit mu = b; - for (ix = 0; ix < a->used; ix++) { - *tmpc = *tmpa++ + mu; - mu = *tmpc >> MP_DIGIT_BIT; - *tmpc++ &= MP_MASK; + for (i = 0; i < a->used; i++) { + c->dp[i] = a->dp[i] + mu; + mu = c->dp[i] >> MP_DIGIT_BIT; + c->dp[i] &= MP_MASK; } /* set final carry */ - ix++; - *tmpc++ = mu; + c->dp[i] = mu; /* setup size */ c->used = a->used + 1; } else { /* a was negative and |a| < b */ - c->used = 1; + c->used = 1; /* the result is a single digit */ - if (a->used == 1) { - *tmpc++ = b - a->dp[0]; - } else { - *tmpc++ = b; - } - - /* setup count so the clearing of oldused - * can fall through correctly - */ - ix = 1; + c->dp[0] = (a->used == 1) ? b - a->dp[0] : b; } /* sign always positive */ c->sign = MP_ZPOS; /* now zero to oldused */ - MP_ZERO_DIGITS(tmpc, oldused - ix); + MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used); mp_clamp(c); return MP_OKAY; diff --git a/mp_div_2.c b/mp_div_2.c index 60bd63d6e..573570d9d 100644 --- a/mp_div_2.c +++ b/mp_div_2.c @@ -6,12 +6,11 @@ /* b = a/2 */ mp_err mp_div_2(const mp_int *a, mp_int *b) { - int x, oldused; - mp_digit r, rr, *tmpa, *tmpb; - mp_err err; + int x, oldused; + mp_digit r; - /* copy */ if (b->alloc < a->used) { + mp_err err; if ((err = mp_grow(b, a->used)) != MP_OKAY) { return err; } @@ -20,20 +19,14 @@ mp_err mp_div_2(const mp_int *a, mp_int *b) oldused = b->used; b->used = a->used; - /* source alias */ - tmpa = a->dp + b->used - 1; - - /* dest alias */ - tmpb = b->dp + b->used - 1; - /* carry */ r = 0; - for (x = b->used - 1; x >= 0; x--) { + for (x = b->used; x --> 0;) { /* get the carry for the next iteration */ - rr = *tmpa & 1u; + mp_digit rr = a->dp[x] & 1u; /* shift the current digit, add in carry and store */ - *tmpb-- = (*tmpa-- >> 1) | (r << (MP_DIGIT_BIT - 1)); + b->dp[x] = (a->dp[x] >> 1) | (r << (MP_DIGIT_BIT - 1)); /* forward carry to next iteration */ r = rr; diff --git a/mp_div_2d.c b/mp_div_2d.c index 9b396acdc..e523465af 100644 --- a/mp_div_2d.c +++ b/mp_div_2d.c @@ -6,23 +6,16 @@ /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ mp_err mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d) { - mp_digit D, r, rr; - int x; mp_err err; - /* if the shift count is <= 0 then we do no work */ - if (b <= 0) { - err = mp_copy(a, c); - if (d != NULL) { - mp_zero(d); - } - return err; + if (b < 0) { + return MP_VAL; } - /* copy */ if ((err = mp_copy(a, c)) != MP_OKAY) { return err; } + /* 'a' should not be used after here - it might be the same as d */ /* get the remainder */ @@ -38,28 +31,25 @@ mp_err mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d) } /* shift any bit count < MP_DIGIT_BIT */ - D = (mp_digit)(b % MP_DIGIT_BIT); - if (D != 0u) { - mp_digit *tmpc, mask, shift; + b %= MP_DIGIT_BIT; + if (b != 0u) { + int x; + mp_digit r, mask, shift; /* mask */ - mask = ((mp_digit)1 << D) - 1uL; + mask = ((mp_digit)1 << b) - 1uL; /* shift for lsb */ - shift = (mp_digit)MP_DIGIT_BIT - D; - - /* alias */ - tmpc = c->dp + (c->used - 1); + shift = (mp_digit)(MP_DIGIT_BIT - b); /* carry */ r = 0; - for (x = c->used - 1; x >= 0; x--) { + for (x = c->used; x --> 0;) { /* get the lower bits of this word in a temp */ - rr = *tmpc & mask; + mp_digit rr = c->dp[x] & mask; /* shift the current word and mix in the carry bits from the previous word */ - *tmpc = (*tmpc >> D) | (r << shift); - --tmpc; + c->dp[x] = (c->dp[x] >> b) | (r << shift); /* set the carry to the carry bits of the current word found above */ r = rr; diff --git a/mp_div_d.c b/mp_div_d.c index 98b6b248c..472ab2796 100644 --- a/mp_div_d.c +++ b/mp_div_d.c @@ -8,7 +8,6 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d) { mp_int q; mp_word w; - mp_digit t; mp_err err; int ix; @@ -56,14 +55,12 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d) q.used = a->used; q.sign = a->sign; w = 0; - for (ix = a->used - 1; ix >= 0; ix--) { + for (ix = a->used; ix --> 0;) { + mp_digit t = 0; w = (w << (mp_word)MP_DIGIT_BIT) | (mp_word)a->dp[ix]; - if (w >= b) { t = (mp_digit)(w / b); w -= (mp_word)t * (mp_word)b; - } else { - t = 0; } q.dp[ix] = t; } @@ -78,7 +75,7 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d) } mp_clear(&q); - return err; + return MP_OKAY; } #endif diff --git a/mp_dr_reduce.c b/mp_dr_reduce.c index fba0e2110..d63024649 100644 --- a/mp_dr_reduce.c +++ b/mp_dr_reduce.c @@ -19,16 +19,12 @@ */ mp_err mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k) { - mp_err err; - int i, m; - mp_word r; - mp_digit mu, *tmpx1, *tmpx2; - /* m = digits in modulus */ - m = n->used; + int m = n->used; /* ensure that "x" has at least 2m digits */ if (x->alloc < (m + m)) { + mp_err err; if ((err = mp_grow(x, m + m)) != MP_OKAY) { return err; } @@ -37,41 +33,37 @@ mp_err mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k) /* top of loop, this is where the code resumes if * another reduction pass is required. */ -top: - /* aliases for digits */ - /* alias for lower half of x */ - tmpx1 = x->dp; - - /* alias for upper half of x, or x/B**m */ - tmpx2 = x->dp + m; + for (;;) { + mp_err err; + int i; + mp_digit mu = 0; - /* set carry to zero */ - mu = 0; + /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ + for (i = 0; i < m; i++) { + mp_word r = ((mp_word)x->dp[i + m] * (mp_word)k) + x->dp[i] + mu; + x->dp[i] = (mp_digit)(r & MP_MASK); + mu = (mp_digit)(r >> ((mp_word)MP_DIGIT_BIT)); + } - /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ - for (i = 0; i < m; i++) { - r = ((mp_word)*tmpx2++ * (mp_word)k) + *tmpx1 + mu; - *tmpx1++ = (mp_digit)(r & MP_MASK); - mu = (mp_digit)(r >> ((mp_word)MP_DIGIT_BIT)); - } + /* set final carry */ + x->dp[i] = mu; - /* set final carry */ - *tmpx1++ = mu; + /* zero words above m */ + MP_ZERO_DIGITS(x->dp + m + 1, (x->used - m) - 1); - /* zero words above m */ - MP_ZERO_DIGITS(tmpx1, (x->used - m) - 1); + /* clamp, sub and return */ + mp_clamp(x); - /* clamp, sub and return */ - mp_clamp(x); + /* if x >= n then subtract and reduce again + * Each successive "recursion" makes the input smaller and smaller. + */ + if (mp_cmp_mag(x, n) == MP_LT) { + break; + } - /* if x >= n then subtract and reduce again - * Each successive "recursion" makes the input smaller and smaller. - */ - if (mp_cmp_mag(x, n) != MP_LT) { if ((err = s_mp_sub(x, n, x)) != MP_OKAY) { return err; } - goto top; } return MP_OKAY; } diff --git a/mp_mod_d.c b/mp_mod_d.c deleted file mode 100644 index 3f7e1917f..000000000 --- a/mp_mod_d.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "tommath_private.h" -#ifdef MP_MOD_D_C -/* LibTomMath, multiple-precision integer library -- Tom St Denis */ -/* SPDX-License-Identifier: Unlicense */ - -mp_err mp_mod_d(const mp_int *a, mp_digit b, mp_digit *c) -{ - return mp_div_d(a, b, NULL, c); -} -#endif diff --git a/mp_montgomery_reduce.c b/mp_montgomery_reduce.c index a872aba6b..6a5be2668 100644 --- a/mp_montgomery_reduce.c +++ b/mp_montgomery_reduce.c @@ -6,9 +6,7 @@ /* computes xR**-1 == x (mod N) via Montgomery Reduction */ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho) { - int ix, digs; - mp_err err; - mp_digit mu; + int ix, digs; /* can the fast reduction [comba] method be used? * @@ -25,6 +23,7 @@ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho) /* grow the input as required */ if (x->alloc < digs) { + mp_err err; if ((err = mp_grow(x, digs)) != MP_OKAY) { return err; } @@ -32,6 +31,9 @@ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho) x->used = digs; for (ix = 0; ix < n->used; ix++) { + int iy; + mp_digit u, mu; + /* mu = ai * rho mod b * * The value of rho must be precalculated via @@ -43,41 +45,28 @@ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho) mu = (mp_digit)(((mp_word)x->dp[ix] * (mp_word)rho) & MP_MASK); /* a = a + mu * m * b**i */ - { - int iy; - mp_digit *tmpn, *tmpx, u; - mp_word r; - - /* alias for digits of the modulus */ - tmpn = n->dp; - - /* alias for the digits of x [the input] */ - tmpx = x->dp + ix; - - /* set the carry to zero */ - u = 0; - /* Multiply and add in place */ - for (iy = 0; iy < n->used; iy++) { - /* compute product and sum */ - r = ((mp_word)mu * (mp_word)*tmpn++) + - (mp_word)u + (mp_word)*tmpx; + /* Multiply and add in place */ + u = 0; + for (iy = 0; iy < n->used; iy++) { + /* compute product and sum */ + mp_word r = ((mp_word)mu * (mp_word)n->dp[iy]) + + (mp_word)u + (mp_word)x->dp[ix + iy]; - /* get carry */ - u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); + /* get carry */ + u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); - /* fix digit */ - *tmpx++ = (mp_digit)(r & (mp_word)MP_MASK); - } - /* At this point the ix'th digit of x should be zero */ - - - /* propagate carries upwards as required*/ - while (u != 0u) { - *tmpx += u; - u = *tmpx >> MP_DIGIT_BIT; - *tmpx++ &= MP_MASK; - } + /* fix digit */ + x->dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); + } + /* At this point the ix'th digit of x should be zero */ + + /* propagate carries upwards as required*/ + while (u != 0u) { + x->dp[ix + iy] += u; + u = x->dp[ix + iy] >> MP_DIGIT_BIT; + x->dp[ix + iy] &= MP_MASK; + ++iy; } } diff --git a/mp_mul.c b/mp_mul.c index 9c8f8aeda..1a7091c23 100644 --- a/mp_mul.c +++ b/mp_mul.c @@ -7,8 +7,8 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c) { mp_err err; - int min_len = MP_MIN(a->used, b->used), - max_len = MP_MAX(a->used, b->used), + int min = MP_MIN(a->used, b->used), + max = MP_MAX(a->used, b->used), digs = a->used + b->used + 1; mp_sign neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; @@ -20,16 +20,16 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c) * Using it to cut the input into slices small enough for s_mp_mul_digs_fast * was actually slower on the author's machine, but YMMV. */ - (min_len >= MP_KARATSUBA_MUL_CUTOFF) && - ((max_len / 2) >= MP_KARATSUBA_MUL_CUTOFF) && + (min >= MP_KARATSUBA_MUL_CUTOFF) && + ((max / 2) >= MP_KARATSUBA_MUL_CUTOFF) && /* Not much effect was observed below a ratio of 1:2, but again: YMMV. */ - (max_len >= (2 * min_len))) { + (max >= (2 * min))) { err = s_mp_balance_mul(a,b,c); } else if (MP_HAS(S_MP_TOOM_MUL) && - (min_len >= MP_TOOM_MUL_CUTOFF)) { + (min >= MP_TOOM_MUL_CUTOFF)) { err = s_mp_toom_mul(a, b, c); } else if (MP_HAS(S_MP_KARATSUBA_MUL) && - (min_len >= MP_KARATSUBA_MUL_CUTOFF)) { + (min >= MP_KARATSUBA_MUL_CUTOFF)) { err = s_mp_karatsuba_mul(a, b, c); } else if (MP_HAS(S_MP_MUL_DIGS_FAST) && /* can we use the fast multiplier? @@ -39,7 +39,7 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c) * digits won't affect carry propagation */ (digs < MP_WARRAY) && - (min_len <= MP_MAXFAST)) { + (min <= MP_MAXFAST)) { err = s_mp_mul_digs_fast(a, b, c, digs); } else if (MP_HAS(S_MP_MUL_DIGS)) { err = s_mp_mul_digs(a, b, c, digs); diff --git a/mp_mul_2.c b/mp_mul_2.c index cd5589dd2..45b6f1cc5 100644 --- a/mp_mul_2.c +++ b/mp_mul_2.c @@ -6,11 +6,12 @@ /* b = a*2 */ mp_err mp_mul_2(const mp_int *a, mp_int *b) { - int x, oldused; - mp_err err; + int x, oldused; + mp_digit r; /* grow to accomodate result */ if (b->alloc < (a->used + 1)) { + mp_err err; if ((err = mp_grow(b, a->used + 1)) != MP_OKAY) { return err; } @@ -19,45 +20,35 @@ mp_err mp_mul_2(const mp_int *a, mp_int *b) oldused = b->used; b->used = a->used; - { - mp_digit r, rr, *tmpa, *tmpb; + /* carry */ + r = 0; + for (x = 0; x < a->used; x++) { - /* alias for source */ - tmpa = a->dp; - - /* alias for dest */ - tmpb = b->dp; - - /* carry */ - r = 0; - for (x = 0; x < a->used; x++) { + /* get what will be the *next* carry bit from the + * MSB of the current digit + */ + mp_digit rr = a->dp[x] >> (mp_digit)(MP_DIGIT_BIT - 1); - /* get what will be the *next* carry bit from the - * MSB of the current digit - */ - rr = *tmpa >> (mp_digit)(MP_DIGIT_BIT - 1); + /* now shift up this digit, add in the carry [from the previous] */ + b->dp[x] = ((a->dp[x] << 1uL) | r) & MP_MASK; - /* now shift up this digit, add in the carry [from the previous] */ - *tmpb++ = ((*tmpa++ << 1uL) | r) & MP_MASK; + /* copy the carry that would be from the source + * digit into the next iteration + */ + r = rr; + } - /* copy the carry that would be from the source - * digit into the next iteration - */ - r = rr; - } + /* new leading digit? */ + if (r != 0u) { + /* add a MSB which is always 1 at this point */ + b->dp[b->used++] = 1; + } - /* new leading digit? */ - if (r != 0u) { - /* add a MSB which is always 1 at this point */ - *tmpb = 1; - ++(b->used); - } + /* now zero any excess digits on the destination + * that we didn't write to + */ + MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used); - /* now zero any excess digits on the destination - * that we didn't write to - */ - MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used); - } b->sign = a->sign; return MP_OKAY; } diff --git a/mp_mul_2d.c b/mp_mul_2d.c index 1ba53a0fc..f1016ead5 100644 --- a/mp_mul_2d.c +++ b/mp_mul_2d.c @@ -6,17 +6,19 @@ /* shift left by a certain bit count */ mp_err mp_mul_2d(const mp_int *a, int b, mp_int *c) { - mp_digit d; - mp_err err; + if (b < 0) { + return MP_VAL; + } - /* copy */ if (a != c) { + mp_err err; if ((err = mp_copy(a, c)) != MP_OKAY) { return err; } } if (c->alloc < (c->used + (b / MP_DIGIT_BIT) + 1)) { + mp_err err; if ((err = mp_grow(c, c->used + (b / MP_DIGIT_BIT) + 1)) != MP_OKAY) { return err; } @@ -24,35 +26,32 @@ mp_err mp_mul_2d(const mp_int *a, int b, mp_int *c) /* shift by as many digits in the bit count */ if (b >= MP_DIGIT_BIT) { + mp_err err; if ((err = mp_lshd(c, b / MP_DIGIT_BIT)) != MP_OKAY) { return err; } } /* shift any bit count < MP_DIGIT_BIT */ - d = (mp_digit)(b % MP_DIGIT_BIT); - if (d != 0u) { - mp_digit *tmpc, shift, mask, r, rr; + b %= MP_DIGIT_BIT; + if (b != 0u) { + mp_digit shift, mask, r; int x; /* bitmask for carries */ - mask = ((mp_digit)1 << d) - (mp_digit)1; + mask = ((mp_digit)1 << b) - (mp_digit)1; /* shift for msbs */ - shift = (mp_digit)MP_DIGIT_BIT - d; - - /* alias */ - tmpc = c->dp; + shift = (mp_digit)(MP_DIGIT_BIT - b); /* carry */ r = 0; for (x = 0; x < c->used; x++) { /* get the higher bits of the current word */ - rr = (*tmpc >> shift) & mask; + mp_digit rr = (c->dp[x] >> shift) & mask; /* shift the current word and OR in the carry */ - *tmpc = ((*tmpc << d) | r) & MP_MASK; - ++tmpc; + c->dp[x] = ((c->dp[x] << b) | r) & MP_MASK; /* set the carry to the carry bits of the current word */ r = rr; diff --git a/mp_mul_d.c b/mp_mul_d.c index 399dc7b47..3e5335f42 100644 --- a/mp_mul_d.c +++ b/mp_mul_d.c @@ -6,10 +6,9 @@ /* multiply by a digit */ mp_err mp_mul_d(const mp_int *a, mp_digit b, mp_int *c) { - mp_digit u, *tmpa, *tmpc; - mp_word r; + mp_digit u; mp_err err; - int ix, olduse; + int ix, oldused; /* make sure c is big enough to hold a*b */ if (c->alloc < (a->used + 1)) { @@ -19,41 +18,35 @@ mp_err mp_mul_d(const mp_int *a, mp_digit b, mp_int *c) } /* get the original destinations used count */ - olduse = c->used; + oldused = c->used; /* set the sign */ c->sign = a->sign; - /* alias for a->dp [source] */ - tmpa = a->dp; - - /* alias for c->dp [dest] */ - tmpc = c->dp; - /* zero carry */ u = 0; /* compute columns */ for (ix = 0; ix < a->used; ix++) { /* compute product and carry sum for this term */ - r = (mp_word)u + ((mp_word)*tmpa++ * (mp_word)b); + mp_word r = (mp_word)u + ((mp_word)a->dp[ix] * (mp_word)b); /* mask off higher bits to get a single digit */ - *tmpc++ = (mp_digit)(r & (mp_word)MP_MASK); + c->dp[ix] = (mp_digit)(r & (mp_word)MP_MASK); /* send carry into next iteration */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); } /* store final carry [if any] and increment ix offset */ - *tmpc++ = u; - ++ix; - - /* now zero digits above the top */ - MP_ZERO_DIGITS(tmpc, olduse - ix); + c->dp[ix] = u; /* set used count */ c->used = a->used + 1; + + /* now zero digits above the top */ + MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used); + mp_clamp(c); return MP_OKAY; diff --git a/mp_prime_fermat.c b/mp_prime_fermat.c index 50d2e5ea1..ac8116fef 100644 --- a/mp_prime_fermat.c +++ b/mp_prime_fermat.c @@ -16,9 +16,6 @@ mp_err mp_prime_fermat(const mp_int *a, const mp_int *b, bool *result) mp_int t; mp_err err; - /* default to composite */ - *result = false; - /* ensure b > 1 */ if (mp_cmp_d(b, 1uL) != MP_GT) { return MP_VAL; @@ -31,16 +28,13 @@ mp_err mp_prime_fermat(const mp_int *a, const mp_int *b, bool *result) /* compute t = b**a mod a */ if ((err = mp_exptmod(b, a, a, &t)) != MP_OKAY) { - goto LBL_T; + goto LBL_ERR; } /* is it equal to b? */ - if (mp_cmp(&t, b) == MP_EQ) { - *result = true; - } + *result = mp_cmp(&t, b) == MP_EQ; - err = MP_OKAY; -LBL_T: +LBL_ERR: mp_clear(&t); return err; } diff --git a/mp_prime_frobenius_underwood.c b/mp_prime_frobenius_underwood.c index 543b8b4a2..62d3476a9 100644 --- a/mp_prime_frobenius_underwood.c +++ b/mp_prime_frobenius_underwood.c @@ -23,17 +23,16 @@ mp_err mp_prime_frobenius_underwood(const mp_int *N, bool *result) { mp_int T1z, T2z, Np1z, sz, tz; - - int a, ap2, length, i, j; + int a, ap2, i; mp_err err; - *result = false; - if ((err = mp_init_multi(&T1z, &T2z, &Np1z, &sz, &tz, NULL)) != MP_OKAY) { return err; } for (a = 0; a < LTM_FROBENIUS_UNDERWOOD_A; a++) { + int j; + /* TODO: That's ugly! No, really, it is! */ if ((a==2) || (a==4) || (a==7) || (a==8) || (a==10) || (a==14) || (a==18) || (a==23) || (a==26) || (a==28)) { @@ -42,7 +41,7 @@ mp_err mp_prime_frobenius_underwood(const mp_int *N, bool *result) mp_set_i32(&T1z, (int32_t)((a * a) - 4)); - if ((err = mp_kronecker(&T1z, N, &j)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_kronecker(&T1z, N, &j)) != MP_OKAY) goto LBL_END; if (j == -1) { break; @@ -50,73 +49,76 @@ mp_err mp_prime_frobenius_underwood(const mp_int *N, bool *result) if (j == 0) { /* composite */ - goto LBL_FU_ERR; + *result = false; + goto LBL_END; } } /* Tell it a composite and set return value accordingly */ if (a >= LTM_FROBENIUS_UNDERWOOD_A) { err = MP_ITER; - goto LBL_FU_ERR; + goto LBL_END; } /* Composite if N and (a+4)*(2*a+5) are not coprime */ mp_set_u32(&T1z, (uint32_t)((a+4)*((2*a)+5))); - if ((err = mp_gcd(N, &T1z, &T1z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_gcd(N, &T1z, &T1z)) != MP_OKAY) goto LBL_END; - if (!((T1z.used == 1) && (T1z.dp[0] == 1u))) goto LBL_FU_ERR; + if (!((T1z.used == 1) && (T1z.dp[0] == 1u))) { + /* composite */ + *result = false; + goto LBL_END; + } ap2 = a + 2; - if ((err = mp_add_d(N, 1uL, &Np1z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_add_d(N, 1uL, &Np1z)) != MP_OKAY) goto LBL_END; mp_set(&sz, 1uL); mp_set(&tz, 2uL); - length = mp_count_bits(&Np1z); - for (i = length - 2; i >= 0; i--) { + for (i = mp_count_bits(&Np1z) - 2; i >= 0; i--) { /* * temp = (sz*(a*sz+2*tz))%N; * tz = ((tz-sz)*(tz+sz))%N; * sz = temp; */ - if ((err = mp_mul_2(&tz, &T2z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_mul_2(&tz, &T2z)) != MP_OKAY) goto LBL_END; /* a = 0 at about 50% of the cases (non-square and odd input) */ if (a != 0) { - if ((err = mp_mul_d(&sz, (mp_digit)a, &T1z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_add(&T1z, &T2z, &T2z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_mul_d(&sz, (mp_digit)a, &T1z)) != MP_OKAY) goto LBL_END; + if ((err = mp_add(&T1z, &T2z, &T2z)) != MP_OKAY) goto LBL_END; } - if ((err = mp_mul(&T2z, &sz, &T1z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_sub(&tz, &sz, &T2z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_add(&sz, &tz, &sz)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_mul(&sz, &T2z, &tz)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_mod(&tz, N, &tz)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_mod(&T1z, N, &sz)) != MP_OKAY) goto LBL_FU_ERR; - if (s_mp_get_bit(&Np1z, (unsigned int)i)) { + if ((err = mp_mul(&T2z, &sz, &T1z)) != MP_OKAY) goto LBL_END; + if ((err = mp_sub(&tz, &sz, &T2z)) != MP_OKAY) goto LBL_END; + if ((err = mp_add(&sz, &tz, &sz)) != MP_OKAY) goto LBL_END; + if ((err = mp_mul(&sz, &T2z, &tz)) != MP_OKAY) goto LBL_END; + if ((err = mp_mod(&tz, N, &tz)) != MP_OKAY) goto LBL_END; + if ((err = mp_mod(&T1z, N, &sz)) != MP_OKAY) goto LBL_END; + if (s_mp_get_bit(&Np1z, i)) { /* * temp = (a+2) * sz + tz * tz = 2 * tz - sz * sz = temp */ if (a == 0) { - if ((err = mp_mul_2(&sz, &T1z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_mul_2(&sz, &T1z)) != MP_OKAY) goto LBL_END; } else { - if ((err = mp_mul_d(&sz, (mp_digit)ap2, &T1z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_mul_d(&sz, (mp_digit)ap2, &T1z)) != MP_OKAY) goto LBL_END; } - if ((err = mp_add(&T1z, &tz, &T1z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_mul_2(&tz, &T2z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_sub(&T2z, &sz, &tz)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_add(&T1z, &tz, &T1z)) != MP_OKAY) goto LBL_END; + if ((err = mp_mul_2(&tz, &T2z)) != MP_OKAY) goto LBL_END; + if ((err = mp_sub(&T2z, &sz, &tz)) != MP_OKAY) goto LBL_END; mp_exch(&sz, &T1z); } } mp_set_u32(&T1z, (uint32_t)((2 * a) + 5)); - if ((err = mp_mod(&T1z, N, &T1z)) != MP_OKAY) goto LBL_FU_ERR; - if (mp_iszero(&sz) && (mp_cmp(&tz, &T1z) == MP_EQ)) { - *result = true; - } + if ((err = mp_mod(&T1z, N, &T1z)) != MP_OKAY) goto LBL_END; + + *result = mp_iszero(&sz) && (mp_cmp(&tz, &T1z) == MP_EQ); -LBL_FU_ERR: +LBL_END: mp_clear_multi(&tz, &sz, &Np1z, &T2z, &T1z, NULL); return err; } diff --git a/mp_prime_is_prime.c b/mp_prime_is_prime.c index d0eca2c28..7d73864c7 100644 --- a/mp_prime_is_prime.c +++ b/mp_prime_is_prime.c @@ -13,14 +13,12 @@ static unsigned int s_floor_ilog2(int value) return r; } - mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) { mp_int b; - int ix, p_max = 0, size_a, len; - bool res; + int ix; + bool res; mp_err err; - unsigned int fips_rand, mask; /* default to no */ *result = false; @@ -133,6 +131,8 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) TODO: can be made a bit finer grained but comparing is not free. */ if (t < 0) { + int p_max = 0; + /* Sorenson, Jonathan; Webster, Jonathan (2015). "Strong Pseudoprimes to Twelve Prime Bases". @@ -174,6 +174,9 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) See Fips 186.4 p. 126ff */ else if (t > 0) { + unsigned int mask; + int size_a; + /* * The mp_digit's have a defined bit-size but the size of the * array a.dp is a simple 'int' and this library can not assume full @@ -219,6 +222,9 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) need to be prime. */ for (ix = 0; ix < t; ix++) { + unsigned int fips_rand; + int len; + /* mp_rand() guarantees the first digit to be non-zero */ if ((err = mp_rand(&b, 1)) != MP_OKAY) { goto LBL_B; diff --git a/mp_prime_miller_rabin.c b/mp_prime_miller_rabin.c index a3af8bc8b..4c23a9f28 100644 --- a/mp_prime_miller_rabin.c +++ b/mp_prime_miller_rabin.c @@ -16,9 +16,6 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) mp_err err; int s, j; - /* default */ - *result = false; - /* ensure b > 1 */ if (mp_cmp_d(b, 1uL) != MP_GT) { return MP_VAL; @@ -29,12 +26,12 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) return err; } if ((err = mp_sub_d(&n1, 1uL, &n1)) != MP_OKAY) { - goto LBL_N1; + goto LBL_ERR1; } /* set 2**s * r = n1 */ if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) { - goto LBL_N1; + goto LBL_ERR1; } /* count the number of least significant bits @@ -44,15 +41,15 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) /* now divide n - 1 by 2**s */ if ((err = mp_div_2d(&r, s, &r, NULL)) != MP_OKAY) { - goto LBL_R; + goto LBL_ERR2; } /* compute y = b**r mod a */ if ((err = mp_init(&y)) != MP_OKAY) { - goto LBL_R; + goto LBL_ERR2; } if ((err = mp_exptmod(b, &r, a, &y)) != MP_OKAY) { - goto LBL_Y; + goto LBL_END; } /* if y != 1 and y != n1 do */ @@ -61,12 +58,13 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) /* while j <= s-1 and y != n1 */ while ((j <= (s - 1)) && (mp_cmp(&y, &n1) != MP_EQ)) { if ((err = mp_sqrmod(&y, a, &y)) != MP_OKAY) { - goto LBL_Y; + goto LBL_END; } /* if y == 1 then composite */ if (mp_cmp_d(&y, 1uL) == MP_EQ) { - goto LBL_Y; + *result = false; + goto LBL_END; } ++j; @@ -74,17 +72,19 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) /* if y != n1 then composite */ if (mp_cmp(&y, &n1) != MP_EQ) { - goto LBL_Y; + *result = false; + goto LBL_END; } } /* probably prime now */ *result = true; -LBL_Y: + +LBL_END: mp_clear(&y); -LBL_R: +LBL_ERR2: mp_clear(&r); -LBL_N1: +LBL_ERR1: mp_clear(&n1); return err; } diff --git a/mp_prime_next_prime.c b/mp_prime_next_prime.c index 40c94a4cf..6faa08de7 100644 --- a/mp_prime_next_prime.c +++ b/mp_prime_next_prime.c @@ -10,11 +10,10 @@ */ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) { - int x, y; - mp_ord cmp; + int x; mp_err err; bool res = false; - mp_digit res_tab[MP_PRIME_TAB_SIZE], step, kstep; + mp_digit res_tab[MP_PRIME_TAB_SIZE], kstep; mp_int b; /* force positive */ @@ -24,7 +23,7 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) if (mp_cmp_d(a, s_mp_prime_tab[MP_PRIME_TAB_SIZE-1]) == MP_LT) { /* find which prime it is bigger than "a" */ for (x = 0; x < MP_PRIME_TAB_SIZE; x++) { - cmp = mp_cmp_d(a, s_mp_prime_tab[x]); + mp_ord cmp = mp_cmp_d(a, s_mp_prime_tab[x]); if (cmp == MP_EQ) { continue; } @@ -42,11 +41,7 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) } /* generate a prime congruent to 3 mod 4 or 1/3 mod 4? */ - if (bbs_style) { - kstep = 4; - } else { - kstep = 2; - } + kstep = bbs_style ? 4 : 2; /* at this point we will use a combination of a sieve and Miller-Rabin */ @@ -79,11 +74,12 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) } for (;;) { + mp_digit step = 0; + bool y; /* skip to the next non-trivially divisible candidate */ - step = 0; do { - /* y == 1 if any residue was zero [e.g. cannot be prime] */ - y = 0; + /* y == true if any residue was zero [e.g. cannot be prime] */ + y = false; /* increase step to next candidate */ step += kstep; @@ -100,10 +96,10 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) /* set flag if zero */ if (res_tab[x] == 0u) { - y = 1; + y = true; } } - } while ((y == 1) && (step < (((mp_digit)1 << MP_DIGIT_BIT) - kstep))); + } while (y && (step < (((mp_digit)1 << MP_DIGIT_BIT) - kstep))); /* add the step */ if ((err = mp_add_d(a, step, a)) != MP_OKAY) { @@ -111,7 +107,7 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) } /* if didn't pass sieve and step == MP_MAX then skip test */ - if ((y == 1) && (step >= (((mp_digit)1 << MP_DIGIT_BIT) - kstep))) { + if (y && (step >= (((mp_digit)1 << MP_DIGIT_BIT) - kstep))) { continue; } @@ -123,7 +119,6 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) } } - err = MP_OKAY; LBL_ERR: mp_clear(&b); return err; diff --git a/mp_prime_strong_lucas_selfridge.c b/mp_prime_strong_lucas_selfridge.c index df5de9637..6262e0714 100644 --- a/mp_prime_strong_lucas_selfridge.c +++ b/mp_prime_strong_lucas_selfridge.c @@ -192,7 +192,7 @@ mp_err mp_prime_strong_lucas_selfridge(const mp_int *a, bool *result) if ((err = mp_mod(&Qmz, a, &Qmz)) != MP_OKAY) goto LBL_LS_ERR; if ((err = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) goto LBL_LS_ERR; - if (s_mp_get_bit(&Dz, (unsigned int)u)) { + if (s_mp_get_bit(&Dz, u)) { /* Formulas for addition of indices (carried out mod N); * * U_(m+n) = (U_m*V_n + U_n*V_m)/2 diff --git a/mp_radix_size.c b/mp_radix_size.c index 49860155f..678cc7c01 100644 --- a/mp_radix_size.c +++ b/mp_radix_size.c @@ -23,13 +23,12 @@ mp_err mp_radix_size(const mp_int *a, int radix, size_t *size) a_ = *a; a_.sign = MP_ZPOS; if ((err = mp_log(&a_, radix, &b)) != MP_OKAY) { - goto LBL_ERR; + return err; } /* mp_ilogb truncates to zero, hence we need one extra put on top and one for `\0`. */ *size = (size_t)(b + 2 + ((a->sign == MP_NEG) ? 1 : 0)); -LBL_ERR: - return err; + return MP_OKAY; } #endif diff --git a/mp_reduce_2k.c b/mp_reduce_2k.c index 5d3c7f90c..e635f5b90 100644 --- a/mp_reduce_2k.c +++ b/mp_reduce_2k.c @@ -8,36 +8,37 @@ mp_err mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d) { mp_int q; mp_err err; - int p; + int p; if ((err = mp_init(&q)) != MP_OKAY) { return err; } p = mp_count_bits(n); -top: - /* q = a/2**p, a = a mod 2**p */ - if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) { - goto LBL_ERR; - } - - if (d != 1u) { - /* q = q * d */ - if ((err = mp_mul_d(&q, d, &q)) != MP_OKAY) { + for (;;) { + /* q = a/2**p, a = a mod 2**p */ + if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) { goto LBL_ERR; } - } - /* a = a + q */ - if ((err = s_mp_add(a, &q, a)) != MP_OKAY) { - goto LBL_ERR; - } + if (d != 1u) { + /* q = q * d */ + if ((err = mp_mul_d(&q, d, &q)) != MP_OKAY) { + goto LBL_ERR; + } + } - if (mp_cmp_mag(a, n) != MP_LT) { + /* a = a + q */ + if ((err = s_mp_add(a, &q, a)) != MP_OKAY) { + goto LBL_ERR; + } + + if (mp_cmp_mag(a, n) == MP_LT) { + break; + } if ((err = s_mp_sub(a, n, a)) != MP_OKAY) { goto LBL_ERR; } - goto top; } LBL_ERR: diff --git a/mp_reduce_2k_l.c b/mp_reduce_2k_l.c index 6328cbc7d..31d9a1882 100644 --- a/mp_reduce_2k_l.c +++ b/mp_reduce_2k_l.c @@ -18,27 +18,30 @@ mp_err mp_reduce_2k_l(mp_int *a, const mp_int *n, const mp_int *d) } p = mp_count_bits(n); -top: - /* q = a/2**p, a = a mod 2**p */ - if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) { - goto LBL_ERR; - } - /* q = q * d */ - if ((err = mp_mul(&q, d, &q)) != MP_OKAY) { - goto LBL_ERR; - } + for (;;) { + /* q = a/2**p, a = a mod 2**p */ + if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) { + goto LBL_ERR; + } - /* a = a + q */ - if ((err = s_mp_add(a, &q, a)) != MP_OKAY) { - goto LBL_ERR; - } + /* q = q * d */ + if ((err = mp_mul(&q, d, &q)) != MP_OKAY) { + goto LBL_ERR; + } + + /* a = a + q */ + if ((err = s_mp_add(a, &q, a)) != MP_OKAY) { + goto LBL_ERR; + } - if (mp_cmp_mag(a, n) != MP_LT) { + if (mp_cmp_mag(a, n) == MP_LT) { + break; + } if ((err = s_mp_sub(a, n, a)) != MP_OKAY) { goto LBL_ERR; } - goto top; + } LBL_ERR: diff --git a/mp_reduce_2k_setup.c b/mp_reduce_2k_setup.c index 0f3fd291e..51f884134 100644 --- a/mp_reduce_2k_setup.c +++ b/mp_reduce_2k_setup.c @@ -8,25 +8,23 @@ mp_err mp_reduce_2k_setup(const mp_int *a, mp_digit *d) { mp_err err; mp_int tmp; - int p; if ((err = mp_init(&tmp)) != MP_OKAY) { return err; } - p = mp_count_bits(a); - if ((err = mp_2expt(&tmp, p)) != MP_OKAY) { - mp_clear(&tmp); - return err; + if ((err = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { + goto LBL_ERR; } if ((err = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { - mp_clear(&tmp); - return err; + goto LBL_ERR; } *d = tmp.dp[0]; + +LBL_ERR: mp_clear(&tmp); - return MP_OKAY; + return err; } #endif diff --git a/mp_root.c b/mp_root.c index 82c3aa257..d53180883 100644 --- a/mp_root.c +++ b/mp_root.c @@ -15,7 +15,6 @@ mp_err mp_root(const mp_int *a, int b, mp_int *c) { mp_int t1, t2, t3, a_; - mp_ord cmp; int ilog2; mp_err err; @@ -105,6 +104,7 @@ mp_err mp_root(const mp_int *a, int b, mp_int *c) /* result can be off by a few so check */ /* Loop beneath can overshoot by one if found root is smaller than actual root */ for (;;) { + mp_ord cmp; if ((err = mp_expt(&t1, b, &t2)) != MP_OKAY) goto LBL_ERR; cmp = mp_cmp(&t2, &a_); if (cmp == MP_EQ) { @@ -133,8 +133,6 @@ mp_err mp_root(const mp_int *a, int b, mp_int *c) /* set the sign of the result */ c->sign = a->sign; - err = MP_OKAY; - LBL_ERR: mp_clear_multi(&t1, &t2, &t3, NULL); return err; diff --git a/mp_sub_d.c b/mp_sub_d.c index 96a747cb6..c5cf7266b 100644 --- a/mp_sub_d.c +++ b/mp_sub_d.c @@ -6,9 +6,7 @@ /* single digit subtraction */ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) { - mp_digit *tmpa, *tmpc; - mp_err err; - int ix, oldused; + int oldused; /* fast path for a == c */ if (a == c) { @@ -26,6 +24,7 @@ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) /* grow c as required */ if (c->alloc < (a->used + 1)) { + mp_err err; if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) { return err; } @@ -35,6 +34,7 @@ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) * addition [with fudged signs] */ if (a->sign == MP_NEG) { + mp_err err; mp_int a_ = *a; a_.sign = MP_ZPOS; err = mp_add_d(&a_, b, c); @@ -46,24 +46,17 @@ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) return err; } - /* setup regs */ oldused = c->used; - tmpa = a->dp; - tmpc = c->dp; /* if a <= b simply fix the single digit */ if (((a->used == 1) && (a->dp[0] <= b)) || (a->used == 0)) { - if (a->used == 1) { - *tmpc++ = b - *tmpa; - } else { - *tmpc++ = b; - } - ix = 1; + c->dp[0] = (a->used == 1) ? b - a->dp[0] : b; /* negative/1digit */ c->sign = MP_NEG; c->used = 1; } else { + int i; mp_digit mu = b; /* positive/size */ @@ -71,15 +64,15 @@ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) c->used = a->used; /* subtract digits, mu is carry */ - for (ix = 0; ix < a->used; ix++) { - *tmpc = *tmpa++ - mu; - mu = *tmpc >> (MP_SIZEOF_BITS(mp_digit) - 1u); - *tmpc++ &= MP_MASK; + for (i = 0; i < a->used; i++) { + c->dp[i] = a->dp[i] - mu; + mu = c->dp[i] >> (MP_SIZEOF_BITS(mp_digit) - 1u); + c->dp[i] &= MP_MASK; } } /* zero excess digits */ - MP_ZERO_DIGITS(tmpc, oldused - ix); + MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used); mp_clamp(c); return MP_OKAY; diff --git a/s_mp_add.c b/s_mp_add.c index 922071996..1dd09f8ee 100644 --- a/s_mp_add.c +++ b/s_mp_add.c @@ -6,84 +6,65 @@ /* low level addition, based on HAC pp.594, Algorithm 14.7 */ mp_err s_mp_add(const mp_int *a, const mp_int *b, mp_int *c) { - const mp_int *x; - mp_err err; - int olduse, min, max; + int oldused, min, max, i; + mp_digit u; /* find sizes, we let |a| <= |b| which means we have to sort * them. "x" will point to the input with the most digits */ - if (a->used > b->used) { - min = b->used; - max = a->used; - x = a; - } else { - min = a->used; - max = b->used; - x = b; + if (a->used < b->used) { + MP_EXCH(const mp_int *, a, b); } + min = b->used; + max = a->used; + /* init result */ if (c->alloc < (max + 1)) { + mp_err err; if ((err = mp_grow(c, max + 1)) != MP_OKAY) { return err; } } /* get old used digit count and set new one */ - olduse = c->used; + oldused = c->used; c->used = max + 1; - { - mp_digit u, *tmpa, *tmpb, *tmpc; - int i; - - /* alias for digit pointers */ - - /* first input */ - tmpa = a->dp; + /* zero the carry */ + u = 0; + for (i = 0; i < min; i++) { + /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ + c->dp[i] = a->dp[i] + b->dp[i] + u; - /* second input */ - tmpb = b->dp; + /* U = carry bit of T[i] */ + u = c->dp[i] >> (mp_digit)MP_DIGIT_BIT; - /* destination */ - tmpc = c->dp; + /* take away carry bit from T[i] */ + c->dp[i] &= MP_MASK; + } - /* zero the carry */ - u = 0; - for (i = 0; i < min; i++) { - /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ - *tmpc = *tmpa++ + *tmpb++ + u; + /* now copy higher words if any, that is in A+B + * if A or B has more digits add those in + */ + if (min != max) { + for (; i < max; i++) { + /* T[i] = A[i] + U */ + c->dp[i] = a->dp[i] + u; /* U = carry bit of T[i] */ - u = *tmpc >> (mp_digit)MP_DIGIT_BIT; + u = c->dp[i] >> (mp_digit)MP_DIGIT_BIT; /* take away carry bit from T[i] */ - *tmpc++ &= MP_MASK; - } - - /* now copy higher words if any, that is in A+B - * if A or B has more digits add those in - */ - if (min != max) { - for (; i < max; i++) { - /* T[i] = X[i] + U */ - *tmpc = x->dp[i] + u; - - /* U = carry bit of T[i] */ - u = *tmpc >> (mp_digit)MP_DIGIT_BIT; - - /* take away carry bit from T[i] */ - *tmpc++ &= MP_MASK; - } + c->dp[i] &= MP_MASK; } + } - /* add carry */ - *tmpc++ = u; + /* add carry */ + c->dp[i] = u; - /* clear digits above oldused */ - MP_ZERO_DIGITS(tmpc, olduse - c->used); - } + /* clear digits above oldused */ + MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used); mp_clamp(c); return MP_OKAY; diff --git a/s_mp_balance_mul.c b/s_mp_balance_mul.c index 410883020..77852a427 100644 --- a/s_mp_balance_mul.c +++ b/s_mp_balance_mul.c @@ -6,15 +6,11 @@ /* single-digit multiplication with the smaller number as the single-digit */ mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c) { - int count, len_a, len_b, nblocks, i, j, bsize; - mp_int a0, tmp, A, B, r; + mp_int a0, tmp, r; mp_err err; - - len_a = a->used; - len_b = b->used; - - nblocks = MP_MAX(a->used, b->used) / MP_MIN(a->used, b->used); - bsize = MP_MIN(a->used, b->used) ; + int i, j, count, + nblocks = MP_MAX(a->used, b->used) / MP_MIN(a->used, b->used), + bsize = MP_MIN(a->used, b->used); if ((err = mp_init_size(&a0, bsize + 2)) != MP_OKAY) { return err; @@ -25,24 +21,20 @@ mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c) } /* Make sure that A is the larger one*/ - if (len_a < len_b) { - B = *a; - A = *b; - } else { - A = *a; - B = *b; + if (a->used < b->used) { + MP_EXCH(const mp_int *, a, b); } for (i = 0, j=0; i < nblocks; i++) { /* Cut a slice off of a */ a0.used = 0; for (count = 0; count < bsize; count++) { - a0.dp[count] = A.dp[ j++ ]; + a0.dp[count] = a->dp[ j++ ]; a0.used++; } mp_clamp(&a0); /* Multiply with b */ - if ((err = mp_mul(&a0, &B, &tmp)) != MP_OKAY) { + if ((err = mp_mul(&a0, b, &tmp)) != MP_OKAY) { goto LBL_ERR; } /* Shift tmp to the correct position */ @@ -55,14 +47,14 @@ mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c) } } /* The left-overs; there are always left-overs */ - if (j < A.used) { + if (j < a->used) { a0.used = 0; - for (count = 0; j < A.used; count++) { - a0.dp[count] = A.dp[ j++ ]; + for (count = 0; j < a->used; count++) { + a0.dp[count] = a->dp[ j++ ]; a0.used++; } mp_clamp(&a0); - if ((err = mp_mul(&a0, &B, &tmp)) != MP_OKAY) { + if ((err = mp_mul(&a0, b, &tmp)) != MP_OKAY) { goto LBL_ERR; } if ((err = mp_lshd(&tmp, bsize * i)) != MP_OKAY) { diff --git a/s_mp_get_bit.c b/s_mp_get_bit.c index f077f613c..a509bcecb 100644 --- a/s_mp_get_bit.c +++ b/s_mp_get_bit.c @@ -5,12 +5,12 @@ /* SPDX-License-Identifier: Unlicense */ /* Get bit at position b and return true if the bit is 1, false if it is 0 */ -bool s_mp_get_bit(const mp_int *a, unsigned int b) +bool s_mp_get_bit(const mp_int *a, int b) { mp_digit bit; - int limb = (int)(b / MP_DIGIT_BIT); + int limb = b / MP_DIGIT_BIT; - if (limb >= a->used) { + if (limb < 0 || limb >= a->used) { return false; } diff --git a/s_mp_invmod_fast.c b/s_mp_invmod_fast.c index ed1fc4a7d..46cf0d664 100644 --- a/s_mp_invmod_fast.c +++ b/s_mp_invmod_fast.c @@ -42,51 +42,49 @@ mp_err s_mp_invmod_fast(const mp_int *a, const mp_int *b, mp_int *c) if ((err = mp_copy(&y, &v)) != MP_OKAY) goto LBL_ERR; mp_set(&D, 1uL); -top: - /* 4. while u is even do */ - while (mp_iseven(&u)) { - /* 4.1 u = u/2 */ - if ((err = mp_div_2(&u, &u)) != MP_OKAY) goto LBL_ERR; - - /* 4.2 if B is odd then */ - if (mp_isodd(&B)) { - if ((err = mp_sub(&B, &x, &B)) != MP_OKAY) goto LBL_ERR; + do { + /* 4. while u is even do */ + while (mp_iseven(&u)) { + /* 4.1 u = u/2 */ + if ((err = mp_div_2(&u, &u)) != MP_OKAY) goto LBL_ERR; + + /* 4.2 if B is odd then */ + if (mp_isodd(&B)) { + if ((err = mp_sub(&B, &x, &B)) != MP_OKAY) goto LBL_ERR; + } + /* B = B/2 */ + if ((err = mp_div_2(&B, &B)) != MP_OKAY) goto LBL_ERR; } - /* B = B/2 */ - if ((err = mp_div_2(&B, &B)) != MP_OKAY) goto LBL_ERR; - } - - /* 5. while v is even do */ - while (mp_iseven(&v)) { - /* 5.1 v = v/2 */ - if ((err = mp_div_2(&v, &v)) != MP_OKAY) goto LBL_ERR; - /* 5.2 if D is odd then */ - if (mp_isodd(&D)) { - /* D = (D-x)/2 */ - if ((err = mp_sub(&D, &x, &D)) != MP_OKAY) goto LBL_ERR; + /* 5. while v is even do */ + while (mp_iseven(&v)) { + /* 5.1 v = v/2 */ + if ((err = mp_div_2(&v, &v)) != MP_OKAY) goto LBL_ERR; + + /* 5.2 if D is odd then */ + if (mp_isodd(&D)) { + /* D = (D-x)/2 */ + if ((err = mp_sub(&D, &x, &D)) != MP_OKAY) goto LBL_ERR; + } + /* D = D/2 */ + if ((err = mp_div_2(&D, &D)) != MP_OKAY) goto LBL_ERR; } - /* D = D/2 */ - if ((err = mp_div_2(&D, &D)) != MP_OKAY) goto LBL_ERR; - } - /* 6. if u >= v then */ - if (mp_cmp(&u, &v) != MP_LT) { - /* u = u - v, B = B - D */ - if ((err = mp_sub(&u, &v, &u)) != MP_OKAY) goto LBL_ERR; + /* 6. if u >= v then */ + if (mp_cmp(&u, &v) != MP_LT) { + /* u = u - v, B = B - D */ + if ((err = mp_sub(&u, &v, &u)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&B, &D, &B)) != MP_OKAY) goto LBL_ERR; - } else { - /* v - v - u, D = D - B */ - if ((err = mp_sub(&v, &u, &v)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&B, &D, &B)) != MP_OKAY) goto LBL_ERR; + } else { + /* v - v - u, D = D - B */ + if ((err = mp_sub(&v, &u, &v)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&D, &B, &D)) != MP_OKAY) goto LBL_ERR; - } + if ((err = mp_sub(&D, &B, &D)) != MP_OKAY) goto LBL_ERR; + } - /* if not zero goto step 4 */ - if (!mp_iszero(&u)) { - goto top; - } + /* if not zero goto step 4 */ + } while (!mp_iszero(&u)); /* now a = C, b = D, gcd == g*v */ diff --git a/s_mp_invmod_slow.c b/s_mp_invmod_slow.c index 28cd6cd88..7d07a141a 100644 --- a/s_mp_invmod_slow.c +++ b/s_mp_invmod_slow.c @@ -36,60 +36,58 @@ mp_err s_mp_invmod_slow(const mp_int *a, const mp_int *b, mp_int *c) mp_set(&A, 1uL); mp_set(&D, 1uL); -top: - /* 4. while u is even do */ - while (mp_iseven(&u)) { - /* 4.1 u = u/2 */ - if ((err = mp_div_2(&u, &u)) != MP_OKAY) goto LBL_ERR; - - /* 4.2 if A or B is odd then */ - if (mp_isodd(&A) || mp_isodd(&B)) { - /* A = (A+y)/2, B = (B-x)/2 */ - if ((err = mp_add(&A, &y, &A)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&B, &x, &B)) != MP_OKAY) goto LBL_ERR; + do { + /* 4. while u is even do */ + while (mp_iseven(&u)) { + /* 4.1 u = u/2 */ + if ((err = mp_div_2(&u, &u)) != MP_OKAY) goto LBL_ERR; + + /* 4.2 if A or B is odd then */ + if (mp_isodd(&A) || mp_isodd(&B)) { + /* A = (A+y)/2, B = (B-x)/2 */ + if ((err = mp_add(&A, &y, &A)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&B, &x, &B)) != MP_OKAY) goto LBL_ERR; + } + /* A = A/2, B = B/2 */ + if ((err = mp_div_2(&A, &A)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_div_2(&B, &B)) != MP_OKAY) goto LBL_ERR; } - /* A = A/2, B = B/2 */ - if ((err = mp_div_2(&A, &A)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_div_2(&B, &B)) != MP_OKAY) goto LBL_ERR; - } - - /* 5. while v is even do */ - while (mp_iseven(&v)) { - /* 5.1 v = v/2 */ - if ((err = mp_div_2(&v, &v)) != MP_OKAY) goto LBL_ERR; - /* 5.2 if C or D is odd then */ - if (mp_isodd(&C) || mp_isodd(&D)) { - /* C = (C+y)/2, D = (D-x)/2 */ - if ((err = mp_add(&C, &y, &C)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&D, &x, &D)) != MP_OKAY) goto LBL_ERR; + /* 5. while v is even do */ + while (mp_iseven(&v)) { + /* 5.1 v = v/2 */ + if ((err = mp_div_2(&v, &v)) != MP_OKAY) goto LBL_ERR; + + /* 5.2 if C or D is odd then */ + if (mp_isodd(&C) || mp_isodd(&D)) { + /* C = (C+y)/2, D = (D-x)/2 */ + if ((err = mp_add(&C, &y, &C)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&D, &x, &D)) != MP_OKAY) goto LBL_ERR; + } + /* C = C/2, D = D/2 */ + if ((err = mp_div_2(&C, &C)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_div_2(&D, &D)) != MP_OKAY) goto LBL_ERR; } - /* C = C/2, D = D/2 */ - if ((err = mp_div_2(&C, &C)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_div_2(&D, &D)) != MP_OKAY) goto LBL_ERR; - } - /* 6. if u >= v then */ - if (mp_cmp(&u, &v) != MP_LT) { - /* u = u - v, A = A - C, B = B - D */ - if ((err = mp_sub(&u, &v, &u)) != MP_OKAY) goto LBL_ERR; + /* 6. if u >= v then */ + if (mp_cmp(&u, &v) != MP_LT) { + /* u = u - v, A = A - C, B = B - D */ + if ((err = mp_sub(&u, &v, &u)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&A, &C, &A)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&A, &C, &A)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&B, &D, &B)) != MP_OKAY) goto LBL_ERR; - } else { - /* v - v - u, C = C - A, D = D - B */ - if ((err = mp_sub(&v, &u, &v)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&B, &D, &B)) != MP_OKAY) goto LBL_ERR; + } else { + /* v - v - u, C = C - A, D = D - B */ + if ((err = mp_sub(&v, &u, &v)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&C, &A, &C)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&C, &A, &C)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&D, &B, &D)) != MP_OKAY) goto LBL_ERR; - } + if ((err = mp_sub(&D, &B, &D)) != MP_OKAY) goto LBL_ERR; + } - /* if not zero goto step 4 */ - if (!mp_iszero(&u)) { - goto top; - } + /* if not zero goto step 4 */ + } while (!mp_iszero(&u)); /* now a = C, b = D, gcd == g*v */ @@ -111,7 +109,7 @@ mp_err s_mp_invmod_slow(const mp_int *a, const mp_int *b, mp_int *c) /* C is now the inverse */ mp_exch(&C, c); - err = MP_OKAY; + LBL_ERR: mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL); return err; diff --git a/s_mp_karatsuba_mul.c b/s_mp_karatsuba_mul.c index df3daa7ee..762e5e21d 100644 --- a/s_mp_karatsuba_mul.c +++ b/s_mp_karatsuba_mul.c @@ -35,8 +35,8 @@ mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) { mp_int x0, x1, y0, y1, t1, x0y0, x1y1; - int B; - mp_err err = MP_MEM; /* default the return code to an error */ + int B, i; + mp_err err; /* min # of digits */ B = MP_MIN(a->used, b->used); @@ -45,27 +45,27 @@ mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) B = B >> 1; /* init copy all the temps */ - if (mp_init_size(&x0, B) != MP_OKAY) { + if ((err = mp_init_size(&x0, B)) != MP_OKAY) { goto LBL_ERR; } - if (mp_init_size(&x1, a->used - B) != MP_OKAY) { + if ((err = mp_init_size(&x1, a->used - B)) != MP_OKAY) { goto X0; } - if (mp_init_size(&y0, B) != MP_OKAY) { + if ((err = mp_init_size(&y0, B)) != MP_OKAY) { goto X1; } - if (mp_init_size(&y1, b->used - B) != MP_OKAY) { + if ((err = mp_init_size(&y1, b->used - B)) != MP_OKAY) { goto Y0; } /* init temps */ - if (mp_init_size(&t1, B * 2) != MP_OKAY) { + if ((err = mp_init_size(&t1, B * 2)) != MP_OKAY) { goto Y1; } - if (mp_init_size(&x0y0, B * 2) != MP_OKAY) { + if ((err = mp_init_size(&x0y0, B * 2)) != MP_OKAY) { goto T1; } - if (mp_init_size(&x1y1, B * 2) != MP_OKAY) { + if ((err = mp_init_size(&x1y1, B * 2)) != MP_OKAY) { goto X0Y0; } @@ -74,32 +74,18 @@ mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) x1.used = a->used - B; y1.used = b->used - B; - { - int x; - mp_digit *tmpa, *tmpb, *tmpx, *tmpy; - - /* we copy the digits directly instead of using higher level functions - * since we also need to shift the digits - */ - tmpa = a->dp; - tmpb = b->dp; - - tmpx = x0.dp; - tmpy = y0.dp; - for (x = 0; x < B; x++) { - *tmpx++ = *tmpa++; - *tmpy++ = *tmpb++; - } - - tmpx = x1.dp; - for (x = B; x < a->used; x++) { - *tmpx++ = *tmpa++; - } - - tmpy = y1.dp; - for (x = B; x < b->used; x++) { - *tmpy++ = *tmpb++; - } + /* we copy the digits directly instead of using higher level functions + * since we also need to shift the digits + */ + for (i = 0; i < B; i++) { + x0.dp[i] = a->dp[i]; + y0.dp[i] = b->dp[i]; + } + for (i = B; i < a->used; i++) { + x1.dp[i - B] = a->dp[i]; + } + for (i = B; i < b->used; i++) { + y1.dp[i - B] = b->dp[i]; } /* only need to clamp the lower words since by definition the @@ -110,50 +96,47 @@ mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) /* now calc the products x0y0 and x1y1 */ /* after this x0 is no longer required, free temp [x0==t2]! */ - if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY) { + if ((err = mp_mul(&x0, &y0, &x0y0)) != MP_OKAY) { goto X1Y1; /* x0y0 = x0*y0 */ } - if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY) { + if ((err = mp_mul(&x1, &y1, &x1y1)) != MP_OKAY) { goto X1Y1; /* x1y1 = x1*y1 */ } /* now calc x1+x0 and y1+y0 */ - if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) { + if ((err = s_mp_add(&x1, &x0, &t1)) != MP_OKAY) { goto X1Y1; /* t1 = x1 - x0 */ } - if (s_mp_add(&y1, &y0, &x0) != MP_OKAY) { + if ((err = s_mp_add(&y1, &y0, &x0)) != MP_OKAY) { goto X1Y1; /* t2 = y1 - y0 */ } - if (mp_mul(&t1, &x0, &t1) != MP_OKAY) { + if ((err = mp_mul(&t1, &x0, &t1)) != MP_OKAY) { goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ } /* add x0y0 */ - if (mp_add(&x0y0, &x1y1, &x0) != MP_OKAY) { + if ((err = mp_add(&x0y0, &x1y1, &x0)) != MP_OKAY) { goto X1Y1; /* t2 = x0y0 + x1y1 */ } - if (s_mp_sub(&t1, &x0, &t1) != MP_OKAY) { + if ((err = s_mp_sub(&t1, &x0, &t1)) != MP_OKAY) { goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ } /* shift by B */ - if (mp_lshd(&t1, B) != MP_OKAY) { + if ((err = mp_lshd(&t1, B)) != MP_OKAY) { goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<used; @@ -23,37 +23,27 @@ mp_err s_mp_karatsuba_sqr(const mp_int *a, mp_int *b) B = B >> 1; /* init copy all the temps */ - if (mp_init_size(&x0, B) != MP_OKAY) + if ((err = mp_init_size(&x0, B)) != MP_OKAY) goto LBL_ERR; - if (mp_init_size(&x1, a->used - B) != MP_OKAY) + if ((err = mp_init_size(&x1, a->used - B)) != MP_OKAY) goto X0; /* init temps */ - if (mp_init_size(&t1, a->used * 2) != MP_OKAY) + if ((err = mp_init_size(&t1, a->used * 2)) != MP_OKAY) goto X1; - if (mp_init_size(&t2, a->used * 2) != MP_OKAY) + if ((err = mp_init_size(&t2, a->used * 2)) != MP_OKAY) goto T1; - if (mp_init_size(&x0x0, B * 2) != MP_OKAY) + if ((err = mp_init_size(&x0x0, B * 2)) != MP_OKAY) goto T2; - if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) + if ((err = mp_init_size(&x1x1, (a->used - B) * 2)) != MP_OKAY) goto X0X0; - { - int x; - mp_digit *dst, *src; - - src = a->dp; - - /* now shift the digits */ - dst = x0.dp; - for (x = 0; x < B; x++) { - *dst++ = *src++; - } - - dst = x1.dp; - for (x = B; x < a->used; x++) { - *dst++ = *src++; - } + /* now shift the digits */ + for (x = 0; x < B; x++) { + x0.dp[x] = a->dp[x]; + } + for (x = B; x < a->used; x++) { + x1.dp[x - B] = a->dp[x]; } x0.used = B; @@ -62,36 +52,34 @@ mp_err s_mp_karatsuba_sqr(const mp_int *a, mp_int *b) mp_clamp(&x0); /* now calc the products x0*x0 and x1*x1 */ - if (mp_sqr(&x0, &x0x0) != MP_OKAY) + if ((err = mp_sqr(&x0, &x0x0)) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */ - if (mp_sqr(&x1, &x1x1) != MP_OKAY) + if ((err = mp_sqr(&x1, &x1x1)) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ /* now calc (x1+x0)**2 */ - if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) + if ((err = s_mp_add(&x1, &x0, &t1)) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ - if (mp_sqr(&t1, &t1) != MP_OKAY) + if ((err = mp_sqr(&t1, &t1)) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ /* add x0y0 */ - if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) + if ((err = s_mp_add(&x0x0, &x1x1, &t2)) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ - if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY) + if ((err = s_mp_sub(&t1, &t2, &t1)) != MP_OKAY) goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ - if (mp_lshd(&t1, B) != MP_OKAY) + if ((err = mp_lshd(&t1, B)) != MP_OKAY) goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))< 1) { - mid = (high + low) >> 1; + int mid = (high + low) >> 1; if ((err = mp_expt(&bi_base, mid - low, &t)) != MP_OKAY) { goto LBL_END; diff --git a/s_mp_log_d.c b/s_mp_log_d.c index 46c9c0afd..62b154818 100644 --- a/s_mp_log_d.c +++ b/s_mp_log_d.c @@ -19,19 +19,16 @@ static mp_word s_pow(mp_word base, mp_word exponent) int s_mp_log_d(mp_digit base, mp_digit n) { - mp_word bracket_low = 1uLL, bracket_mid, bracket_high, N; - int ret, high = 1uL, low = 0uL, mid; + mp_word bracket_low = 1uLL, bracket_high = base, N = n; + int ret, high = 1, low = 0; if (n < base) { - return 0uL; + return 0; } if (n == base) { - return 1uL; + return 1; } - bracket_high = (mp_word) base ; - N = (mp_word) n; - while (bracket_high < N) { low = high; bracket_low = bracket_high; @@ -40,8 +37,8 @@ int s_mp_log_d(mp_digit base, mp_digit n) } while (((mp_digit)(high - low)) > 1uL) { - mid = (low + high) >> 1; - bracket_mid = bracket_low * s_pow(base, (mp_word)(mid - low)); + int mid = (low + high) >> 1; + mp_word bracket_mid = bracket_low * s_pow(base, (mp_word)(mid - low)); if (N < bracket_mid) { high = mid ; diff --git a/s_mp_montgomery_reduce_fast.c b/s_mp_montgomery_reduce_fast.c index 083e7a4f4..a78c537ee 100644 --- a/s_mp_montgomery_reduce_fast.c +++ b/s_mp_montgomery_reduce_fast.c @@ -13,7 +13,7 @@ */ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) { - int ix, olduse; + int ix, oldused; mp_err err; mp_word W[MP_WARRAY]; @@ -22,7 +22,7 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) } /* get old used count */ - olduse = x->used; + oldused = x->used; /* grow a as required */ if (x->alloc < (n->used + 1)) { @@ -34,38 +34,30 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) /* first we have to get the digits of the input into * an array of double precision words W[...] */ - { - mp_word *_W; - mp_digit *tmpx; - /* alias for the W[] array */ - _W = W; - - /* alias for the digits of x*/ - tmpx = x->dp; - - /* copy the digits of a into W[0..a->used-1] */ - for (ix = 0; ix < x->used; ix++) { - *_W++ = *tmpx++; - } + /* copy the digits of a into W[0..a->used-1] */ + for (ix = 0; ix < x->used; ix++) { + W[ix] = x->dp[ix]; + } - /* zero the high words of W[a->used..m->used*2] */ - if (ix < ((n->used * 2) + 1)) { - MP_ZERO_BUFFER(_W, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix)); - } + /* zero the high words of W[a->used..m->used*2] */ + if (ix < ((n->used * 2) + 1)) { + MP_ZERO_BUFFER(W + x->used, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix)); } /* now we proceed to zero successive digits * from the least significant upwards */ for (ix = 0; ix < n->used; ix++) { + int iy; + mp_digit mu; + /* mu = ai * m' mod b * * We avoid a double precision multiplication (which isn't required) * by casting the value down to a mp_digit. Note this requires * that W[ix-1] have the carry cleared (see after the inner loop) */ - mp_digit mu; mu = ((W[ix] & MP_MASK) * rho) & MP_MASK; /* a = a + mu * m * b**i @@ -82,21 +74,8 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) * carry fixups are done in order so after these loops the * first m->used words of W[] have the carries fixed */ - { - int iy; - mp_digit *tmpn; - mp_word *_W; - - /* alias for the digits of the modulus */ - tmpn = n->dp; - - /* Alias for the columns set by an offset of ix */ - _W = W + ix; - - /* inner loop */ - for (iy = 0; iy < n->used; iy++) { - *_W++ += (mp_word)mu * (mp_word)*tmpn++; - } + for (iy = 0; iy < n->used; iy++) { + W[ix + iy] += (mp_word)mu * (mp_word)n->dp[iy]; } /* now fix carry for next digit, W[ix+1] */ @@ -107,47 +86,30 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) * shift the words downward [all those least * significant digits we zeroed]. */ - { - mp_digit *tmpx; - mp_word *_W, *_W1; - - /* nox fix rest of carries */ - - /* alias for current word */ - _W1 = W + ix; - - /* alias for next word, where the carry goes */ - _W = W + ++ix; - for (; ix < ((n->used * 2) + 1); ix++) { - *_W++ += *_W1++ >> (mp_word)MP_DIGIT_BIT; - } - - /* copy out, A = A/b**n - * - * The result is A/b**n but instead of converting from an - * array of mp_word to mp_digit than calling mp_rshd - * we just copy them in the right order - */ - - /* alias for destination word */ - tmpx = x->dp; - - /* alias for shifted double precision result */ - _W = W + n->used; + for (; ix < (n->used * 2); ix++) { + W[ix + 1] += W[ix] >> (mp_word)MP_DIGIT_BIT; + } - for (ix = 0; ix < (n->used + 1); ix++) { - *tmpx++ = *_W++ & (mp_word)MP_MASK; - } + /* copy out, A = A/b**n + * + * The result is A/b**n but instead of converting from an + * array of mp_word to mp_digit than calling mp_rshd + * we just copy them in the right order + */ - /* zero oldused digits, if the input a was larger than - * m->used+1 we'll have to clear the digits - */ - MP_ZERO_DIGITS(tmpx, olduse - ix); + for (ix = 0; ix < (n->used + 1); ix++) { + x->dp[ix] = W[n->used + ix] & (mp_word)MP_MASK; } - /* set the max used and clamp */ + /* set the max used */ x->used = n->used + 1; + + /* zero oldused digits, if the input a was larger than + * m->used+1 we'll have to clear the digits + */ + MP_ZERO_DIGITS(x->dp + x->used, oldused - x->used); + mp_clamp(x); /* if A >= m then A = A - m */ diff --git a/s_mp_mul_digs.c b/s_mp_mul_digs.c index ea0985b87..27e51f834 100644 --- a/s_mp_mul_digs.c +++ b/s_mp_mul_digs.c @@ -11,10 +11,7 @@ mp_err s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) { mp_int t; mp_err err; - int pa, pb, ix, iy; - mp_digit u; - mp_word r; - mp_digit tmpx, *tmpt, *tmpy; + int pa, ix; /* can we use the fast multiplier? */ if ((digs < MP_WARRAY) && @@ -30,38 +27,28 @@ mp_err s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) /* compute the digits of the product directly */ pa = a->used; for (ix = 0; ix < pa; ix++) { - /* set the carry to zero */ - u = 0; + int iy, pb; + mp_digit u = 0; /* limit ourselves to making digs digits of output */ pb = MP_MIN(b->used, digs - ix); - /* setup some aliases */ - /* copy of the digit from a used within the nested loop */ - tmpx = a->dp[ix]; - - /* an alias for the destination shifted ix places */ - tmpt = t.dp + ix; - - /* an alias for the digits of b */ - tmpy = b->dp; - /* compute the columns of the output and propagate the carry */ for (iy = 0; iy < pb; iy++) { /* compute the column as a mp_word */ - r = (mp_word)*tmpt + - ((mp_word)tmpx * (mp_word)*tmpy++) + - (mp_word)u; + mp_word r = (mp_word)t.dp[ix + iy] + + ((mp_word)a->dp[ix] * (mp_word)b->dp[iy]) + + (mp_word)u; /* the new column is the lower part of the result */ - *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK); + t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); /* get the carry word from the result */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); } /* set carry if it is placed below digs */ if ((ix + iy) < digs) { - *tmpt = u; + t.dp[ix + pb] = u; } } diff --git a/s_mp_mul_digs_fast.c b/s_mp_mul_digs_fast.c index 8988838fb..44aabd087 100644 --- a/s_mp_mul_digs_fast.c +++ b/s_mp_mul_digs_fast.c @@ -21,7 +21,7 @@ */ mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) { - int olduse, pa, ix, iz; + int oldused, pa, ix; mp_err err; mp_digit W[MP_WARRAY]; mp_word _W; @@ -39,18 +39,12 @@ mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) /* clear the carry */ _W = 0; for (ix = 0; ix < pa; ix++) { - int tx, ty; - int iy; - mp_digit *tmpx, *tmpy; + int tx, ty, iy, iz; /* get offsets into the two bignums */ ty = MP_MIN(b->used-1, ix); tx = ix - ty; - /* setup temp aliases */ - tmpx = a->dp + tx; - tmpy = b->dp + ty; - /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ @@ -58,8 +52,7 @@ mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) /* execute loop */ for (iz = 0; iz < iy; ++iz) { - _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; - + _W += (mp_word)a->dp[tx + iz] * (mp_word)b->dp[ty - iz]; } /* store term */ @@ -70,20 +63,17 @@ mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) } /* setup dest */ - olduse = c->used; + oldused = c->used; c->used = pa; - { - mp_digit *tmpc; - tmpc = c->dp; - for (ix = 0; ix < pa; ix++) { - /* now extract the previous digit [below the carry] */ - *tmpc++ = W[ix]; - } - - /* clear unused digits [that existed in the old copy of c] */ - MP_ZERO_DIGITS(tmpc, olduse - ix); + for (ix = 0; ix < pa; ix++) { + /* now extract the previous digit [below the carry] */ + c->dp[ix] = W[ix]; } + + /* clear unused digits [that existed in the old copy of c] */ + MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used); + mp_clamp(c); return MP_OKAY; } diff --git a/s_mp_mul_high_digs.c b/s_mp_mul_high_digs.c index 87cfbe54f..5a8c0731b 100644 --- a/s_mp_mul_high_digs.c +++ b/s_mp_mul_high_digs.c @@ -9,11 +9,8 @@ mp_err s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) { mp_int t; - int pa, pb, ix, iy; + int pa, pb, ix; mp_err err; - mp_digit u; - mp_word r; - mp_digit tmpx, *tmpt, *tmpy; /* can we use the fast multiplier? */ if (MP_HAS(S_MP_MUL_HIGH_DIGS_FAST) @@ -30,31 +27,22 @@ mp_err s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) pa = a->used; pb = b->used; for (ix = 0; ix < pa; ix++) { - /* clear the carry */ - u = 0; - - /* left hand side of A[ix] * B[iy] */ - tmpx = a->dp[ix]; - - /* alias to the address of where the digits will be stored */ - tmpt = &(t.dp[digs]); - - /* alias for where to read the right hand side from */ - tmpy = b->dp + (digs - ix); + int iy; + mp_digit u = 0; for (iy = digs - ix; iy < pb; iy++) { /* calculate the double precision result */ - r = (mp_word)*tmpt + - ((mp_word)tmpx * (mp_word)*tmpy++) + - (mp_word)u; + mp_word r = (mp_word)t.dp[ix + iy] + + ((mp_word)a->dp[ix] * (mp_word)b->dp[iy]) + + (mp_word)u; /* get the lower part */ - *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK); + t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); /* carry the carry */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); } - *tmpt = u; + t.dp[ix + pb] = u; } mp_clamp(&t); mp_exch(&t, c); diff --git a/s_mp_mul_high_digs_fast.c b/s_mp_mul_high_digs_fast.c index 1559ebcf4..138476599 100644 --- a/s_mp_mul_high_digs_fast.c +++ b/s_mp_mul_high_digs_fast.c @@ -14,7 +14,7 @@ */ mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) { - int olduse, pa, ix, iz; + int oldused, pa, ix; mp_err err; mp_digit W[MP_WARRAY]; mp_word _W; @@ -31,17 +31,12 @@ mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int pa = a->used + b->used; _W = 0; for (ix = digs; ix < pa; ix++) { - int tx, ty, iy; - mp_digit *tmpx, *tmpy; + int tx, ty, iy, iz; /* get offsets into the two bignums */ ty = MP_MIN(b->used-1, ix); tx = ix - ty; - /* setup temp aliases */ - tmpx = a->dp + tx; - tmpy = b->dp + ty; - /* this is the number of times the loop will iterrate, essentially its while (tx++ < a->used && ty-- >= 0) { ... } */ @@ -49,7 +44,7 @@ mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int /* execute loop */ for (iz = 0; iz < iy; iz++) { - _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; + _W += (mp_word)a->dp[tx + iz] * (mp_word)b->dp[ty - iz]; } /* store term */ @@ -60,21 +55,17 @@ mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int } /* setup dest */ - olduse = c->used; + oldused = c->used; c->used = pa; - { - mp_digit *tmpc; + for (ix = digs; ix < pa; ix++) { + /* now extract the previous digit [below the carry] */ + c->dp[ix] = W[ix]; + } - tmpc = c->dp + digs; - for (ix = digs; ix < pa; ix++) { - /* now extract the previous digit [below the carry] */ - *tmpc++ = W[ix]; - } + /* clear unused digits [that existed in the old copy of c] */ + MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used); - /* clear unused digits [that existed in the old copy of c] */ - MP_ZERO_DIGITS(tmpc, olduse - ix); - } mp_clamp(c); return MP_OKAY; } diff --git a/s_mp_prime_is_divisible.c b/s_mp_prime_is_divisible.c index 0cca5a6f1..63b2405ab 100644 --- a/s_mp_prime_is_divisible.c +++ b/s_mp_prime_is_divisible.c @@ -10,16 +10,12 @@ */ mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result) { - int ix; - mp_err err; - mp_digit res; - - /* default to not */ - *result = false; - - for (ix = 0; ix < MP_PRIME_TAB_SIZE; ix++) { - /* what is a mod LBL_prime_tab[ix] */ - if ((err = mp_mod_d(a, s_mp_prime_tab[ix], &res)) != MP_OKAY) { + int i; + for (i = 0; i < MP_PRIME_TAB_SIZE; i++) { + /* what is a mod LBL_prime_tab[i] */ + mp_err err; + mp_digit res; + if ((err = mp_mod_d(a, s_mp_prime_tab[i], &res)) != MP_OKAY) { return err; } @@ -30,6 +26,8 @@ mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result) } } + /* default to not */ + *result = false; return MP_OKAY; } #endif diff --git a/s_mp_sqr.c b/s_mp_sqr.c index 61106ed73..4a2030638 100644 --- a/s_mp_sqr.c +++ b/s_mp_sqr.c @@ -7,10 +7,8 @@ mp_err s_mp_sqr(const mp_int *a, mp_int *b) { mp_int t; - int ix, iy, pa; + int ix, pa; mp_err err; - mp_word r; - mp_digit u, tmpx, *tmpt; pa = a->used; if ((err = mp_init_size(&t, (2 * pa) + 1)) != MP_OKAY) { @@ -21,10 +19,13 @@ mp_err s_mp_sqr(const mp_int *a, mp_int *b) t.used = (2 * pa) + 1; for (ix = 0; ix < pa; ix++) { + mp_digit u; + int iy; + /* first calculate the digit at 2*ix */ /* calculate double precision result */ - r = (mp_word)t.dp[2*ix] + - ((mp_word)a->dp[ix] * (mp_word)a->dp[ix]); + mp_word r = (mp_word)t.dp[2*ix] + + ((mp_word)a->dp[ix] * (mp_word)a->dp[ix]); /* store lower part in result */ t.dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK); @@ -32,32 +33,27 @@ mp_err s_mp_sqr(const mp_int *a, mp_int *b) /* get the carry */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); - /* left hand side of A[ix] * A[iy] */ - tmpx = a->dp[ix]; - - /* alias for where to store the results */ - tmpt = t.dp + ((2 * ix) + 1); - for (iy = ix + 1; iy < pa; iy++) { /* first calculate the product */ - r = (mp_word)tmpx * (mp_word)a->dp[iy]; + r = (mp_word)a->dp[ix] * (mp_word)a->dp[iy]; /* now calculate the double precision result, note we use * addition instead of *2 since it's easier to optimize */ - r = (mp_word)*tmpt + r + r + (mp_word)u; + r = (mp_word)t.dp[ix + iy] + r + r + (mp_word)u; /* store lower part */ - *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK); + t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); /* get carry */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); } /* propagate upwards */ while (u != 0uL) { - r = (mp_word)*tmpt + (mp_word)u; - *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK); + r = (mp_word)t.dp[ix + iy] + (mp_word)u; + t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); + ++iy; } } diff --git a/s_mp_sqr_fast.c b/s_mp_sqr_fast.c index bcb1f5e6b..675d75db5 100644 --- a/s_mp_sqr_fast.c +++ b/s_mp_sqr_fast.c @@ -15,14 +15,14 @@ After that loop you do the squares and add them in. mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) { - int olduse, pa, ix, iz; - mp_digit W[MP_WARRAY], *tmpx; + int oldused, pa, ix; + mp_digit W[MP_WARRAY]; mp_word W1; - mp_err err; /* grow the destination as required */ pa = a->used + a->used; if (b->alloc < pa) { + mp_err err; if ((err = mp_grow(b, pa)) != MP_OKAY) { return err; } @@ -31,9 +31,8 @@ mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) /* number of output digits to produce */ W1 = 0; for (ix = 0; ix < pa; ix++) { - int tx, ty, iy; + int tx, ty, iy, iz; mp_word _W; - mp_digit *tmpy; /* clear counter */ _W = 0; @@ -42,10 +41,6 @@ mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) ty = MP_MIN(a->used-1, ix); tx = ix - ty; - /* setup temp aliases */ - tmpx = a->dp + tx; - tmpy = a->dp + ty; - /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ @@ -59,7 +54,7 @@ mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) /* execute loop */ for (iz = 0; iz < iy; iz++) { - _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; + _W += (mp_word)a->dp[tx + iz] * (mp_word)a->dp[ty - iz]; } /* double the inner product and add carry */ @@ -78,19 +73,16 @@ mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) } /* setup dest */ - olduse = b->used; + oldused = b->used; b->used = a->used+a->used; - { - mp_digit *tmpb; - tmpb = b->dp; - for (ix = 0; ix < pa; ix++) { - *tmpb++ = W[ix] & MP_MASK; - } - - /* clear unused digits [that existed in the old copy of c] */ - MP_ZERO_DIGITS(tmpb, olduse - ix); + for (ix = 0; ix < pa; ix++) { + b->dp[ix] = W[ix] & MP_MASK; } + + /* clear unused digits [that existed in the old copy of c] */ + MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used); + mp_clamp(b); return MP_OKAY; } diff --git a/s_mp_sub.c b/s_mp_sub.c index bef1fce53..05386e5f7 100644 --- a/s_mp_sub.c +++ b/s_mp_sub.c @@ -6,64 +6,51 @@ /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c) { - int olduse, min, max; - mp_err err; - - /* find sizes */ - min = b->used; - max = a->used; + int oldused = c->used, min = b->used, max = a->used, i; + mp_digit u; /* init result */ if (c->alloc < max) { + mp_err err; if ((err = mp_grow(c, max)) != MP_OKAY) { return err; } } - olduse = c->used; - c->used = max; - - { - mp_digit u, *tmpa, *tmpb, *tmpc; - int i; - - /* alias for digit pointers */ - tmpa = a->dp; - tmpb = b->dp; - tmpc = c->dp; - - /* set carry to zero */ - u = 0; - for (i = 0; i < min; i++) { - /* T[i] = A[i] - B[i] - U */ - *tmpc = (*tmpa++ - *tmpb++) - u; - - /* U = carry bit of T[i] - * Note this saves performing an AND operation since - * if a carry does occur it will propagate all the way to the - * MSB. As a result a single shift is enough to get the carry - */ - u = *tmpc >> (MP_SIZEOF_BITS(mp_digit) - 1u); - /* Clear carry from T[i] */ - *tmpc++ &= MP_MASK; - } + c->used = max; - /* now copy higher words if any, e.g. if A has more digits than B */ - for (; i < max; i++) { - /* T[i] = A[i] - U */ - *tmpc = *tmpa++ - u; + /* set carry to zero */ + u = 0; + for (i = 0; i < min; i++) { + /* T[i] = A[i] - B[i] - U */ + c->dp[i] = (a->dp[i] - b->dp[i]) - u; + + /* U = carry bit of T[i] + * Note this saves performing an AND operation since + * if a carry does occur it will propagate all the way to the + * MSB. As a result a single shift is enough to get the carry + */ + u = c->dp[i] >> (MP_SIZEOF_BITS(mp_digit) - 1u); + + /* Clear carry from T[i] */ + c->dp[i] &= MP_MASK; + } - /* U = carry bit of T[i] */ - u = *tmpc >> (MP_SIZEOF_BITS(mp_digit) - 1u); + /* now copy higher words if any, e.g. if A has more digits than B */ + for (; i < max; i++) { + /* T[i] = A[i] - U */ + c->dp[i] = a->dp[i] - u; - /* Clear carry from T[i] */ - *tmpc++ &= MP_MASK; - } + /* U = carry bit of T[i] */ + u = c->dp[i] >> (MP_SIZEOF_BITS(mp_digit) - 1u); - /* clear digits above used (since we may not have grown result above) */ - MP_ZERO_DIGITS(tmpc, olduse - c->used); + /* Clear carry from T[i] */ + c->dp[i] &= MP_MASK; } + /* clear digits above used (since we may not have grown result above) */ + MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used); + mp_clamp(c); return MP_OKAY; } diff --git a/s_mp_toom_sqr.c b/s_mp_toom_sqr.c index 67c465c6a..d8f2f8e0f 100644 --- a/s_mp_toom_sqr.c +++ b/s_mp_toom_sqr.c @@ -21,11 +21,9 @@ mp_err s_mp_toom_sqr(const mp_int *a, mp_int *b) { mp_int S0, a0, a1, a2; - mp_digit *tmpa, *tmpc; int B, count; mp_err err; - /* init temps */ if ((err = mp_init(&S0)) != MP_OKAY) { return err; @@ -42,18 +40,14 @@ mp_err s_mp_toom_sqr(const mp_int *a, mp_int *b) a1.used = B; if ((err = mp_init_size(&a2, B + (a->used - (3 * B)))) != MP_OKAY) goto LBL_ERRa2; - tmpa = a->dp; - tmpc = a0.dp; for (count = 0; count < B; count++) { - *tmpc++ = *tmpa++; + a0.dp[count] = a->dp[count]; } - tmpc = a1.dp; for (; count < (2 * B); count++) { - *tmpc++ = *tmpa++; + a1.dp[count - B] = a->dp[count]; } - tmpc = a2.dp; for (; count < a->used; count++) { - *tmpc++ = *tmpa++; + a2.dp[count - 2 * B] = a->dp[count]; a2.used++; } mp_clamp(&a0); diff --git a/tommath.def b/tommath.def index e93bd1460..b759069bf 100644 --- a/tommath.def +++ b/tommath.def @@ -72,7 +72,6 @@ EXPORTS mp_lshd mp_mod mp_mod_2d - mp_mod_d mp_montgomery_calc_normalization mp_montgomery_reduce mp_montgomery_setup diff --git a/tommath.h b/tommath.h index 3b624b419..ed1446dc0 100644 --- a/tommath.h +++ b/tommath.h @@ -398,7 +398,7 @@ mp_err mp_mul_d(const mp_int *a, mp_digit b, mp_int *c) MP_WUR; mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d) MP_WUR; /* c = a mod b, 0 <= c < b */ -mp_err mp_mod_d(const mp_int *a, mp_digit b, mp_digit *c) MP_WUR; +#define mp_mod_d(a, b, c) mp_div_d((a), (b), NULL, (c)) /* ---> number theory <--- */ diff --git a/tommath_class.h b/tommath_class.h index 0dfc1f183..79f3969e2 100644 --- a/tommath_class.h +++ b/tommath_class.h @@ -77,7 +77,6 @@ # define MP_LSHD_C # define MP_MOD_C # define MP_MOD_2D_C -# define MP_MOD_D_C # define MP_MONTGOMERY_CALC_NORMALIZATION_C # define MP_MONTGOMERY_REDUCE_C # define MP_MONTGOMERY_SETUP_C @@ -260,7 +259,6 @@ # define MP_COPY_C # define MP_MOD_2D_C # define MP_RSHD_C -# define MP_ZERO_C #endif #if defined(MP_DIV_3_C) @@ -470,10 +468,10 @@ #if defined(MP_IS_SQUARE_C) # define MP_CLEAR_C # define MP_CMP_MAG_C +# define MP_DIV_D_C # define MP_GET_I32_C # define MP_INIT_U32_C # define MP_MOD_C -# define MP_MOD_D_C # define MP_SQRT_C # define MP_SQR_C #endif @@ -522,10 +520,6 @@ # define MP_ZERO_C #endif -#if defined(MP_MOD_D_C) -# define MP_DIV_D_C -#endif - #if defined(MP_MONTGOMERY_CALC_NORMALIZATION_C) # define MP_2EXPT_C # define MP_CMP_MAG_C @@ -660,8 +654,8 @@ # define MP_ADD_D_C # define MP_CLEAR_C # define MP_CMP_D_C +# define MP_DIV_D_C # define MP_INIT_C -# define MP_MOD_D_C # define MP_PRIME_IS_PRIME_C # define MP_SET_C # define MP_SUB_D_C @@ -905,10 +899,10 @@ # define MP_CMP_D_C # define MP_COPY_C # define MP_DIV_2_C +# define MP_DIV_D_C # define MP_EXPTMOD_C # define MP_INIT_MULTI_C # define MP_KRONECKER_C -# define MP_MOD_D_C # define MP_MULMOD_C # define MP_SET_C # define MP_SQRMOD_C @@ -1187,7 +1181,7 @@ #endif #if defined(S_MP_PRIME_IS_DIVISIBLE_C) -# define MP_MOD_D_C +# define MP_DIV_D_C #endif #if defined(S_MP_RAND_JENKINS_C) diff --git a/tommath_private.h b/tommath_private.h index f0a3265e9..118c4cebe 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -186,7 +186,7 @@ MP_STATIC_ASSERT(prec_geq_min_prec, MP_PREC >= MP_MIN_PREC) extern MP_PRIVATE mp_err(*s_mp_rand_source)(void *out, size_t size); /* lowlevel functions, do not call! */ -MP_PRIVATE bool s_mp_get_bit(const mp_int *a, unsigned int b); +MP_PRIVATE bool s_mp_get_bit(const mp_int *a, int b); MP_PRIVATE mp_err s_mp_add(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; MP_PRIVATE mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; MP_PRIVATE mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;