Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add MP_SMALL_STACK_SIZE option #538

Merged
merged 11 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ jobs:
# RSA superclass with tests (no sanitizer, but debug info)
- { BUILDOPTIONS: '--with-cc=gcc --with-m64 --cflags=-DLTM_NOTHING --cflags=-DSC_RSA_1_WITH_TESTS --limit-valgrind', SANITIZER: '', COMPILE_DEBUG: '1', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: '' }

# Build with small stack-size
- { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'gcc-multilib' }
- { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --limit-valgrind', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 libc6-dev-i386 gcc-multilib' }
- { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --multithread --limit-valgrind', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'libc6-dev-i386 gcc-multilib' }
- { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --multithread', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 gcc-multilib' }

# Test "autotuning", the automatic evaluation and setting of the Toom-Cook cut-offs.
#- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_16BIT --limit-valgrind --make-option=tune'
#- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_32BIT --limit-valgrind --make-option=tune'
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ if(COMPILE_LTO)
if(COMPILER_SUPPORTS_LTO)
set_property(TARGET ${PROJECT_NAME} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(SEND_ERROR "This compiler does not support LTO. Reconfigure ${PROJECT_NAME} with -DCOMPILE_LTO=OFF.")
message(FATAL_ERROR "This compiler does not support LTO. Reconfigure ${PROJECT_NAME} with -DCOMPILE_LTO=OFF.")
endif()
endif()

Expand Down
8 changes: 6 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,20 @@ image:
- Visual Studio 2019
- Visual Studio 2017
- Visual Studio 2015
environment:
matrix:
- CFLAGS_VAR: ""
CFLAGS_VAR_DLL: "CFLAGS=\"/Ox /MD /DLTM_TEST_DYNAMIC\""
build_script:
- cmd: >-
if "Visual Studio 2022"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"
if "Visual Studio 2019"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
if "Visual Studio 2017"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
if "Visual Studio 2015"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64
if "Visual Studio 2015"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86_amd64
nmake -f makefile.msvc test.exe
nmake -f makefile.msvc test.exe %CFLAGS_VAR%
nmake -f makefile.msvc clean-obj
nmake -f makefile.msvc test_dll.exe CFLAGS="/Ox /MD /DLTM_TEST_DYNAMIC"
nmake -f makefile.msvc test_dll.exe %CFLAGS_VAR_DLL%
test_script:
- cmd: test.exe
- cmd: test_dll.exe
142 changes: 129 additions & 13 deletions demo/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2455,12 +2455,101 @@ static int test_mp_pack_unpack(void)
#define ONLY_PUBLIC_API_C
#endif

#if !defined(LTM_TEST_MULTITHREAD)
#define SINGLE_THREADED_C
typedef uintptr_t thread_id_t;
#else
#define MULTI_THREADED_C
#if !defined(_WIN32)
#define MULTI_THREADED_PTHREAD_C
#include <pthread.h>
typedef pthread_t thread_id_t;
#else
#define MULTI_THREADED_MSVC_C

#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0501
#endif
#ifndef WINVER
#define WINVER 0x0501
#endif

#define WIN32_LEAN_AND_MEAN
#include <windows.h>
typedef HANDLE thread_id_t;
#endif
#endif

#if !defined(MULTI_THREADED_PTHREAD_C)
extern int pthread_create(thread_id_t *, const void *, void *(*)(void *), void *);
extern int pthread_join(thread_id_t, void **);
#endif

#if !defined(MULTI_THREADED_MSVC_C)
extern thread_id_t CreateThread(void *, size_t, unsigned long (*)(void *), void *, unsigned long, void *);
extern unsigned long WaitForSingleObject(thread_id_t hHandle, unsigned long dwMilliseconds);
#define INFINITE ((unsigned long)-1)
#endif

struct test_fn {
const char *name;
int (*fn)(void);
};

struct thread_info {
thread_id_t thread_id;
const struct test_fn *t;
int ret;
};

static void run(struct thread_info *tinfo)
{
tinfo->ret = tinfo->t->fn();

if (mp_warray_free() == -2)
tinfo->ret = EXIT_FAILURE;
}

static void *run_pthread(void *arg)
{
run(arg);

return arg;
}

static unsigned long run_msvc(void *arg)
{
run(arg);

return 0;
}

static int thread_start(struct thread_info *info)
{
if (MP_HAS(MULTI_THREADED_PTHREAD))
return pthread_create(&info->thread_id, NULL, run_pthread, info);
if (MP_HAS(MULTI_THREADED_MSVC)) {
info->thread_id = CreateThread(NULL, 0, run_msvc, info, 0, NULL);
return info->thread_id == (thread_id_t)NULL ? -1 : 0;
}
return -1;
}

static int thread_join(struct thread_info *info, struct thread_info **res)
{
if (MP_HAS(MULTI_THREADED_PTHREAD))
return pthread_join(info->thread_id, (void **)res);
if (MP_HAS(MULTI_THREADED_MSVC)) {
WaitForSingleObject(info->thread_id, INFINITE);
*res = info;
return 0;
}
return -1;
}

static int unit_tests(int argc, char **argv)
{
static const struct {
const char *name;
int (*fn)(void);
} test[] = {
static const struct test_fn test[] = {
#define T0(n) { #n, test_##n }
#define T1(n, o) { #n, MP_HAS(o) ? test_##n : NULL }
#define T2(n, o1, o2) { #n, (MP_HAS(o1) && MP_HAS(o2)) ? test_##n : NULL }
Expand Down Expand Up @@ -2522,31 +2611,54 @@ static int unit_tests(int argc, char **argv)
#undef T2
#undef T1
};
struct thread_info test_threads[sizeof(test)/sizeof(test[0])], *res;
unsigned long i, ok, fail, nop;
uint64_t t;
int j;

ok = fail = nop = 0;

t = (uint64_t)time(NULL);
printf("SEED: 0x%" PRIx64 "\n\n", t);
s_mp_rand_jenkins_init(t);
mp_rand_source(s_mp_rand_jenkins);

if (MP_HAS(MP_SMALL_STACK_SIZE)) {
printf("Small-stack enabled\n\n");
}

if (MP_HAS(MULTI_THREADED)) {
printf("Multi-threading enabled\n\n");
/* we ignore the fact that jenkins is not thread safe */
for (i = 0; i < (sizeof(test) / sizeof(test[0])); ++i) {
test_threads[i].t = &test[i];
EXPECT(thread_start(&test_threads[i]) == 0);
}
}

for (i = 0; i < (sizeof(test) / sizeof(test[0])); ++i) {
if (argc > 1) {
for (j = 1; j < argc; ++j) {
if (strstr(test[i].name, argv[j]) != NULL) {
break;
j = -1;
if (MP_HAS(SINGLE_THREADED)) {
if (argc > 1) {
for (j = 1; j < argc; ++j) {
if (strstr(test[i].name, argv[j]) != NULL) {
break;
}
}
if (j == argc) continue;
}
if (j == argc) continue;

if (test[i].fn)
j = test[i].fn();
} else if (MP_HAS(MULTI_THREADED)) {
EXPECT(thread_join(&test_threads[i], &res) == 0);
j = res->ret;
}
printf("TEST %s\n", test[i].name);

if (test[i].fn == NULL) {
nop++;
printf("NOP %s\n\n", test[i].name);
} else if (test[i].fn() == EXIT_SUCCESS) {
} else if (j == EXIT_SUCCESS) {
ok++;
printf("\n");
} else {
Expand All @@ -2556,8 +2668,12 @@ static int unit_tests(int argc, char **argv)
}
fprintf(fail?stderr:stdout, "Tests OK/NOP/FAIL: %lu/%lu/%lu\n", ok, nop, fail);

if (fail != 0) return EXIT_FAILURE;
else return EXIT_SUCCESS;
EXPECT(mp_warray_free() != -2);

if (fail == 0)
return EXIT_SUCCESS;
LBL_ERR:
return EXIT_FAILURE;
}

int main(int argc, char **argv)
Expand Down
47 changes: 46 additions & 1 deletion doc/bn.tex
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,16 @@ \subsubsection{Operand Size Related}
\end{center}
\end{small}

\subsection{Small-Stack option}
\label{ch:SMALL_STACK_INTRO}
The library can be compiled with the symbol \texttt{MP\_SMALL\_STACK\_SIZE} defined, which results in
the temporary \texttt{MP\_WARRAY}-sized stack buffers being put on the heap.
This comes with one problem, namely: formerly promised thread-safety isn't given anymore.
Therefore if the Small-Stack option is enabled while doing multi threading, one shall always initialize
the library by calling \texttt{mp\_warray\_init()} once with the correct number of threads.

C.f. \ref{ch:SMALL_STACK_API} for the API description and further details.

\section{Purpose of LibTomMath}
Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath
was not written with bleeding edge performance in mind. First and foremost LibTomMath was written
Expand Down Expand Up @@ -428,7 +438,11 @@ \chapter{Getting Started with LibTomMath}
\section{Building Programs}
In order to use LibTomMath you must include ``tommath.h'' and link against the appropriate library
file (typically
libtommath.a). There is no library initialization required and the entire library is thread safe.
libtommath.a). There is no library initialization required and the entire library is thread safe
if it is used in its default configuration. The small-stack option makes use of atomic operations
to maintain its internal state and therefore does not require locking, but it MUST be initialized
if used from multiple threads. For further information see \ref{ch:SMALL_STACK_INTRO} resp.
\ref{ch:SMALL_STACK_API}.

\section{Return Codes}
There are five possible return codes a function may return.
Expand Down Expand Up @@ -813,6 +827,37 @@ \subsection{Adding additional digits}
\end{alltt}
\end{small}

\section{Small-Stack option}
\label{ch:SMALL_STACK_API}

In case the \texttt{MP\_SMALL\_STACK\_SIZE} symbol is defined the following functions
can be useful.

To initialize the internal structure the following function shall be called.

\index{mp\_warray\_init}
\begin{alltt}
mp_err mp_warray_init(size_t n_alloc, bool preallocate);
\end{alltt}

sjaeckel marked this conversation as resolved.
Show resolved Hide resolved
The flag \texttt{preallocate} controls whether the internal buffers --
\texttt{n\_alloc} buffers of size \texttt{MP\_WARRAY} -- will be allocated when
\texttt{mp\_warray\_init()} is called, or whether they will be allocated when required.

To free the internally allocated memory the following function shall be called.

\index{mp\_warray\_free}
\begin{alltt}
int mp_warray_free(void);
\end{alltt}


Those two API functions are always available, even if the \texttt{MP\_SMALL\_STACK\_SIZE} option
has been disabled at compile time.
In that case \texttt{mp\_warray\_init()} will return \texttt{MP\_ERR} and \texttt{mp\_warray\_free()}
will return $-1$.


\chapter{Basic Operations}
\section{Copying}

Expand Down
2 changes: 1 addition & 1 deletion helper.pl
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ sub update_dep
foreach my $filename (glob '*mp_*.c') {
my $content;
my $cc = $ENV{'CC'} || 'gcc';
$content = `$cc -E -x c -DLTM_ALL $filename`;
$content = `$cc -E -x c -DLTM_ALL -DMP_SMALL_STACK_SIZE $filename`;
$content =~ s/^# 1 "$filename".*?^# 2 "$filename"//ms;

# convert filename to upper case so we can use it as a define
Expand Down
16 changes: 16 additions & 0 deletions libtommath_VS2008.vcproj
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,10 @@
RelativePath="mp_unpack.c"
>
</File>
<File
RelativePath="mp_warray_free.c"
>
</File>
<File
RelativePath="mp_xor.c"
>
Expand Down Expand Up @@ -928,6 +932,18 @@
RelativePath="s_mp_sub.c"
>
</File>
<File
RelativePath="s_mp_warray.c"
>
</File>
<File
RelativePath="s_mp_warray_get.c"
>
</File>
<File
RelativePath="s_mp_warray_put.c"
>
</File>
<File
RelativePath="s_mp_zero_buf.c"
>
Expand Down
21 changes: 12 additions & 9 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o m
mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \
mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \
mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \
s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \
s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \
s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o
mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \
s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \
s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \
s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \
s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \
s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \
s_mp_zero_buf.o s_mp_zero_digs.o

#END_INS

Expand Down Expand Up @@ -172,9 +173,10 @@ c89:
-e 's/UINT32_MAX/0xFFFFFFFFu/g' \
-e 's/UINT64_MAX/(mp_u64)-1/g' \
-e 's/INT32_MAX/0x7FFFFFFF/g' \
-e 's/INT32_MIN/(-2147483647-1)/g' \
-e 's/INT32_MIN/(-2147483647-1)/g' \
-e 's/INT64_MAX/(mp_i64)(((mp_u64)1<<63)-1)/g' \
-e 's/INT64_MIN/(mp_i64)((mp_u64)1<<63)/g' \
-e 's/uintptr_t/mp_uintptr/g' \
-e 's/SIZE_MAX/((size_t)-1)/g' \
-e 's/\(PRI[ioux]64\)/MP_\1/g' \
-e 's/uint\([0-9][0-9]*\)_t/mp_u\1/g' \
Expand All @@ -195,10 +197,11 @@ c99:
-e 's/false_/MP_NO_/g' \
-e 's/0xFFFFFFFFu/UINT32_MAX/g' \
-e 's/(mp_u64)-1/UINT64_MAX/g' \
-e 's/(-2147483647-1)/INT32_MIN/g' \
-e 's/(-2147483647-1)/INT32_MIN/g' \
-e 's/0x7FFFFFFF/INT32_MAX/g' \
-e 's/(mp_i64)((mp_u64)1<<63)/INT64_MIN/g' \
-e 's/(mp_i64)(((mp_u64)1<<63)-1)/INT64_MAX/g' \
-e 's/mp_uintptr/uintptr_t/g' \
-e 's/((size_t)-1)/SIZE_MAX/g' \
-e 's/MP_\(PRI[ioux]64\)/\1/g' \
-e 's/mp_u\([0-9][0-9]*\)/uint\1_t/g' \
Expand Down
Loading
Loading