From 5fcbfed317a2a1e5a7b676855aea3d6dd1d324d3 Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Tue, 23 Apr 2024 13:21:10 +0530 Subject: [PATCH 01/16] Add a support for debug-stats dumping and resetting on demand Signed-off-by: TejaswineeL --- pal/src/host/linux-sgx/host_exception.c | 69 ++++++++++++++++++++++++- pal/src/host/linux-sgx/host_thread.c | 36 +++++++++++-- pal/src/host/linux-sgx/pal_tcb.h | 1 + 3 files changed, 100 insertions(+), 6 deletions(-) diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index f0aae5bdeb..9b2042ac31 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -15,19 +15,27 @@ * __sigset_t uc_sigmask; */ - +#include #include #include +#include +#include +#include #include "api.h" #include "cpu.h" #include "debug_map.h" +#include "gdb_integration/sgx_gdb.h" #include "host_internal.h" #include "pal_rpc_queue.h" +#include "pal_tcb.h" #include "sigreturn.h" #include "ucontext.h" +#define MAX_DBG_THREADS 4096 + static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT}; +static int send_sigusr1_signal_to_children(void); static int block_signal(int sig, bool block) { int how = block ? SIG_BLOCK : SIG_UNBLOCK; @@ -188,6 +196,61 @@ static void handle_dummy_signal(int signum, siginfo_t* info, struct ucontext* uc /* we need this handler to interrupt blocking syscalls in RPC threads */ } +static int send_sigusr1_signal_to_children() { + int signal_counter= 0; + + for (size_t i = 1; i < MAX_DBG_THREADS; i++) { + int child_tid = ((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i]; + if(child_tid > 0) { + DO_SYSCALL(tkill, child_tid, SIGUSR1); + signal_counter++; + } + } + return signal_counter; +} + +static void handle_async_sigusr1_signal(int signum, siginfo_t* info, struct ucontext* uc) { + __UNUSED(signum); + __UNUSED(info); + __UNUSED(uc); + + static atomic_int no_of_children_visited = 0; + static const uint64_t LOOP_ATTEMPTS_MAX = 10000; /* rather arbitrary */ + static const uint64_t SLEEP_MAX = 100000000; /* nanoseconds (0.1 seconds) */ + static const uint64_t SLEEP_STEP = 1000000; /* 100 steps before capped */ + + if(g_sgx_enable_stats) { + + if(DO_SYSCALL(gettid) == g_host_pid) { + int no_of_children = send_sigusr1_signal_to_children(); + uint64_t loop_attempts = 0; + uint64_t sleep_time = 0; + + while((no_of_children) > (__atomic_load_n(&no_of_children_visited, __ATOMIC_RELAXED))) { + if (loop_attempts == LOOP_ATTEMPTS_MAX) { + if (sleep_time < SLEEP_MAX) + sleep_time += SLEEP_STEP; + struct timespec tv = {.tv_sec = 0, .tv_nsec = sleep_time}; + (void)DO_SYSCALL(nanosleep, &tv, /*rem=*/NULL); + } else { + loop_attempts++; + CPU_RELAX(); + } + } + update_and_print_stats(true); + __atomic_exchange_n(&no_of_children_visited, 0, __ATOMIC_ACQ_REL); + } else { + log_always("----- DUMPTING and RESETTING SGX STATS -----"); + update_and_print_stats(/*process_wide=*/false); + PAL_HOST_TCB* tcb = pal_get_host_tcb(); + int ret = pal_host_tcb_reset_stats(tcb); + if(ret < 0) + return; + __atomic_fetch_add(&no_of_children_visited, 1, __ATOMIC_ACQ_REL); + } + } +} + int sgx_signal_setup(void) { int ret; @@ -236,6 +299,10 @@ int sgx_signal_setup(void) { if (ret < 0) goto err; + ret = set_signal_handler(SIGUSR1, handle_async_sigusr1_signal); + if (ret < 0) + goto err; + ret = 0; err: return ret; diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index 6db254b192..0da13e5476 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -50,11 +50,11 @@ void update_and_print_stats(bool process_wide) { tid, tcb->eenter_cnt, tcb->eexit_cnt, tcb->aex_cnt, tcb->sync_signal_cnt, tcb->async_signal_cnt); - g_eenter_cnt += tcb->eenter_cnt; - g_eexit_cnt += tcb->eexit_cnt; - g_aex_cnt += tcb->aex_cnt; - g_sync_signal_cnt += tcb->sync_signal_cnt; - g_async_signal_cnt += tcb->async_signal_cnt; + __atomic_fetch_add(&g_eenter_cnt, tcb->eenter_cnt, __ATOMIC_ACQ_REL); + __atomic_fetch_add(&g_eexit_cnt, tcb->eexit_cnt, __ATOMIC_ACQ_REL); + __atomic_fetch_add(&g_aex_cnt, tcb->aex_cnt, __ATOMIC_ACQ_REL); + __atomic_fetch_add(&g_sync_signal_cnt, tcb->sync_signal_cnt, __ATOMIC_ACQ_REL); + __atomic_fetch_add(&g_async_signal_cnt, tcb->async_signal_cnt, __ATOMIC_ACQ_REL); if (process_wide) { int pid = g_host_pid; @@ -67,9 +67,17 @@ void update_and_print_stats(bool process_wide) { " # of async signals: %lu", pid, g_eenter_cnt, g_eexit_cnt, g_aex_cnt, g_sync_signal_cnt, g_async_signal_cnt); + + __atomic_exchange_n(&g_eenter_cnt, 0, __ATOMIC_ACQ_REL); + __atomic_exchange_n(&g_eexit_cnt, 0, __ATOMIC_ACQ_REL); + __atomic_exchange_n(&g_aex_cnt, 0, __ATOMIC_ACQ_REL); + __atomic_exchange_n(&g_sync_signal_cnt, 0, __ATOMIC_ACQ_REL); + __atomic_exchange_n(&g_async_signal_cnt, 0, __ATOMIC_ACQ_REL); } } + + void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { tcb->self = tcb; tcb->tcs = NULL; /* initialized by child thread */ @@ -87,6 +95,24 @@ void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { tcb->last_async_event = PAL_EVENT_NO_EVENT; } +int pal_host_tcb_reset_stats(PAL_HOST_TCB* tcb) { + tcb->eenter_cnt = 0; + tcb->eexit_cnt = 0; + tcb->aex_cnt = 0; + tcb->sync_signal_cnt = 0; + tcb->async_signal_cnt = 0; + + int ret; + + /* set GS reg of this thread to thread's TCB; */ + ret = DO_SYSCALL(arch_prctl, ARCH_SET_GS, tcb); + if (ret < 0) { + ret = -EPERM; + log_always("error at pal_thread_reset_stats %d", ret); + } + return ret; +} + int create_tcs_mapper(void* tcs_base, unsigned int thread_num) { sgx_arch_tcs_t* enclave_tcs = tcs_base; diff --git a/pal/src/host/linux-sgx/pal_tcb.h b/pal/src/host/linux-sgx/pal_tcb.h index fa5225f955..9230c1fa5b 100644 --- a/pal/src/host/linux-sgx/pal_tcb.h +++ b/pal/src/host/linux-sgx/pal_tcb.h @@ -108,6 +108,7 @@ typedef struct pal_host_tcb { } PAL_HOST_TCB; extern void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack); +extern int pal_host_tcb_reset_stats(PAL_HOST_TCB* tcb); static inline PAL_HOST_TCB* pal_get_host_tcb(void) { PAL_HOST_TCB* tcb; From 14a8be3adcb801ed4a9a1ff65afb178448111d86 Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Mon, 29 Apr 2024 14:33:33 +0530 Subject: [PATCH 02/16] fixup! Dump and Reset stats data on demand using SIGUSR1 signal Signed-off-by: TejaswineeL --- pal/src/host/linux-sgx/host_exception.c | 82 ++++++++++++------------- pal/src/host/linux-sgx/host_thread.c | 11 ++-- 2 files changed, 45 insertions(+), 48 deletions(-) diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index 9b2042ac31..15b1740c6b 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -18,9 +18,6 @@ #include #include #include -#include -#include -#include #include "api.h" #include "cpu.h" @@ -32,8 +29,6 @@ #include "sigreturn.h" #include "ucontext.h" -#define MAX_DBG_THREADS 4096 - static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT}; static int send_sigusr1_signal_to_children(void); @@ -196,7 +191,7 @@ static void handle_dummy_signal(int signum, siginfo_t* info, struct ucontext* uc /* we need this handler to interrupt blocking syscalls in RPC threads */ } -static int send_sigusr1_signal_to_children() { +static int send_sigusr1_signal_to_children(void) { int signal_counter= 0; for (size_t i = 1; i < MAX_DBG_THREADS; i++) { @@ -206,49 +201,52 @@ static int send_sigusr1_signal_to_children() { signal_counter++; } } + return signal_counter; } -static void handle_async_sigusr1_signal(int signum, siginfo_t* info, struct ucontext* uc) { - __UNUSED(signum); - __UNUSED(info); - __UNUSED(uc); - +static void dump_and_reset_stats(void) +{ static atomic_int no_of_children_visited = 0; static const uint64_t LOOP_ATTEMPTS_MAX = 10000; /* rather arbitrary */ - static const uint64_t SLEEP_MAX = 100000000; /* nanoseconds (0.1 seconds) */ - static const uint64_t SLEEP_STEP = 1000000; /* 100 steps before capped */ - - if(g_sgx_enable_stats) { - - if(DO_SYSCALL(gettid) == g_host_pid) { - int no_of_children = send_sigusr1_signal_to_children(); - uint64_t loop_attempts = 0; - uint64_t sleep_time = 0; - - while((no_of_children) > (__atomic_load_n(&no_of_children_visited, __ATOMIC_RELAXED))) { - if (loop_attempts == LOOP_ATTEMPTS_MAX) { - if (sleep_time < SLEEP_MAX) - sleep_time += SLEEP_STEP; - struct timespec tv = {.tv_sec = 0, .tv_nsec = sleep_time}; - (void)DO_SYSCALL(nanosleep, &tv, /*rem=*/NULL); - } else { - loop_attempts++; - CPU_RELAX(); - } + + if(DO_SYSCALL(gettid) == g_host_pid) { + int no_of_children = send_sigusr1_signal_to_children(); + uint64_t loop_attempts = 0; + + /* Wait here until all the children are done processing the signal. */ + while((no_of_children) > (__atomic_load_n(&no_of_children_visited, __ATOMIC_ACQUIRE))) { + if (loop_attempts == LOOP_ATTEMPTS_MAX) { + DO_SYSCALL(sched_yield); + } else { + loop_attempts++; + CPU_RELAX(); } - update_and_print_stats(true); - __atomic_exchange_n(&no_of_children_visited, 0, __ATOMIC_ACQ_REL); - } else { - log_always("----- DUMPTING and RESETTING SGX STATS -----"); - update_and_print_stats(/*process_wide=*/false); - PAL_HOST_TCB* tcb = pal_get_host_tcb(); - int ret = pal_host_tcb_reset_stats(tcb); - if(ret < 0) - return; - __atomic_fetch_add(&no_of_children_visited, 1, __ATOMIC_ACQ_REL); } + + update_and_print_stats(/*process_wide=*/true); + __atomic_store_n(&no_of_children_visited, 0, __ATOMIC_RELEASE); + } else { + log_always("----- DUMPTING and RESETTING SGX STATS -----"); + update_and_print_stats(/*process_wide=*/false); + __atomic_fetch_add(&no_of_children_visited, 1, __ATOMIC_ACQ_REL); } + + PAL_HOST_TCB* tcb = pal_get_host_tcb(); + int ret = pal_host_tcb_reset_stats(tcb); + if(ret < 0) + return; +} + +static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { + __UNUSED(signum); + __UNUSED(info); + __UNUSED(uc); + + if(g_sgx_enable_stats) + dump_and_reset_stats(); + + return; } int sgx_signal_setup(void) { @@ -299,7 +297,7 @@ int sgx_signal_setup(void) { if (ret < 0) goto err; - ret = set_signal_handler(SIGUSR1, handle_async_sigusr1_signal); + ret = set_signal_handler(SIGUSR1, handle_sigusr1); if (ret < 0) goto err; diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index 0da13e5476..d98e4a0233 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -68,11 +68,11 @@ void update_and_print_stats(bool process_wide) { pid, g_eenter_cnt, g_eexit_cnt, g_aex_cnt, g_sync_signal_cnt, g_async_signal_cnt); - __atomic_exchange_n(&g_eenter_cnt, 0, __ATOMIC_ACQ_REL); - __atomic_exchange_n(&g_eexit_cnt, 0, __ATOMIC_ACQ_REL); - __atomic_exchange_n(&g_aex_cnt, 0, __ATOMIC_ACQ_REL); - __atomic_exchange_n(&g_sync_signal_cnt, 0, __ATOMIC_ACQ_REL); - __atomic_exchange_n(&g_async_signal_cnt, 0, __ATOMIC_ACQ_REL); + __atomic_store_n(&g_eenter_cnt, 0, __ATOMIC_RELEASE); + __atomic_store_n(&g_eexit_cnt, 0, __ATOMIC_RELEASE); + __atomic_store_n(&g_aex_cnt, 0, __ATOMIC_RELEASE); + __atomic_store_n(&g_sync_signal_cnt, 0, __ATOMIC_RELEASE); + __atomic_store_n(&g_async_signal_cnt, 0, __ATOMIC_RELEASE); } } @@ -104,7 +104,6 @@ int pal_host_tcb_reset_stats(PAL_HOST_TCB* tcb) { int ret; - /* set GS reg of this thread to thread's TCB; */ ret = DO_SYSCALL(arch_prctl, ARCH_SET_GS, tcb); if (ret < 0) { ret = -EPERM; From dfb0310318943fa14aa12c9db79ce30fcf7d5586 Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Mon, 29 Apr 2024 15:34:27 +0530 Subject: [PATCH 03/16] fixup! fixup! Dump and Reset stats data on demand using SIGUSR1 signal Signed-off-by: TejaswineeL --- pal/src/host/linux-sgx/host_exception.c | 1 - pal/src/host/linux-sgx/host_thread.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index 15b1740c6b..3aba4bcd9d 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -30,7 +30,6 @@ #include "ucontext.h" static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT}; -static int send_sigusr1_signal_to_children(void); static int block_signal(int sig, bool block) { int how = block ? SIG_BLOCK : SIG_UNBLOCK; diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index d98e4a0233..db00088c9a 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -76,8 +76,6 @@ void update_and_print_stats(bool process_wide) { } } - - void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { tcb->self = tcb; tcb->tcs = NULL; /* initialized by child thread */ From b31b7d76c9b99bb66d5439899898df7781e8442e Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Mon, 6 May 2024 16:47:33 +0530 Subject: [PATCH 04/16] fixup! Dump and Reset stats data on demand using SIGUSR1 signal Signed-off-by: TejaswineeL --- pal/src/host/linux-sgx/host_exception.c | 51 +++++++++---------------- pal/src/host/linux-sgx/host_thread.c | 17 +++------ pal/src/host/linux-sgx/pal_tcb.h | 2 +- 3 files changed, 25 insertions(+), 45 deletions(-) diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index 3aba4bcd9d..a7da841202 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -14,8 +14,6 @@ * ../../../include/arch/x86_64/linux/ucontext.h:136:5: error: unknown type name ‘__sigset_t’ * __sigset_t uc_sigmask; */ - -#include #include #include @@ -190,39 +188,31 @@ static void handle_dummy_signal(int signum, siginfo_t* info, struct ucontext* uc /* we need this handler to interrupt blocking syscalls in RPC threads */ } -static int send_sigusr1_signal_to_children(void) { - int signal_counter= 0; +static size_t send_sigusr1_signal_to_children(pid_t main_tid) { + size_t no_of_signal_sent = 0; - for (size_t i = 1; i < MAX_DBG_THREADS; i++) { + for (size_t i = 0; i < MAX_DBG_THREADS; i++) { int child_tid = ((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i]; - if(child_tid > 0) { + if (child_tid == main_tid) + continue; + + if (child_tid) { DO_SYSCALL(tkill, child_tid, SIGUSR1); - signal_counter++; + no_of_signal_sent++; } } - - return signal_counter; + return no_of_signal_sent; } -static void dump_and_reset_stats(void) -{ - static atomic_int no_of_children_visited = 0; - static const uint64_t LOOP_ATTEMPTS_MAX = 10000; /* rather arbitrary */ - - if(DO_SYSCALL(gettid) == g_host_pid) { - int no_of_children = send_sigusr1_signal_to_children(); - uint64_t loop_attempts = 0; - - /* Wait here until all the children are done processing the signal. */ - while((no_of_children) > (__atomic_load_n(&no_of_children_visited, __ATOMIC_ACQUIRE))) { - if (loop_attempts == LOOP_ATTEMPTS_MAX) { - DO_SYSCALL(sched_yield); - } else { - loop_attempts++; - CPU_RELAX(); - } - } +static void dump_and_reset_stats(void) { + static size_t no_of_children_visited = 0; + if (DO_SYSCALL(gettid) == g_host_pid) { + size_t no_of_children = send_sigusr1_signal_to_children(g_host_pid); + + while ((__atomic_load_n(&no_of_children_visited, __ATOMIC_ACQUIRE)) < (no_of_children)) { + DO_SYSCALL(sched_yield); + } update_and_print_stats(/*process_wide=*/true); __atomic_store_n(&no_of_children_visited, 0, __ATOMIC_RELEASE); } else { @@ -231,10 +221,7 @@ static void dump_and_reset_stats(void) __atomic_fetch_add(&no_of_children_visited, 1, __ATOMIC_ACQ_REL); } - PAL_HOST_TCB* tcb = pal_get_host_tcb(); - int ret = pal_host_tcb_reset_stats(tcb); - if(ret < 0) - return; + pal_host_tcb_reset_stats(); } static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { @@ -242,7 +229,7 @@ static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { __UNUSED(info); __UNUSED(uc); - if(g_sgx_enable_stats) + if (g_sgx_enable_stats) dump_and_reset_stats(); return; diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index db00088c9a..12d43bb9c8 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -65,8 +65,9 @@ void update_and_print_stats(bool process_wide) { " # of AEXs: %lu\n" " # of sync signals: %lu\n" " # of async signals: %lu", - pid, g_eenter_cnt, g_eexit_cnt, g_aex_cnt, - g_sync_signal_cnt, g_async_signal_cnt); + pid, __atomic_load_n(&g_eenter_cnt, __ATOMIC_ACQUIRE), __atomic_load_n(&g_eexit_cnt, __ATOMIC_ACQUIRE), + __atomic_load_n(&g_aex_cnt, __ATOMIC_ACQUIRE), __atomic_load_n(&g_sync_signal_cnt, __ATOMIC_ACQUIRE), + __atomic_load_n(&g_async_signal_cnt, __ATOMIC_ACQUIRE)); __atomic_store_n(&g_eenter_cnt, 0, __ATOMIC_RELEASE); __atomic_store_n(&g_eexit_cnt, 0, __ATOMIC_RELEASE); @@ -93,21 +94,13 @@ void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { tcb->last_async_event = PAL_EVENT_NO_EVENT; } -int pal_host_tcb_reset_stats(PAL_HOST_TCB* tcb) { +void pal_host_tcb_reset_stats(void) { + PAL_HOST_TCB* tcb = pal_get_host_tcb(); tcb->eenter_cnt = 0; tcb->eexit_cnt = 0; tcb->aex_cnt = 0; tcb->sync_signal_cnt = 0; tcb->async_signal_cnt = 0; - - int ret; - - ret = DO_SYSCALL(arch_prctl, ARCH_SET_GS, tcb); - if (ret < 0) { - ret = -EPERM; - log_always("error at pal_thread_reset_stats %d", ret); - } - return ret; } int create_tcs_mapper(void* tcs_base, unsigned int thread_num) { diff --git a/pal/src/host/linux-sgx/pal_tcb.h b/pal/src/host/linux-sgx/pal_tcb.h index 9230c1fa5b..dd94c02484 100644 --- a/pal/src/host/linux-sgx/pal_tcb.h +++ b/pal/src/host/linux-sgx/pal_tcb.h @@ -108,7 +108,7 @@ typedef struct pal_host_tcb { } PAL_HOST_TCB; extern void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack); -extern int pal_host_tcb_reset_stats(PAL_HOST_TCB* tcb); +extern void pal_host_tcb_reset_stats(void); static inline PAL_HOST_TCB* pal_get_host_tcb(void) { PAL_HOST_TCB* tcb; From c876141adb555b4669713d57c17cba6924744705 Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Fri, 10 May 2024 16:24:36 +0530 Subject: [PATCH 05/16] fixup! Dump and Reset stats data on demand using SIGUSR1 signal Signed-off-by: TejaswineeL --- Documentation/performance.rst | 9 +++++++++ pal/src/host/linux-sgx/host_exception.c | 13 ++++++------- pal/src/host/linux-sgx/host_thread.c | 6 ++++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/Documentation/performance.rst b/Documentation/performance.rst index 515d1d335b..bf22b4e496 100644 --- a/Documentation/performance.rst +++ b/Documentation/performance.rst @@ -103,6 +103,15 @@ How to read this output: counters should be compared against "golden runs" to deduce any interesting trends. +It is also possible to reset the performance statistics interactively, using +``SIGUSR1`` signal. This helps to collect performance statistics only for a +particular period e.g., skipping the Gramine startup and application +initialization time and concentrating only on the actual application processing. +Send ``SIGUSR1`` using command ``kill -SIGUSR1 `` (note the +minus sign before ). Sending multiple ``SIGUSR1`` will result +in a sequential dump and reset of the statistics, each dump and reset of +statistics will be done after the previous ``SIGUSR1``. + Effects of system calls / ocalls -------------------------------- diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index a7da841202..f29d3078af 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -189,7 +189,7 @@ static void handle_dummy_signal(int signum, siginfo_t* info, struct ucontext* uc } static size_t send_sigusr1_signal_to_children(pid_t main_tid) { - size_t no_of_signal_sent = 0; + size_t no_of_signals_sent = 0; for (size_t i = 0; i < MAX_DBG_THREADS; i++) { int child_tid = ((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i]; @@ -198,10 +198,10 @@ static size_t send_sigusr1_signal_to_children(pid_t main_tid) { if (child_tid) { DO_SYSCALL(tkill, child_tid, SIGUSR1); - no_of_signal_sent++; + no_of_signals_sent++; } } - return no_of_signal_sent; + return no_of_signals_sent; } static void dump_and_reset_stats(void) { @@ -210,13 +210,14 @@ static void dump_and_reset_stats(void) { if (DO_SYSCALL(gettid) == g_host_pid) { size_t no_of_children = send_sigusr1_signal_to_children(g_host_pid); - while ((__atomic_load_n(&no_of_children_visited, __ATOMIC_ACQUIRE)) < (no_of_children)) { + while ((__atomic_load_n(&no_of_children_visited, __ATOMIC_ACQUIRE)) < no_of_children) { DO_SYSCALL(sched_yield); } + log_always("----- DUMPING and RESETTING SGX STATS -----"); update_and_print_stats(/*process_wide=*/true); __atomic_store_n(&no_of_children_visited, 0, __ATOMIC_RELEASE); } else { - log_always("----- DUMPTING and RESETTING SGX STATS -----"); + log_always("----- DUMPING and RESETTING SGX STATS -----"); update_and_print_stats(/*process_wide=*/false); __atomic_fetch_add(&no_of_children_visited, 1, __ATOMIC_ACQ_REL); } @@ -231,8 +232,6 @@ static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { if (g_sgx_enable_stats) dump_and_reset_stats(); - - return; } int sgx_signal_setup(void) { diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index 12d43bb9c8..6a8a37164c 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -65,8 +65,10 @@ void update_and_print_stats(bool process_wide) { " # of AEXs: %lu\n" " # of sync signals: %lu\n" " # of async signals: %lu", - pid, __atomic_load_n(&g_eenter_cnt, __ATOMIC_ACQUIRE), __atomic_load_n(&g_eexit_cnt, __ATOMIC_ACQUIRE), - __atomic_load_n(&g_aex_cnt, __ATOMIC_ACQUIRE), __atomic_load_n(&g_sync_signal_cnt, __ATOMIC_ACQUIRE), + pid, __atomic_load_n(&g_eenter_cnt, __ATOMIC_ACQUIRE), + __atomic_load_n(&g_eexit_cnt, __ATOMIC_ACQUIRE), + __atomic_load_n(&g_aex_cnt, __ATOMIC_ACQUIRE), + __atomic_load_n(&g_sync_signal_cnt, __ATOMIC_ACQUIRE), __atomic_load_n(&g_async_signal_cnt, __ATOMIC_ACQUIRE)); __atomic_store_n(&g_eenter_cnt, 0, __ATOMIC_RELEASE); From c8317ba58670f437521b4c996380a1fcbe99da1e Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Tue, 14 May 2024 14:43:27 +0530 Subject: [PATCH 06/16] fixup! Dump and Reset stats data on demand using SIGUSR1 signal Signed-off-by: TejaswineeL --- Documentation/performance.rst | 11 ++++------- pal/src/host/linux-sgx/host_exception.c | 12 ++++++------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/Documentation/performance.rst b/Documentation/performance.rst index bf22b4e496..b733052492 100644 --- a/Documentation/performance.rst +++ b/Documentation/performance.rst @@ -103,14 +103,11 @@ How to read this output: counters should be compared against "golden runs" to deduce any interesting trends. -It is also possible to reset the performance statistics interactively, using -``SIGUSR1`` signal. This helps to collect performance statistics only for a -particular period e.g., skipping the Gramine startup and application +It is also possible to dump and reset SGX-related statistics interactively, using +``SIGUSR1`` signal. This helps to collect SGX-related statistics only for a +particular period, e.g. skipping the Gramine startup and application initialization time and concentrating only on the actual application processing. -Send ``SIGUSR1`` using command ``kill -SIGUSR1 `` (note the -minus sign before ). Sending multiple ``SIGUSR1`` will result -in a sequential dump and reset of the statistics, each dump and reset of -statistics will be done after the previous ``SIGUSR1``. +Send ``SIGUSR1`` using command ``kill -SIGUSR1 ``. Effects of system calls / ocalls -------------------------------- diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index f29d3078af..cb4d0693ef 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -210,14 +210,14 @@ static void dump_and_reset_stats(void) { if (DO_SYSCALL(gettid) == g_host_pid) { size_t no_of_children = send_sigusr1_signal_to_children(g_host_pid); + log_always("----- DUMPING and RESETTING SGX STATS -----"); while ((__atomic_load_n(&no_of_children_visited, __ATOMIC_ACQUIRE)) < no_of_children) { DO_SYSCALL(sched_yield); } - log_always("----- DUMPING and RESETTING SGX STATS -----"); + update_and_print_stats(/*process_wide=*/true); __atomic_store_n(&no_of_children_visited, 0, __ATOMIC_RELEASE); } else { - log_always("----- DUMPING and RESETTING SGX STATS -----"); update_and_print_stats(/*process_wide=*/false); __atomic_fetch_add(&no_of_children_visited, 1, __ATOMIC_ACQ_REL); } @@ -272,6 +272,10 @@ int sgx_signal_setup(void) { if (ret < 0) goto err; + ret = set_signal_handler(SIGUSR1, handle_sigusr1); + if (ret < 0) + goto err; + /* SIGUSR2 is reserved for Gramine usage: interrupting blocking syscalls in RPC threads. * We block SIGUSR2 in enclave threads; it is unblocked by each RPC thread explicitly. */ ret = set_signal_handler(SIGUSR2, handle_dummy_signal); @@ -282,10 +286,6 @@ int sgx_signal_setup(void) { if (ret < 0) goto err; - ret = set_signal_handler(SIGUSR1, handle_sigusr1); - if (ret < 0) - goto err; - ret = 0; err: return ret; From 003f35686f0d7b6c8d1b28e4be5b0b1777c95479 Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Thu, 16 May 2024 11:16:22 +0530 Subject: [PATCH 07/16] fixup! Dump and Reset stats data on demand using SIGUSR1 signal Signed-off-by: TejaswineeL --- pal/src/host/linux-sgx/host_thread.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index 6a8a37164c..426a682070 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -28,11 +28,11 @@ bool g_sgx_enable_stats = false; /* this function is called only on thread/process exit (never in the middle of thread exec) */ void update_and_print_stats(bool process_wide) { - static atomic_ulong g_eenter_cnt = 0; - static atomic_ulong g_eexit_cnt = 0; - static atomic_ulong g_aex_cnt = 0; - static atomic_ulong g_sync_signal_cnt = 0; - static atomic_ulong g_async_signal_cnt = 0; + static uint64_t g_eenter_cnt = 0; + static uint64_t g_eexit_cnt = 0; + static uint64_t g_aex_cnt = 0; + static uint64_t g_sync_signal_cnt = 0; + static uint64_t g_async_signal_cnt = 0; if (!g_sgx_enable_stats) return; From 6da00929f3f441054dcfbeeec446618911b8ab3a Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Tue, 2 Jul 2024 15:04:53 +0530 Subject: [PATCH 08/16] fixup! Add a support for debug-stats dumping and resetting on demand Signed-off-by: TejaswineeL --- Documentation/performance.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/performance.rst b/Documentation/performance.rst index 0aea3a1506..e9e94e4120 100644 --- a/Documentation/performance.rst +++ b/Documentation/performance.rst @@ -107,7 +107,7 @@ It is also possible to dump and reset SGX-related statistics interactively, usin ``SIGUSR1`` signal. This helps to collect SGX-related statistics only for a particular period, e.g. skipping the Gramine startup and application initialization time and concentrating only on the actual application processing. -Send ``SIGUSR1`` using command ``kill -SIGUSR1 ``. +Send ``SIGUSR1`` using command ``kill -SIGUSR1 ``. Effects of system calls / ocalls -------------------------------- From 7cdaacf68dde23bbfb35f1be336dbddd9d8a302b Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Wed, 3 Jul 2024 13:50:57 +0530 Subject: [PATCH 09/16] fixup! fixup! Add a support for debug-stats dumping and resetting on demand Signed-off-by: TejaswineeL --- pal/src/host/linux-sgx/host_exception.c | 1 - 1 file changed, 1 deletion(-) diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index 9c1c2fe14f..7072ed23b4 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -240,7 +240,6 @@ static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { #endif /* DEBUG */ } - int sgx_signal_setup(void) { int ret; From 26c4684723bad994ab080d12cf59b3d0076852d2 Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Fri, 12 Jul 2024 11:13:01 +0530 Subject: [PATCH 10/16] fixup! Add support for dumping and resetting debug stats on demand Signed-off-by: TejaswineeL --- Documentation/performance.rst | 2 +- pal/src/host/linux-sgx/host_exception.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/performance.rst b/Documentation/performance.rst index e9e94e4120..065451d73f 100644 --- a/Documentation/performance.rst +++ b/Documentation/performance.rst @@ -107,7 +107,7 @@ It is also possible to dump and reset SGX-related statistics interactively, usin ``SIGUSR1`` signal. This helps to collect SGX-related statistics only for a particular period, e.g. skipping the Gramine startup and application initialization time and concentrating only on the actual application processing. -Send ``SIGUSR1`` using command ``kill -SIGUSR1 ``. +Send ``SIGUSR1`` using command ``kill -SIGUSR1 -``. Effects of system calls / ocalls -------------------------------- diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index 7072ed23b4..e14ca96822 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -14,6 +14,7 @@ * ../../../include/arch/x86_64/linux/ucontext.h:136:5: error: unknown type name ‘__sigset_t’ * __sigset_t uc_sigmask; */ + #include #include From 68f4f3a730350780b40993ceae609b99e6e4e278 Mon Sep 17 00:00:00 2001 From: sreeharikax Date: Tue, 30 Jul 2024 12:02:10 +0530 Subject: [PATCH 11/16] fixup! Add a support for debug-stats dumping and resetting on demand Signed-off-by: sreeharikax --- pal/src/host/linux-sgx/enclave_framework.c | 10 +--------- pal/src/host/linux-sgx/enclave_ocalls.c | 3 ++- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/pal/src/host/linux-sgx/enclave_framework.c b/pal/src/host/linux-sgx/enclave_framework.c index ed9398ed78..4500f38141 100644 --- a/pal/src/host/linux-sgx/enclave_framework.c +++ b/pal/src/host/linux-sgx/enclave_framework.c @@ -67,27 +67,19 @@ bool sgx_is_valid_untrusted_ptr(const void* _addr, size_t size, size_t alignment } /* - * When DEBUG is enabled, we run sgx_profile_sample() during asynchronous enclave exit (AEX), which + * We run some functions (e.g. sgx_profile_sample()) during asynchronous enclave exit (AEX), which * uses the stack. Make sure to update URSP so that the AEX handler does not overwrite the part of * the stack that we just allocated. * * (Recall that URSP is an outside stack pointer, saved by EENTER and restored on AEX by the SGX * hardware itself.) */ -#ifdef DEBUG - #define UPDATE_USTACK(_ustack) \ do { \ SET_ENCLAVE_TCB(ustack, _ustack); \ GET_ENCLAVE_TCB(gpr)->ursp = (uint64_t)_ustack; \ } while(0) -#else - -#define UPDATE_USTACK(_ustack) SET_ENCLAVE_TCB(ustack, _ustack) - -#endif - void* sgx_prepare_ustack(void) { void* old_ustack = GET_ENCLAVE_TCB(ustack); diff --git a/pal/src/host/linux-sgx/enclave_ocalls.c b/pal/src/host/linux-sgx/enclave_ocalls.c index c7259056e9..6a59fda081 100644 --- a/pal/src/host/linux-sgx/enclave_ocalls.c +++ b/pal/src/host/linux-sgx/enclave_ocalls.c @@ -136,8 +136,9 @@ static long sgx_exitless_ocall(uint64_t code, void* ocall_args) { } } + long result = COPY_UNTRUSTED_VALUE(&req->result); sgx_reset_ustack(old_ustack); - return COPY_UNTRUSTED_VALUE(&req->result); + return result; } __attribute_no_sanitize_address From f09d9ec9253c829216bed01a23cdbbe970a7c1a6 Mon Sep 17 00:00:00 2001 From: sreeharikax Date: Tue, 30 Jul 2024 16:13:55 +0530 Subject: [PATCH 12/16] fixup! Add a support for debug-stats dumping and resetting on demand Signed-off-by: sreeharikax --- pal/src/host/linux-sgx/host_entry.S | 27 +++-- pal/src/host/linux-sgx/host_exception.c | 139 +++++++++++++++++------- pal/src/host/linux-sgx/host_thread.c | 2 + pal/src/host/linux-sgx/pal_tcb.h | 2 + 4 files changed, 121 insertions(+), 49 deletions(-) diff --git a/pal/src/host/linux-sgx/host_entry.S b/pal/src/host/linux-sgx/host_entry.S index 9cd0d5ea7f..00f1c10a29 100644 --- a/pal/src/host/linux-sgx/host_entry.S +++ b/pal/src/host/linux-sgx/host_entry.S @@ -4,6 +4,7 @@ .extern tcs_base .extern g_in_aex_profiling + .extern dump_and_reset_stats .global sgx_ecall .type sgx_ecall, @function @@ -70,9 +71,9 @@ async_exit_pointer: # increment per-thread AEX counter for stats lock incq %gs:PAL_HOST_TCB_AEX_CNT -#ifdef DEBUG # Inform that we are in AEX profiling code movb $1, %gs:PAL_HOST_TCB_IN_AEX_PROF + # Save ERESUME parameters pushq %rax .cfi_adjust_cfa_offset 8 @@ -81,18 +82,25 @@ async_exit_pointer: pushq %rcx .cfi_adjust_cfa_offset 8 - # Align stack (required by System V AMD64 ABI) + pushq %rbp + .cfi_adjust_cfa_offset 8 movq %rsp, %rbp + .cfi_offset %rbp, -16 .cfi_def_cfa_register %rbp - andq $~0xF, %rsp + andq $~0xF, %rsp # Required by System V AMD64 ABI. +#ifdef DEBUG # Call sgx_profile_sample_aex with %rdi = TCS movq %rbx, %rdi call sgx_profile_sample_aex +#endif + + call dump_and_reset_stats # Restore stack movq %rbp, %rsp - .cfi_def_cfa_register %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 # Restore ERESUME parameters popq %rcx @@ -101,9 +109,8 @@ async_exit_pointer: .cfi_adjust_cfa_offset -8 popq %rax .cfi_adjust_cfa_offset -8 - movb $0, %gs:PAL_HOST_TCB_IN_AEX_PROF -#endif + movb $0, %gs:PAL_HOST_TCB_IN_AEX_PROF .cfi_endproc # fall-through to ERESUME @@ -143,12 +150,12 @@ sgx_raise: .cfi_offset %rbp, -16 .cfi_def_cfa_register %rbp -#if DEBUG # Adjust stack and save RDI subq $8, %rsp andq $~0xF, %rsp # Required by System V AMD64 ABI. movq %rdi, -8(%rbp) +#if DEBUG # Call sgx_profile_sample_ocall_outer with RBX (ocall handler) movq %rbx, %rdi call sgx_profile_sample_ocall_outer @@ -156,12 +163,12 @@ sgx_raise: # Call sgx_profile_sample_ocall_inner with RDX (pointer to in-enclave context) movq %rdx, %rdi call sgx_profile_sample_ocall_inner +#endif + + call dump_and_reset_stats # Restore RDI movq -8(%rbp), %rdi -#else - andq $~0xF, %rsp # Required by System V AMD64 ABI. -#endif callq *%rbx diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index e14ca96822..c317ccb606 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -19,10 +19,12 @@ #include #include "api.h" +#include "assert.h" #include "cpu.h" #include "debug_map.h" #include "gdb_integration/sgx_gdb.h" #include "host_internal.h" +#include "host_syscall.h" #include "pal_rpc_queue.h" #include "pal_tcb.h" #include "sigreturn.h" @@ -30,6 +32,34 @@ static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT}; +/* + * If no SGX-stats reset is in flight, this variable is zero. + * + * Upon user-induced SIGUSR1 on some thread (below happens in signal handling context): + * 1. If `g_stats_reset_leader_tid == 0`, then it is set to the TID of this thread -- this thread + * is designated to be the "leader" of SGX-stats reset flow, and it will broadcast SIGUSR1 to + * all other threads on the first AEX (since we can't do any complex logic in signal handling + * context, we postpone to the normal context which starts right-after an AEX event). + * 2. If `g_stats_reset_leader_tid != 0`, then it means that an SGX-stats reset flow is in flight. + * Two cases are possible: + * a. If PID of sending process is the current PID, then the signal was sent by the "leader" + * and this thread is a "follower" -- it sets `reset_stats = true` in its TCB, so that + * this thread's statistics are dumped and reset on the next AEX. + * b. If PID of sending process is not the current PID, then the signal was sent by the user + * and this is a new "SGX-stats reset" event from the user. Since the previous flow is + * still in flight, the thread must ignore this signal. + * + * On each AEX, each thread checks (below happens in normal context): + * 1. If `g_stats_reset_leader_tid == 0`, do nothing (no SGX-stats reset is in flight). + * 2. If `g_stats_reset_leader_tid == gettid()`, then this is the "leader" thread and it must + * broadcast SIGUSR1 to all other threads and wait until they perform their SGX-stats resets. + * After all threads are done, the "leader" resets `g_stats_reset_leader_tid` to zero. + * 3. Else, this is the "follower" thread and it must perform its SGX-stats reset. + * + * Application threads on Linux can never be 0, so this "no-op" default is safe. + */ +static int g_stats_reset_leader_tid = 0; + static int block_signal(int sig, bool block) { int how = block ? SIG_BLOCK : SIG_UNBLOCK; @@ -189,50 +219,31 @@ static void handle_dummy_signal(int signum, siginfo_t* info, struct ucontext* uc /* we need this handler to interrupt blocking syscalls in RPC threads */ } -static size_t send_sigusr1_signal_to_children(pid_t main_tid) { - size_t no_of_signals_sent = 0; - - for (size_t i = 0; i < MAX_DBG_THREADS; i++) { - int child_tid = ((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i]; - if (child_tid == main_tid) - continue; - - if (child_tid) { - DO_SYSCALL(tkill, child_tid, SIGUSR1); - no_of_signals_sent++; - } - } - return no_of_signals_sent; -} - -static void dump_and_reset_stats(void) { - static size_t no_of_children_visited = 0; - - if (DO_SYSCALL(gettid) == g_host_pid) { - size_t no_of_children = send_sigusr1_signal_to_children(g_host_pid); - - log_always("----- DUMPING and RESETTING SGX STATS -----"); - while ((__atomic_load_n(&no_of_children_visited, __ATOMIC_ACQUIRE)) < no_of_children) { - DO_SYSCALL(sched_yield); - } - - update_and_print_stats(/*process_wide=*/true); - __atomic_store_n(&no_of_children_visited, 0, __ATOMIC_RELEASE); - } else { - update_and_print_stats(/*process_wide=*/false); - __atomic_fetch_add(&no_of_children_visited, 1, __ATOMIC_ACQ_REL); - } - - pal_host_tcb_reset_stats(); -} - static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { __UNUSED(signum); __UNUSED(info); __UNUSED(uc); - if (g_sgx_enable_stats) - dump_and_reset_stats(); + if (g_sgx_enable_stats) { + int expected_tid = 0; + if (__atomic_compare_exchange_n(&g_stats_reset_leader_tid, &expected_tid, + DO_SYSCALL(gettid), /*weak=*/false, + __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) == true) { + /* first thread that gets SIGUSR1, the CAS above designated it as the "leader" */ + PAL_HOST_TCB* tcb = pal_get_host_tcb(); + tcb->reset_stats = true; + } else { + /* thread gets SIGUSR1, check if this is a signal from the "leader" */ + if (info->si_pid == g_host_pid) { + PAL_HOST_TCB* tcb = pal_get_host_tcb(); + assert(!tcb->reset_stats); + tcb->reset_stats = true; + } else { + log_warning("Received SIGUSR1 from user, but there is another SGX-stats reset " + "in flight; ignoring it"); + } + } + } #ifdef DEBUG if (g_pal_enclave.profile_enable) { @@ -313,3 +324,53 @@ void pal_describe_location(uintptr_t addr, char* buf, size_t buf_size) { #endif default_describe_location(addr, buf, buf_size); } + +static size_t send_sigusr1_to_followers(pid_t leader_tid) { + size_t followers_num = 0; + + /* we re-use DBGINFO_ADDR special variable (that is primarily used by GDB for debugging), + * fortunately this variable is set up even in non-debug builds */ + for (size_t i = 0; i < MAX_DBG_THREADS; i++) { + int follower_tid = ((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i]; + if (!follower_tid || follower_tid == leader_tid) + continue; + + DO_SYSCALL(tkill, follower_tid, SIGUSR1); + followers_num++; + } + return followers_num; +} + +/* called on each AEX and OCALL (in normal context), see host_entry.S */ +void dump_and_reset_stats(void) { + static size_t followers_visited_num = 0; /* note `static`, it is a global var */ + + if (!g_sgx_enable_stats) + return; + + int leader_tid = __atomic_load_n(&g_stats_reset_leader_tid, __ATOMIC_ACQUIRE); + if (!leader_tid) + return; + + PAL_HOST_TCB* tcb = pal_get_host_tcb(); + if (!tcb->reset_stats) + return; + + if (DO_SYSCALL(gettid) == leader_tid) { + log_always("----- DUMPING and RESETTING SGX STATS -----"); + size_t followers_num = send_sigusr1_to_followers(leader_tid); + + while ((__atomic_load_n(&followers_visited_num, __ATOMIC_ACQUIRE)) < followers_num) + DO_SYSCALL(sched_yield); + + update_and_print_stats(/*process_wide=*/true); + pal_host_tcb_reset_stats(); + __atomic_store_n(&followers_visited_num, 0, __ATOMIC_RELEASE); + + __atomic_store_n(&g_stats_reset_leader_tid, 0, __ATOMIC_RELEASE); + } else { + update_and_print_stats(/*process_wide=*/false); + pal_host_tcb_reset_stats(); + __atomic_fetch_add(&followers_visited_num, 1, __ATOMIC_ACQ_REL); + } +} diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index 426a682070..52e471fe5a 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -90,6 +90,7 @@ void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { tcb->aex_cnt = 0; tcb->sync_signal_cnt = 0; tcb->async_signal_cnt = 0; + tcb->reset_stats = false; tcb->profile_sample_time = 0; @@ -103,6 +104,7 @@ void pal_host_tcb_reset_stats(void) { tcb->aex_cnt = 0; tcb->sync_signal_cnt = 0; tcb->async_signal_cnt = 0; + tcb->reset_stats = false; } int create_tcs_mapper(void* tcs_base, unsigned int thread_num) { diff --git a/pal/src/host/linux-sgx/pal_tcb.h b/pal/src/host/linux-sgx/pal_tcb.h index dd94c02484..0cade48f64 100644 --- a/pal/src/host/linux-sgx/pal_tcb.h +++ b/pal/src/host/linux-sgx/pal_tcb.h @@ -105,10 +105,12 @@ typedef struct pal_host_tcb { uint64_t profile_sample_time; /* last time sgx_profile_sample() recorded a sample */ int32_t last_async_event; /* last async signal, reported to the enclave on ocall return */ int* start_status_ptr; /* pointer to return value of clone_thread */ + bool reset_stats; /* if true, dump SGX stats and reset them on next AEX event */ } PAL_HOST_TCB; extern void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack); extern void pal_host_tcb_reset_stats(void); +void dump_and_reset_stats(void); static inline PAL_HOST_TCB* pal_get_host_tcb(void) { PAL_HOST_TCB* tcb; From 2966f33f1417a190960b486c9e26b255b13149cf Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Wed, 14 Aug 2024 16:38:19 +0530 Subject: [PATCH 13/16] fixup! Add a support for debug-stats dumping and resetting on demand Signed-off-by: TejaswineeL --- pal/src/host/linux-sgx/host_entry.S | 13 ++-- pal/src/host/linux-sgx/host_exception.c | 52 +++++++------- pal/src/host/linux-sgx/host_internal.h | 5 +- pal/src/host/linux-sgx/host_main.c | 5 +- pal/src/host/linux-sgx/host_ocalls.c | 4 +- pal/src/host/linux-sgx/host_thread.c | 95 +++++++++++++++---------- pal/src/host/linux-sgx/pal_tcb.h | 12 ++-- 7 files changed, 106 insertions(+), 80 deletions(-) diff --git a/pal/src/host/linux-sgx/host_entry.S b/pal/src/host/linux-sgx/host_entry.S index 00f1c10a29..9b55f3eba7 100644 --- a/pal/src/host/linux-sgx/host_entry.S +++ b/pal/src/host/linux-sgx/host_entry.S @@ -4,8 +4,9 @@ .extern tcs_base .extern g_in_aex_profiling - .extern dump_and_reset_stats - +#ifdef DEBUG + .extern maybe_dump_and_reset_stats +#endif .global sgx_ecall .type sgx_ecall, @function @@ -93,9 +94,9 @@ async_exit_pointer: # Call sgx_profile_sample_aex with %rdi = TCS movq %rbx, %rdi call sgx_profile_sample_aex -#endif - call dump_and_reset_stats + call maybe_dump_and_reset_stats +#endif # Restore stack movq %rbp, %rsp @@ -163,9 +164,9 @@ sgx_raise: # Call sgx_profile_sample_ocall_inner with RDX (pointer to in-enclave context) movq %rdx, %rdi call sgx_profile_sample_ocall_inner -#endif - call dump_and_reset_stats + call maybe_dump_and_reset_stats +#endif # Restore RDI movq -8(%rbp), %rdi diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index c317ccb606..47e37c701f 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -30,8 +30,8 @@ #include "sigreturn.h" #include "ucontext.h" -static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT}; - +static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT, SIGUSR1}; + #ifdef DEBUG /* * If no SGX-stats reset is in flight, this variable is zero. * @@ -44,7 +44,8 @@ static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT}; * Two cases are possible: * a. If PID of sending process is the current PID, then the signal was sent by the "leader" * and this thread is a "follower" -- it sets `reset_stats = true` in its TCB, so that - * this thread's statistics are dumped and reset on the next AEX. + * this thread's statistics are dumped and reset on the next AEX or right-before + * executing the next OCALL in untrusted runtime. * b. If PID of sending process is not the current PID, then the signal was sent by the user * and this is a new "SGX-stats reset" event from the user. Since the previous flow is * still in flight, the thread must ignore this signal. @@ -58,7 +59,9 @@ static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT}; * * Application threads on Linux can never be 0, so this "no-op" default is safe. */ -static int g_stats_reset_leader_tid = 0; + int g_stats_reset_leader_tid = 0; + long long int g_stats_reset_epoch = 0; +#endif /* DEBUG */ static int block_signal(int sig, bool block) { int how = block ? SIG_BLOCK : SIG_UNBLOCK; @@ -219,6 +222,7 @@ static void handle_dummy_signal(int signum, siginfo_t* info, struct ucontext* uc /* we need this handler to interrupt blocking syscalls in RPC threads */ } +#ifdef DEBUG static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { __UNUSED(signum); __UNUSED(info); @@ -245,12 +249,11 @@ static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { } } -#ifdef DEBUG if (g_pal_enclave.profile_enable) { __atomic_store_n(&g_trigger_profile_reinit, true, __ATOMIC_RELEASE); } -#endif /* DEBUG */ } +#endif /* DEBUG */ int sgx_signal_setup(void) { int ret; @@ -290,9 +293,11 @@ int sgx_signal_setup(void) { if (ret < 0) goto err; +#ifdef DEBUG ret = set_signal_handler(SIGUSR1, handle_sigusr1); if (ret < 0) goto err; +#endif /* DEBUG */ /* SIGUSR2 is reserved for Gramine usage: interrupting blocking syscalls in RPC threads. * We block SIGUSR2 in enclave threads; it is unblocked by each RPC thread explicitly. */ @@ -325,24 +330,9 @@ void pal_describe_location(uintptr_t addr, char* buf, size_t buf_size) { default_describe_location(addr, buf, buf_size); } -static size_t send_sigusr1_to_followers(pid_t leader_tid) { - size_t followers_num = 0; - - /* we re-use DBGINFO_ADDR special variable (that is primarily used by GDB for debugging), - * fortunately this variable is set up even in non-debug builds */ - for (size_t i = 0; i < MAX_DBG_THREADS; i++) { - int follower_tid = ((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i]; - if (!follower_tid || follower_tid == leader_tid) - continue; - - DO_SYSCALL(tkill, follower_tid, SIGUSR1); - followers_num++; - } - return followers_num; -} - +#ifdef DEBUG /* called on each AEX and OCALL (in normal context), see host_entry.S */ -void dump_and_reset_stats(void) { +void maybe_dump_and_reset_stats(void) { static size_t followers_visited_num = 0; /* note `static`, it is a global var */ if (!g_sgx_enable_stats) @@ -360,17 +350,23 @@ void dump_and_reset_stats(void) { log_always("----- DUMPING and RESETTING SGX STATS -----"); size_t followers_num = send_sigusr1_to_followers(leader_tid); - while ((__atomic_load_n(&followers_visited_num, __ATOMIC_ACQUIRE)) < followers_num) + __atomic_fetch_add(&g_stats_reset_epoch, 1, __ATOMIC_ACQ_REL); + long long int noted_epoch = __atomic_load_n(&g_stats_reset_epoch, __ATOMIC_ACQUIRE); + while ((__atomic_load_n(&followers_visited_num, __ATOMIC_ACQUIRE)) < followers_num) { + + if(__atomic_load_n(&g_stats_reset_epoch, __ATOMIC_ACQUIRE) > noted_epoch) + log_warning("One of the worker threads exited"); DO_SYSCALL(sched_yield); + } + log_always("STATS notes_epoch %lld", noted_epoch); - update_and_print_stats(/*process_wide=*/true); - pal_host_tcb_reset_stats(); + update_print_and_reset_stats(/*process_wide=*/true); __atomic_store_n(&followers_visited_num, 0, __ATOMIC_RELEASE); __atomic_store_n(&g_stats_reset_leader_tid, 0, __ATOMIC_RELEASE); } else { - update_and_print_stats(/*process_wide=*/false); - pal_host_tcb_reset_stats(); + update_print_and_reset_stats(/*process_wide=*/false); __atomic_fetch_add(&followers_visited_num, 1, __ATOMIC_ACQ_REL); } } +#endif /* DEBUG */ diff --git a/pal/src/host/linux-sgx/host_internal.h b/pal/src/host/linux-sgx/host_internal.h index 24d23758a8..4031390ada 100644 --- a/pal/src/host/linux-sgx/host_internal.h +++ b/pal/src/host/linux-sgx/host_internal.h @@ -60,8 +60,9 @@ extern struct pal_enclave g_pal_enclave; #ifdef DEBUG extern bool g_trigger_profile_reinit; extern char g_profile_filename[128]; +extern int g_stats_reset_leader_tid; +extern long long int g_stats_reset_epoch; #endif /* DEBUG */ - void* realloc(void* ptr, size_t new_size); int open_sgx_driver(void); @@ -130,9 +131,9 @@ void thread_exit(int status); int sgx_signal_setup(void); int block_async_signals(bool block); +#ifdef DEBUG int set_tcs_debug_flag_if_debugging(void* tcs_addrs[], size_t count); -#ifdef DEBUG /* SGX profiling (sgx_profile.c) */ /* diff --git a/pal/src/host/linux-sgx/host_main.c b/pal/src/host/linux-sgx/host_main.c index 0440f42892..3f9d106ce3 100644 --- a/pal/src/host/linux-sgx/host_main.c +++ b/pal/src/host/linux-sgx/host_main.c @@ -617,12 +617,12 @@ static int initialize_enclave(struct pal_enclave* enclave, const char* manifest_ dbg->tcs_addrs[t] = tcs_addrs[t]; } +#ifdef DEBUG ret = set_tcs_debug_flag_if_debugging(tcs_addrs, enclave->thread_num); if (ret < 0) { goto out; } -#ifdef DEBUG /* * Report libpal map. All subsequent files will be reported via PalDebugMapAdd(), but this * one has to be handled separately. @@ -1057,7 +1057,7 @@ static int load_enclave(struct pal_enclave* enclave, char* args, size_t args_siz uint64_t end_time; DO_SYSCALL(gettimeofday, &tv, NULL); end_time = tv.tv_sec * 1000000UL + tv.tv_usec; - +#ifdef DEBUG if (g_sgx_enable_stats) { /* This shows the time for Gramine + the Intel SGX driver to initialize the untrusted * PAL, config and create the SGX enclave, add enclave pages, measure and init it. @@ -1065,6 +1065,7 @@ static int load_enclave(struct pal_enclave* enclave, char* args, size_t args_siz log_always("----- SGX enclave loading time = %10lu microseconds -----", end_time - start_time); } +#endif /* DEBUG */ /* start running trusted PAL */ ecall_enclave_start(enclave->libpal_uri, args, args_size, env, env_size, parent_stream_fd, diff --git a/pal/src/host/linux-sgx/host_ocalls.c b/pal/src/host/linux-sgx/host_ocalls.c index eba742e55c..a26d499464 100644 --- a/pal/src/host/linux-sgx/host_ocalls.c +++ b/pal/src/host/linux-sgx/host_ocalls.c @@ -41,8 +41,8 @@ static long sgx_ocall_exit(void* args) { /* exit the whole process if exit_group() */ if (ocall_exit_args->is_exitgroup) { - update_and_print_stats(/*process_wide=*/true); #ifdef DEBUG + update_print_and_reset_stats(/*process_wide=*/true); sgx_profile_finish(); #endif @@ -64,8 +64,8 @@ static long sgx_ocall_exit(void* args) { if (!current_enclave_thread_cnt()) { /* no enclave threads left, kill the whole process */ - update_and_print_stats(/*process_wide=*/true); #ifdef DEBUG + update_print_and_reset_stats(/*process_wide=*/true); sgx_profile_finish(); #endif #ifdef SGX_VTUNE_PROFILE diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index 52e471fe5a..d13bcb18a0 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -26,13 +26,13 @@ static size_t g_enclave_thread_num = 0; bool g_sgx_enable_stats = false; -/* this function is called only on thread/process exit (never in the middle of thread exec) */ -void update_and_print_stats(bool process_wide) { - static uint64_t g_eenter_cnt = 0; - static uint64_t g_eexit_cnt = 0; - static uint64_t g_aex_cnt = 0; - static uint64_t g_sync_signal_cnt = 0; - static uint64_t g_async_signal_cnt = 0; +#ifdef DEBUG +void update_print_and_reset_stats(bool process_wide) { + static atomic_ulong g_eenter_cnt = 0; + static atomic_ulong g_eexit_cnt = 0; + static atomic_ulong g_aex_cnt = 0; + static atomic_ulong g_sync_signal_cnt = 0; + static atomic_ulong g_async_signal_cnt = 0; if (!g_sgx_enable_stats) return; @@ -50,11 +50,18 @@ void update_and_print_stats(bool process_wide) { tid, tcb->eenter_cnt, tcb->eexit_cnt, tcb->aex_cnt, tcb->sync_signal_cnt, tcb->async_signal_cnt); - __atomic_fetch_add(&g_eenter_cnt, tcb->eenter_cnt, __ATOMIC_ACQ_REL); - __atomic_fetch_add(&g_eexit_cnt, tcb->eexit_cnt, __ATOMIC_ACQ_REL); - __atomic_fetch_add(&g_aex_cnt, tcb->aex_cnt, __ATOMIC_ACQ_REL); - __atomic_fetch_add(&g_sync_signal_cnt, tcb->sync_signal_cnt, __ATOMIC_ACQ_REL); - __atomic_fetch_add(&g_async_signal_cnt, tcb->async_signal_cnt, __ATOMIC_ACQ_REL); + g_eenter_cnt += tcb->eenter_cnt; + g_eexit_cnt += tcb->eexit_cnt; + g_aex_cnt += tcb->aex_cnt; + g_sync_signal_cnt += tcb->sync_signal_cnt; + g_async_signal_cnt += tcb->async_signal_cnt; + + tcb->eenter_cnt = 0; + tcb->eexit_cnt = 0; + tcb->aex_cnt = 0; + tcb->sync_signal_cnt = 0; + tcb->async_signal_cnt = 0; + tcb->reset_stats = false; if (process_wide) { int pid = g_host_pid; @@ -65,48 +72,34 @@ void update_and_print_stats(bool process_wide) { " # of AEXs: %lu\n" " # of sync signals: %lu\n" " # of async signals: %lu", - pid, __atomic_load_n(&g_eenter_cnt, __ATOMIC_ACQUIRE), - __atomic_load_n(&g_eexit_cnt, __ATOMIC_ACQUIRE), - __atomic_load_n(&g_aex_cnt, __ATOMIC_ACQUIRE), - __atomic_load_n(&g_sync_signal_cnt, __ATOMIC_ACQUIRE), - __atomic_load_n(&g_async_signal_cnt, __ATOMIC_ACQUIRE)); - - __atomic_store_n(&g_eenter_cnt, 0, __ATOMIC_RELEASE); - __atomic_store_n(&g_eexit_cnt, 0, __ATOMIC_RELEASE); - __atomic_store_n(&g_aex_cnt, 0, __ATOMIC_RELEASE); - __atomic_store_n(&g_sync_signal_cnt, 0, __ATOMIC_RELEASE); - __atomic_store_n(&g_async_signal_cnt, 0, __ATOMIC_RELEASE); + pid, g_eenter_cnt, g_eexit_cnt, g_aex_cnt, + g_sync_signal_cnt, g_async_signal_cnt); + + g_eenter_cnt = 0; + g_eexit_cnt = 0; + g_aex_cnt = 0; + g_sync_signal_cnt = 0; + g_async_signal_cnt = 0; } } +#endif /* DEBUG */ void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { tcb->self = tcb; tcb->tcs = NULL; /* initialized by child thread */ tcb->stack = stack; tcb->alt_stack = alt_stack; - tcb->eenter_cnt = 0; tcb->eexit_cnt = 0; tcb->aex_cnt = 0; tcb->sync_signal_cnt = 0; tcb->async_signal_cnt = 0; tcb->reset_stats = false; - tcb->profile_sample_time = 0; tcb->last_async_event = PAL_EVENT_NO_EVENT; } -void pal_host_tcb_reset_stats(void) { - PAL_HOST_TCB* tcb = pal_get_host_tcb(); - tcb->eenter_cnt = 0; - tcb->eexit_cnt = 0; - tcb->aex_cnt = 0; - tcb->sync_signal_cnt = 0; - tcb->async_signal_cnt = 0; - tcb->reset_stats = false; -} - int create_tcs_mapper(void* tcs_base, unsigned int thread_num) { sgx_arch_tcs_t* enclave_tcs = tcs_base; @@ -127,10 +120,12 @@ static int add_dynamic_tcs(sgx_arch_tcs_t* tcs) { int ret; struct enclave_dbginfo* dbginfo = (struct enclave_dbginfo*)DBGINFO_ADDR; +#ifdef DEBUG ret = set_tcs_debug_flag_if_debugging((void**)&tcs, /*count=*/1); if (ret < 0) { return ret; } +#endif /* DEBUG */ size_t i = 0; spinlock_lock(&g_enclave_thread_map_lock); @@ -306,7 +301,15 @@ noreturn void thread_exit(int status) { * (by sgx_ocall_exit()) but we keep it here for future proof */ block_async_signals(true); - update_and_print_stats(/*process_wide=*/false); +#ifdef DEBUG + if(__atomic_load_n(&g_stats_reset_leader_tid, __ATOMIC_ACQUIRE) == DO_SYSCALL(gettid)) { + __atomic_store_n(&g_stats_reset_leader_tid, 0, __ATOMIC_RELEASE); + log_warning("Main thread exited. The SIGUSR1 signal may be lost"); + } + __atomic_fetch_add(&g_stats_reset_epoch, 1, __ATOMIC_ACQ_REL); + + update_print_and_reset_stats(/*process_wide=*/false); +#endif /* DEBUG */ if (tcb->alt_stack) { stack_t ss; @@ -410,6 +413,7 @@ int get_tid_from_tcs(void* tcs) { return tid ? tid : -EINVAL; } +#ifdef DEBUG int set_tcs_debug_flag_if_debugging(void* tcs_addrs[], size_t count) { if (!g_sgx_enable_stats && !g_vtune_profile_enabled) return 0; @@ -446,3 +450,22 @@ int set_tcs_debug_flag_if_debugging(void* tcs_addrs[], size_t count) { DO_SYSCALL(close, enclave_mem); return ret; } + +int send_sigusr1_to_followers(pid_t leader_tid) { + size_t followers_num = 0; + size_t enclave_thread_count = current_enclave_thread_cnt(); + + spinlock_lock(&g_enclave_thread_map_lock); + // enclave_thread_count limits the repeatative looping over thread ids + for (size_t i = 0; i < enclave_thread_count; i++) { + int follower_tid = g_enclave_thread_map[i].tid; + if (!follower_tid || follower_tid == leader_tid) + continue; + + DO_SYSCALL(tgkill, g_host_pid, follower_tid, SIGUSR1); + followers_num++; + } + spinlock_unlock(&g_enclave_thread_map_lock); + return followers_num; +} +#endif /* DEBUG */ diff --git a/pal/src/host/linux-sgx/pal_tcb.h b/pal/src/host/linux-sgx/pal_tcb.h index 0cade48f64..179298c65a 100644 --- a/pal/src/host/linux-sgx/pal_tcb.h +++ b/pal/src/host/linux-sgx/pal_tcb.h @@ -5,6 +5,7 @@ #include #include "pal.h" +#include "pal_linux_types.h" #include "sgx_arch.h" struct untrusted_area { @@ -105,12 +106,11 @@ typedef struct pal_host_tcb { uint64_t profile_sample_time; /* last time sgx_profile_sample() recorded a sample */ int32_t last_async_event; /* last async signal, reported to the enclave on ocall return */ int* start_status_ptr; /* pointer to return value of clone_thread */ - bool reset_stats; /* if true, dump SGX stats and reset them on next AEX event */ + bool reset_stats; /* if true, dump SGX stats and reset them on next AEX event or + after next enclave exit for an OCALL */ } PAL_HOST_TCB; extern void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack); -extern void pal_host_tcb_reset_stats(void); -void dump_and_reset_stats(void); static inline PAL_HOST_TCB* pal_get_host_tcb(void) { PAL_HOST_TCB* tcb; @@ -122,5 +122,9 @@ static inline PAL_HOST_TCB* pal_get_host_tcb(void) { } extern bool g_sgx_enable_stats; -void update_and_print_stats(bool process_wide); +#ifdef DEBUG +void update_print_and_reset_stats(bool process_wide); +int send_sigusr1_to_followers(pid_t leader_tid); +void maybe_dump_and_reset_stats(void); +#endif /* DEBUG */ #endif /* IN_ENCLAVE */ From 5e4aaa16394be70fb09d36e515aaaefba3356fbd Mon Sep 17 00:00:00 2001 From: TejaswineeL Date: Wed, 14 Aug 2024 18:41:37 +0530 Subject: [PATCH 14/16] fixup! Add a support for debug-stats dumping and resetting on demand Signed-off-by: TejaswineeL --- Documentation/manifest-syntax.rst | 2 ++ Documentation/performance.rst | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Documentation/manifest-syntax.rst b/Documentation/manifest-syntax.rst index b96fa1892f..35544a2ff8 100644 --- a/Documentation/manifest-syntax.rst +++ b/Documentation/manifest-syntax.rst @@ -1197,6 +1197,8 @@ Enabling per-thread and process-wide SGX stats sgx.enable_stats = [true|false] (Default: false) +Gramine must be compiled with --buildtype=debug or --buildtype=debugoptimized +for this option to work. In addition, the manifest must contain sgx.debug = true. This syntax specifies whether to enable SGX enclave-specific statistics: #. ``TCS.FLAGS.DBGOPTIN`` flag. This flag is set in all enclave threads and diff --git a/Documentation/performance.rst b/Documentation/performance.rst index 065451d73f..56c0237c53 100644 --- a/Documentation/performance.rst +++ b/Documentation/performance.rst @@ -18,11 +18,12 @@ Enabling per-thread and process-wide SGX stats See also :ref:`perf` below for installing ``perf``. -Enable statistics using ``sgx.enable_stats = true`` manifest option. Now your -graminized application correctly reports performance counters. This is useful -when using e.g. ``perf stat`` to collect performance statistics. This manifest -option also forces Gramine to dump SGX-related information on each -thread/process exit. Here is an example: +Enable statistics using ``sgx.enable_stats = true`` manifest option (note that +Gramine must be compiled with ``--buildtype=debug`` or ``--buildtype=debugoptimized`` +for this option to work). Now your graminized application correctly reports +performance counters. This is useful when using e.g. ``perf stat`` to collect +performance statistics. This manifest option also forces Gramine to dump +SGX-related information on each thread/process exit. Here is an example: :: From b5fcb6064315a4fdbff2e404664f09f31c020681 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Mon, 19 Aug 2024 06:49:05 -0700 Subject: [PATCH 15/16] fixup! Add a support for debug-stats dumping and resetting on demand Signed-off-by: Dmitrii Kuvaiskii --- Documentation/manifest-syntax.rst | 6 +- Documentation/performance.rst | 11 +-- .../fork_and_access_file.manifest.template | 3 + .../multi_pthread.manifest.template | 1 - .../multi_pthread_exitless.manifest.template | 1 - pal/regression/Thread2.manifest.template | 1 - .../Thread2_exitless.manifest.template | 1 - pal/src/host/linux-sgx/host_entry.S | 25 +++---- pal/src/host/linux-sgx/host_exception.c | 75 +++++++++++++++---- pal/src/host/linux-sgx/host_internal.h | 6 +- pal/src/host/linux-sgx/host_main.c | 17 +++-- pal/src/host/linux-sgx/host_thread.c | 36 ++++----- pal/src/host/linux-sgx/pal_tcb.h | 9 +-- 13 files changed, 114 insertions(+), 78 deletions(-) diff --git a/Documentation/manifest-syntax.rst b/Documentation/manifest-syntax.rst index 35544a2ff8..df9d12faf7 100644 --- a/Documentation/manifest-syntax.rst +++ b/Documentation/manifest-syntax.rst @@ -1197,8 +1197,6 @@ Enabling per-thread and process-wide SGX stats sgx.enable_stats = [true|false] (Default: false) -Gramine must be compiled with --buildtype=debug or --buildtype=debugoptimized -for this option to work. In addition, the manifest must contain sgx.debug = true. This syntax specifies whether to enable SGX enclave-specific statistics: #. ``TCS.FLAGS.DBGOPTIN`` flag. This flag is set in all enclave threads and @@ -1215,6 +1213,10 @@ This syntax specifies whether to enable SGX enclave-specific statistics: includes creating the enclave, adding enclave pages, measuring them and initializing the enclave. +For this option to take effect, Gramine must be compiled with +``--buildtype=debug`` or ``--buildtype=debugoptimized``. Otherwise (if built in +release mode), Gramine will exit with an error. + .. warning:: This option is insecure and cannot be used with production enclaves (``sgx.debug = false``). If a production enclave is started with this option diff --git a/Documentation/performance.rst b/Documentation/performance.rst index 56c0237c53..cb664cee80 100644 --- a/Documentation/performance.rst +++ b/Documentation/performance.rst @@ -19,11 +19,12 @@ Enabling per-thread and process-wide SGX stats See also :ref:`perf` below for installing ``perf``. Enable statistics using ``sgx.enable_stats = true`` manifest option (note that -Gramine must be compiled with ``--buildtype=debug`` or ``--buildtype=debugoptimized`` -for this option to work). Now your graminized application correctly reports -performance counters. This is useful when using e.g. ``perf stat`` to collect -performance statistics. This manifest option also forces Gramine to dump -SGX-related information on each thread/process exit. Here is an example: +Gramine must be compiled with ``--buildtype=debug`` or +``--buildtype=debugoptimized`` for this option to work). Now your graminized +application correctly reports performance counters. This is useful when using +e.g. ``perf stat`` to collect performance statistics. This manifest option also +forces Gramine to dump SGX-related information on each thread/process exit. Here +is an example: :: diff --git a/libos/test/regression/fork_and_access_file.manifest.template b/libos/test/regression/fork_and_access_file.manifest.template index 490ca9633a..6e359ce796 100644 --- a/libos/test/regression/fork_and_access_file.manifest.template +++ b/libos/test/regression/fork_and_access_file.manifest.template @@ -12,6 +12,9 @@ sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }} sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} +# this is only to test that `sgx.enable_stats` works (it can only be specified for debug-mode tests) +sgx.enable_stats = true + sgx.trusted_files = [ "file:{{ gramine.libos }}", "file:{{ gramine.runtimedir(libc) }}/", diff --git a/libos/test/regression/multi_pthread.manifest.template b/libos/test/regression/multi_pthread.manifest.template index 57ab9bcc86..2174dcbf98 100644 --- a/libos/test/regression/multi_pthread.manifest.template +++ b/libos/test/regression/multi_pthread.manifest.template @@ -13,7 +13,6 @@ sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '8' }} sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} -sgx.enable_stats = true sgx.trusted_files = [ "file:{{ gramine.libos }}", diff --git a/libos/test/regression/multi_pthread_exitless.manifest.template b/libos/test/regression/multi_pthread_exitless.manifest.template index 5e147b6242..aee2c4be0b 100644 --- a/libos/test/regression/multi_pthread_exitless.manifest.template +++ b/libos/test/regression/multi_pthread_exitless.manifest.template @@ -14,7 +14,6 @@ sgx.insecure__rpc_thread_num = 8 sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} -sgx.enable_stats = true sgx.trusted_files = [ "file:{{ gramine.libos }}", diff --git a/pal/regression/Thread2.manifest.template b/pal/regression/Thread2.manifest.template index 304080bcad..536567203a 100644 --- a/pal/regression/Thread2.manifest.template +++ b/pal/regression/Thread2.manifest.template @@ -1,7 +1,6 @@ loader.entrypoint = "file:{{ binary_dir }}/{{ entrypoint }}" sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '2' }} -sgx.enable_stats = true sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} diff --git a/pal/regression/Thread2_exitless.manifest.template b/pal/regression/Thread2_exitless.manifest.template index f5c9fc276c..4005174e97 100644 --- a/pal/regression/Thread2_exitless.manifest.template +++ b/pal/regression/Thread2_exitless.manifest.template @@ -4,7 +4,6 @@ loader.entrypoint = "file:{{ binary_dir }}/{{ entrypoint }}" sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '2' }} sgx.insecure__rpc_thread_num = 2 -sgx.enable_stats = true sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} diff --git a/pal/src/host/linux-sgx/host_entry.S b/pal/src/host/linux-sgx/host_entry.S index 9b55f3eba7..18e0bd9c26 100644 --- a/pal/src/host/linux-sgx/host_entry.S +++ b/pal/src/host/linux-sgx/host_entry.S @@ -4,9 +4,8 @@ .extern tcs_base .extern g_in_aex_profiling -#ifdef DEBUG .extern maybe_dump_and_reset_stats -#endif + .global sgx_ecall .type sgx_ecall, @function @@ -72,9 +71,9 @@ async_exit_pointer: # increment per-thread AEX counter for stats lock incq %gs:PAL_HOST_TCB_AEX_CNT +#ifdef DEBUG # Inform that we are in AEX profiling code movb $1, %gs:PAL_HOST_TCB_IN_AEX_PROF - # Save ERESUME parameters pushq %rax .cfi_adjust_cfa_offset 8 @@ -83,25 +82,20 @@ async_exit_pointer: pushq %rcx .cfi_adjust_cfa_offset 8 - pushq %rbp - .cfi_adjust_cfa_offset 8 + # Align stack (required by System V AMD64 ABI) movq %rsp, %rbp - .cfi_offset %rbp, -16 .cfi_def_cfa_register %rbp - andq $~0xF, %rsp # Required by System V AMD64 ABI. + andq $~0xF, %rsp -#ifdef DEBUG # Call sgx_profile_sample_aex with %rdi = TCS movq %rbx, %rdi call sgx_profile_sample_aex call maybe_dump_and_reset_stats -#endif # Restore stack movq %rbp, %rsp - popq %rbp - .cfi_def_cfa %rsp, 8 + .cfi_def_cfa_register %rsp # Restore ERESUME parameters popq %rcx @@ -110,8 +104,9 @@ async_exit_pointer: .cfi_adjust_cfa_offset -8 popq %rax .cfi_adjust_cfa_offset -8 - movb $0, %gs:PAL_HOST_TCB_IN_AEX_PROF +#endif + .cfi_endproc # fall-through to ERESUME @@ -151,12 +146,12 @@ sgx_raise: .cfi_offset %rbp, -16 .cfi_def_cfa_register %rbp +#if DEBUG # Adjust stack and save RDI subq $8, %rsp andq $~0xF, %rsp # Required by System V AMD64 ABI. movq %rdi, -8(%rbp) -#if DEBUG # Call sgx_profile_sample_ocall_outer with RBX (ocall handler) movq %rbx, %rdi call sgx_profile_sample_ocall_outer @@ -166,10 +161,12 @@ sgx_raise: call sgx_profile_sample_ocall_inner call maybe_dump_and_reset_stats -#endif # Restore RDI movq -8(%rbp), %rdi +#else + andq $~0xF, %rsp # Required by System V AMD64 ABI. +#endif callq *%rbx diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index 47e37c701f..127b8ab630 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -22,7 +22,6 @@ #include "assert.h" #include "cpu.h" #include "debug_map.h" -#include "gdb_integration/sgx_gdb.h" #include "host_internal.h" #include "host_syscall.h" #include "pal_rpc_queue.h" @@ -30,16 +29,18 @@ #include "sigreturn.h" #include "ucontext.h" -static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT, SIGUSR1}; - #ifdef DEBUG +static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT}; + +#ifdef DEBUG /* - * If no SGX-stats reset is in flight, this variable is zero. + * If no SGX-stats reset is in flight, `g_stats_reset_leader_tid` is zero. * * Upon user-induced SIGUSR1 on some thread (below happens in signal handling context): * 1. If `g_stats_reset_leader_tid == 0`, then it is set to the TID of this thread -- this thread * is designated to be the "leader" of SGX-stats reset flow, and it will broadcast SIGUSR1 to - * all other threads on the first AEX (since we can't do any complex logic in signal handling - * context, we postpone to the normal context which starts right-after an AEX event). + * all other threads on the first AEX or right-before executing the next OCALL in untrusted + * runtime (since we can't do any complex logic in signal handling context, we postpone it to + * the normal context). * 2. If `g_stats_reset_leader_tid != 0`, then it means that an SGX-stats reset flow is in flight. * Two cases are possible: * a. If PID of sending process is the current PID, then the signal was sent by the "leader" @@ -50,7 +51,7 @@ static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT, SIGUSR1}; * and this is a new "SGX-stats reset" event from the user. Since the previous flow is * still in flight, the thread must ignore this signal. * - * On each AEX, each thread checks (below happens in normal context): + * On each AEX and on each OCALL execution, each thread checks (below happens in normal context): * 1. If `g_stats_reset_leader_tid == 0`, do nothing (no SGX-stats reset is in flight). * 2. If `g_stats_reset_leader_tid == gettid()`, then this is the "leader" thread and it must * broadcast SIGUSR1 to all other threads and wait until they perform their SGX-stats resets. @@ -59,8 +60,26 @@ static const int ASYNC_SIGNALS[] = {SIGTERM, SIGCONT, SIGUSR1}; * * Application threads on Linux can never be 0, so this "no-op" default is safe. */ - int g_stats_reset_leader_tid = 0; - long long int g_stats_reset_epoch = 0; +static int g_stats_reset_leader_tid = 0; + +/* + * Each "SGX stats reset" is supposed to be executed in one epoch. Epoch is changed (i.e. + * `g_stats_reset_epoch` is atomically incremented) when any thread exits. If an "SGX stats reset" + * round detects that the epoch has changed before the leader thread got responses from all follower + * threads, this "SGX stats reset" round is aborted, see while loop in maybe_dump_and_reset_stats(). + * + * This epoch mechanism is required to avoid data races: + * - If the leader thread is exited in the meantime (upon e.g. SIGTERM), the + * `g_stats_reset_leader_tid` variable would never be reset and future rounds would become + * impossible (all threads would think that some previous round is still in flight). + * - If some follower thread is exited in the meantime, the wait-for-all-followers loop in + * maybe_dump_and_reset_stats() would never break. + * + * Note that the epoch is *not* changed when a new thread is spawned, i.e. the "SGX stats reset" + * would successfully finish but without taking into account the newly spawned thread. This is a + * benign scenario, though the quality of SGX-stats reporting will be lower in this case. + */ +static uint32_t g_stats_reset_epoch = 0; #endif /* DEBUG */ static int block_signal(int sig, bool block) { @@ -348,17 +367,18 @@ void maybe_dump_and_reset_stats(void) { if (DO_SYSCALL(gettid) == leader_tid) { log_always("----- DUMPING and RESETTING SGX STATS -----"); - size_t followers_num = send_sigusr1_to_followers(leader_tid); + uint32_t epoch = __atomic_load_n(&g_stats_reset_epoch, __ATOMIC_ACQUIRE); - __atomic_fetch_add(&g_stats_reset_epoch, 1, __ATOMIC_ACQ_REL); - long long int noted_epoch = __atomic_load_n(&g_stats_reset_epoch, __ATOMIC_ACQUIRE); - while ((__atomic_load_n(&followers_visited_num, __ATOMIC_ACQUIRE)) < followers_num) { + size_t followers_num = broadcast_signal_to_threads(SIGUSR1, /*exclude_tid=*/leader_tid); - if(__atomic_load_n(&g_stats_reset_epoch, __ATOMIC_ACQUIRE) > noted_epoch) - log_warning("One of the worker threads exited"); + while ((__atomic_load_n(&followers_visited_num, __ATOMIC_ACQUIRE)) < followers_num) { DO_SYSCALL(sched_yield); + if (__atomic_load_n(&g_stats_reset_epoch, __ATOMIC_ACQUIRE) != epoch) { + log_warning("SGX stats reset (started due to SIGUSR1) was interrupted because at " + "least one thread exited in the meantime; stats may be incomplete"); + break; + } } - log_always("STATS notes_epoch %lld", noted_epoch); update_print_and_reset_stats(/*process_wide=*/true); __atomic_store_n(&followers_visited_num, 0, __ATOMIC_RELEASE); @@ -369,4 +389,27 @@ void maybe_dump_and_reset_stats(void) { __atomic_fetch_add(&followers_visited_num, 1, __ATOMIC_ACQ_REL); } } + +/* called when some thread exits -- a possible "SGX stats reset" round must be aborted, see above */ +void abort_current_reset_stats(int exiting_tid) { + if (!g_sgx_enable_stats) + return; + + /* make sure that an exiting thread does not receive SIGUSR1; this prevents a data race when + * this thread receives SIGUSR1, initiates a new "SGX stats reset" round and immediately exits, + * leaving `g_stats_reset_leader_tid` set to a dangling-tid value */ + block_signal(SIGUSR1, /*block=*/true); + + /* unconditionally increment the "SGX stats reset" epoch, reacting to every thread exit */ + __atomic_fetch_add(&g_stats_reset_epoch, 1, __ATOMIC_ACQ_REL); + + int leader_tid = __atomic_load_n(&g_stats_reset_leader_tid, __ATOMIC_ACQUIRE); + if (leader_tid == exiting_tid) { + /* unset leader, otherwise no other thread would be able to initiate "SGX stats reset" + * rounds in the future */ + __atomic_store_n(&g_stats_reset_leader_tid, 0, __ATOMIC_RELEASE); + log_warning("SGX stats reset (started due to SIGUSR1) was aborted because initiating " + "thread exited; stats may be incomplete"); + } +} #endif /* DEBUG */ diff --git a/pal/src/host/linux-sgx/host_internal.h b/pal/src/host/linux-sgx/host_internal.h index 4031390ada..3db8e2912a 100644 --- a/pal/src/host/linux-sgx/host_internal.h +++ b/pal/src/host/linux-sgx/host_internal.h @@ -60,9 +60,8 @@ extern struct pal_enclave g_pal_enclave; #ifdef DEBUG extern bool g_trigger_profile_reinit; extern char g_profile_filename[128]; -extern int g_stats_reset_leader_tid; -extern long long int g_stats_reset_epoch; #endif /* DEBUG */ + void* realloc(void* ptr, size_t new_size); int open_sgx_driver(void); @@ -130,10 +129,11 @@ void thread_exit(int status); int sgx_signal_setup(void); int block_async_signals(bool block); +size_t broadcast_signal_to_threads(int sig, int exclude_tid); -#ifdef DEBUG int set_tcs_debug_flag_if_debugging(void* tcs_addrs[], size_t count); +#ifdef DEBUG /* SGX profiling (sgx_profile.c) */ /* diff --git a/pal/src/host/linux-sgx/host_main.c b/pal/src/host/linux-sgx/host_main.c index 3f9d106ce3..6722c727c7 100644 --- a/pal/src/host/linux-sgx/host_main.c +++ b/pal/src/host/linux-sgx/host_main.c @@ -617,12 +617,12 @@ static int initialize_enclave(struct pal_enclave* enclave, const char* manifest_ dbg->tcs_addrs[t] = tcs_addrs[t]; } -#ifdef DEBUG ret = set_tcs_debug_flag_if_debugging(tcs_addrs, enclave->thread_num); if (ret < 0) { goto out; } +#ifdef DEBUG /* * Report libpal map. All subsequent files will be reported via PalDebugMapAdd(), but this * one has to be handled separately. @@ -761,6 +761,14 @@ static int parse_loader_config(char* manifest, struct pal_enclave* enclave_info, goto out; } +#ifndef DEBUG + if (g_sgx_enable_stats) { + log_error("'sgx.enable_stats = true' is specified in non-debug mode, this is disallowed"); + ret = -EINVAL; + goto out; + } +#endif /* !DEBUG */ + ret = toml_string_in(manifest_root, "sgx.sigfile", &dummy_sigfile_str); if (ret < 0 || dummy_sigfile_str) { log_error("sgx.sigfile is not supported anymore. Please update your manifest according to " @@ -1057,15 +1065,12 @@ static int load_enclave(struct pal_enclave* enclave, char* args, size_t args_siz uint64_t end_time; DO_SYSCALL(gettimeofday, &tv, NULL); end_time = tv.tv_sec * 1000000UL + tv.tv_usec; -#ifdef DEBUG if (g_sgx_enable_stats) { - /* This shows the time for Gramine + the Intel SGX driver to initialize the untrusted - * PAL, config and create the SGX enclave, add enclave pages, measure and init it. - */ + /* Show the time for Gramine + the Intel SGX driver to initialize the untrusted PAL, config + * and create the SGX enclave, add enclave pages, measure and init it. */ log_always("----- SGX enclave loading time = %10lu microseconds -----", end_time - start_time); } -#endif /* DEBUG */ /* start running trusted PAL */ ecall_enclave_start(enclave->libpal_uri, args, args_size, env, env_size, parent_stream_fd, diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index d13bcb18a0..fa71d8b342 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -26,7 +26,6 @@ static size_t g_enclave_thread_num = 0; bool g_sgx_enable_stats = false; -#ifdef DEBUG void update_print_and_reset_stats(bool process_wide) { static atomic_ulong g_eenter_cnt = 0; static atomic_ulong g_eexit_cnt = 0; @@ -82,19 +81,20 @@ void update_print_and_reset_stats(bool process_wide) { g_async_signal_cnt = 0; } } -#endif /* DEBUG */ void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { tcb->self = tcb; tcb->tcs = NULL; /* initialized by child thread */ tcb->stack = stack; tcb->alt_stack = alt_stack; + tcb->eenter_cnt = 0; tcb->eexit_cnt = 0; tcb->aex_cnt = 0; tcb->sync_signal_cnt = 0; tcb->async_signal_cnt = 0; tcb->reset_stats = false; + tcb->profile_sample_time = 0; tcb->last_async_event = PAL_EVENT_NO_EVENT; @@ -120,12 +120,10 @@ static int add_dynamic_tcs(sgx_arch_tcs_t* tcs) { int ret; struct enclave_dbginfo* dbginfo = (struct enclave_dbginfo*)DBGINFO_ADDR; -#ifdef DEBUG ret = set_tcs_debug_flag_if_debugging((void**)&tcs, /*count=*/1); if (ret < 0) { return ret; } -#endif /* DEBUG */ size_t i = 0; spinlock_lock(&g_enclave_thread_map_lock); @@ -302,12 +300,7 @@ noreturn void thread_exit(int status) { block_async_signals(true); #ifdef DEBUG - if(__atomic_load_n(&g_stats_reset_leader_tid, __ATOMIC_ACQUIRE) == DO_SYSCALL(gettid)) { - __atomic_store_n(&g_stats_reset_leader_tid, 0, __ATOMIC_RELEASE); - log_warning("Main thread exited. The SIGUSR1 signal may be lost"); - } - __atomic_fetch_add(&g_stats_reset_epoch, 1, __ATOMIC_ACQ_REL); - + abort_current_reset_stats(DO_SYSCALL(gettid)); update_print_and_reset_stats(/*process_wide=*/false); #endif /* DEBUG */ @@ -413,7 +406,6 @@ int get_tid_from_tcs(void* tcs) { return tid ? tid : -EINVAL; } -#ifdef DEBUG int set_tcs_debug_flag_if_debugging(void* tcs_addrs[], size_t count) { if (!g_sgx_enable_stats && !g_vtune_profile_enabled) return 0; @@ -451,21 +443,19 @@ int set_tcs_debug_flag_if_debugging(void* tcs_addrs[], size_t count) { return ret; } -int send_sigusr1_to_followers(pid_t leader_tid) { - size_t followers_num = 0; - size_t enclave_thread_count = current_enclave_thread_cnt(); - +size_t broadcast_signal_to_threads(int sig, int exclude_tid) { + size_t threads_num = 0; spinlock_lock(&g_enclave_thread_map_lock); - // enclave_thread_count limits the repeatative looping over thread ids - for (size_t i = 0; i < enclave_thread_count; i++) { - int follower_tid = g_enclave_thread_map[i].tid; - if (!follower_tid || follower_tid == leader_tid) + + for (size_t i = 0; i < g_enclave_thread_num; i++) { + int thread_tid = g_enclave_thread_map[i].tid; + if (!thread_tid || thread_tid == exclude_tid) continue; - DO_SYSCALL(tgkill, g_host_pid, follower_tid, SIGUSR1); - followers_num++; + DO_SYSCALL(tgkill, g_host_pid, thread_tid, sig); + threads_num++; } + spinlock_unlock(&g_enclave_thread_map_lock); - return followers_num; + return threads_num; } -#endif /* DEBUG */ diff --git a/pal/src/host/linux-sgx/pal_tcb.h b/pal/src/host/linux-sgx/pal_tcb.h index 179298c65a..02ee085eb0 100644 --- a/pal/src/host/linux-sgx/pal_tcb.h +++ b/pal/src/host/linux-sgx/pal_tcb.h @@ -5,7 +5,6 @@ #include #include "pal.h" -#include "pal_linux_types.h" #include "sgx_arch.h" struct untrusted_area { @@ -106,8 +105,7 @@ typedef struct pal_host_tcb { uint64_t profile_sample_time; /* last time sgx_profile_sample() recorded a sample */ int32_t last_async_event; /* last async signal, reported to the enclave on ocall return */ int* start_status_ptr; /* pointer to return value of clone_thread */ - bool reset_stats; /* if true, dump SGX stats and reset them on next AEX event or - after next enclave exit for an OCALL */ + bool reset_stats; /* if true, dump SGX stats and reset them on next AEX/OCALL */ } PAL_HOST_TCB; extern void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack); @@ -122,9 +120,10 @@ static inline PAL_HOST_TCB* pal_get_host_tcb(void) { } extern bool g_sgx_enable_stats; -#ifdef DEBUG void update_print_and_reset_stats(bool process_wide); -int send_sigusr1_to_followers(pid_t leader_tid); +#ifdef DEBUG void maybe_dump_and_reset_stats(void); +void abort_current_reset_stats(int exiting_tid); #endif /* DEBUG */ + #endif /* IN_ENCLAVE */ From 8daa0cb8901454565c6a34a7895b0cc6a7992f82 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Mon, 19 Aug 2024 07:39:06 -0700 Subject: [PATCH 16/16] fixup! Add a support for debug-stats dumping and resetting on demand Signed-off-by: Dmitrii Kuvaiskii --- pal/src/host/linux-sgx/enclave_framework.c | 10 +++++++++- pal/src/host/linux-sgx/host_main.c | 6 ++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pal/src/host/linux-sgx/enclave_framework.c b/pal/src/host/linux-sgx/enclave_framework.c index 4500f38141..ed9398ed78 100644 --- a/pal/src/host/linux-sgx/enclave_framework.c +++ b/pal/src/host/linux-sgx/enclave_framework.c @@ -67,19 +67,27 @@ bool sgx_is_valid_untrusted_ptr(const void* _addr, size_t size, size_t alignment } /* - * We run some functions (e.g. sgx_profile_sample()) during asynchronous enclave exit (AEX), which + * When DEBUG is enabled, we run sgx_profile_sample() during asynchronous enclave exit (AEX), which * uses the stack. Make sure to update URSP so that the AEX handler does not overwrite the part of * the stack that we just allocated. * * (Recall that URSP is an outside stack pointer, saved by EENTER and restored on AEX by the SGX * hardware itself.) */ +#ifdef DEBUG + #define UPDATE_USTACK(_ustack) \ do { \ SET_ENCLAVE_TCB(ustack, _ustack); \ GET_ENCLAVE_TCB(gpr)->ursp = (uint64_t)_ustack; \ } while(0) +#else + +#define UPDATE_USTACK(_ustack) SET_ENCLAVE_TCB(ustack, _ustack) + +#endif + void* sgx_prepare_ustack(void) { void* old_ustack = GET_ENCLAVE_TCB(ustack); diff --git a/pal/src/host/linux-sgx/host_main.c b/pal/src/host/linux-sgx/host_main.c index 6722c727c7..96ff6dbb37 100644 --- a/pal/src/host/linux-sgx/host_main.c +++ b/pal/src/host/linux-sgx/host_main.c @@ -1065,9 +1065,11 @@ static int load_enclave(struct pal_enclave* enclave, char* args, size_t args_siz uint64_t end_time; DO_SYSCALL(gettimeofday, &tv, NULL); end_time = tv.tv_sec * 1000000UL + tv.tv_usec; + if (g_sgx_enable_stats) { - /* Show the time for Gramine + the Intel SGX driver to initialize the untrusted PAL, config - * and create the SGX enclave, add enclave pages, measure and init it. */ + /* This shows the time for Gramine + the Intel SGX driver to initialize the untrusted + * PAL, config and create the SGX enclave, add enclave pages, measure and init it. + */ log_always("----- SGX enclave loading time = %10lu microseconds -----", end_time - start_time); }