From 18a30fb1684fb944a3d4e6d0d805a5836721cf08 Mon Sep 17 00:00:00 2001 From: Ian Ziemba Date: Wed, 18 Sep 2024 08:30:56 -0500 Subject: [PATCH 1/4] fabtests: Move pingpong logic into pre-posted func Move the current pingpong logic into a pingpong pre-posted RX function. This better describes the behavior of this pingpong test. In addition, this change will allow for a pingpong without pre-posted RX buffers to be defined. Signed-off-by: Ian Ziemba --- fabtests/benchmarks/benchmark_shared.c | 62 ++++++++++++++++---------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/fabtests/benchmarks/benchmark_shared.c b/fabtests/benchmarks/benchmark_shared.c index 935b7961cb8..32dc4304c11 100644 --- a/fabtests/benchmarks/benchmark_shared.c +++ b/fabtests/benchmarks/benchmark_shared.c @@ -86,27 +86,10 @@ void ft_benchmark_usage(void) "# of iterations > window size"); } -int pingpong(void) +/* Pingpong latency test with pre-posted receive buffers. */ +static int pingpong_pre_posted_rx(size_t inject_size) { int ret, i; - size_t inject_size = fi->tx_attr->inject_size; - - ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_INJECT_MSG_SIZE, - &inject_size, &(size_t){sizeof inject_size}); - if (ret && ret != -FI_ENOPROTOOPT) { - FT_PRINTERR("fi_getopt(FI_OPT_INJECT_MSG_SIZE)", ret); - return ret; - } - - if (inject_size_set) - inject_size = opts.inject_size; - - if (opts.options & FT_OPT_ENABLE_HMEM) - inject_size = 0; - - ret = ft_sync(); - if (ret) - return ret; if (opts.dst_addr) { for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { @@ -114,9 +97,11 @@ int pingpong(void) ft_start(); if (opts.transfer_size <= inject_size) - ret = ft_inject(ep, remote_fi_addr, opts.transfer_size); + ret = ft_inject(ep, remote_fi_addr, + opts.transfer_size); else - ret = ft_tx(ep, remote_fi_addr, opts.transfer_size, &tx_ctx); + ret = ft_tx(ep, remote_fi_addr, + opts.transfer_size, &tx_ctx); if (ret) return ret; @@ -134,15 +119,46 @@ int pingpong(void) return ret; if (opts.transfer_size <= inject_size) - ret = ft_inject(ep, remote_fi_addr, opts.transfer_size); + ret = ft_inject(ep, remote_fi_addr, + opts.transfer_size); else - ret = ft_tx(ep, remote_fi_addr, opts.transfer_size, &tx_ctx); + ret = ft_tx(ep, remote_fi_addr, + opts.transfer_size, &tx_ctx); if (ret) return ret; } } ft_stop(); + return FI_SUCCESS; +} + +int pingpong(void) +{ + int ret; + size_t inject_size = fi->tx_attr->inject_size; + + ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_INJECT_MSG_SIZE, + &inject_size, &(size_t){sizeof inject_size}); + if (ret && ret != -FI_ENOPROTOOPT) { + FT_PRINTERR("fi_getopt(FI_OPT_INJECT_MSG_SIZE)", ret); + return ret; + } + + if (inject_size_set) + inject_size = opts.inject_size; + + if (opts.options & FT_OPT_ENABLE_HMEM) + inject_size = 0; + + ret = ft_sync(); + if (ret) + return ret; + + ret = pingpong_pre_posted_rx(inject_size); + if (ret) + return ret; + if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 2, opts.argc, opts.argv); From f86ac48fe628f262b7d0787e7e88a1edd18da9dd Mon Sep 17 00:00:00 2001 From: Ian Ziemba Date: Wed, 18 Sep 2024 09:27:00 -0500 Subject: [PATCH 2/4] fabtests: Define common run pingpong function The run() logic in rdm_pingpong and rdm_tagged_pingpong is the same. Consolidate this logic into a single run_pingpong() function. Signed-off-by: Ian Ziemba --- fabtests/benchmarks/benchmark_shared.c | 28 ++++++++++++++++++++ fabtests/benchmarks/benchmark_shared.h | 1 + fabtests/benchmarks/rdm_pingpong.c | 30 +-------------------- fabtests/benchmarks/rdm_tagged_pingpong.c | 32 +---------------------- 4 files changed, 31 insertions(+), 60 deletions(-) diff --git a/fabtests/benchmarks/benchmark_shared.c b/fabtests/benchmarks/benchmark_shared.c index 32dc4304c11..32b0fdc926c 100644 --- a/fabtests/benchmarks/benchmark_shared.c +++ b/fabtests/benchmarks/benchmark_shared.c @@ -168,6 +168,34 @@ int pingpong(void) return 0; } +int run_pingpong(void) +{ + int i, ret = 0; + + ret = ft_init_fabric(); + if (ret) + return ret; + + if (!(opts.options & FT_OPT_SIZE)) { + for (i = 0; i < TEST_CNT; i++) { + if (!ft_use_size(i, opts.sizes_enabled)) + continue; + opts.transfer_size = test_size[i].size; + init_test(&opts, test_name, sizeof(test_name)); + ret = pingpong(); + if (ret) + return ret; + } + } else { + init_test(&opts, test_name, sizeof(test_name)); + ret = pingpong(); + if (ret) + return ret; + } + + return ft_finalize(); +} + int pingpong_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote) { int ret, i; diff --git a/fabtests/benchmarks/benchmark_shared.h b/fabtests/benchmarks/benchmark_shared.h index 1dcc7352fea..57f0facb087 100644 --- a/fabtests/benchmarks/benchmark_shared.h +++ b/fabtests/benchmarks/benchmark_shared.h @@ -46,6 +46,7 @@ extern "C" { void ft_parse_benchmark_opts(int op, char *optarg); void ft_benchmark_usage(void); int pingpong(void); +int run_pingpong(void); int bandwidth(void); int pingpong_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote); int bandwidth_rma(enum ft_rma_opcodes op, struct fi_rma_iov *remote); diff --git a/fabtests/benchmarks/rdm_pingpong.c b/fabtests/benchmarks/rdm_pingpong.c index 9eb5a0e0de5..f5c5871e22d 100644 --- a/fabtests/benchmarks/rdm_pingpong.c +++ b/fabtests/benchmarks/rdm_pingpong.c @@ -36,34 +36,6 @@ #include "shared.h" #include "benchmark_shared.h" -static int run(void) -{ - int i, ret = 0; - - ret = ft_init_fabric(); - if (ret) - return ret; - - if (!(opts.options & FT_OPT_SIZE)) { - for (i = 0; i < TEST_CNT; i++) { - if (!ft_use_size(i, opts.sizes_enabled)) - continue; - opts.transfer_size = test_size[i].size; - init_test(&opts, test_name, sizeof(test_name)); - ret = pingpong(); - if (ret) - return ret; - } - } else { - init_test(&opts, test_name, sizeof(test_name)); - ret = pingpong(); - if (ret) - return ret; - } - - return ft_finalize(); -} - int main(int argc, char **argv) { int op, ret; @@ -106,7 +78,7 @@ int main(int argc, char **argv) hints->tx_attr->tclass = FI_TC_LOW_LATENCY; hints->addr_format = opts.address_format; - ret = run(); + ret = run_pingpong(); ft_free_res(); return ft_exit_code(ret); diff --git a/fabtests/benchmarks/rdm_tagged_pingpong.c b/fabtests/benchmarks/rdm_tagged_pingpong.c index a0288ad7ee1..36a11152eb8 100644 --- a/fabtests/benchmarks/rdm_tagged_pingpong.c +++ b/fabtests/benchmarks/rdm_tagged_pingpong.c @@ -36,36 +36,6 @@ #include #include "benchmark_shared.h" -static int run(void) -{ - int i, ret = 0; - - ret = ft_init_fabric(); - if (ret) - return ret; - - if (!(opts.options & FT_OPT_SIZE)) { - for (i = 0; i < TEST_CNT; i++) { - if (!ft_use_size(i, opts.sizes_enabled)) - continue; - opts.transfer_size = test_size[i].size; - init_test(&opts, test_name, sizeof(test_name)); - ret = pingpong(); - if (ret) - goto out; - } - } else { - init_test(&opts, test_name, sizeof(test_name)); - ret = pingpong(); - if (ret) - goto out; - } - - ft_finalize(); -out: - return ret; -} - int main(int argc, char **argv) { int op, ret; @@ -108,7 +78,7 @@ int main(int argc, char **argv) hints->tx_attr->tclass = FI_TC_LOW_LATENCY; hints->addr_format = opts.address_format; - ret = run(); + ret = run_pingpong(); ft_free_res(); return ft_exit_code(ret); From b46d82649c91c400b8ef49e64140f677605c2f22 Mon Sep 17 00:00:00 2001 From: Ian Ziemba Date: Thu, 19 Sep 2024 11:54:39 -0500 Subject: [PATCH 3/4] fabtests: Split out ft_sync logic Split out the ft_sync logic into two separate functions: inband sync (ft_sync_inband) and out-of-band sync (ft_sync_oob). The inband sync supports the option to conditionally repost buffers after the sync. The breaking out of the sync logic is needed to support fi_rdm_pingpong/fi_rdm_tagged_pingpong with a no pre-posted RX buffer option. Signed-off-by: Ian Ziemba --- fabtests/common/shared.c | 87 +++++++++++++++++++++++++-------------- fabtests/include/shared.h | 2 + 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/fabtests/common/shared.c b/fabtests/common/shared.c index 11b7a638fa2..2d4387ba4df 100644 --- a/fabtests/common/shared.c +++ b/fabtests/common/shared.c @@ -3012,49 +3012,74 @@ void eq_readerr(struct fid_eq *eq, const char *eq_str) } } -int ft_sync() +int ft_sync_oob(void) { char buf = 'a'; int ret; if (opts.dst_addr) { - if (!(opts.options & FT_OPT_OOB_SYNC)) { - ret = ft_tx_msg(ep, remote_fi_addr, tx_buf, 1, &tx_ctx, - FI_DELIVERY_COMPLETE); - if (ret) - return ret; + ret = ft_sock_send(oob_sock, &buf, 1); + if (ret) + return ret; - ret = ft_rx(ep, 1); - } else { - ret = ft_sock_send(oob_sock, &buf, 1); - if (ret) - return ret; + ret = ft_sock_recv(oob_sock, &buf, 1); + if (ret) + return ret; + } else { + ret = ft_sock_recv(oob_sock, &buf, 1); + if (ret) + return ret; - ret = ft_sock_recv(oob_sock, &buf, 1); - if (ret) - return ret; - } + ret = ft_sock_send(oob_sock, &buf, 1); + if (ret) + return ret; + } + + return FI_SUCCESS; +} + +int ft_sync_inband(bool repost_rx) +{ + int ret; + + if (opts.dst_addr) { + ret = ft_tx_msg(ep, remote_fi_addr, tx_buf, 1, &tx_ctx, + FI_DELIVERY_COMPLETE); + if (ret) + return ret; + + ret = ft_get_rx_comp(rx_seq); + if (ret) + return ret; } else { - if (!(opts.options & FT_OPT_OOB_SYNC)) { - ret = ft_rx(ep, 1); - if (ret) - return ret; + ret = ft_get_rx_comp(rx_seq); + if (ret) + return ret; - ret = ft_tx_msg(ep, remote_fi_addr, tx_buf, 1, &tx_ctx, - FI_DELIVERY_COMPLETE); - if (ret) - return ret; - } else { - ret = ft_sock_recv(oob_sock, &buf, 1); - if (ret) - return ret; + ret = ft_tx_msg(ep, remote_fi_addr, tx_buf, 1, &tx_ctx, + FI_DELIVERY_COMPLETE); + if (ret) + return ret; + } - ret = ft_sock_send(oob_sock, &buf, 1); - if (ret) - return ret; - } + if (repost_rx) { + ret = ft_post_rx(ep, rx_size, &rx_ctx); + if (ret) + return ret; } + return FI_SUCCESS; +} + +int ft_sync() +{ + int ret; + + if (ft_check_opts(FT_OPT_OOB_SYNC)) + ret = ft_sync_oob(); + else + ret = ft_sync_inband(true); + return ret; } diff --git a/fabtests/include/shared.h b/fabtests/include/shared.h index 4bed8cb572b..80fd5538fe4 100644 --- a/fabtests/include/shared.h +++ b/fabtests/include/shared.h @@ -560,6 +560,8 @@ void *ft_get_aligned_addr(void *ptr, size_t alignment) int ft_read_cq(struct fid_cq *cq, uint64_t *cur, uint64_t total, int timeout, uint64_t tag); +int ft_sync_oob(void); +int ft_sync_inband(bool repost_rx); int ft_sync(void); int ft_sync_pair(int status); int ft_fork_and_pair(void); From 353d62aa4735f9da61e3dbd1352916ecf3037dd4 Mon Sep 17 00:00:00 2001 From: Ian Ziemba Date: Wed, 18 Sep 2024 09:09:28 -0500 Subject: [PATCH 4/4] fabtests: Support no prepost RX pingpong test The new pingpong test allows for TX operations to be posted and processed, if necessary, before post the RX buffer. This better aligns to how OSU latency works. By doing this, fi_rdm_tagged_latency is now lower than OSU latency which makes sense since less SW is involved. The no prepost RX pingpong test can be enabled by using the -r option. Signed-off-by: Ian Ziemba --- fabtests/benchmarks/benchmark_shared.c | 102 +++++++++++++++++++++++-- fabtests/benchmarks/benchmark_shared.h | 2 +- fabtests/common/shared.c | 8 ++ fabtests/include/shared.h | 1 + 4 files changed, 106 insertions(+), 7 deletions(-) diff --git a/fabtests/benchmarks/benchmark_shared.c b/fabtests/benchmarks/benchmark_shared.c index 32b0fdc926c..6c863bbcf3a 100644 --- a/fabtests/benchmarks/benchmark_shared.c +++ b/fabtests/benchmarks/benchmark_shared.c @@ -70,6 +70,9 @@ void ft_parse_benchmark_opts(int op, char *optarg) case 'W': opts.window_size = atoi(optarg); break; + case 'r': + opts.options |= FT_OPT_NO_PRE_POSTED_RX; + break; default: break; } @@ -84,6 +87,10 @@ void ft_benchmark_usage(void) "* The following condition is required to have at least " "one window\nsize # of messsages to be sent: " "# of iterations > window size"); + FT_PRINT_OPTS_USAGE("-r", "Do not pre post RX buffers"); + FT_PRINT_OPTS_USAGE("", "Only the following tests support this option for now:"); + FT_PRINT_OPTS_USAGE("", "\tfi_rdm_tagged_pingpong"); + FT_PRINT_OPTS_USAGE("", "\tfi_rdm_pingpong"); } /* Pingpong latency test with pre-posted receive buffers. */ @@ -133,6 +140,68 @@ static int pingpong_pre_posted_rx(size_t inject_size) return FI_SUCCESS; } +/* Pingpong latency test without pre-posted receive buffers. */ +static int pingpong_no_pre_posted_rx(size_t inject_size) +{ + int ret, i; + + if (opts.dst_addr) { + for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { + if (i == opts.warmup_iterations) + ft_start(); + + if (opts.transfer_size <= inject_size) + ret = ft_inject(ep, remote_fi_addr, + opts.transfer_size); + else + ret = ft_tx(ep, remote_fi_addr, + opts.transfer_size, &tx_ctx); + if (ret) + return ret; + + ret = ft_post_rx(ep, opts.transfer_size, &rx_ctx); + if (ret) + return ret; + + ret = ft_get_rx_comp(rx_seq); + if (ret) + return ret; + } + } else { + for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { + if (i == opts.warmup_iterations) + ft_start(); + + ret = ft_post_rx(ep, opts.transfer_size, &rx_ctx); + if (ret) + return ret; + + ret = ft_get_rx_comp(rx_seq); + if (ret) + return ret; + + if (ft_check_opts(FT_OPT_VERIFY_DATA | FT_OPT_ACTIVE)) { + ret = ft_check_buf((char *) rx_buf + ft_rx_prefix_size(), + opts.transfer_size); + if (ret) + return ret; + } + + if (opts.transfer_size <= inject_size) + ret = ft_inject(ep, remote_fi_addr, + opts.transfer_size); + else + ret = ft_tx(ep, remote_fi_addr, + opts.transfer_size, &tx_ctx); + if (ret) + return ret; + } + } + ft_stop(); + + return FI_SUCCESS; +} + int pingpong(void) { int ret; @@ -151,13 +220,34 @@ int pingpong(void) if (opts.options & FT_OPT_ENABLE_HMEM) inject_size = 0; - ret = ft_sync(); - if (ret) - return ret; + if (ft_check_opts(FT_OPT_NO_PRE_POSTED_RX)) { + if (ft_check_opts(FT_OPT_OOB_SYNC)) { + ret = ft_sync_oob(); + if (ret) + return ret; + } else { + /* Repost RX buffers to support inband sync. */ + ret = ft_post_rx(ep, rx_size, &rx_ctx); + if (ret) + return ret; - ret = pingpong_pre_posted_rx(inject_size); - if (ret) - return ret; + ret = ft_sync_inband(false); + if (ret) + return ret; + } + + ret = pingpong_no_pre_posted_rx(inject_size); + if (ret) + return ret; + } else { + ret = ft_sync(); + if (ret) + return ret; + + ret = pingpong_pre_posted_rx(inject_size); + if (ret) + return ret; + } if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 2, diff --git a/fabtests/benchmarks/benchmark_shared.h b/fabtests/benchmarks/benchmark_shared.h index 57f0facb087..fbaf3eb3075 100644 --- a/fabtests/benchmarks/benchmark_shared.h +++ b/fabtests/benchmarks/benchmark_shared.h @@ -40,7 +40,7 @@ extern "C" { #include -#define BENCHMARK_OPTS "vkj:W:" +#define BENCHMARK_OPTS "rvkj:W:" #define FT_BENCHMARK_MAX_MSG_SIZE (test_size[TEST_CNT - 1].size) void ft_parse_benchmark_opts(int op, char *optarg); diff --git a/fabtests/common/shared.c b/fabtests/common/shared.c index 2d4387ba4df..eb95127b6f0 100644 --- a/fabtests/common/shared.c +++ b/fabtests/common/shared.c @@ -1340,6 +1340,14 @@ int ft_init_fabric(void) if (ft_check_opts(FT_OPT_FORK_CHILD)) ft_fork_child(); + if (ft_check_opts(FT_OPT_NO_PRE_POSTED_RX) && + !ft_check_opts(FT_OPT_SKIP_MSG_ALLOC) && + (fi->caps & (FI_MSG | FI_TAGGED))) { + ret = ft_sync_inband(false); + if (ret) + return ret; + } + return 0; } diff --git a/fabtests/include/shared.h b/fabtests/include/shared.h index 80fd5538fe4..7d56fdd7257 100644 --- a/fabtests/include/shared.h +++ b/fabtests/include/shared.h @@ -145,6 +145,7 @@ enum { FT_OPT_DISABLE_TAG_VALIDATION = 1 << 25, FT_OPT_ADDR_IS_OOB = 1 << 26, FT_OPT_REG_DMABUF_MR = 1 << 27, + FT_OPT_NO_PRE_POSTED_RX = 1 << 28, FT_OPT_OOB_CTRL = FT_OPT_OOB_SYNC | FT_OPT_OOB_ADDR_EXCH, };