diff --git a/src/cart/README.env b/src/cart/README.env index e8f57d338ef..656f2ab73e4 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -132,6 +132,12 @@ This file lists the environment variables used in CaRT. It its value exceed 256, then will use 256 for flow control. Set it to zero means disable the flow control in cart. + . D_QUOTA_RPCS + Set it as the max number of per-context inflight RPCs that a sender will send + onto a wire. Quota on each context is independent of each other. + If it is not set the default value of 64 is used. + Setting it to 0 disables quota + . CRT_CTX_SHARE_ADDR Set it to non-zero to make all the contexts share one network address, in this case CaRT will create one SEP and each context maps to one tx/rx diff --git a/src/cart/crt_context.c b/src/cart/crt_context.c index 19f72ebbd07..0efbf705f13 100644 --- a/src/cart/crt_context.c +++ b/src/cart/crt_context.c @@ -11,6 +11,10 @@ #include "crt_internal.h" static void crt_epi_destroy(struct crt_ep_inflight *epi); +static int +context_quotas_init(crt_context_t crt_ctx); +static int +context_quotas_finalize(crt_context_t crt_ctx); static struct crt_ep_inflight * epi_link2ptr(d_list_t *rlink) @@ -141,6 +145,13 @@ crt_context_init(crt_context_t crt_ctx) if (rc != 0) D_GOTO(out, rc); + rc = D_MUTEX_INIT(&ctx->cc_quotas.mutex, NULL); + if (rc != 0) { + D_MUTEX_DESTROY(&ctx->cc_mutex); + D_GOTO(out, rc); + } + + D_INIT_LIST_HEAD(&ctx->cc_quotas.rpc_waitq); D_INIT_LIST_HEAD(&ctx->cc_link); /* create timeout binheap */ @@ -162,6 +173,8 @@ crt_context_init(crt_context_t crt_ctx) D_GOTO(out_binheap_destroy, rc); } + rc = context_quotas_init(crt_ctx); + D_GOTO(out, rc); out_binheap_destroy: @@ -684,10 +697,17 @@ crt_context_destroy(crt_context_t crt_ctx, int force) D_GOTO(out, rc = -DER_UNINIT); } + rc = context_quotas_finalize(crt_ctx); + if (rc) { + DL_ERROR(rc, "context_quotas_finalize() failed"); + if (!force) + D_GOTO(out, rc); + } + ctx = crt_ctx; rc = crt_grp_ctx_invalid(ctx, false /* locked */); if (rc) { - D_ERROR("crt_grp_ctx_invalid failed, rc: %d.\n", rc); + DL_ERROR(rc, "crt_grp_ctx_invalid() failed"); if (!force) D_GOTO(out, rc); } @@ -1167,6 +1187,7 @@ crt_context_req_track(struct crt_rpc_priv *rpc_priv) d_list_t *rlink; d_rank_t ep_rank; int rc = 0; + int quota_rc = 0; struct crt_grp_priv *grp_priv; D_ASSERT(crt_ctx != NULL); @@ -1177,6 +1198,9 @@ crt_context_req_track(struct crt_rpc_priv *rpc_priv) D_GOTO(out, rc = CRT_REQ_TRACK_IN_INFLIGHQ); } + /* check inflight quota. if exceeded, queue this rpc */ + quota_rc = crt_context_get_quota_resource(rpc_priv->crp_pub.cr_ctx, CRT_QUOTA_RPCS); + grp_priv = crt_grp_pub2priv(rpc_priv->crp_pub.cr_ep.ep_grp); ep_rank = crt_grp_priv_get_primary_rank(grp_priv, rpc_priv->crp_pub.cr_ep.ep_rank); @@ -1228,15 +1252,16 @@ crt_context_req_track(struct crt_rpc_priv *rpc_priv) rpc_priv->crp_epi = epi; RPC_ADDREF(rpc_priv); - if (crt_gdata.cg_credit_ep_ctx != 0 && - (epi->epi_req_num - epi->epi_reply_num) >= crt_gdata.cg_credit_ep_ctx) { - if (rpc_priv->crp_opc_info->coi_queue_front) { - d_list_add(&rpc_priv->crp_epi_link, - &epi->epi_req_waitq); - } else { - d_list_add_tail(&rpc_priv->crp_epi_link, - &epi->epi_req_waitq); - } + if (quota_rc == -DER_QUOTA_LIMIT) { + epi->epi_req_num++; + rpc_priv->crp_state = RPC_STATE_QUEUED; + rc = CRT_REQ_TRACK_IN_WAITQ; + } else if (crt_gdata.cg_credit_ep_ctx != 0 && + (epi->epi_req_num - epi->epi_reply_num) >= crt_gdata.cg_credit_ep_ctx) { + if (rpc_priv->crp_opc_info->coi_queue_front) + d_list_add(&rpc_priv->crp_epi_link, &epi->epi_req_waitq); + else + d_list_add_tail(&rpc_priv->crp_epi_link, &epi->epi_req_waitq); epi->epi_req_wait_num++; rpc_priv->crp_state = RPC_STATE_QUEUED; @@ -1246,13 +1271,11 @@ crt_context_req_track(struct crt_rpc_priv *rpc_priv) rc = crt_req_timeout_track(rpc_priv); D_MUTEX_UNLOCK(&crt_ctx->cc_mutex); if (rc == 0) { - d_list_add_tail(&rpc_priv->crp_epi_link, - &epi->epi_req_q); + d_list_add_tail(&rpc_priv->crp_epi_link, &epi->epi_req_q); epi->epi_req_num++; rc = CRT_REQ_TRACK_IN_INFLIGHQ; } else { - RPC_ERROR(rpc_priv, - "crt_req_timeout_track failed, rc: %d.\n", rc); + RPC_ERROR(rpc_priv, "crt_req_timeout_track failed, rc: %d.\n", rc); /* roll back the addref above */ RPC_DECREF(rpc_priv); } @@ -1264,6 +1287,10 @@ crt_context_req_track(struct crt_rpc_priv *rpc_priv) /* reference taken by d_hash_rec_find or "epi->epi_ref = 1" above */ D_MUTEX_LOCK(&crt_ctx->cc_mutex); d_hash_rec_decref(&crt_ctx->cc_epi_table, &epi->epi_link); + + if (quota_rc == -DER_QUOTA_LIMIT) + d_list_add_tail(&rpc_priv->crp_waitq_link, &crt_ctx->cc_quotas.rpc_waitq); + D_MUTEX_UNLOCK(&crt_ctx->cc_mutex); out: @@ -1280,9 +1307,11 @@ credits_available(struct crt_ep_inflight *epi) { int64_t inflight = epi->epi_req_num - epi->epi_reply_num; - D_ASSERTF(inflight >= 0 && inflight <= crt_gdata.cg_credit_ep_ctx, - "req_num=%ld reply_num=%ld credit_ep_ctx=%u\n", epi->epi_req_num, - epi->epi_reply_num, crt_gdata.cg_credit_ep_ctx); + /* TODO: inflight right now includes items queued in quota waitq, and can exceed credit + * limit */ + if (inflight > crt_gdata.cg_credit_ep_ctx) + return 0; + return crt_gdata.cg_credit_ep_ctx - inflight; } @@ -1324,6 +1353,7 @@ crt_context_req_untrack_internal(struct crt_rpc_priv *rpc_priv) } else {/* RPC_CANCELED or RPC_INITED or RPC_TIMEOUT */ epi->epi_req_num--; } + D_ASSERT(epi->epi_req_num >= epi->epi_reply_num); D_MUTEX_UNLOCK(&epi->epi_mutex); @@ -1340,6 +1370,29 @@ crt_context_req_untrack_internal(struct crt_rpc_priv *rpc_priv) RPC_DECREF(rpc_priv); } +static void +dispatch_rpc(struct crt_rpc_priv *rpc) +{ + int rc; + + if (rpc == NULL) + return; + + crt_rpc_lock(rpc); + + rc = crt_req_send_internal(rpc); + if (rc == 0) { + crt_rpc_unlock(rpc); + } else { + RPC_ADDREF(rpc); + RPC_ERROR(rpc, "crt_req_send_internal failed, rc: %d\n", rc); + rpc->crp_state = RPC_STATE_INITED; + crt_context_req_untrack_internal(rpc); + /* for error case here */ + crt_rpc_complete_and_unlock(rpc, rc); + } +} + void crt_context_req_untrack(struct crt_rpc_priv *rpc_priv) { @@ -1351,17 +1404,26 @@ crt_context_req_untrack(struct crt_rpc_priv *rpc_priv) D_ASSERT(crt_ctx != NULL); - if (rpc_priv->crp_pub.cr_opc == CRT_OPC_URI_LOOKUP) { - RPC_TRACE(DB_NET, rpc_priv, "bypass untracking for URI_LOOKUP.\n"); + if (rpc_priv->crp_pub.cr_opc == CRT_OPC_URI_LOOKUP) return; - } epi = rpc_priv->crp_epi; D_ASSERT(epi != NULL); + /* Dispatch one rpc from wait_q if any or return resource back */ + D_MUTEX_LOCK(&crt_ctx->cc_mutex); + tmp_rpc = + d_list_pop_entry(&crt_ctx->cc_quotas.rpc_waitq, struct crt_rpc_priv, crp_waitq_link); + D_MUTEX_UNLOCK(&crt_ctx->cc_mutex); + + if (tmp_rpc != NULL) + dispatch_rpc(tmp_rpc); + else + crt_context_put_quota_resource(rpc_priv->crp_pub.cr_ctx, CRT_QUOTA_RPCS); + crt_context_req_untrack_internal(rpc_priv); - /* done if flow control disabled */ + /* done if ep credit flow control is disabled */ if (crt_gdata.cg_credit_ep_ctx == 0) return; @@ -1408,20 +1470,8 @@ crt_context_req_untrack(struct crt_rpc_priv *rpc_priv) D_MUTEX_UNLOCK(&epi->epi_mutex); /* re-submit the rpc req */ - while ((tmp_rpc = d_list_pop_entry(&submit_list, struct crt_rpc_priv, crp_tmp_link))) { - crt_rpc_lock(tmp_rpc); - rc = crt_req_send_internal(tmp_rpc); - if (rc == 0) { - crt_rpc_unlock(tmp_rpc); - } else { - RPC_ADDREF(tmp_rpc); - RPC_ERROR(tmp_rpc, "crt_req_send_internal failed, rc: %d\n", rc); - tmp_rpc->crp_state = RPC_STATE_INITED; - crt_context_req_untrack_internal(tmp_rpc); - /* for error case here */ - crt_rpc_complete_and_unlock(tmp_rpc, rc); - } - } + while ((tmp_rpc = d_list_pop_entry(&submit_list, struct crt_rpc_priv, crp_tmp_link))) + dispatch_rpc(tmp_rpc); } /* TODO: Need per-provider call */ @@ -1910,3 +1960,152 @@ crt_req_force_completion(struct crt_rpc_priv *rpc_priv) crt_req_timeout_track(rpc_priv); D_MUTEX_UNLOCK(&crt_ctx->cc_mutex); } + +static int +context_quotas_init(crt_context_t crt_ctx) +{ + struct crt_context *ctx = crt_ctx; + struct crt_quotas *quotas; + int rc = 0; + + if (ctx == NULL) { + D_ERROR("NULL context\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + quotas = &ctx->cc_quotas; + + quotas->limit[CRT_QUOTA_RPCS] = crt_gdata.cg_rpc_quota; + quotas->current[CRT_QUOTA_RPCS] = 0; + quotas->enabled[CRT_QUOTA_RPCS] = crt_gdata.cg_rpc_quota > 0 ? true : false; +out: + return rc; +} + +static int +context_quotas_finalize(crt_context_t crt_ctx) +{ + struct crt_context *ctx = crt_ctx; + + if (ctx == NULL) { + D_ERROR("NULL context\n"); + return -DER_INVAL; + } + + for (int i = 0; i < CRT_QUOTA_COUNT; i++) + ctx->cc_quotas.enabled[i] = false; + + return DER_SUCCESS; +} + +int +crt_context_quota_limit_set(crt_context_t crt_ctx, crt_quota_type_t quota, int value) +{ + struct crt_context *ctx = crt_ctx; + int rc = 0; + + if (ctx == NULL) { + D_ERROR("NULL context\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + if (quota < 0 || quota >= CRT_QUOTA_COUNT) { + D_ERROR("Invalid quota %d passed\n", quota); + D_GOTO(out, rc = -DER_INVAL); + } + + D_MUTEX_LOCK(&ctx->cc_quotas.mutex); + ctx->cc_quotas.limit[quota] = value; + D_MUTEX_UNLOCK(&ctx->cc_quotas.mutex); + +out: + return rc; +} + +int +crt_context_quota_limit_get(crt_context_t crt_ctx, crt_quota_type_t quota, int *value) +{ + struct crt_context *ctx = crt_ctx; + int rc = 0; + + if (ctx == NULL) { + D_ERROR("NULL context\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + if (quota < 0 || quota >= CRT_QUOTA_COUNT) { + D_ERROR("Invalid quota %d passed\n", quota); + D_GOTO(out, rc = -DER_INVAL); + } + + if (value == NULL) { + D_ERROR("NULL value\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + *value = ctx->cc_quotas.limit[quota]; + +out: + return rc; +} + +int +crt_context_get_quota_resource(crt_context_t crt_ctx, crt_quota_type_t quota) +{ + struct crt_context *ctx = crt_ctx; + int rc = 0; + + if (ctx == NULL) { + D_ERROR("NULL context\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + if (quota < 0 || quota >= CRT_QUOTA_COUNT) { + D_ERROR("Invalid quota %d passed\n", quota); + D_GOTO(out, rc = -DER_INVAL); + } + + /* If quotas not enabled or unlimited quota */ + if (!ctx->cc_quotas.enabled[quota] || ctx->cc_quotas.limit[quota] == 0) + return 0; + + D_MUTEX_LOCK(&ctx->cc_quotas.mutex); + if (ctx->cc_quotas.current[quota] < ctx->cc_quotas.limit[quota]) + ctx->cc_quotas.current[quota]++; + else { + D_WARN("Quota limit reached for quota_type=%d\n", quota); + rc = -DER_QUOTA_LIMIT; + } + D_MUTEX_UNLOCK(&ctx->cc_quotas.mutex); +out: + return rc; +} + +int +crt_context_put_quota_resource(crt_context_t crt_ctx, crt_quota_type_t quota) +{ + struct crt_context *ctx = crt_ctx; + int rc = 0; + + if (ctx == NULL) { + D_ERROR("NULL context\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + if (quota < 0 || quota >= CRT_QUOTA_COUNT) { + D_ERROR("Invalid quota %d passed\n", quota); + D_GOTO(out, rc = -DER_INVAL); + } + + /* If quotas not enabled or unlimited quota */ + if (!ctx->cc_quotas.enabled[quota] || ctx->cc_quotas.limit[quota] == 0) + return 0; + + D_MUTEX_LOCK(&ctx->cc_quotas.mutex); + D_ASSERTF(ctx->cc_quotas.current[quota] > 0, "Invalid current limit"); + ctx->cc_quotas.current[quota]--; + D_MUTEX_UNLOCK(&ctx->cc_quotas.mutex); + +out: + return rc; +} diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 2b74f882bc1..4bc4b551044 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -1392,7 +1392,7 @@ crt_hg_req_send_cb(const struct hg_cb_info *hg_cbinfo) void crt_hg_req_send(struct crt_rpc_priv *rpc_priv) { - hg_return_t hg_ret; + hg_return_t hg_ret; D_ASSERT(rpc_priv != NULL); diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 308b9c946f4..54f8caf5877 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -95,6 +95,8 @@ dump_envariables(void) "D_PROVIDER_AUTH_KEY", "D_PORT_AUTO_ADJUST", "D_POLL_TIMEOUT", + "D_LOG_FILE_APPEND_RANK", + "D_QUOTA_RPCS", "D_POST_INIT", "D_POST_INCR"}; @@ -323,6 +325,11 @@ static int data_init(int server, crt_init_options_t *opt) d_getenv_int("CRT_CREDIT_EP_CTX", &credits); } + /* Enable quotas by default only on clients */ + crt_gdata.cg_rpc_quota = server ? 0 : CRT_QUOTA_RPCS_DEFAULT; + + d_getenv_int("D_QUOTA_RPCS", &crt_gdata.cg_rpc_quota); + /* Must be set on the server when using UCX, will not affect OFI */ d_getenv_char("UCX_IB_FORK_INIT", &ucx_ib_fork_init); if (ucx_ib_fork_init) { diff --git a/src/cart/crt_internal_fns.h b/src/cart/crt_internal_fns.h index c88c794852b..8188f6ce877 100644 --- a/src/cart/crt_internal_fns.h +++ b/src/cart/crt_internal_fns.h @@ -31,6 +31,11 @@ enum { CRT_REQ_TRACK_IN_WAITQ, }; +int +crt_context_get_quota_resource(crt_context_t crt_ctx, crt_quota_type_t quota); +int + crt_context_put_quota_resource(crt_context_t crt_ctx, crt_quota_type_t quota); + int crt_context_req_track(struct crt_rpc_priv *rpc_priv); bool crt_context_empty(int provider, int locked); void crt_context_req_untrack(struct crt_rpc_priv *rpc_priv); diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index a09bc2b4c49..9eb59002ad8 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -88,7 +88,7 @@ struct crt_gdata { /** Provider specific data */ struct crt_prov_gdata cg_prov_gdata_primary; - /** */ + /** Placeholder for secondary provider data */ struct crt_prov_gdata *cg_prov_gdata_secondary; /** Hints to mercury for request post init (ignored for clients) */ @@ -110,6 +110,7 @@ struct crt_gdata { /** HG level global data */ struct crt_hg_gdata *cg_hg; + /** Points to default group */ struct crt_grp_gdata *cg_grp; /** refcount to protect crt_init/crt_finalize */ @@ -145,6 +146,8 @@ struct crt_gdata { struct d_tm_node_t *cg_uri_other; /** Number of cores on a system */ long cg_num_cores; + /** Inflight rpc quota limit */ + uint32_t cg_rpc_quota; }; extern struct crt_gdata crt_gdata; @@ -189,6 +192,14 @@ extern struct crt_plugin_gdata crt_plugin_gdata; #define CRT_DEFAULT_CREDITS_PER_EP_CTX (32) #define CRT_MAX_CREDITS_PER_EP_CTX (256) +struct crt_quotas { + int limit[CRT_QUOTA_COUNT]; + int current[CRT_QUOTA_COUNT]; + bool enabled[CRT_QUOTA_COUNT]; + pthread_mutex_t mutex; + d_list_t rpc_waitq; +}; + /* crt_context */ struct crt_context { d_list_t cc_link; /** link to gdata.cg_ctx_list */ @@ -227,6 +238,9 @@ struct crt_context { /** Stores self uri for the current context */ char cc_self_uri[CRT_ADDR_STR_MAX_LEN]; + + /** Stores quotas */ + struct crt_quotas cc_quotas; }; /* in-flight RPC req list, be tracked per endpoint for every crt_context */ diff --git a/src/cart/crt_rpc.c b/src/cart/crt_rpc.c index e834064250b..380a9434ecd 100644 --- a/src/cart/crt_rpc.c +++ b/src/cart/crt_rpc.c @@ -560,9 +560,9 @@ int crt_req_create(crt_context_t crt_ctx, crt_endpoint_t *tgt_ep, crt_opcode_t opc, crt_rpc_t **req) { - int rc = 0; - struct crt_grp_priv *grp_priv = NULL; + struct crt_grp_priv *grp_priv = NULL; struct crt_rpc_priv *rpc_priv; + int rc = 0; if (crt_ctx == CRT_CONTEXT_NULL || req == NULL) { D_ERROR("invalid parameter (NULL crt_ctx or req).\n"); diff --git a/src/cart/crt_rpc.h b/src/cart/crt_rpc.h index d297d2f13f0..ea2b1a48518 100644 --- a/src/cart/crt_rpc.h +++ b/src/cart/crt_rpc.h @@ -18,6 +18,8 @@ #define CRT_DEFAULT_TIMEOUT_S (60) /* second */ #define CRT_DEFAULT_TIMEOUT_US (CRT_DEFAULT_TIMEOUT_S * 1e6) /* micro-second */ +#define CRT_QUOTA_RPCS_DEFAULT 64 + /* uri lookup max retry times */ #define CRT_URI_LOOKUP_RETRY_MAX (8) @@ -130,6 +132,8 @@ struct crt_rpc_priv { d_list_t crp_epi_link; /* tmp_link used in crt_context_req_untrack */ d_list_t crp_tmp_link; + /* link for crt_context::cc_quotas.rpc_waitq */ + d_list_t crp_waitq_link; /* link to parent RPC crp_opc_info->co_child_rpcs/co_replied_rpcs */ d_list_t crp_parent_link; /* binheap node for timeout management, in crt_context::cc_bh_timeout */ diff --git a/src/gurt/tests/test_gurt.c b/src/gurt/tests/test_gurt.c index 75043590d65..82de7082547 100644 --- a/src/gurt/tests/test_gurt.c +++ b/src/gurt/tests/test_gurt.c @@ -198,15 +198,24 @@ void test_d_errstr(void **state) assert_string_equal(value, "DER_SUCCESS"); value = d_errstr(-DER_IVCB_FORWARD); assert_string_equal(value, "DER_IVCB_FORWARD"); -#ifdef TEST_OLD_ERROR - value = d_errstr(-DER_FREE_MEM); - assert_string_equal(value, "DER_FREE_MEM"); - value = d_errstr(-DER_STALE); - assert_string_equal(value, "DER_STALE"); - (void)test_d_errstr_v2; -#else - test_d_errstr_v2(state); -#endif + + /* Check the boundary at the end of the GURT error numbers, this will need updating if + * additional error numbers are added. + */ + value = d_errstr(-DER_QUOTA_LIMIT); + assert_string_equal(value, "DER_QUOTA_LIMIT"); + value = d_errstr(-1046); + assert_string_equal(value, "DER_QUOTA_LIMIT"); + value = d_errstr(-(DER_QUOTA_LIMIT + 1)); + assert_string_equal(value, "DER_UNKNOWN"); + + /* Check the end of the DAOS error numbers. */ + value = d_errstr(-DER_DIV_BY_ZERO); + assert_string_equal(value, "DER_DIV_BY_ZERO"); + value = d_errstr(-2047); + assert_string_equal(value, "DER_DIV_BY_ZERO"); + value = d_errstr(-(DER_DIV_BY_ZERO + 1)); + assert_string_equal(value, "DER_UNKNOWN"); } void test_d_errdesc(void **state) diff --git a/src/include/cart/api.h b/src/include/cart/api.h index 402d17f83ed..c55d98445a9 100644 --- a/src/include/cart/api.h +++ b/src/include/cart/api.h @@ -2249,6 +2249,42 @@ crt_quiet_error(int err) return err == -DER_GRPVER; } +/** + * Change the quota limit. + * + * \param[in] crt_ctx CaRT context + * \param[in] quota Quota type + * \param[in] val Value + * + * \return DER_SUCCESS on success, negative value on + * failure. + */ +int +crt_context_quota_limit_set(crt_context_t crt_ctx, crt_quota_type_t quota, int value); + +/** + * Query the quota limit. + * + * \param[in] crt_ctx CaRT context + * \param[in] quota Quota type + * \param[out] val Returned value + * + * \return DER_SUCCESS on success, negative value on + * failure. + */ +int +crt_context_quota_limit_get(crt_context_t crt_ctx, crt_quota_type_t quota, int *value); + +/** + * Get the proto version of an RPC request. + * + * \param[in] req pointer to RPC request + * + * \return positive version or negative error. + */ +int +crt_req_get_proto_ver(crt_rpc_t *req); + /** @} */ diff --git a/src/include/cart/types.h b/src/include/cart/types.h index 0ce7dd79815..d02e2881047 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -440,6 +440,17 @@ typedef enum { CRT_GROUP_MOD_OP_COUNT, } crt_group_mod_op_t; +/** + * Quotas supported by CaRT. + */ +typedef enum { + /** Limit of number of inflight rpcs */ + CRT_QUOTA_RPCS, + + /** Total count of supported quotas */ + CRT_QUOTA_COUNT, +} crt_quota_type_t; + /** @} */ #endif /* __CRT_TYPES_H__ */ diff --git a/src/include/daos_errno.h b/src/include/daos_errno.h index 6564abe54db..f388db928fe 100644 --- a/src/include/daos_errno.h +++ b/src/include/daos_errno.h @@ -25,143 +25,109 @@ extern "C" { */ /** Preprocessor macro defining GURT errno values and internal definition of d_errstr */ -#define D_FOREACH_GURT_ERR(ACTION) \ - /** no permission */ \ - ACTION(DER_NO_PERM, (DER_ERR_GURT_BASE + 1), \ - Operation not permitted) \ - /** invalid handle */ \ - ACTION(DER_NO_HDL, (DER_ERR_GURT_BASE + 2), \ - Invalid handle) \ - /** invalid parameters */ \ - ACTION(DER_INVAL, (DER_ERR_GURT_BASE + 3), \ - Invalid parameters) \ - /** entity already exists */ \ - ACTION(DER_EXIST, (DER_ERR_GURT_BASE + 4), \ - Entity already exists) \ - /** nonexistent entity */ \ - ACTION(DER_NONEXIST, (DER_ERR_GURT_BASE + 5), \ - The specified entity does not exist) \ - /** unreachable node */ \ - ACTION(DER_UNREACH, (DER_ERR_GURT_BASE + 6), \ - Unreachable node) \ - /** no space on storage target */ \ - ACTION(DER_NOSPACE, (DER_ERR_GURT_BASE + 7), \ - No space on storage target) \ - /** already did sth */ \ - ACTION(DER_ALREADY, (DER_ERR_GURT_BASE + 8), \ - Operation already performed) \ - /** NO memory */ \ - ACTION(DER_NOMEM, (DER_ERR_GURT_BASE + 9), \ - Out of memory) \ - /** Function not implemented */ \ - ACTION(DER_NOSYS, (DER_ERR_GURT_BASE + 10), \ - Function not implemented) \ - /** timed out */ \ - ACTION(DER_TIMEDOUT, (DER_ERR_GURT_BASE + 11), \ - Time out) \ - /** Busy */ \ - ACTION(DER_BUSY, (DER_ERR_GURT_BASE + 12), \ - Device or resource busy) \ - /** Try again */ \ - ACTION(DER_AGAIN, (DER_ERR_GURT_BASE + 13), \ - Try again) \ - /** Incompatible protocol */ \ - ACTION(DER_PROTO, (DER_ERR_GURT_BASE + 14), \ - Incompatible protocol) \ - /** not initialized */ \ - ACTION(DER_UNINIT, (DER_ERR_GURT_BASE + 15), \ - Device or resource not initialized) \ - /** buffer too short (larger buffer needed) */ \ - ACTION(DER_TRUNC, (DER_ERR_GURT_BASE + 16), \ - Buffer too short) \ - /** data too long for defined data type or buffer size */ \ +#define D_FOREACH_GURT_ERR(ACTION) \ + /** no permission */ \ + ACTION(DER_NO_PERM, (DER_ERR_GURT_BASE + 1), Operation not permitted) \ + /** invalid handle */ \ + ACTION(DER_NO_HDL, (DER_ERR_GURT_BASE + 2), Invalid handle) \ + /** invalid parameters */ \ + ACTION(DER_INVAL, (DER_ERR_GURT_BASE + 3), Invalid parameters) \ + /** entity already exists */ \ + ACTION(DER_EXIST, (DER_ERR_GURT_BASE + 4), Entity already exists) \ + /** nonexistent entity */ \ + ACTION(DER_NONEXIST, (DER_ERR_GURT_BASE + 5), The specified entity does not exist) \ + /** unreachable node */ \ + ACTION(DER_UNREACH, (DER_ERR_GURT_BASE + 6), Unreachable node) \ + /** no space on storage target */ \ + ACTION(DER_NOSPACE, (DER_ERR_GURT_BASE + 7), No space on storage target) \ + /** already did sth */ \ + ACTION(DER_ALREADY, (DER_ERR_GURT_BASE + 8), Operation already performed) \ + /** NO memory */ \ + ACTION(DER_NOMEM, (DER_ERR_GURT_BASE + 9), Out of memory) \ + /** Function not implemented */ \ + ACTION(DER_NOSYS, (DER_ERR_GURT_BASE + 10), Function not implemented) \ + /** timed out */ \ + ACTION(DER_TIMEDOUT, (DER_ERR_GURT_BASE + 11), Time out) \ + /** Busy */ \ + ACTION(DER_BUSY, (DER_ERR_GURT_BASE + 12), Device or resource busy) \ + /** Try again */ \ + ACTION(DER_AGAIN, (DER_ERR_GURT_BASE + 13), Try again) \ + /** Incompatible protocol */ \ + ACTION(DER_PROTO, (DER_ERR_GURT_BASE + 14), Incompatible protocol) \ + /** not initialized */ \ + ACTION(DER_UNINIT, (DER_ERR_GURT_BASE + 15), Device or resource not initialized) \ + /** buffer too short (larger buffer needed) */ \ + ACTION(DER_TRUNC, (DER_ERR_GURT_BASE + 16), Buffer too short) \ + /** data too long for defined data type or buffer size */ \ ACTION(DER_OVERFLOW, (DER_ERR_GURT_BASE + 17), \ - Data too long for defined data type or buffer size) \ - /** operation canceled */ \ - ACTION(DER_CANCELED, (DER_ERR_GURT_BASE + 18), \ - Operation canceled) \ - /** Out-Of-Group or member list */ \ - ACTION(DER_OOG, (DER_ERR_GURT_BASE + 19), \ - Out of group or member list) \ - /** transport layer mercury error */ \ - ACTION(DER_HG, (DER_ERR_GURT_BASE + 20), \ - Transport layer mercury error) \ - /** RPC or protocol version not registered */ \ - ACTION(DER_UNREG, (DER_ERR_GURT_BASE + 21), \ - RPC or protocol version not registered) \ - /** failed to generate an address string */ \ - ACTION(DER_ADDRSTR_GEN, (DER_ERR_GURT_BASE + 22), \ - Failed to generate an address string) \ - /** PMIx layer error */ \ - ACTION(DER_PMIX, (DER_ERR_GURT_BASE + 23), \ - PMIx layer error) \ - /** IV callback - cannot handle locally */ \ - ACTION(DER_IVCB_FORWARD, (DER_ERR_GURT_BASE + 24), \ - Incast variable unavailable locally. Must forward) \ - /** miscellaneous error */ \ - ACTION(DER_MISC, (DER_ERR_GURT_BASE + 25), \ - Miscellaneous error) \ - /** Bad path name */ \ - ACTION(DER_BADPATH, (DER_ERR_GURT_BASE + 26), \ - Bad path name) \ - /** Not a directory */ \ - ACTION(DER_NOTDIR, (DER_ERR_GURT_BASE + 27), \ - Not a directory) \ - /** corpc failed */ \ - ACTION(DER_CORPC_INCOMPLETE, (DER_ERR_GURT_BASE + 28), \ - Collective RPC failed) \ - /** no rank is subscribed to RAS */ \ - ACTION(DER_NO_RAS_RANK, (DER_ERR_GURT_BASE + 29), \ - No rank is subscribed to RAS) \ - /** service group not attached */ \ - ACTION(DER_NOTATTACH, (DER_ERR_GURT_BASE + 30), \ - Service group not attached) \ - /** version mismatch */ \ - ACTION(DER_MISMATCH, (DER_ERR_GURT_BASE + 31), \ - Version mismatch) \ - /** rank has been excluded */ \ - ACTION(DER_EXCLUDED, (DER_ERR_GURT_BASE + 32), \ - Rank has been excluded) \ - /** user-provided RPC handler didn't send reply back */ \ - ACTION(DER_NOREPLY, (DER_ERR_GURT_BASE + 33), \ - User provided RPC handler did not send reply back) \ - /** denial-of-service */ \ - ACTION(DER_DOS, (DER_ERR_GURT_BASE + 34), \ - Denial of service) \ - /** Incorrect target for the RPC */ \ + Data too long for defined data type or buffer size) \ + /** operation canceled */ \ + ACTION(DER_CANCELED, (DER_ERR_GURT_BASE + 18), Operation canceled) \ + /** Out-Of-Group or member list */ \ + ACTION(DER_OOG, (DER_ERR_GURT_BASE + 19), Out of group or member list) \ + /** transport layer mercury error */ \ + ACTION(DER_HG, (DER_ERR_GURT_BASE + 20), Transport layer mercury error) \ + /** RPC or protocol version not registered */ \ + ACTION(DER_UNREG, (DER_ERR_GURT_BASE + 21), RPC or protocol version not registered) \ + /** failed to generate an address string */ \ + ACTION(DER_ADDRSTR_GEN, (DER_ERR_GURT_BASE + 22), Failed to generate an address string) \ + /** PMIx layer error */ \ + ACTION(DER_PMIX, (DER_ERR_GURT_BASE + 23), PMIx layer error) \ + /** IV callback - cannot handle locally */ \ + ACTION(DER_IVCB_FORWARD, (DER_ERR_GURT_BASE + 24), \ + Incast variable unavailable locally.Must forward) \ + /** miscellaneous error */ \ + ACTION(DER_MISC, (DER_ERR_GURT_BASE + 25), Miscellaneous error) \ + /** Bad path name */ \ + ACTION(DER_BADPATH, (DER_ERR_GURT_BASE + 26), Bad path name) \ + /** Not a directory */ \ + ACTION(DER_NOTDIR, (DER_ERR_GURT_BASE + 27), Not a directory) \ + /** corpc failed */ \ + ACTION(DER_CORPC_INCOMPLETE, (DER_ERR_GURT_BASE + 28), Collective RPC failed) \ + /** no rank is subscribed to RAS */ \ + ACTION(DER_NO_RAS_RANK, (DER_ERR_GURT_BASE + 29), No rank is subscribed to RAS) \ + /** service group not attached */ \ + ACTION(DER_NOTATTACH, (DER_ERR_GURT_BASE + 30), Service group not attached) \ + /** version mismatch */ \ + ACTION(DER_MISMATCH, (DER_ERR_GURT_BASE + 31), Version mismatch) \ + /** rank has been excluded */ \ + ACTION(DER_EXCLUDED, (DER_ERR_GURT_BASE + 32), Rank has been excluded) \ + /** user-provided RPC handler didn't send reply back */ \ + ACTION(DER_NOREPLY, (DER_ERR_GURT_BASE + 33), \ + User provided RPC handler did not send reply back) \ + /** denial-of-service */ \ + ACTION(DER_DOS, (DER_ERR_GURT_BASE + 34), Denial of service) \ + /** Incorrect target for the RPC */ \ ACTION(DER_BAD_TARGET, (DER_ERR_GURT_BASE + 35), \ - Incorrect target for the RPC) \ - /** Group versioning mismatch */ \ - ACTION(DER_GRPVER, (DER_ERR_GURT_BASE + 36), \ - Group versioning mismatch) \ - /** HLC synchronization error */ \ - ACTION(DER_HLC_SYNC, (DER_ERR_GURT_BASE + 37), \ - HLC synchronization error) \ - /** No shared memory available */ \ - ACTION(DER_NO_SHMEM, (DER_ERR_GURT_BASE + 38), \ - Not enough shared memory free) \ - /** Failed to add metric */ \ - ACTION(DER_ADD_METRIC_FAILED, (DER_ERR_GURT_BASE + 39), \ - Failed to add the specified metric) \ - /** Duration start/end mismatch */ \ - ACTION(DER_DURATION_MISMATCH, (DER_ERR_GURT_BASE + 40), \ - Duration end not paired with duration start) \ - /** Operation not permitted on metric type*/ \ + Incorrect target for the RPC) \ + /** Group versioning mismatch */ \ + ACTION(DER_GRPVER, (DER_ERR_GURT_BASE + 36), Group versioning mismatch) \ + /** HLC synchronization error */ \ + ACTION(DER_HLC_SYNC, (DER_ERR_GURT_BASE + 37), HLC synchronization error) \ + /** No shared memory available */ \ + ACTION(DER_NO_SHMEM, (DER_ERR_GURT_BASE + 38), Not enough shared memory free) \ + /** Failed to add metric */ \ + ACTION(DER_ADD_METRIC_FAILED, (DER_ERR_GURT_BASE + 39), \ + Failed to add the specified metric) \ + /** Duration start/end mismatch */ \ + ACTION(DER_DURATION_MISMATCH, (DER_ERR_GURT_BASE + 40), \ + Duration end not paired with duration start) \ + /** Operation not permitted on metric type*/ \ ACTION(DER_OP_NOT_PERMITTED, (DER_ERR_GURT_BASE + 41), \ - Operation not permitted for metric type provided) \ - /** Metric path name exceeds permitted length*/ \ - ACTION(DER_EXCEEDS_PATH_LEN, (DER_ERR_GURT_BASE + 42), \ - Path name exceeds permitted length) \ - /** Metric was not found.*/ \ - ACTION(DER_METRIC_NOT_FOUND, (DER_ERR_GURT_BASE + 43), \ - Read failed because metric not found) \ - /** Invalid user/group permissions.*/ \ - ACTION(DER_SHMEM_PERMS, (DER_ERR_GURT_BASE + 44), \ - Unable to access shared memory segment due to \ - incompatible user or group permissions) \ - /** Fatal (non-retry-able) transport layer mercury error */ \ - ACTION(DER_HG_FATAL, (DER_ERR_GURT_BASE + 45), \ - Fatal transport layer mercury error) + Operation not permitted for metric type provided) \ + /** Metric path name exceeds permitted length*/ \ + ACTION(DER_EXCEEDS_PATH_LEN, (DER_ERR_GURT_BASE + 42), Path name exceeds permitted length) \ + /** Metric was not found.*/ \ + ACTION(DER_METRIC_NOT_FOUND, (DER_ERR_GURT_BASE + 43), \ + Read failed because metric not found) \ + /** Invalid user/group permissions.*/ \ + ACTION(DER_SHMEM_PERMS, (DER_ERR_GURT_BASE + 44), \ + Unable to access shared memory segment due to incompatible user or \ + group permissions) \ + /** Fatal (non-retry-able) transport layer mercury error */ \ + ACTION(DER_HG_FATAL, (DER_ERR_GURT_BASE + 45), Fatal transport layer mercury error) \ + /** Quota limit reached on the requested resource */ \ + ACTION(DER_QUOTA_LIMIT, (DER_ERR_GURT_BASE + 46), Quota limit reached) /** TODO: add more error numbers */ /** Preprocessor macro defining DAOS errno values and internal definition of d_errstr */