Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-14679 pool: Report on stopping sp_stopping #14374

Merged
merged 9 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/include/daos_srv/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ int ds_pool_tgt_map_update(struct ds_pool *pool, struct pool_buf *buf,
bool ds_pool_skip_for_check(struct ds_pool *pool);
int ds_pool_start_after_check(uuid_t uuid);
int ds_pool_start(uuid_t uuid, bool aft_chk);
void ds_pool_stop(uuid_t uuid);
int ds_pool_stop(uuid_t uuid);
int dsc_pool_svc_extend(uuid_t pool_uuid, d_rank_list_t *svc_ranks, uint64_t deadline, int ntargets,
const d_rank_list_t *rank_list, int ndomains, const uint32_t *domains);
int dsc_pool_svc_update_target_state(uuid_t pool_uuid, d_rank_list_t *ranks, uint64_t deadline,
Expand Down
3 changes: 2 additions & 1 deletion src/mgmt/rpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
#define MGMT_PROTO_SRV_RPC_LIST \
X(MGMT_TGT_CREATE, 0, &CQF_mgmt_tgt_create, ds_mgmt_hdlr_tgt_create, \
&ds_mgmt_hdlr_tgt_create_co_ops) \
X(MGMT_TGT_DESTROY, 0, &CQF_mgmt_tgt_destroy, ds_mgmt_hdlr_tgt_destroy, NULL) \
X(MGMT_TGT_DESTROY, 0, &CQF_mgmt_tgt_destroy, ds_mgmt_hdlr_tgt_destroy, \
&ds_mgmt_hdlr_tgt_destroy_co_ops) \
X(MGMT_TGT_PARAMS_SET, 0, &CQF_mgmt_tgt_params_set, ds_mgmt_tgt_params_set_hdlr, NULL) \
X(MGMT_TGT_PROFILE, 0, &CQF_mgmt_profile, ds_mgmt_tgt_profile_hdlr, NULL) \
X(MGMT_TGT_MAP_UPDATE, 0, &CQF_mgmt_tgt_map_update, ds_mgmt_hdlr_tgt_map_update, \
Expand Down
4 changes: 4 additions & 0 deletions src/mgmt/srv.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ static struct crt_corpc_ops ds_mgmt_hdlr_tgt_create_co_ops = {
.co_post_reply = ds_mgmt_tgt_create_post_reply,
};

static struct crt_corpc_ops ds_mgmt_hdlr_tgt_destroy_co_ops = {
.co_aggregate = ds_mgmt_tgt_destroy_aggregator
};

static struct crt_corpc_ops ds_mgmt_hdlr_tgt_map_update_co_ops = {
.co_aggregate = ds_mgmt_tgt_map_update_aggregator,
.co_pre_forward = ds_mgmt_tgt_map_update_pre_forward,
Expand Down
1 change: 1 addition & 0 deletions src/mgmt/srv_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ int ds_mgmt_tgt_setup(void);
void ds_mgmt_tgt_cleanup(void);
void ds_mgmt_hdlr_tgt_create(crt_rpc_t *rpc_req);
void ds_mgmt_hdlr_tgt_destroy(crt_rpc_t *rpc_req);
int ds_mgmt_tgt_destroy_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv);
void ds_mgmt_hdlr_tgt_shard_destroy(crt_rpc_t *rpc_req);
int ds_mgmt_tgt_create_aggregator(crt_rpc_t *source, crt_rpc_t *result,
void *priv);
Expand Down
15 changes: 14 additions & 1 deletion src/mgmt/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -1404,7 +1404,9 @@ ds_mgmt_hdlr_tgt_destroy(crt_rpc_t *td_req)
goto out;
}

ds_pool_stop(td_in->td_pool_uuid);
rc = ds_pool_stop(td_in->td_pool_uuid);
if (rc != 0)
goto out;

/** generate path to the target directory */
rc = ds_mgmt_tgt_file(td_in->td_pool_uuid, NULL, NULL, &path);
Expand Down Expand Up @@ -1449,6 +1451,17 @@ ds_mgmt_hdlr_tgt_destroy(crt_rpc_t *td_req)
crt_reply_send(td_req);
}

int
ds_mgmt_tgt_destroy_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv)
{
struct mgmt_tgt_destroy_out *out_source = crt_reply_get(source);
struct mgmt_tgt_destroy_out *out_result = crt_reply_get(result);

if (out_source->td_rc != 0)
out_result->td_rc = out_source->td_rc;
return 0;
}

/**
* Set parameter on a single target.
*/
Expand Down
134 changes: 68 additions & 66 deletions src/pool/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,42 @@ pool_child_delete_one(void *uuid)
return 0;
}

static void
pool_child_delete_all(struct ds_pool *pool)
{
int rc;

rc = ds_pool_thread_collective(pool->sp_uuid, 0, pool_child_delete_one, pool->sp_uuid, 0);
if (rc == 0)
D_INFO(DF_UUID ": deleted\n", DP_UUID(pool->sp_uuid));
else if (rc == -DER_CANCELED)
D_INFO(DF_UUID ": no ESs\n", DP_UUID(pool->sp_uuid));
else
DL_ERROR(rc, DF_UUID ": failed to delete ES pool caches", DP_UUID(pool->sp_uuid));
}

static int
pool_child_add_all(struct ds_pool *pool)
{
struct pool_child_lookup_arg collective_arg = {
.pla_pool = pool,
.pla_uuid = pool->sp_uuid,
.pla_map_version = pool->sp_map_version
};
int rc;

rc = ds_pool_thread_collective(pool->sp_uuid, 0, pool_child_add_one, &collective_arg,
DSS_ULT_DEEP_STACK);
if (rc == 0) {
D_INFO(DF_UUID ": added\n", DP_UUID(pool->sp_uuid));
} else {
DL_ERROR(rc, DF_UUID ": failed to add ES pool caches", DP_UUID(pool->sp_uuid));
pool_child_delete_all(pool);
return rc;
}
return 0;
}

/* ds_pool ********************************************************************/

static struct daos_lru_cache *pool_cache;
Expand All @@ -725,16 +761,6 @@ pool_obj(struct daos_llink *llink)
return container_of(llink, struct ds_pool, sp_entry);
}

static inline void
pool_put_sync(void *args)
{
struct ds_pool *pool = args;

D_ASSERT(pool != NULL);
D_ASSERT(dss_get_module_info()->dmi_xs_id == 0);
daos_lru_ref_release(pool_cache, &pool->sp_entry);
}

struct ds_pool_create_arg {
uint32_t pca_map_version;
};
Expand All @@ -745,7 +771,6 @@ pool_alloc_ref(void *key, unsigned int ksize, void *varg,
{
struct ds_pool_create_arg *arg = varg;
struct ds_pool *pool;
struct pool_child_lookup_arg collective_arg;
char group_id[DAOS_UUID_STR_SIZE];
struct dss_module_info *info = dss_get_module_info();
unsigned int iv_ns_id;
Expand Down Expand Up @@ -817,22 +842,9 @@ pool_alloc_ref(void *key, unsigned int ksize, void *varg,
goto err_group;
}

collective_arg.pla_pool = pool;
collective_arg.pla_uuid = key;
collective_arg.pla_map_version = arg->pca_map_version;
rc = ds_pool_thread_collective(pool->sp_uuid, 0, pool_child_add_one,
&collective_arg, DSS_ULT_DEEP_STACK);
if (rc != 0) {
D_ERROR(DF_UUID": failed to add ES pool caches: "DF_RC"\n",
DP_UUID(key), DP_RC(rc));
goto err_iv_ns;
}

*link = &pool->sp_entry;
return 0;

err_iv_ns:
ds_iv_ns_put(pool->sp_iv_ns);
err_group:
rc_tmp = crt_group_secondary_destroy(pool->sp_group);
if (rc_tmp != 0)
Expand Down Expand Up @@ -865,13 +877,6 @@ pool_free_ref(struct daos_llink *llink)
D_DEBUG(DB_MGMT, DF_UUID": freeing\n", DP_UUID(pool->sp_uuid));

D_ASSERT(d_list_empty(&pool->sp_hdls));
rc = ds_pool_thread_collective(pool->sp_uuid, 0, pool_child_delete_one,
pool->sp_uuid, 0);
if (rc == -DER_CANCELED)
D_DEBUG(DB_MD, DF_UUID": no ESs\n", DP_UUID(pool->sp_uuid));
else if (rc != 0)
D_ERROR(DF_UUID": failed to delete ES pool caches: "DF_RC"\n",
DP_UUID(pool->sp_uuid), DP_RC(rc));

ds_cont_ec_eph_free(pool);

Expand Down Expand Up @@ -970,7 +975,7 @@ ds_pool_lookup(const uuid_t uuid, struct ds_pool **pool)

if ((*pool)->sp_stopping) {
D_DEBUG(DB_MD, DF_UUID": is in stopping\n", DP_UUID(uuid));
pool_put_sync(*pool);
ds_pool_put(*pool);
*pool = NULL;
return -DER_SHUTDOWN;
}
Expand All @@ -989,31 +994,9 @@ ds_pool_get(struct ds_pool *pool)
void
ds_pool_put(struct ds_pool *pool)
{
int rc;

/*
* Someone has stopped the pool. Current user may be the one that is holding the last
* reference on the pool, then drop such reference will trigger pool_free_ref() as to
* stop related container that may wait current user (ULT) to exit. To avoid deadlock,
* let's use independent ULT to drop the reference asynchronously and make current ULT
* to go ahead.
*
* An example of the deadlock scenarios is something like that:
*
* cont_iv_prop_fetch_ult => ds_pool_put => pool_free_ref [WAIT]=> cont_child_stop =>
* cont_stop_agg [WAIT]=> cont_agg_ult => ds_cont_csummer_init => ds_cont_get_props =>
* cont_iv_prop_fetch [WAIT]=> cont_iv_prop_fetch_ult
*/
if (unlikely(pool->sp_stopping) && daos_lru_is_last_user(&pool->sp_entry)) {
rc = dss_ult_create(pool_put_sync, pool, DSS_XS_SELF, 0, 0, NULL);
if (unlikely(rc != 0)) {
D_ERROR("Failed to create ULT to async put ref on the pool "DF_UUID"\n",
DP_UUID(pool->sp_uuid));
pool_put_sync(pool);
}
} else {
pool_put_sync(pool);
}
D_ASSERT(pool != NULL);
D_ASSERT(dss_get_module_info()->dmi_xs_id == 0);
daos_lru_ref_release(pool_cache, &pool->sp_entry);
}

void
Expand Down Expand Up @@ -1187,13 +1170,17 @@ ds_pool_start(uuid_t uuid, bool aft_chk)
else
pool->sp_cr_checked = 0;

rc = pool_child_add_all(pool);
if (rc != 0)
goto failure_pool;

if (!ds_pool_skip_for_check(pool)) {
rc = dss_ult_create(pool_fetch_hdls_ult, pool, DSS_XS_SYS, 0,
DSS_DEEP_STACK_SZ, NULL);
if (rc != 0) {
D_ERROR(DF_UUID": failed to create fetch ult: "DF_RC"\n",
DP_UUID(uuid), DP_RC(rc));
D_GOTO(failure_pool, rc);
goto failure_children;
}

pool->sp_fetch_hdls = 1;
Expand All @@ -1212,8 +1199,10 @@ ds_pool_start(uuid_t uuid, bool aft_chk)

failure_ult:
pool_fetch_hdls_ult_abort(pool);
failure_children:
pool_child_delete_all(pool);
failure_pool:
pool_put_sync(pool);
ds_pool_put(pool);
return rc;
}

Expand Down Expand Up @@ -1242,18 +1231,27 @@ pool_tgt_disconnect_all(struct ds_pool *pool)
* Stop a pool. Must be called on the system xstream. Release the ds_pool
* object reference held by ds_pool_start. Only for mgmt and pool modules.
*/
void
int
ds_pool_stop(uuid_t uuid)
{
struct ds_pool *pool;

ds_pool_failed_remove(uuid);

ds_pool_lookup(uuid, &pool);
if (pool == NULL)
return;
D_ASSERT(!pool->sp_stopping);
ds_pool_lookup_internal(uuid, &pool);
if (pool == NULL) {
D_INFO(DF_UUID ": not found\n", DP_UUID(uuid));
return 0;
}
if (pool->sp_stopping) {
int rc = -DER_BUSY;

DL_WARN(rc, DF_UUID ": already stopping", DP_UUID(uuid));
ds_pool_put(pool);
return rc;
jolivier23 marked this conversation as resolved.
Show resolved Hide resolved
}
pool->sp_stopping = 1;
D_INFO(DF_UUID ": stopping\n", DP_UUID(uuid));

pool_tgt_disconnect_all(pool);

Expand All @@ -1263,9 +1261,13 @@ ds_pool_stop(uuid_t uuid)

ds_rebuild_abort(pool->sp_uuid, -1, -1, -1);
ds_migrate_stop(pool, -1, -1);

pool_child_delete_all(pool);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the sponsor of ds_pool_stop() is not the last user for the ds_pool, then the other reference holders may still use related pool_child. Under such case, is it possible to cause some race?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I thought about this question too. I think if there's no other bug, there should be no other user of this ds_pool object. Moreover, if there were such a user, there was no ds_pool_child pointer in ds_pool. So, although that user might encounter some error, he should not encounter serious problems like segfaults.

Copy link
Contributor

@Nasf-Fan Nasf-Fan May 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The race may happen, for example, during ds_pool_tgt_map_update(), someone triggers ds_pool_stop() as to some pool_child is released before collective tasks for ds_pool_tgt_map_update completed, such as dtx_resync may get failure. It is risky for that because dtx_resync is global operation, if miss some pool_child, it will affect DTX check result as to the decision. Further more, if the DTX leader wants to commit or abort some DTX, but related pool shard is closed, then it may leave orphan DTX entry on related pool shard. If the pool will be destroyed, it's OK; otherwise, it will be trouble. I am not aware of some know segmentation fault issues under such case, but really not sure for that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I can pull in one more change originally planned for future work: Wait for all other ds_pool references to be released. If we do that before calling pool_child_delete_all, the situation should become easier for other ds_pool reference holders.

The dtx_resync concern sounds like a different problem. Let's chat to understand where the problem is.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The root reason for dtx_resync trouble is not related with the patch, but the changes in this patch will increase the risk to expose the dtx_resync trouble.


ds_pool_put(pool); /* held by ds_pool_start */
pool_put_sync(pool);
D_INFO(DF_UUID": pool stopped\n", DP_UUID(uuid));
ds_pool_put(pool);
D_INFO(DF_UUID ": stopped\n", DP_UUID(uuid));
return 0;
}

/* ds_pool_hdl ****************************************************************/
Expand Down
Loading