Skip to content

Commit

Permalink
DAOS-14679 pool: Report on stopping sp_stopping
Browse files Browse the repository at this point in the history
When destroying a pool on an engine, we may block in

  ds_pool_stop
    ds_pool_put
      pool_free_ref
        pool_child_delete_one
          pool_child_stop

waiting for some ds_pool_child references. If the user retries the pool
destroy command, the new ds_pool_stop call can't find the ds_pool object
that is waiting for ds_pool_child references, and the new pool destroy
command usually succeeds, even though the storage capacity allocated to
the pool hasn't been released yet.

This patch makes the following changes:

  - Move the pool_child_delete_one collective call from pool_free_ref to
    before the ds_pool_put call in ds_pool_stop. This makes sure that
    the ds_pool object remains in the LRU cache in the sp_stopping state
    while we wait for ds_pool_child references (or other upper layer
    resources).
      - Remove the pool_put_sync trick, which is no longer necessary.

  - Return an error from ds_pool_stop when the ds_pool object is in the
    sp_stopping state, so that the new pool destroy command mentioned
    above will return an explicit error.

Features: pool
Signed-off-by: Li Wei <wei.g.li@intel.com>
Required-githooks: true
  • Loading branch information
liw committed May 15, 2024
1 parent b81d4f6 commit 1e67120
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 51 deletions.
2 changes: 1 addition & 1 deletion src/include/daos_srv/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ int ds_pool_tgt_map_update(struct ds_pool *pool, struct pool_buf *buf,
bool ds_pool_skip_for_check(struct ds_pool *pool);
int ds_pool_start_after_check(uuid_t uuid);
int ds_pool_start(uuid_t uuid, bool aft_chk);
void ds_pool_stop(uuid_t uuid);
int ds_pool_stop(uuid_t uuid);
int ds_pool_extend(uuid_t pool_uuid, int ntargets, const d_rank_list_t *rank_list, int ndomains,
const uint32_t *domains, d_rank_list_t *svc_ranks);
int ds_pool_target_update_state(uuid_t pool_uuid, d_rank_list_t *ranks,
Expand Down
4 changes: 3 additions & 1 deletion src/mgmt/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -1404,7 +1404,9 @@ ds_mgmt_hdlr_tgt_destroy(crt_rpc_t *td_req)
goto out;
}

ds_pool_stop(td_in->td_pool_uuid);
rc = ds_pool_stop(td_in->td_pool_uuid);
if (rc != 0)
goto out;

/** generate path to the target directory */
rc = ds_mgmt_tgt_file(td_in->td_pool_uuid, NULL, NULL, &path);
Expand Down
80 changes: 31 additions & 49 deletions src/pool/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -725,16 +725,6 @@ pool_obj(struct daos_llink *llink)
return container_of(llink, struct ds_pool, sp_entry);
}

static inline void
pool_put_sync(void *args)
{
struct ds_pool *pool = args;

D_ASSERT(pool != NULL);
D_ASSERT(dss_get_module_info()->dmi_xs_id == 0);
daos_lru_ref_release(pool_cache, &pool->sp_entry);
}

struct ds_pool_create_arg {
uint32_t pca_map_version;
};
Expand Down Expand Up @@ -865,13 +855,6 @@ pool_free_ref(struct daos_llink *llink)
D_DEBUG(DB_MGMT, DF_UUID": freeing\n", DP_UUID(pool->sp_uuid));

D_ASSERT(d_list_empty(&pool->sp_hdls));
rc = ds_pool_thread_collective(pool->sp_uuid, 0, pool_child_delete_one,
pool->sp_uuid, 0);
if (rc == -DER_CANCELED)
D_DEBUG(DB_MD, DF_UUID": no ESs\n", DP_UUID(pool->sp_uuid));
else if (rc != 0)
D_ERROR(DF_UUID": failed to delete ES pool caches: "DF_RC"\n",
DP_UUID(pool->sp_uuid), DP_RC(rc));

ds_cont_ec_eph_free(pool);

Expand Down Expand Up @@ -970,7 +953,7 @@ ds_pool_lookup(const uuid_t uuid, struct ds_pool **pool)

if ((*pool)->sp_stopping) {
D_DEBUG(DB_MD, DF_UUID": is in stopping\n", DP_UUID(uuid));
pool_put_sync(*pool);
ds_pool_put(*pool);
*pool = NULL;
return -DER_SHUTDOWN;
}
Expand All @@ -989,31 +972,9 @@ ds_pool_get(struct ds_pool *pool)
void
ds_pool_put(struct ds_pool *pool)
{
int rc;

/*
* Someone has stopped the pool. Current user may be the one that is holding the last
* reference on the pool, then drop such reference will trigger pool_free_ref() as to
* stop related container that may wait current user (ULT) to exit. To avoid deadlock,
* let's use independent ULT to drop the reference asynchronously and make current ULT
* to go ahead.
*
* An example of the deadlock scenarios is something like that:
*
* cont_iv_prop_fetch_ult => ds_pool_put => pool_free_ref [WAIT]=> cont_child_stop =>
* cont_stop_agg [WAIT]=> cont_agg_ult => ds_cont_csummer_init => ds_cont_get_props =>
* cont_iv_prop_fetch [WAIT]=> cont_iv_prop_fetch_ult
*/
if (unlikely(pool->sp_stopping) && daos_lru_is_last_user(&pool->sp_entry)) {
rc = dss_ult_create(pool_put_sync, pool, DSS_XS_SELF, 0, 0, NULL);
if (unlikely(rc != 0)) {
D_ERROR("Failed to create ULT to async put ref on the pool "DF_UUID"\n",
DP_UUID(pool->sp_uuid));
pool_put_sync(pool);
}
} else {
pool_put_sync(pool);
}
D_ASSERT(pool != NULL);
D_ASSERT(dss_get_module_info()->dmi_xs_id == 0);
daos_lru_ref_release(pool_cache, &pool->sp_entry);
}

void
Expand Down Expand Up @@ -1213,7 +1174,7 @@ ds_pool_start(uuid_t uuid, bool aft_chk)
failure_ult:
pool_fetch_hdls_ult_abort(pool);
failure_pool:
pool_put_sync(pool);
ds_pool_put(pool);
return rc;
}

Expand All @@ -1238,22 +1199,39 @@ pool_tgt_disconnect_all(struct ds_pool *pool)
}
}

static void
pool_stop_children(struct ds_pool *pool)
{
int rc;

rc = ds_pool_thread_collective(pool->sp_uuid, 0, pool_child_delete_one, pool->sp_uuid, 0);
if (rc == -DER_CANCELED)
D_DEBUG(DB_MD, DF_UUID ": no ESs\n", DP_UUID(pool->sp_uuid));
else if (rc != 0)
DL_ERROR(rc, DF_UUID ": failed to delete ES pool caches", DP_UUID(pool->sp_uuid));
}

/*
* Stop a pool. Must be called on the system xstream. Release the ds_pool
* object reference held by ds_pool_start. Only for mgmt and pool modules.
*/
void
int
ds_pool_stop(uuid_t uuid)
{
struct ds_pool *pool;

ds_pool_failed_remove(uuid);

ds_pool_lookup(uuid, &pool);
ds_pool_lookup_internal(uuid, &pool);
if (pool == NULL)
return;
D_ASSERT(!pool->sp_stopping);
return 0;
if (pool->sp_stopping) {
D_INFO(DF_UUID ": already stopping\n", DP_UUID(uuid));
ds_pool_put(pool);
return -DER_BUSY;
}
pool->sp_stopping = 1;
D_INFO(DF_UUID ": pool stopping\n", DP_UUID(uuid));

pool_tgt_disconnect_all(pool);

Expand All @@ -1263,9 +1241,13 @@ ds_pool_stop(uuid_t uuid)

ds_rebuild_abort(pool->sp_uuid, -1, -1, -1);
ds_migrate_stop(pool, -1, -1);

pool_stop_children(pool);

ds_pool_put(pool); /* held by ds_pool_start */
pool_put_sync(pool);
ds_pool_put(pool);
D_INFO(DF_UUID": pool stopped\n", DP_UUID(uuid));
return 0;
}

/* ds_pool_hdl ****************************************************************/
Expand Down

0 comments on commit 1e67120

Please sign in to comment.