Skip to content

Commit

Permalink
prov/efa: update efa shm implementation to allocate fi_peer_srx_context
Browse files Browse the repository at this point in the history
The previous definition of the peer API didn't specify who allocated the
second peer structure (the one referenced by the peer). The shm implementation
was choosing to duplicate the imported srx and set it internally. The new
definition specifies that the owner handle the duplication of the peer resource
which is then imported into the peer to just set. Shm has been updated accordingly
but efa needs to be updated to create a second peer_srx and set the fields to the
original one for the peer to reference the owner_ops correctly.

This also adds a missing fi_close for the shm srx resource

Signed-off-by: Alexia Ingerson <alexia.ingerson@intel.com>
  • Loading branch information
aingerson committed Sep 17, 2024
1 parent 2f5cf27 commit e200ca3
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
4 changes: 4 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ struct efa_rdm_ep {

/* shm provider fid */
struct fid_ep *shm_ep;
/* shm srx fid (shm-owned) */
struct fid_ep *shm_srx;
/* shm peer_srx (efa-owned) */
struct fid_peer_srx *shm_peer_srx;

/*
* EFA RDM endpoint rx/tx queue sizes. These may be different from the core
Expand Down
24 changes: 20 additions & 4 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,13 @@ static int efa_rdm_ep_close(struct fid *fid)
}

if (efa_rdm_ep->shm_ep) {
ret = fi_close(&efa_rdm_ep->shm_srx->fid);
if (ret) {
EFA_WARN(FI_LOG_EP_CTRL, "Unable to close shm srx\n");
retv = ret;
}
free(efa_rdm_ep->shm_peer_srx);

ret = fi_close(&efa_rdm_ep->shm_ep->fid);
if (ret) {
EFA_WARN(FI_LOG_EP_CTRL, "Unable to close shm EP\n");
Expand Down Expand Up @@ -1238,7 +1245,6 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg)
int ret = 0;
struct fi_peer_srx_context peer_srx_context = {0};
struct fi_rx_attr peer_srx_attr = {0};
struct fid_ep *peer_srx_ep = NULL;
struct util_srx_ctx *srx_ctx;

switch (command) {
Expand Down Expand Up @@ -1302,10 +1308,20 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg)
* shared memory region.
*/
if (ep->shm_ep) {
peer_srx_context.srx = util_get_peer_srx(ep->peer_srx_ep);
ep->shm_peer_srx = calloc(1, sizeof(*ep->shm_peer_srx));
if (!ep->shm_peer_srx) {
ret = -FI_ENOMEM;
goto err_unlock;
}
memcpy(ep->shm_peer_srx, util_get_peer_srx(ep->peer_srx_ep),
sizeof(*ep->shm_peer_srx));

peer_srx_context.size = sizeof(peer_srx_context);
peer_srx_context.srx = ep->shm_peer_srx;

peer_srx_attr.op_flags |= FI_PEER;
ret = fi_srx_context(efa_rdm_ep_domain(ep)->shm_domain,
&peer_srx_attr, &peer_srx_ep, &peer_srx_context);
&peer_srx_attr, &ep->shm_srx, &peer_srx_context);
if (ret)
goto err_unlock;
shm_ep_name_len = EFA_SHM_NAME_MAX;
Expand All @@ -1315,7 +1331,7 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg)
fi_setname(&ep->shm_ep->fid, shm_ep_name, shm_ep_name_len);

/* Bind srx to shm ep */
ret = fi_ep_bind(ep->shm_ep, &ep->peer_srx_ep->fid, 0);
ret = fi_ep_bind(ep->shm_ep, &ep->shm_srx->fid, 0);
if (ret)
goto err_unlock;

Expand Down

0 comments on commit e200ca3

Please sign in to comment.