diff --git a/Chap_API_Fabric.tex b/Chap_API_Fabric.tex index 366ab84f..2723ca61 100644 --- a/Chap_API_Fabric.tex +++ b/Chap_API_Fabric.tex @@ -37,7 +37,7 @@ \chapter{Fabric Support Definitions} \begin{itemize} \item An array of information on fabric devices for a node by passing \refattr{PMIX_FABRIC_DEVICES} as the key to \refapi{PMIx_Get} along with the \refattr{PMIX_HOSTNAME} of the node as a directive - \item An array of information on a specific fabric device by passing \refattr{PMIX_FABRIC_DEVICE} as the key to \refapi{PMIx_Get} along with the \refattr{PMIX_FABRIC_DEVICE_ID} of the device as a directive + \item An array of information on a specific fabric device by passing \refattr{PMIX_FABRIC_DEVICE} as the key to \refapi{PMIx_Get} along with the \refattr{PMIX_DEVICE_ID} of the device as a directive \item An array of information on a specific fabric device by passing \refattr{PMIX_FABRIC_DEVICE} as the key to \refapi{PMIx_Get} along with both \refattr{PMIX_FABRIC_DEVICE_NAME} of the device and the \refattr{PMIX_HOSTNAME} of the node as directives \end{itemize} @@ -47,7 +47,7 @@ \chapter{Fabric Support Definitions} \begin{itemize} \item \pasteAttributeItemBegin{PMIX_HOSTNAME} The \refattr{PMIX_NODEID} may be returned in its place, or in addition to the hostname. \pasteAttributeItemEnd - \item \pasteAttributeItem{PMIX_FABRIC_DEVICE_ID} + \item \pasteAttributeItem{PMIX_DEVICE_ID} \item \pasteAttributeItem{PMIX_FABRIC_DEVICE_NAME} \item \pasteAttributeItem{PMIX_FABRIC_DEVICE_VENDOR} \item \pasteAttributeItem{PMIX_FABRIC_DEVICE_BUS_TYPE} @@ -111,12 +111,13 @@ \subsection{Fabric Endpoint Structure} \begin{codepar} typedef struct pmix_endpoint \{ char *uuid; + char *osname; pmix_byte_object_t endpt; \} pmix_endpoint_t; \end{codepar} \cspecificend -The \refarg{uuid} field contains the \ac{UUID} of the fabric device and the \refarg{endpt} field contains a fabric vendor-specific object identifying the communication endpoint assigned to the process. +The \refarg{uuid} field contains the \ac{UUID} of the fabric device, the \refarg{osname} is the local operating system's name for the device, and the \refarg{endpt} field contains a fabric vendor-specific object identifying the communication endpoint assigned to the process. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -196,107 +197,6 @@ \subsection{Fabric endpoint support macros} \end{arglist} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Fabric Device Distance Structure} -\declarestruct{pmix_device_distance_t} - -The \refstruct{pmix_device_distance_t} structure contains the minimum and maximum relative distance from the caller to a given fabric device. - -\versionMarker{4.0} -\cspecificstart -\begin{codepar} -typedef struct pmix_device_distance \{ - char *uuid; - uint16_t mindist; - uint16_t maxdist; -\} pmix_device_distance_t; -\end{codepar} -\cspecificend - -The two distance fields provide the minimum and maximum relative distance to the device from the binding location (as sampled at the time of the request) of the process, expressed as a 16-bit integer value where a smaller number indicates that this device is closer to the process than a device with a larger distance value. - -Relative distances only apply to similar devices (i.e., devices from the same fabric) and cannot be used to compare devices from different fabrics. Both minimum and maximum distances are provided to support cases where the process may be bound to more than one location, and the locations are at different distances from the device. - -A relative distance value of \code{UINT16_MAX} indicates that the distance from the process to the device could not be provided. This may be due to lack of available information (e.g., the \ac{PMIx} library not having access to device locations) or other factors. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Fabric device distance support macros} -\label{api:netenddist:macros} - -The following macros are provided to support the \refstruct{pmix_device_distance_t} structure. - -%%%% -\littleheader{Initialize the device distance structure} -\declaremacro{PMIX_DEVICE_DIST_CONSTRUCT} - -Initialize the \refstruct{pmix_device_distance_t} fields. - -\versionMarker{4.0} -\cspecificstart -\begin{codepar} -PMIX_DEVICE_DIST_CONSTRUCT(m) -\end{codepar} -\cspecificend - -\begin{arglist} -\argin{m}{Pointer to the structure to be initialized (pointer to \refstruct{pmix_device_distance_t})} -\end{arglist} - -%%%% -\littleheader{Destruct the device distance structure} -\declaremacro{PMIX_DEVICE_DIST_DESTRUCT} - -Destruct the \refstruct{pmix_device_distance_t} fields. - -\versionMarker{4.0} -\cspecificstart -\begin{codepar} -PMIX_DEVICE_DIST_DESTRUCT(m) -\end{codepar} -\cspecificend - -\begin{arglist} -\argin{m}{Pointer to the structure to be destructed (pointer to \refstruct{pmix_device_distance_t})} -\end{arglist} - -%%%% -\littleheader{Create an device distance array} -\declaremacro{PMIX_DEVICE_DIST_CREATE} - -Allocate and initialize a \refstruct{pmix_device_distance_t} array. - -\versionMarker{4.0} -\cspecificstart -\begin{codepar} -PMIX_DEVICE_DIST_CREATE(m, n) -\end{codepar} -\cspecificend - -\begin{arglist} -\arginout{m}{Address where the pointer to the array of \refstruct{pmix_device_distance_t} structures shall be stored (handle)} -\argin{n}{Number of structures to be allocated (\code{size_t})} -\end{arglist} - -%%%% -\littleheader{Release an device distance array} -\declaremacro{PMIX_DEVICE_DIST_FREE} - -Release an array of \refstruct{pmix_device_distance_t} structures. - -\versionMarker{4.0} -\cspecificstart -\begin{codepar} -PMIX_DEVICE_DIST_FREE(m, n) -\end{codepar} -\cspecificend - -\begin{arglist} -\argin{m}{Pointer to the array of \refstruct{pmix_device_distance_t} structures (handle)} -\argin{n}{Number of structures in the array (\code{size_t})} -\end{arglist} - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Fabric Coordinate Structure} \declarestruct{pmix_coord_t} @@ -410,6 +310,7 @@ \subsection{Fabric Geometry Structure} typedef struct pmix_geometry \{ size_t fabric; char *uuid; + char *osname; pmix_coord_t *coordinates; size_t ncoords; \} pmix_geometry_t; @@ -631,7 +532,7 @@ \subsection{Fabric registration structure} \pasteAttributeItem{PMIX_FABRIC_DEVICE_NAME} \pasteAttributeItem{PMIX_FABRIC_DEVICE_VENDOR} -\pasteAttributeItem{PMIX_FABRIC_DEVICE_ID} +\pasteAttributeItem{PMIX_DEVICE_ID} \pasteAttributeItem{PMIX_HOSTNAME} \pasteAttributeItem{PMIX_FABRIC_DEVICE_DRIVER} \pasteAttributeItem{PMIX_FABRIC_DEVICE_FIRMWARE} @@ -743,11 +644,7 @@ \section{Fabric Support Attributes} } % \declareAttributeNEW{PMIX_FABRIC_DEVICE}{"pmix.fabdev"}{\refstruct{pmix_data_array_t}}{ -An array of \refstruct{pmix_info_t} describing a particular fabric device using one or more of the attributes defined below. The first element in the array shall be the \refattr{PMIX_FABRIC_DEVICE_ID} of the device. -} -% -\declareAttributeNEW{PMIX_FABRIC_DEVICE_ID}{"pmix.fabdev.id"}{string}{ -System-wide \ac{UUID} of a particular fabric device. +An array of \refstruct{pmix_info_t} describing a particular fabric device using one or more of the attributes defined below. The first element in the array shall be the \refattr{PMIX_DEVICE_ID} of the device. } % \declareAttributeNEW{PMIX_FABRIC_DEVICE_INDEX}{"pmix.fabdev.idx"}{uint32_t}{ @@ -814,17 +711,13 @@ \section{Fabric Support Attributes} Fabric endpoints for a specified process. As multiple endpoints may be assigned to a given process (e.g., in the case where multiple devices are associated with a package to which the process is bound), the returned values will be provided in a \refstruct{pmix_data_array_t} of \refstruct{pmix_endpoint_t} elements. } % -\declareAttributeNEW{PMIX_FABRIC_DEVICE_DIST}{"pmix.fab.endptdist"}{pmix_data_array_t}{ -Relative distance from the location of the calling process (either as sampled at the time of a \refapi{PMIx_Fabric_update_distances} request, or based on the initial binding location set at time of start of execution) to each local fabric device, returned as an array of \refstruct{pmix_device_distance_t} elements in no particular order. -} -% \vspace{\baselineskip} -The following attributes are related to the \emph{job realm} (as described in Section \ref{chap:res:jrealm}) and are retrieved according to those rules. +The following attributes are related to the \emph{job realm} (as described in Section \ref{chap:res:jrealm}) and are retrieved according to those rules. Note that distances to fabric devices are retrieved using the \refattr{PMIX_DEVICE_DISTANCES} key with the appropriate \refstruct{pmix_device_type_t} qualifier. % \declareAttributeNEW{PMIX_SWITCH_PEERS}{"pmix.speers"}{pmix_data_array_t}{ Peer ranks that share the same switch as the process specified in the call to \refapi{PMIx_Get}. Returns a \refstruct{pmix_data_array_t} array of \refstruct{pmix_info_t} results, each element containing the \refattr{PMIX_SWITCH_PEERS} key with a three-element \refstruct{pmix_data_array_t} array of -\refstruct{pmix_info_t} containing the \refattr{PMIX_FABRIC_DEVICE_ID} of the local fabric device, the \refattr{PMIX_FABRIC_SWITCH} identifying the switch to which it is connected, and a comma-delimited string of peer ranks sharing the switch to which that device is connected. +\refstruct{pmix_info_t} containing the \refattr{PMIX_DEVICE_ID} of the local fabric device, the \refattr{PMIX_FABRIC_SWITCH} identifying the switch to which it is connected, and a comma-delimited string of peer ranks sharing the switch to which that device is connected. } % @@ -1082,84 +975,5 @@ \subsection{\code{PMIx_Fabric_deregister_nb}} Non-blocking form of \refapi{PMIx_Fabric_deregister}. Provided \refarg{fabric} must not be accessed until after callback function has been executed. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{\code{PMIx_Fabric_update_distances}} -\declareapi{PMIx_Fabric_update_distances} - -%%%% -\summary - -Update distances from current process location to local fabric devices. - -%%%% -\format - -\versionMarker{4.0} -\cspecificstart -\begin{codepar} -pmix_status_t -PMIx_Fabric_update_distances(pmix_device_distance_t *distances[], - size_t *ndist); -\end{codepar} -\cspecificend - -\begin{arglist} -\arginout{distances}{Pointer to an address where the array of \refstruct{pmix_device_distance_t} structures containing the distances from the caller to local fabric devices is to be returned (handle)} -\arginout{ndist}{Pointer to an address where the number of elements in the \refarg{distances} array is to be returned (handle)} -\end{arglist} - -Returns one of the following: - -\begin{itemize} -\item \refconst{PMIX_SUCCESS} indicating that the distances were returned. -\item a non-zero \ac{PMIx} error constant indicating the reason the request failed. -\end{itemize} - - -%%%% -\descr - -Both the minimum and maximum distance fields in the elements of the array shall be filled with the respective distances between the current process location and the respective fabric device. Any distance information stored in the local \ac{PMIx} server's cache should also be updated so that subsequent queries return the updated values. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{\code{PMIx_Fabric_update_distances_nb}} -\declareapi{PMIx_Fabric_update_distances_nb} - -%%%% -\summary - -Update distances from current process location to local fabric devices. - -%%%% -\format - -\versionMarker{4.0} -\cspecificstart -\begin{codepar} -pmix_status_t -PMIx_Fabric_update_distances_nb(pmix_info_cbfunc_t cbfunc, - void *cbdata); -\end{codepar} -\cspecificend - -\begin{arglist} -\argin{cbfunc}{Callback function \refapi{pmix_info_cbfunc_t} (function reference)} -\argin{cbdata}{Data to be passed to the callback function (memory reference)} -\end{arglist} - -Returns one of the following: - -\begin{itemize} -\item \refconst{PMIX_SUCCESS} indicating that the request has been accepted for processing and the provided callback function will be executed upon completion of the operation. Note that the library must not invoke the callback function prior to returning from the \ac{API}. -\item a non-zero \ac{PMIx} error constant indicating a reason for the request to have been rejected. In this case, the provided callback function will not be executed -\end{itemize} - - -%%%% -\descr - -Non-blocking form of the \refapi{PMIx_Fabric_update_distances} \ac{API}. If successful, the requested data will be returned under the \refattr{PMIX_FABRIC_DEVICE_DIST} attribute in the \refarg{info} array of the callback function. - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/Chap_API_Job_Mgmt.tex b/Chap_API_Job_Mgmt.tex index 47577320..cabe6b66 100644 --- a/Chap_API_Job_Mgmt.tex +++ b/Chap_API_Job_Mgmt.tex @@ -1143,7 +1143,7 @@ \subsection{Log attributes} } % \declareAttribute{PMIX_LOG_EMAIL_SERVER}{"pmix.log.esrvr"}{char*}{ -Hostname (or IP address) of SMTP server. +Hostname (or \ac{IP} address) of SMTP server. } % \declareAttribute{PMIX_LOG_EMAIL_SRVR_PORT}{"pmix.log.esrvrprt"}{int32_t}{ diff --git a/Chap_API_Proc_Mgmt.tex b/Chap_API_Proc_Mgmt.tex index 084ad4ae..db2e350e 100644 --- a/Chap_API_Proc_Mgmt.tex +++ b/Chap_API_Proc_Mgmt.tex @@ -1169,6 +1169,23 @@ \subsubsection{Topology support macros} \end{arglist} +\littleheader{Destruct the topology structure} +\declaremacro{PMIX_TOPOLOGY_DESTRUCT} + +Destruct the \refstruct{pmix_topology_t} fields + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +PMIX_TOPOLOGY_DESTRUCT(m) +\end{codepar} +\cspecificend + +\begin{arglist} +\argin{m}{Pointer to the structure to be destructed (pointer to \refstruct{pmix_topology_t})} +\end{arglist} + + \littleheader{Create a topology array} \declaremacro{PMIX_TOPOLOGY_CREATE} @@ -1187,6 +1204,24 @@ \subsubsection{Topology support macros} \end{arglist} +\littleheader{Release a topology array} +\declaremacro{PMIX_TOPOLOGY_FREE} + +Release a \refstruct{pmix_topology_t} array. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +PMIX_TOPOLOGY_FREE(m, n) +\end{codepar} +\cspecificend + +\begin{arglist} +\arginout{m}{Address of the array of \refstruct{pmix_topology_t} structures to be released (handle)} +\argin{n}{Number of structures in the array (size_t)} +\end{arglist} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsubsection{Relative locality of two processes} \declarestruct{pmix_locality_t} @@ -1238,6 +1273,10 @@ \subsubsection{Relative locality of two processes} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsubsection{Locality keys} +% +\declareAttributeNEW{PMIX_LOCALITY}{"pmix.loc"}{pmix_locality_t}{ +Bitmask describing the location of the referenced process. +} % \declareAttribute{PMIX_LOCALITY_STRING}{"pmix.locstr"}{char*}{ String describing a process's bound location - referenced using the process's @@ -1252,13 +1291,13 @@ \subsubsection{Locality keys} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{\code{PMIx_Get_cpuset}} -\declareapi{PMIx_Get_cpuset} +\subsection{\code{PMIx_Parse_cpuset_string}} +\declareapi{PMIx_Parse_cpuset_string} %%%% \summary -Get the \ac{PU} binding bitmap from its string representation. +Parse the \ac{PU} binding bitmap from its string representation. %%%% \format @@ -1267,8 +1306,8 @@ \subsection{\code{PMIx_Get_cpuset}} \cspecificstart \begin{codepar} pmix_status_t -PMIx_Get_cpuset(const char *cpuset_string, - pmix_cpuset_t *cpuset); +PMIx_Parse_cpuset_string(const char *cpuset_string, + pmix_cpuset_t *cpuset); \end{codepar} \cspecificend @@ -1285,3 +1324,351 @@ \subsection{\code{PMIx_Get_cpuset}} Parse the string representation of the binding bitmap (as returned by \refapi{PMIx_Get} using the \refattr{PMIX_CPUSET} key) and set the appropriate \ac{PU} binding location information in the provided memory location. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{\code{PMIx_Get_cpuset}} +\declareapi{PMIx_Get_cpuset} + +%%%% +\summary + +Get the \ac{PU} binding bitmap of the current process. + +%%%% +\format + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +pmix_status_t +PMIx_Get_cpuset(pmix_cpuset_t *cpuset, pmix_bind_envelope_t ref); +\end{codepar} +\cspecificend + +\begin{arglist} +\arginout{cpuset}{Address of an object where the bitmap is to be stored (memory reference)} +\argin{ref}{The binding envelope to be considered when formulating the bitmap (\refstruct{pmix_bind_envelope_t})} +\end{arglist} + +Returns \refconst{PMIX_SUCCESS}, indicating that the \refarg{cpuset} was successfully loaded, or an appropriate \ac{PMIx} error constant. + +%%%% +\descr + +Obtain and set the appropriate \ac{PU} binding location information in the provided memory location based on the specified binding envelope. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsubsection{Binding envelope} +\declarestruct{pmix_bind_envelope_t} + +\versionMarker{4.0} +The \refstruct{pmix_bind_envelope_t} data type +defines the envelope of threads within a possibly multi-threaded process that are to be considered when getting the cpuset associated with the process. Valid values include: + +\begin{constantdesc} +% +\declareconstitemNEW{PMIX_CPUBIND_PROCESS} +Use the location of all threads in the possibly multi-threaded process. +% +\declareconstitemNEW{PMIX_CPUBIND_THREAD} +Use only the location of the thread calling the \ac{API}. +% +\end{constantdesc} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{\code{PMIx_Compute_distances}} +\declareapi{PMIx_Compute_distances} + +%%%% +\summary + +Compute distances from specified process location to local devices. + +%%%% +\format + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +pmix_status_t +PMIx_Compute_distances(pmix_topology_t *topo, + pmix_cpuset_t *cpuset, + pmix_info_t info[], size_t ninfo[], + pmix_device_distance_t *distances[], + size_t *ndist); +\end{codepar} +\cspecificend + +\begin{arglist} +\argin{topo}{Pointer to the topology description of the node where the process is located (\code{NULL} indicates the local node) (\refstruct{pmix_topology_t})} +\argin{cpuset}{Pointer to the location of the process (\refstruct{pmix_cpuset_t})} +\argin{info}{Array of \refstruct{pmix_info_t} describing the devices whose distance is to be computed (handle)} +\argin{ninfo}{Number of elements in \refarg{info} (integer)} +\arginout{distances}{Pointer to an address where the array of \refstruct{pmix_device_distance_t} structures containing the distances from the caller to local fabric devices is to be returned (handle)} +\arginout{ndist}{Pointer to an address where the number of elements in the \refarg{distances} array is to be returned (handle)} +\end{arglist} + +Returns one of the following: + +\begin{itemize} +\item \refconst{PMIX_SUCCESS} indicating that the distances were returned. +\item a non-zero \ac{PMIx} error constant indicating the reason the request failed. +\end{itemize} + + +%%%% +\descr + +Both the minimum and maximum distance fields in the elements of the array shall be filled with the respective distances between the current process location and the types of devices or specific device identified in the \refarg{info} directives. In the absence of directives, distances to all supported device types shall be returned. + +\adviceuserstart +A process whose threads are not all bound to the same location may return inconsistent results from calls to this \ac{API} by different threads if the \refconst{PMIX_CPUBIND_THREAD} binding envelope was used when generating the \refarg{cpuset}. +\adviceuserend + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{\code{PMIx_Compute_distances_nb}} +\declareapi{PMIx_Compute_distances_nb} + +%%%% +\summary + +Compute distances from specified process location to local devices. + +%%%% +\format + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +pmix_status_t +PMIx_Compute_distances_nb(pmix_topology_t *topo, + pmix_cpuset_t *cpuset, + pmix_info_t info[], size_t ninfo[], + pmix_device_dist_cbfunc_t cbfunc, + void *cbdata); +\end{codepar} +\cspecificend + +\begin{arglist} +\argin{topo}{Pointer to the topology description of the node where the process is located (\code{NULL} indicates the local node) (\refstruct{pmix_topology_t})} +\argin{cpuset}{Pointer to the location of the process (\refstruct{pmix_cpuset_t})} +\argin{info}{Array of \refstruct{pmix_info_t} describing the devices whose distance is to be computed (handle)} +\argin{ninfo}{Number of elements in \refarg{info} (integer)} +\argin{cbfunc}{Callback function \refapi{pmix_info_cbfunc_t} (function reference)} +\argin{cbdata}{Data to be passed to the callback function (memory reference)} +\end{arglist} + +Returns one of the following: + +\begin{itemize} +\item \refconst{PMIX_SUCCESS} indicating that the request has been accepted for processing and the provided callback function will be executed upon completion of the operation. Note that the library must not invoke the callback function prior to returning from the \ac{API}. +\item a non-zero \ac{PMIx} error constant indicating a reason for the request to have been rejected. In this case, the provided callback function will not be executed +\end{itemize} + + +%%%% +\descr + +Non-blocking form of the \refapi{PMIx_Compute_distances} \ac{API}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Device Distance Callback Function} +\declareapi{pmix_device_dist_cbfunc_t} + +%%%% +\summary + +The \refapi{pmix_device_dist_cbfunc_t} is used to return an array of device distances. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +typedef void (*pmix_device_dist_cbfunc_t) + (pmix_status_t status, + pmix_device_distance_t *dist, + size_t ndist, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata); +\end{codepar} +\cspecificend + +\begin{arglist} +\argin{status}{Status associated with the operation (\refstruct{pmix_status_t})} +\argin{dist}{Array of \refstruct{pmix_device_distance_t} returned by the operation (pointer)} +\argin{ndist}{Number of elements in the \argref{dist} array (\code{size_t})} +\argin{cbdata}{Callback data passed to original \ac{API} call (memory reference)} +\argin{release_fn}{Function to be called when done with the \argref{dist} data (function pointer)} +\argin{release_cbdata}{Callback data to be passed to \argref{release_fn} (memory reference)} +\end{arglist} + + +%%%% +\descr + +The \refarg{status} indicates if requested data was found or not. +The array of \refstruct{pmix_device_distance_t} will contain the distance information. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Device type} +\declarestruct{pmix_device_type_t} + +The \refstruct{pmix_device_type_t} is a \code{uint64_t} bitmask for identifying the type(s) whose distances are being requested, or the type of a specific device being referenced (e.g., in a \refstruct{pmix_device_distance_t} object). + +\versionMarker{1.0} +\cspecificstart +\begin{codepar} +typedef uint16_t pmix_device_type_t; +\end{codepar} +\cspecificend + +The following constants can be used to set a variable of the type \refstruct{pmix_device_type_t}. + +\begin{constantdesc} +% +\declareconstitemNEW{PMIX_DEVTYPE_UNKNOWN} +The device is of an unknown type - will not be included in returned device distances. +% +\declareconstitemNEW{PMIX_DEVTYPE_BLOCK} +Operating system block device, or non-volatile memory device (e.g., "sda" or "dax2.0" on Linux). +% +\declareconstitemNEW{PMIX_DEVTYPE_GPU} +Operating system \ac{GPU} device (e.g., "card0" for a Linux \ac{DRM} device). +% +\declareconstitemNEW{PMIX_DEVTYPE_NETWORK} +Operating system network device (e.g., the "eth0" interface on Linux). +% +\declareconstitemNEW{PMIX_DEVTYPE_OPENFABRICS} +Operating system OpenFabrics device (e.g., an "mlx4_0" InfiniBand \ac{HCA}, or "hfi1_0" Omni-Path interface on Linux). +% +\declareconstitemNEW{PMIX_DEVTYPE_DMA} +Operating system \ac{DMA} engine device (e.g., the "dma0chan0" \ac{DMA} channel on Linux). +% +\declareconstitemNEW{PMIX_DEVTYPE_COPROC} +Operating system co-processor device (e.g., "mic0" for a Xeon Phi on Linux, "opencl0d0" for a OpenCL device, or "cuda0" for a \ac{CUDA} device). +% +\end{constantdesc} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Device Distance Structure} +\declarestruct{pmix_device_distance_t} + +The \refstruct{pmix_device_distance_t} structure contains the minimum and maximum relative distance from the caller to a given device. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +typedef struct pmix_device_distance \{ + char *uuid; + char *osname; + pmix_device_type_t type; + uint16_t mindist; + uint16_t maxdist; +\} pmix_device_distance_t; +\end{codepar} +\cspecificend + +The \refarg{uuid} is a string identifier guaranteed to be unique within the cluster and is typically assembled from discovered device attributes (e.g., the \ac{IP} address of the device). The \refarg{osname} is the local operating system name of the device and is only unique to that node. + +The two distance fields provide the minimum and maximum relative distance to the device from the specified location of the process, expressed as a 16-bit integer value where a smaller number indicates that this device is closer to the process than a device with a larger distance value. Note that relative distance values are not necessarily correlated to a physical property - e.g., a device at twice the distance from another device does not necessarily have twice the latency for communication with it. + +Relative distances only apply to similar devices and cannot be used to compare devices of different types. Both minimum and maximum distances are provided to support cases where the process may be bound to more than one location, and the locations are at different distances from the device. + +A relative distance value of \code{UINT16_MAX} indicates that the distance from the process to the device could not be provided. This may be due to lack of available information (e.g., the \ac{PMIx} library not having access to device locations) or other factors. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Device distance support macros} +\label{api:netenddist:macros} + +The following macros are provided to support the \refstruct{pmix_device_distance_t} structure. + +%%%% +\littleheader{Initialize the device distance structure} +\declaremacro{PMIX_DEVICE_DIST_CONSTRUCT} + +Initialize the \refstruct{pmix_device_distance_t} fields. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +PMIX_DEVICE_DIST_CONSTRUCT(m) +\end{codepar} +\cspecificend + +\begin{arglist} +\argin{m}{Pointer to the structure to be initialized (pointer to \refstruct{pmix_device_distance_t})} +\end{arglist} + +%%%% +\littleheader{Destruct the device distance structure} +\declaremacro{PMIX_DEVICE_DIST_DESTRUCT} + +Destruct the \refstruct{pmix_device_distance_t} fields. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +PMIX_DEVICE_DIST_DESTRUCT(m) +\end{codepar} +\cspecificend + +\begin{arglist} +\argin{m}{Pointer to the structure to be destructed (pointer to \refstruct{pmix_device_distance_t})} +\end{arglist} + +%%%% +\littleheader{Create an device distance array} +\declaremacro{PMIX_DEVICE_DIST_CREATE} + +Allocate and initialize a \refstruct{pmix_device_distance_t} array. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +PMIX_DEVICE_DIST_CREATE(m, n) +\end{codepar} +\cspecificend + +\begin{arglist} +\arginout{m}{Address where the pointer to the array of \refstruct{pmix_device_distance_t} structures shall be stored (handle)} +\argin{n}{Number of structures to be allocated (\code{size_t})} +\end{arglist} + +%%%% +\littleheader{Release an device distance array} +\declaremacro{PMIX_DEVICE_DIST_FREE} + +Release an array of \refstruct{pmix_device_distance_t} structures. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +PMIX_DEVICE_DIST_FREE(m, n) +\end{codepar} +\cspecificend + +\begin{arglist} +\argin{m}{Pointer to the array of \refstruct{pmix_device_distance_t} structures (handle)} +\argin{n}{Number of structures in the array (\code{size_t})} +\end{arglist} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Device distance attributes} +\label{api:netenddist:attrs} + +The following attributes can be used to retrieve device distances from the \ac{PMIx} data store. Note that distances stored by the host environment are based on the process location at the time of start of execution and may not reflect changes to location imposed by the process itself. +% +\declareAttributeNEW{PMIX_DEVICE_DISTANCES}{"pmix.dev.dist"}{pmix_data_array_t}{ +Return an array of \refstruct{pmix_device_distance_t} containing the minimum and maximum distances of the given process location to all devices of the specified type on the local node. +} +% +\declareAttributeNEW{PMIX_DEVICE_TYPE}{"pmix.dev.type"}{pmix_device_type_t}{ +Bitmask specifying the type(s) of device(s) whose information is being requested. Only used as a directive/qualifier. +} +% +\declareAttributeNEW{PMIX_DEVICE_ID}{"pmix.dev.id"}{string}{ +System-wide \ac{UUID} or node-local \ac{OS} name of a particular device. +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/Chap_API_Reserved_Keys.tex b/Chap_API_Reserved_Keys.tex index 4b501692..eb6930be 100644 --- a/Chap_API_Reserved_Keys.tex +++ b/Chap_API_Reserved_Keys.tex @@ -402,6 +402,10 @@ \subsection{Process realm attributes} A string representation of the \ac{PU} binding bitmap applied to the process upon launch. The string shall begin with the name of the library that generated it (e.g., "hwloc") followed by a colon and the bitmap string itself. } % +\declareAttributeNEW{PMIX_CPUSET_BITMAP}{"pmix.bitmap"}{pmix_cpuset_t*}{ +Bitmap applied to the process upon launch. +} +% \declareAttribute{PMIX_CREDENTIAL}{"pmix.cred"}{char*}{ Security credential assigned to the process. } @@ -412,11 +416,11 @@ \subsection{Process realm attributes} % \declareAttributeNEW{PMIX_REINCARNATION}{"pmix.reinc"}{uint32_t}{ Number of times this process has been re-instantiated - i.e, a value of zero indicates that the process has never been restarted. -} +5} \vspace{\baselineskip} -In addition, process-level information includes functional attributes directly associated with a process - for example, the process-related fabric attributes included in Section \ref{api:fabric:attrs}. +In addition, process-level information includes functional attributes directly associated with a process - for example, the process-related fabric attributes included in Section \ref{api:fabric:attrs} or the distance attributes of Section \ref{api:netenddist:attrs}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/Chap_API_Server.tex b/Chap_API_Server.tex index 6a6cb395..715a0bf0 100644 --- a/Chap_API_Server.tex +++ b/Chap_API_Server.tex @@ -368,7 +368,7 @@ \subsection{\code{PMIx_server_register_nspace}} Host environments are required to provide a wide range of session-, job-, application-, node-, and process-realm information, and may choose to provide a similarly wide range of optional information. The information is broadly separated into categories based on the \emph{data realm} definitions explained in Section \ref{api:struct:attributes:retrieval}, and retrieved according to the rules detailed in Section \ref{chap:rkeys:retrules}. -Session-realm information may be passed as individual \refstruct{pmix_info_t} entries, or as part of a \refstruct{pmix_data_array_t} using the \refattr{PMIX_SESSION_INFO_ARRAY} attribute.The list of data referenced in this way shall include: +Session-realm information may be passed as individual \refstruct{pmix_info_t} entries, or as part of a \refstruct{pmix_data_array_t} using the \refattr{PMIX_SESSION_INFO_ARRAY} attribute. The list of data referenced in this way shall include: \begin{itemize} \item \pasteAttributeItem{PMIX_UNIV_SIZE} @@ -503,12 +503,16 @@ \subsection{\code{PMIx_server_register_nspace}} \begin{itemize} \item \pasteAttributeItem{PMIX_LOCALITY_STRING} \item \pasteAttributeItem{PMIX_PROCDIR} + \item \pasteAttributeItem{PMIX_PACKAGE_RANK} \end{itemize} -and the following optional information - note that this information can be derived from information already provided by other attributes, but it may be included here for ease of retrieval by users: +and the following optional information - note that some of this information can be derived from information already provided by other attributes, but it may be included here for ease of retrieval by users: \begin{itemize} \item \pasteAttributeItem{PMIX_HOSTNAME} + \item \pasteAttributeItem{PMIX_CPUSET} + \item \pasteAttributeItem{PMIX_CPUSET_BITMAP} + \item \pasteAttributeItem{PMIX_DEVICE_DISTANCES} \end{itemize} \divider @@ -941,7 +945,7 @@ \subsection{\code{PMIx_server_deregister_resources}} %%%% \descr -Remove information about resources not associated with a given namespace from the \ac{PMIx} server library. Only the \refarg{key} fields of the provided \refarg{info} array shall be used for the operation - the associated values shall be ignored except where they serve as qualifiers to the request. For example, to remove a specific fabric device from a given node, the \refarg{info} array might include a \refattr{PMIX_NODE_INFO_ARRAY} containing the \refattr{PMIX_NODEID} or \refattr{PMIX_HOSTNAME} identifying the node hosting the device, and the \refattr{PMIX_FABRIC_DEVICE_NAME} specifying the device to be removed. Alternatively, the device could be removed using only the \refattr{PMIX_FABRIC_DEVICE_ID} as this is unique across the overall system. +Remove information about resources not associated with a given namespace from the \ac{PMIx} server library. Only the \refarg{key} fields of the provided \refarg{info} array shall be used for the operation - the associated values shall be ignored except where they serve as qualifiers to the request. For example, to remove a specific fabric device from a given node, the \refarg{info} array might include a \refattr{PMIX_NODE_INFO_ARRAY} containing the \refattr{PMIX_NODEID} or \refattr{PMIX_HOSTNAME} identifying the node hosting the device, and the \refattr{PMIX_FABRIC_DEVICE_NAME} specifying the device to be removed. Alternatively, the device could be removed using only the \refattr{PMIX_DEVICE_ID} as this is unique across the overall system. \advicermstart As information not related to namespaces is considered \emph{static}, there is no requirement that the host environment deregister resources prior to finalizing the \ac{PMIx} server library. The server library shall properly cleanup as part of its normal finalize operations. Deregistration of resources is only required, therefore, when the host environment determines that client processes should no longer have access to that information. @@ -1802,7 +1806,7 @@ \subsection{\code{PMIx_server_generate_cpuset_string}} %%%% \descr -Provide a function by which the host environment can generate a string representation of the cpuset bitmap for inclusion in the call to \refapi{PMIx_server_register_nspace}. This function shall only be called for local client processes, with the returned string included in the job-level information (via the \refattr{PMIX_CPUSET} attribute) provided to local clients. Local clients can use these strings as input to obtain their \ac{PU} bindings via the \refapi{PMIx_Get_cpuset} \ac{API}. +Provide a function by which the host environment can generate a string representation of the cpuset bitmap for inclusion in the call to \refapi{PMIx_server_register_nspace}. This function shall only be called for local client processes, with the returned string included in the job-level information (via the \refattr{PMIX_CPUSET} attribute) provided to local clients. Local clients can use these strings as input to obtain their \ac{PU} bindings via the \refapi{PMIx_Parse_cpuset_string} \ac{API}. The function is required to return a string prefixed by the \refarg{source} field of the provided \refarg{cpuset} followed by a colon. The remainder of the string shall represent the \acp{PU} to which the process is bound as expressed by the underlying implementation. @@ -1842,6 +1846,22 @@ \subsubsection{Cpuset support macros} \argin{m}{Pointer to the structure to be initialized (pointer to \refstruct{pmix_cpuset_t})} \end{arglist} +\littleheader{Destruct the cpuset structure} +\declaremacro{PMIX_CPUSET_DESTRUCT} + +Destruct the \refstruct{pmix_cpuset_t} fields. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +PMIX_CPUSET_DESTRUCT(m) +\end{codepar} +\cspecificend + +\begin{arglist} +\argin{m}{Pointer to the structure to be destructed (pointer to \refstruct{pmix_cpuset_t})} +\end{arglist} + \littleheader{Create a cpuset array} @@ -1861,6 +1881,24 @@ \subsubsection{Cpuset support macros} \argin{n}{Number of structures to be allocated (size_t)} \end{arglist} +\littleheader{Release a cpuset array} +\declaremacro{PMIX_CPUSET_FREE} + +Deconstruct and free a \refstruct{pmix_cpuset_t} array. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +PMIX_CPUSET_FREE(m, n) +\end{codepar} +\cspecificend + +\begin{arglist} +\arginout{m}{Address the array of \refstruct{pmix_cpuset_t} structures to be released (handle)} +\argin{n}{Number of structures in the array (size_t)} +\end{arglist} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\code{PMIx_server_define_process_set}} diff --git a/Chap_API_Struct.tex b/Chap_API_Struct.tex index 314bf872..749f8d97 100644 --- a/Chap_API_Struct.tex +++ b/Chap_API_Struct.tex @@ -2107,6 +2107,12 @@ \section{Generalized Data Types Used for Packing/Unpacking} \declareconstitemNEW{PMIX_TOPO} Structure containing the topology for a given node. (\refstruct{pmix_topology_t}). % +\declareconstitemNEW{PMIX_DEVTYPE} +Bitmask containing the types of devices being referenced. (\refstruct{pmix_device_type_t}). +% +\declareconstitemNEW{PMIX_LOCTYPE} +Bitmask describing the relative location of another process. (\refstruct{pmix_locality_t}). +% \declareconstitemNEW{PMIX_DATA_TYPE_MAX} A starting point for implementer-specific data types. Values above this are guaranteed not to conflict with \ac{PMIx} values. @@ -2476,5 +2482,19 @@ \section{PMIx Datatype Value String Representations} \end{codepar} \cspecificend +%%%% +\summary +\declareapi{PMIx_Device_type_string} + +String representation of a \refstruct{pmix_device_type_t}. + +\versionMarker{4.0} +\cspecificstart +\begin{codepar} +const char* +PMIx_Device_type_string(pmix_device_type_t type); +\end{codepar} +\cspecificend + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/Chap_Revisions.tex b/Chap_Revisions.tex index 2f9cbe07..a54a5d2b 100644 --- a/Chap_Revisions.tex +++ b/Chap_Revisions.tex @@ -354,12 +354,14 @@ \section{Version 4.0: Sept 2020} \item \refapi{PMIx_Group_join}, \refapi{PMIx_Group_join_nb} \item \refapi{PMIx_Group_leave}, \refapi{PMIx_Group_leave_nb} \item \refapi{PMIx_Get_relative_locality}, \refapi{PMIx_Load_topology} - \item \refapi{PMIx_Get_cpuset} + \item \refapi{PMIx_Parse_cpuset_string}, \refapi{PMIx_Get_cpuset} \item \refapi{PMIx_Link_state_string}, \refapi{PMIx_Job_state_string} + \item \refapi{PMIx_Device_type_string} \item \refapi{PMIx_Fabric_register}, \refapi{PMIx_Fabric_register_nb} \item \refapi{PMIx_Fabric_update}, \refapi{PMIx_Fabric_update_nb} \item \refapi{PMIx_Fabric_deregister}, \refapi{PMIx_Fabric_deregister_nb} - \item \refapi{PMIx_Fabric_update_distances}, \refapi{PMIx_Fabric_update_distances_nb} + \item \refapi{PMIx_Compute_distances}, \refapi{PMIx_Compute_distances_nb} + \item \refapi{PMIx_Get_attribute_string}, \refapi{PMIx_Get_attribute_name} \end{compactitemize} \item Server \acp{API} @@ -387,6 +389,7 @@ \section{Version 4.0: Sept 2020} \item \refstruct{pmix_cpuset_t} \item \refstruct{pmix_topology_t} \item \refstruct{pmix_locality_t} + \item \refstruct{pmix_bind_envelope_t} \item \refstruct{pmix_group_opt_t} \item \refstruct{pmix_group_operation_t} \item \refstruct{pmix_fabric_t} @@ -396,7 +399,14 @@ \section{Version 4.0: Sept 2020} \item \refstruct{pmix_geometry_t} \item \refstruct{pmix_link_state_t} \item \refstruct{pmix_job_state_t} + \item \refstruct{pmix_device_type_t} \end{compactitemize} + + \item Callback functions + \begin{compactitemize} + \item \refapi{pmix_device_dist_cbfunc_t} + \end{compactitemize} + \end{itemize} \subsection{Added Constants} @@ -411,6 +421,9 @@ \subsection{Added Constants} \refconst{PMIX_GEOMETRY} \refconst{PMIX_DEVICE_DIST} \refconst{PMIX_ENDPOINT} +\refconst{PMIX_TOPO} +\refconst{PMIX_DEVTYPE} +\refconst{PMIX_LOCTYPE} \refconst{PMIX_DATA_TYPE_MAX} \littleheader{Query constants} @@ -574,6 +587,7 @@ \subsection{Added Attributes} \pasteAttributeItem{PMIX_REINCARNATION} \pasteAttributeItem{PMIX_HOSTNAME_ALIASES} \pasteAttributeItem{PMIX_HOSTNAME_KEEP_FQDN} +\pasteAttributeItem{PMIX_CPUSET_BITMAP} % % \littleheader{Tool attributes} @@ -605,14 +619,12 @@ \subsection{Added Attributes} \pasteAttributeItem{PMIX_FABRIC_COORDINATES} \pasteAttributeItem{PMIX_FABRIC_DIMS} \pasteAttributeItem{PMIX_FABRIC_ENDPT} -\pasteAttributeItem{PMIX_FABRIC_DEVICE_DIST} \pasteAttributeItem{PMIX_FABRIC_SHAPE} \pasteAttributeItem{PMIX_FABRIC_SHAPE_STRING} \pasteAttributeItem{PMIX_SWITCH_PEERS} \pasteAttributeItem{PMIX_FABRIC_PLANE} \pasteAttributeItem{PMIX_FABRIC_SWITCH} \pasteAttributeItem{PMIX_FABRIC_DEVICE} -\pasteAttributeItem{PMIX_FABRIC_DEVICE_ID} \pasteAttributeItem{PMIX_FABRIC_DEVICE_INDEX} \pasteAttributeItem{PMIX_FABRIC_DEVICE_NAME} \pasteAttributeItem{PMIX_FABRIC_DEVICE_VENDOR} @@ -629,6 +641,12 @@ \subsection{Added Attributes} \pasteAttributeItem{PMIX_FABRIC_DEVICE_PCI_DEVID} % % +\littleheader{Device attributes} +\pasteAttributeItem{PMIX_DEVICE_DISTANCES} +\pasteAttributeItem{PMIX_DEVICE_TYPE} +\pasteAttributeItem{PMIX_DEVICE_ID} +% +% \littleheader{Sets-Groups attributes} \pasteAttributeItem{PMIX_QUERY_NUM_PSETS} \pasteAttributeItem{PMIX_QUERY_PSET_NAMES} diff --git a/pmix-standard.tex b/pmix-standard.tex index fe0e2fc8..b9a515c9 100644 --- a/pmix-standard.tex +++ b/pmix-standard.tex @@ -129,6 +129,11 @@ \acrodef{NUMA}{Non-Uniform Memory Access} \acrodef{UUID}{Universally Unique IDentifier} \acrodef{GPU}{Graphics Processing Unit} +\acrodef{DRM}{Direct Rendering Manager} +\acrodef{DMA}{Direct Memory Access} +\acrodef{CUDA}{Compute Unified Device Architecture} +\acrodef{HCA}{Host Channel Adapter} +\acrodef{IP}{Internet Protocol} %%%%%%%%%%%%%%%%%%%