Skip to content

Commit

Permalink
REPO: Transfer internal updates for 1.1906.1 to github
Browse files Browse the repository at this point in the history
  • Loading branch information
tkordenbrock committed Jul 8, 2019
1 parent 2279f32 commit 90b5b06
Show file tree
Hide file tree
Showing 731 changed files with 62,288 additions and 10,067 deletions.
42 changes: 28 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.8.0)
project( Faodel
LANGUAGES CXX C
VERSION 1.1811.1
VERSION 1.1906.1
)


Expand Down Expand Up @@ -32,6 +32,9 @@ project( Faodel
if( POLICY CMP0074 )
cmake_policy( SET CMP0074 NEW ) # find_package() uses PackageName_ROOT vars
endif()
if( POLICY CMP0075 )
cmake_policy( SET CMP0075 OLD ) # check_include_file() uses CMAKE_REQUIRED_LIBRARIES var
endif()

# CMake built-ins that we'll use
include( FindPkgConfig )
Expand Down Expand Up @@ -112,12 +115,14 @@ endfunction()
# Options
option( BUILD_SHARED_LIBS "Build Faodel as shared libs" OFF )
option( BUILD_TESTS "Faodel testing gtest and ctest" ON )
option( BUILD_DOCS "Generate documentation using Doxygen" ON )
option( BUILD_DOCS "Generate documentation using Doxygen" ON )

option( Faodel_ENABLE_MPI_SUPPORT "Enable use of MPI communication" ON )
option( Faodel_ENABLE_TCMALLOC "Use tcmalloc from gperftools in Lunasa, potentially other places" ON )
option( Faodel_ENABLE_CEREAL "Enable use of Cereal for serialization in NNTI (disables XDR)" OFF )
option( Faodel_ENABLE_MPI_SUPPORT "Enable use of MPI communication" ON )
option( Faodel_ENABLE_TCMALLOC "Use tcmalloc from gperftools in Lunasa, potentially other places" ON )
option( Faodel_ENABLE_IOM_HDF5 "Build the HDF5-based IOM in Kelpie" OFF )
option( Faodel_ENABLE_IOM_LEVELDB "Build the LevelDB-based IOM in Kelpie" OFF )
option( Faodel_ENABLE_IOM_CASSANDRA "Build the Cassandra-based IOM in Kelpie" OFF )

set( Faodel_NETWORK_LIBRARY "nnti" CACHE STRING "RDMA Network library to use for low-level communication" )
set_property(CACHE Faodel_NETWORK_LIBRARY PROPERTY STRINGS nnti libfabric)
Expand Down Expand Up @@ -337,8 +342,7 @@ set( CMAKE_NO_SYSTEM_FROM_IMPORTED ${savevar} )
if( Faodel_ENABLE_TCMALLOC )

# Bail out if we're on an architecture that doesn't support tcmalloc
if( (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" ) OR
(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le" ) )
if( CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le" )
message( FATAL_ERROR "The TCMALLOC library included with FAODEL does not currently have "
" support for the ${CMAKE_SYSTEM_PROCESSOR} CPU (it has optimizations"
" for x86_64). You must set Faodel_ENABLE_TCMALLOC to OFF in order to"
Expand All @@ -349,11 +353,13 @@ if( Faodel_ENABLE_TCMALLOC )
message( STATUS "Faodel_ENABLE_TCMALLOC is set, gperftools TPL will be built and tcmalloc will be used" )
set( PKGCONFIG_TCMALLOC "-ltcmalloc -lspinlock" )
endif()


add_subdirectory( tpl/cereal )

add_subdirectory( src/sbl )
add_subdirectory(src/faodel-common)
add_subdirectory( src/webhook )
add_subdirectory(src/faodel-services)
add_subdirectory( src/faodel-common)
add_subdirectory(src/whookie)
add_subdirectory( src/faodel-services)
if( BUILD_NNTI )
add_subdirectory( src/nnti )
endif()
Expand All @@ -363,7 +369,7 @@ add_subdirectory( src/dirman )
add_subdirectory( src/kelpie )

# Set some useful properties on our targets, now that they're defined
foreach( COMP sbl common webhook services lunasa opbox kelpie )
foreach( COMP sbl common whookie services lunasa opbox kelpie )

target_include_directories( ${COMP}
INTERFACE
Expand Down Expand Up @@ -395,9 +401,8 @@ if (Faodel_NETWORK_LIBRARY STREQUAL "libfabric")
endif()

# Build tools
add_subdirectory( tools/faodel-info )
add_subdirectory( tools/faodel-cli )
add_subdirectory( tools/kelpie-server )
add_subdirectory( tools/whookie )


# Do the tests
Expand All @@ -407,7 +412,7 @@ if( BUILD_TESTS )

add_subdirectory( tests/sbl )
add_subdirectory( tests/common )
add_subdirectory( tests/webhook )
add_subdirectory( tests/whookie )
add_subdirectory( tests/services )
if( BUILD_NNTI )
add_subdirectory( tests/nnti )
Expand Down Expand Up @@ -605,6 +610,15 @@ else()
message( STATUS " Not building the MPI Transport" )
endif()
endif()
if( ${NNTI_USE_XDR} )
message( STATUS " Using XDR for serialization" )
else()
if( ${NNTI_USE_CEREAL} )
message( STATUS " Using Cereal for serialization" )
else()
message( STATUS " ERROR - Couldn't find a serialization library" )
endif()
endif()
message( STATUS "" )
message( STATUS "Opbox Config:" )
message( STATUS " Network Library: ${Faodel_NETWORK_LIBRARY}" )
Expand Down
77 changes: 69 additions & 8 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ what happened in the test. Common problems include:

export OMPI_MCA_rmaps_base_oversubscribe=1

- Check your webhook.interfaces list: Compute nodes often have several
network interfaces and webhook often guesses wrong. Log into a compute
- Check your whookie.interfaces list: Compute nodes often have several
network interfaces and whookie often guesses wrong. Log into a compute
node, do "ifconfig" or "ip addr" to find a live network interface,
and set it in your config.

Expand All @@ -155,15 +155,15 @@ what happened in the test. Common problems include:
Given that Atomics are not currently used in FAODEL's current libs, it
may not be essential for these tests to work in order to use FAODEL.

FAODEL provides the faodel_info tool as a sanity check for your build. This
FAODEL provides the faodel tool as a sanity check for your build. This
tools prints out build information and performs basic checks to determine
if the libraries will work. You should run this test on your platform's
login node, as well as a compute node (some platforms have different
hardware).

build/tools/faodel-info/faodel_info
build/tools/faodel-cli/faodel build-info
salloc -N 1
srun build/tools/faodel-info/faodel_info
srun build/tools/faodel-cli/faodel build-info
exit


Expand Down Expand Up @@ -211,7 +211,7 @@ can be found on the FAODEL wiki. Common settings that a
user may wish to change include:

```
webhook.interfaces ib0,eth0 # Change the nic used for webhook
whookie.interfaces ib0,eth0 # Change the nic used for whookie
net.transport.name ibverbs # Select net driver when using nnti or libfabric
Expand Down Expand Up @@ -311,7 +311,20 @@ configure and build on any node and run on any node.



Selecting An Infiniband Network Device
--------------------------------------
When FAODEL bootstraps the network, it searches for an Infiniband device
with an active port. By default, FAODEL queries the verbs library for a
list of devices and chooses the first one with an active port. If there
are multiple devices or multiple active ports, FAODEL may not choose the
correct device.

In this case, add the following to the configuration file:
```
net.transport.interfaces ib1,ib0 # Prefer ib1 over ib0
```
When `net.transport.interfaces` is defined, FAODEL will search for these
devices (and only these devices) in the order given.


Building Third-Party Libraries (TPLs)
Expand Down Expand Up @@ -383,6 +396,32 @@ the libfabric library in order to run applications.



Data Structure Serialization
============================

NNTI data structures can to sent to peers both implicitly (command
messages) and explicitly (buffer references). To support
heterogeneous platforms, these data structures are serialized to a
portable format, sent to the peer and deserialized at the recipient.

NNTI has historically used XDR for serialization because it is
fast, tight and ubiquitous. In recent releases (eg. Mojave) of
MacOS, XDR is not fully implemented. NNTI detects this condition
during configuration and uses the bundled Cereal library as an
alternative.

If you prefer Cereal over XDR, you can force the use of Cereal
using the `Faodel_ENABLE_CEREAL` option.

Note 1: Cereal is a header-only implementation and is only used
internally by NNTI. It does not get installed as part of Faodel.

Note 2: There is no way to reference an external installation of
Cereal.




Platform-Specific Notes: Installing on Mutrino (Cray XC40)
==========================================================

Expand Down Expand Up @@ -520,7 +559,7 @@ defined as ipogif0). Thus, you should add the following info to the
configuration file specified by `FAODEL_CONFIG`:

```
webhook.interfaces ipogif0
whookie.interfaces ipogif0
net.transport.name ugni
```

Expand Down Expand Up @@ -554,6 +593,27 @@ find . -name CTestTestfile.cmake | xargs sed -i 's@/opt/cray/elogin/eproxy/2.0.2
FAODEL.


Installing on Astra (ARM-based Mellanox InfiniBand Cluster)
===========================================================

Astra is an ARM-based cluster with a Mellanox InfiniBand interconnect.
In general, Faodel operates the same on Astra as on any other
InfiniBand platform. One extra feature of Astra is that it
has full support for Mellanox's On-Demand Paging feature that
allows a process' entire virtual address space to be registered
without pinning (locking) pages in memory. The availability of
ODP is detected during configuration, but it is disable by
default because it is still experimental. To enable it, add the
following to your configuration file.

```
net.transport.use_odp true
```

This feature is still experimental with no guarantees of
performance or correctness.


Installing on Kahuna (Generic InfiniBand Cluster)
=================================================

Expand Down Expand Up @@ -591,7 +651,7 @@ ports for sockets. Thus, you should add the following info to the
configuration file specified by `FAODEL_CONFIG`:

```
webhook.interfaces eth0,ib0
whookie.interfaces eth0,ib0
net.transport.name ibverbs
```

Expand Down Expand Up @@ -625,6 +685,7 @@ Advanced Options
| ------------------------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------- |
| HDF5_DIR | Path | Location of HDF5 libs. Only used when FAODEL_ENABLE_IOM_HDF5 used |
| leveldb_DIR | Path | Location of leveldb libs. Only used when FAODEL_ENABLE_IOM_LEVELDB used |
| Faodel_ENABLE_CEREAL | Boolean | If XDR is not fully implemented (eg. MacOS Mojave), Cereal is used. CMake should autodetect, but this forces Cereal. |
| Faodel_NO_ISYSTEM_FLAG | Boolean | Some compilers use "-isystem" to identify system libs instead of "-I". CMake should autodetect, but this can override |
| Faodel_OPBOX_NET_NNTI | Boolean | Set to true if Faodel_NETWORK_LIBRARY is nnti |
| Faodel_PERFTOOLS_* | - | These variables are used by the tpl/gperftools library. See their documentation for more info |
Expand Down
44 changes: 34 additions & 10 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,36 @@ faodel tools. Releases are named alphabetically and have a
4-digit ID associated with them that designates the year and month of
the release.

DIO (1.1811.2)
--------------
- Summary: Fix build problems discovered during Spack integration
- Release Improvements:
- Common Logging failed to build when SBL was selected for output (thanks to @fbudin69500)
- Kelpie failed to build when MPI was disabled (thanks to @fbudin69500)
Excelsior! (1.1906.1)
---------------------
- Summary: Job-to-Job improvements via new cli tool
- Release Improvements
- New faodel-cli tool for manipulating many things
- Gets build/configure info (replaces faodel-info)
- Start/stop services (dirman, kelpie)
- Define/query/remove dirman resources
- Put/get/list kelpie objects
- New example/kelpie-cli script shows how to use
- Support for ARM platform
- NNTI adds On-Demand Paging capability
- NNTI adds Cereal as alternative for serialization
- NNTI has better detection and selection of IB devices
- Fixes
- SBL could segfault due to Boost if exit without calling finish
- FAODEL couldn't be included in a larger project's cmake
- LDO had a race condition in destructor
- Significant User-Visible Changes:
- faodel-info and whookie tools replaced by faodel cli tool
- Dirman's DirInfo "children" renamed to "members"
- Faodel now has a package in the Spack develop branch
- Known Issues
- FAODEL's libfabric transport is still experimental. It does not fully
implement Atomics or Long Sends. While Kelpie does not require
these operations, other OpBox-based applications may break
without this support.
- On Cray machines with the Aries interconnect, FAODEL can be overwhelmed
by a sustained stream of sends larger than the MTU. To avoid this problem,
the sender should limit itself to bursts of 32 long sends at a time.

DIO (1.1811.1)
--------------
Expand All @@ -24,10 +48,10 @@ DIO (1.1811.1)
- Lunasa has templated containers for storing a bundle of items in an LDO
- GenericSequentialDataBundle : When data is accessed in order
- GenericRandomDataBundle : When data is accessed out of order
- Users can now define webhooks for rendering specific DataObject types
- Users can now define whookies for rendering specific DataObject types
- Bootstrap Start/Stops are much more robust
- General build fixes for use with EMPIRE
- Significant User Visible Changes:
- Significant User-Visible Changes:
- Common and services directories renamed to faodel-common and faodel-services
- Kelpie pools now have a "behavior" that controls how data is copied
- Some components/tools changed directories. Check your includes
Expand All @@ -42,7 +66,7 @@ DIO (1.1811.1)
these operations, other OpBox-based applications may break
without this support.
- On Cray machines with the Aries interconnect, FAODEL can be overwhelmed
by a sustrained stream of sends larger than the MTU. To avoid this problem,
by a sustained stream of sends larger than the MTU. To avoid this problem,
the sender should limit itself to bursts of 32 long sends at a time.
- This version does not have support for ARM8 or POWER cpus.

Expand Down Expand Up @@ -98,7 +122,7 @@ Amigo (0.1707.1)
----------------
- Summary: First packaged release, for friendly users
- Release Improvements:
- Stable versions of SBL, Gutties, WebHook, NNTI, Lunasa, and Opbox
- Stable versions of SBL, Gutties, Whookie, NNTI, Lunasa, and Opbox
- Experimental version of Kelpie (nonet)
- Switched to Graith CMake modules
- Initial doxygen and readme documentation
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ FAODEL is composed of multiple libraries:
layer for high-performance networks. It provides application with
the ability to send messages and coordinate RDMA transfers via
registered memory.
- [WebHook](src/webhook/README_WebHook.md): WebHook is a network service for
- [Whookie](src/whookie/README_Whookie.md): Whookie is a network service for
FAODEL nodes that enables users and applications to query
and change the state of a node via an HTTP connection.
- [Services](src/faodel-services/README_Services.md): Basic services that make it
Expand Down Expand Up @@ -81,8 +81,9 @@ The following helped contribute ideas and provided feedback for the project:

This release includes third-party software that contains its own licensing
and copyright info:
- cereal (in tpl/cereal)
- gperftools (in tpl/gperftools)
- Boost ASIO examples (in src/webhook/server)
- Boost ASIO examples (in src/whookie/server)

Copyright
=========
Expand Down
29 changes: 28 additions & 1 deletion cmake/FaodelTPLs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,33 @@ endif()



#######################
## Cassandra IOM
#######################

if( Faodel_ENABLE_IOM_CASSANDRA )

# The DataStax Cassandra C/C++ driver generates a pkgconfig module, so let's try to find that.
set( PKG_CONFIG_USE_CMAKE_PREFIX_PATH 1 )

pkg_search_module( cassandra_pc cassandra REQUIRED )

if( cassandra_pc_FOUND )
set( CASSANDRA_FOUND TRUE )
set( FAODEL_HAVE_CASSANDRA TRUE )

add_library( Faodel::Cassandra INTERFACE IMPORTED )
target_include_directories( Faodel::Cassandra INTERFACE ${cassandra_pc_INCLUDE_DIRS} )
target_link_libraries( Faodel::Cassandra INTERFACE ${cassandra_pc_LDFLAGS} )
target_compile_definitions( Faodel::Cassandra INTERFACE ${cassandra_pc_CFLAGS_OTHER} )

message( STATUS "Will build Cassandra IOM, Faodel_ENABLE_IOM_CASSANDRA set and Cassandra driver found" )
else()
message( STATUS "Cannot build Cassandra IOM as requested, Cassandra driver not found. Set CMAKE_PREFIX_PATH" )
endif()

endif()

########################
## MPI
########################
Expand Down Expand Up @@ -198,7 +225,7 @@ if (Faodel_NETWORK_LIBRARY STREQUAL "libfabric")
endif()

LIST( APPEND FaodelNetlib_TARGETS Libfabric )
set( PKGCONFIG_REQUIRES "libfabric" )
set( PKGCONFIG_REQUIRES "${PKGCONFIG_REQUIRES} libfabric" )
message( STATUS "Found Libfabric, target appended to FaodelNetlib_TARGETS" )
endif()

Expand Down
Loading

0 comments on commit 90b5b06

Please sign in to comment.