Skip to content

Commit

Permalink
Merge branch 'branch-24.10' of https://github.com/rapidsai/cuvs into …
Browse files Browse the repository at this point in the history
…ivfpq-cosine
  • Loading branch information
tarang-jain committed Sep 15, 2024
2 parents 744c011 + a6b71d7 commit a692dfb
Show file tree
Hide file tree
Showing 16 changed files with 126 additions and 111 deletions.
1 change: 1 addition & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ DEPENDENCIES=(
dask-cuda
cuvs
pylibraft
librmm
rmm
rapids-dask-dependency
)
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver=11.4.1.48
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
- librmm==24.10.*,>=0.0.0a0
- make
- nccl>=2.9.9
- ninja
Expand All @@ -49,7 +50,6 @@ dependencies:
- pytest==7.*
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- recommonmark
- rmm==24.10.*,>=0.0.0a0
- rust
- scikit-build-core>=0.10.0
- scikit-learn
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver=11.4.1.48
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
- librmm==24.10.*,>=0.0.0a0
- make
- nccl>=2.9.9
- ninja
Expand All @@ -49,7 +50,6 @@ dependencies:
- pytest==7.*
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- recommonmark
- rmm==24.10.*,>=0.0.0a0
- rust
- scikit-build-core>=0.10.0
- scikit-learn
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies:
- libcurand-dev
- libcusolver-dev
- libcusparse-dev
- librmm==24.10.*,>=0.0.0a0
- make
- nccl>=2.9.9
- ninja
Expand All @@ -45,7 +46,6 @@ dependencies:
- pytest==7.*
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- recommonmark
- rmm==24.10.*,>=0.0.0a0
- rust
- scikit-build-core>=0.10.0
- scikit-learn
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies:
- libcurand-dev
- libcusolver-dev
- libcusparse-dev
- librmm==24.10.*,>=0.0.0a0
- make
- nccl>=2.9.9
- ninja
Expand All @@ -45,7 +46,6 @@ dependencies:
- pytest==7.*
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- recommonmark
- rmm==24.10.*,>=0.0.0a0
- rust
- scikit-build-core>=0.10.0
- scikit-learn
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies:
- libcusolver=11.4.1.48
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
- librmm==24.10.*,>=0.0.0a0
- matplotlib
- nccl>=2.9.9
- ninja
Expand All @@ -42,6 +43,5 @@ dependencies:
- pandas
- pylibraft==24.10.*,>=0.0.0a0
- pyyaml
- rmm==24.10.*,>=0.0.0a0
- sysroot_linux-aarch64==2.17
name: bench_ann_cuda-118_arch-aarch64
2 changes: 1 addition & 1 deletion conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies:
- libcusolver=11.4.1.48
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
- librmm==24.10.*,>=0.0.0a0
- matplotlib
- nccl>=2.9.9
- ninja
Expand All @@ -42,6 +43,5 @@ dependencies:
- pandas
- pylibraft==24.10.*,>=0.0.0a0
- pyyaml
- rmm==24.10.*,>=0.0.0a0
- sysroot_linux-64==2.17
name: bench_ann_cuda-118_arch-x86_64
2 changes: 1 addition & 1 deletion conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies:
- libcurand-dev
- libcusolver-dev
- libcusparse-dev
- librmm==24.10.*,>=0.0.0a0
- matplotlib
- nccl>=2.9.9
- ninja
Expand All @@ -38,6 +39,5 @@ dependencies:
- pandas
- pylibraft==24.10.*,>=0.0.0a0
- pyyaml
- rmm==24.10.*,>=0.0.0a0
- sysroot_linux-aarch64==2.17
name: bench_ann_cuda-125_arch-aarch64
2 changes: 1 addition & 1 deletion conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies:
- libcurand-dev
- libcusolver-dev
- libcusparse-dev
- librmm==24.10.*,>=0.0.0a0
- matplotlib
- nccl>=2.9.9
- ninja
Expand All @@ -38,6 +39,5 @@ dependencies:
- pandas
- pylibraft==24.10.*,>=0.0.0a0
- pyyaml
- rmm==24.10.*,>=0.0.0a0
- sysroot_linux-64==2.17
name: bench_ann_cuda-125_arch-x86_64
6 changes: 1 addition & 5 deletions conda/recipes/cuvs/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.

# Usage:
# conda build . -c conda-forge -c numba -c rapidsai -c pytorch
# conda build . -c rapidsai -c conda-forge -c nvidia
{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
Expand Down Expand Up @@ -54,10 +54,8 @@ requirements:
- pylibraft {{ minor_version }}
- libcuvs {{ version }}
- python x.x
- rmm ={{ minor_version }}
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- scikit-build-core >=0.10.0
- setuptools
run:
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
{% if cuda_major == "11" %}
Expand All @@ -68,7 +66,6 @@ requirements:
- pylibraft {{ minor_version }}
- libcuvs {{ version }}
- python x.x
- rmm ={{ minor_version }}
- cuda-python
- numpy >=1.23,<3.0a0

Expand All @@ -81,5 +78,4 @@ tests:
about:
home: https://rapids.ai/
license: Apache-2.0
# license_file: LICENSE
summary: cuvs python library
33 changes: 24 additions & 9 deletions conda/recipes/libcuvs/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.

# Usage:
# conda build . -c conda-forge -c nvidia -c rapidsai
# conda build . -c rapidsai -c conda-forge -c nvidia
{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
Expand Down Expand Up @@ -63,7 +63,8 @@ outputs:
- ninja
- {{ stdlib("c") }}
host:
- libraft ={{ minor_version }}
- librmm ={{ minor_version }}
- libraft-headers ={{ minor_version }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
- cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }}
Expand All @@ -84,7 +85,6 @@ outputs:
- libcusparse-dev
{% endif %}
run:
- libraft ={{ minor_version }}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
{% if cuda_major != "11" %}
- cuda-cudart
Expand Down Expand Up @@ -129,7 +129,8 @@ outputs:
- ninja
- {{ stdlib("c") }}
host:
- libraft ={{ minor_version }}
- librmm ={{ minor_version }}
- libraft-headers ={{ minor_version }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
- cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }}
Expand All @@ -150,7 +151,6 @@ outputs:
- libcusparse-dev
{% endif %}
run:
- libraft ={{ minor_version }}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
{% if cuda_major != "11" %}
- cuda-cudart
Expand Down Expand Up @@ -195,7 +195,8 @@ outputs:
- ninja
- {{ stdlib("c") }}
host:
- libraft ={{ minor_version }}
- librmm ={{ minor_version }}
- libraft-headers ={{ minor_version }}
- {{ pin_subpackage('libcuvs', exact=True) }}
- cuda-version ={{ cuda_version }}
- openblas # required by some CPU algos in benchmarks
Expand Down Expand Up @@ -228,7 +229,6 @@ outputs:
- libcusolver
- libcusparse
{% endif %}
- libraft ={{ minor_version }}
- {{ pin_subpackage('libcuvs', exact=True) }}
about:
home: https://rapids.ai/
Expand All @@ -248,6 +248,9 @@ outputs:
- {{ compiler('cuda') }}
- cuda-cudart-dev
- libcublas-dev
- libcurand-dev
- libcusolver-dev
- libcusparse-dev
{% endif %}
requirements:
build:
Expand All @@ -263,17 +266,27 @@ outputs:
- ninja
- {{ stdlib("c") }}
host:
- libraft ={{ minor_version }}
- librmm ={{ minor_version }}
- libraft-headers ={{ minor_version }}
- {{ pin_subpackage('libcuvs', exact=True) }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
- cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
- libcublas {{ cuda11_libcublas_host_version }}
- libcublas-dev {{ cuda11_libcublas_host_version }}
- libcurand {{ cuda11_libcurand_host_version }}
- libcurand-dev {{ cuda11_libcurand_host_version }}
- libcusolver {{ cuda11_libcusolver_host_version }}
- libcusolver-dev {{ cuda11_libcusolver_host_version }}
- libcusparse {{ cuda11_libcusparse_host_version }}
- libcusparse-dev {{ cuda11_libcusparse_host_version }}
{% else %}
- cuda-cudart-dev
- cuda-profiler-api
- libcublas-dev
- libcurand-dev
- libcusolver-dev
- libcusparse-dev
{% endif %}
run:
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand All @@ -282,8 +295,10 @@ outputs:
{% else %}
- cuda-cudart
- libcublas
- libcurand
- libcusolver
- libcusparse
{% endif %}
- libraft ={{ minor_version }}
- {{ pin_subpackage('libcuvs', exact=True) }}
about:
home: https://rapids.ai/
Expand Down
4 changes: 2 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

option(BUILD_SHARED_LIBS "Build cuvs shared libraries" ON)
option(BUILD_TESTS "Build cuvs unit-tests" ON)
option(BUILD_C_LIBRARY "Build raft C API library" OFF)
option(BUILD_C_TESTS "Build raft C API tests" OFF)
option(BUILD_C_LIBRARY "Build cuVS C API library" OFF)
option(BUILD_C_TESTS "Build cuVS C API tests" OFF)
option(BUILD_ANN_BENCH "Build cuVS ann benchmarks" OFF)
option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON)
option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
Expand Down
42 changes: 21 additions & 21 deletions cpp/src/neighbors/detail/faiss_distance_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,39 +14,39 @@ inline void chooseTileSize(size_t numQueries,
size_t numCentroids,
size_t dim,
size_t elementSize,
size_t totalMem,
size_t& tileRows,
size_t& tileCols)
{
// 512 seems to be a batch size sweetspot for float32.
// If we are on float16, increase to 512.
// If the k size (vec dim) of the matrix multiplication is small (<= 32),
// increase to 1024.
size_t preferredTileRows = 512;
if (dim <= 32) { preferredTileRows = 1024; }

tileRows = std::min(preferredTileRows, numQueries);

// The matrix multiplication should be large enough to be efficient, but if
// it is too large, we seem to lose efficiency as opposed to
// double-streaming. Each tile size here defines 1/2 of the memory use due
// to double streaming. We ignore available temporary memory, as that is
// adjusted independently by the user and can thus meet these requirements
// (or not). For <= 4 GB GPUs, prefer 512 MB of usage. For <= 8 GB GPUs,
// prefer 768 MB of usage. Otherwise, prefer 1 GB of usage.
size_t targetUsage = 0;

if (totalMem <= ((size_t)4) * 1024 * 1024 * 1024) {
targetUsage = 512 * 1024 * 1024;
} else if (totalMem <= ((size_t)8) * 1024 * 1024 * 1024) {
targetUsage = 768 * 1024 * 1024;
size_t targetUsage = 512 * 1024 * 1024;
if (tileRows * numCentroids * elementSize * 2 <= targetUsage) {
tileCols = numCentroids;
} else {
targetUsage = 1024 * 1024 * 1024;
}
// only query total memory in case it potentially impacts tilesize
size_t totalMem = rmm::available_device_memory().second;

targetUsage /= 2 * elementSize;
if (totalMem > ((size_t)8) * 1024 * 1024 * 1024) {
targetUsage = 1024 * 1024 * 1024;
} else if (totalMem > ((size_t)4) * 1024 * 1024 * 1024) {
targetUsage = 768 * 1024 * 1024;
}

// 512 seems to be a batch size sweetspot for float32.
// If we are on float16, increase to 512.
// If the k size (vec dim) of the matrix multiplication is small (<= 32),
// increase to 1024.
size_t preferredTileRows = 512;
if (dim <= 32) { preferredTileRows = 1024; }

tileRows = std::min(preferredTileRows, numQueries);

// tileCols is the remainder size
tileCols = std::min(targetUsage / preferredTileRows, numCentroids);
tileCols = std::min(targetUsage / (2 * elementSize * tileRows), numCentroids);
}
}
} // namespace cuvs::neighbors::detail::faiss_select
Loading

0 comments on commit a692dfb

Please sign in to comment.