Skip to content

Commit

Permalink
Add type hints to python-defined ops, run and tfrecord APIs (#5118)
Browse files Browse the repository at this point in the history
Add annotations to operators with custom Python wrappers:
* External source
* TFRecord reader
* Python function family
* nvidia.dali.math module
* ops.Compose

Add stub file for nvidia.dali.tfrecord module.

Add return type annotations to the Pipeline.run function.
The TensorList types again are not fully visible due to being generated 
by backend at runtime, next step should provide a dedicated stub file
or alternative implementation.

The stubs are based on the output of mypy `stubgen` and the 
`nvidia.dali.ops._signatures._gen_[fn/ops]_signature`.

The stub generation is reworked, first grouping the operators into 
4 categories, so the generated stubs contain imports first. 
We utilize the fact that the operators with custom Python wrappers
have dedicated implementation modules now, and reexpose them
in the interface files, allowing the type hints to be picked up.

The external source has non-trivial defaults, expressed mostly via `None`
and cross-dependent on other parameters, so creating a meaningful
annotations there is hard.
There are two overloads provided for the `external_source` function,
allowing to disambiguate between the single and multiple outputs
(when `num_outputs` parameter was used, and the return type is 
always a tuple). Such distinction can't be easily made in the ops API.

Numba and Pytorch function are left for a followup.

Signed-off-by: Krzysztof Lecki <klecki@nvidia.com>
  • Loading branch information
klecki committed Nov 3, 2023
1 parent 2ad5be4 commit 1327f5b
Show file tree
Hide file tree
Showing 12 changed files with 733 additions and 81 deletions.
4 changes: 2 additions & 2 deletions dali/operators/python_function/python_function.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -65,6 +65,6 @@ as PyTorch tensors.)code")
.NoPrune()
.AddParent("PythonFunctionBase")
.AddOptionalArg("batch_processing", R"code(Determines whether the function gets
an entire batch as an input.)code", false);
an entire batch as an input.)code", true);

} // namespace dali
4 changes: 0 additions & 4 deletions dali/python/nvidia/dali/external_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,10 +432,6 @@ class ExternalSource():
.. note::
This is applicable only when copying data to and from GPU memory.
`blocking` : bool, optional
Determines whether the external source should wait until data is available or just fail
when the data is not available.
`no_copy` : bool, optional
Determines whether DALI should copy the buffer when feed_input is called.
Expand Down
124 changes: 124 additions & 0 deletions dali/python/nvidia/dali/external_source.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union, Optional, overload
from typing import Sequence, Any, Callable, Iterable

from nvidia.dali.data_node import DataNode
from nvidia.dali.types import DALIDataType, DALIImageType, DALIInterpType


class ExternalSource:

# The `source` parameter represents the Union of types accepted by the `fn.external_source`,
# check the comment there for the explanation.
def __init__(
self,
source: Optional[Union[Callable[..., Any], Iterable[Any], Callable[..., Sequence[Any]], Iterable[Sequence[Any]]]] = None,
num_outputs: Optional[int] = None,
*,
batch: Optional[bool] = None,
batch_info: Optional[bool] = False,
dtype: Union[Sequence[DALIDataType], DALIDataType, None] = None,
ndim: Union[Sequence[int], int, None] = None,
layout: Union[Sequence[str], str, None] = None,
name: Optional[str] = None,
device: Optional[str] = "cpu",
cuda_stream: Optional[Any] = None,
use_copy_kernel: Optional[bool] = False,
cycle: Union[str, bool, None] = None,
repeat_last: Optional[bool] = False,
parallel: Optional[bool] = False,
no_copy: Optional[bool] = None,
prefetch_queue_depth: Optional[int] = 1,
bytes_per_sample_hint: Union[Sequence[int], int, None] = [0],
) -> None:
...

def __call__(
self,
*,
source: Optional[Union[Callable[..., Any], Iterable[Any], Callable[..., Sequence[Any]], Iterable[Sequence[Any]]]] = None,
batch: Optional[bool] = None,
batch_info: Optional[bool] = False,
dtype: Union[Sequence[DALIDataType], DALIDataType, None] = None,
ndim: Union[Sequence[int], int, None] = None,
layout: Union[Sequence[str], str, None] = None,
name: Optional[str] = None,
cuda_stream: Optional[Any] = None,
use_copy_kernel: Optional[bool] = False,
cycle: Union[str, bool, None] = None,
repeat_last: Optional[bool] = False,
parallel: Optional[bool] = False,
no_copy: Optional[bool] = None,
prefetch_queue_depth: Optional[int] = 1,
bytes_per_sample_hint: Union[Sequence[int], int, None] = [0],
) -> DataNode:
...


# The overload representing a call without specifying `num_outputs`. It expects a function
# returning a tensor or a batch of tensors directly, corresponding to exactly one DataNode output.
# `Any` can be replaced to represent TensorLike and BatchLike values.
# TODO(klecki): overloads with specific `batch` values can be considered
@overload
def external_source(
source: Optional[Union[Callable[..., Any], Iterable[Any]]] = None,
*,
batch: Optional[bool] = None,
batch_info: Optional[bool] = False,
dtype: Union[DALIDataType, Sequence[DALIDataType], None] = None,
ndim: Union[int, Sequence[int], None] = None,
layout: Union[str, Sequence[str], None] = None,
name: Optional[str] = None,
device: Optional[str] = "cpu",
cuda_stream: Optional[Any] = None,
use_copy_kernel: Optional[bool] = False,
cycle: Union[str, bool, None] = None,
repeat_last: Optional[bool] = False,
parallel: Optional[bool] = False,
no_copy: Optional[bool] = None,
prefetch_queue_depth: Optional[int] = 1,
bytes_per_sample_hint: Union[Sequence[int], int, None] = [0],
) -> DataNode:
...


# The overload representing a call with `num_outputs` specified. It expects a function
# returning a tuple/sequence of tensors or batches, corresponding to a tuple of `num_outputs`
# DataNode outputs.
# `Any` can be replaced to represent TensorLike and BatchLike values.
# TODO(klecki): overloads with specific `batch` values can be considered
@overload
def external_source(
source: Optional[Union[Callable[..., Sequence[Any]], Iterable[Sequence[Any]]]] = None,
num_outputs: int = ...,
*,
batch: Optional[bool] = None,
batch_info: Optional[bool] = False,
dtype: Union[Sequence[DALIDataType], DALIDataType, None] = None,
ndim: Union[Sequence[int], int, None] = None,
layout: Union[Sequence[str], str, None] = None,
name: Optional[str] = None,
device: Optional[str] = "cpu",
cuda_stream: Optional[Any] = None,
use_copy_kernel: Optional[bool] = False,
cycle: Union[str, bool, None] = None,
repeat_last: Optional[bool] = False,
parallel: Optional[bool] = False,
no_copy: Optional[bool] = None,
prefetch_queue_depth: Optional[int] = 1,
bytes_per_sample_hint: Union[Sequence[int], int, None] = [0],
) -> Sequence[DataNode]:
...
Loading

0 comments on commit 1327f5b

Please sign in to comment.