Add type hints to python-defined ops, run and tfrecord APIs (#5118)

Add annotations to operators with custom Python wrappers: * External source * TFRecord reader * Python function family * nvidia.dali.math module * ops.Compose Add stub file for nvidia.dali.tfrecord module. Add return type annotations to the Pipeline.run function. The TensorList types again are not fully visible due to being generated by backend at runtime, next step should provide a dedicated stub file or alternative implementation. The stubs are based on the output of mypy `stubgen` and the `nvidia.dali.ops._signatures._gen_[fn/ops]_signature`. The stub generation is reworked, first grouping the operators into 4 categories, so the generated stubs contain imports first. We utilize the fact that the operators with custom Python wrappers have dedicated implementation modules now, and reexpose them in the interface files, allowing the type hints to be picked up. The external source has non-trivial defaults, expressed mostly via `None` and cross-dependent on other parameters, so creating a meaningful annotations there is hard. There are two overloads provided for the `external_source` function, allowing to disambiguate between the single and multiple outputs (when `num_outputs` parameter was used, and the return type is always a tuple). Such distinction can't be easily made in the ops API. Numba and Pytorch function are left for a followup. Signed-off-by: Krzysztof Lecki <klecki@nvidia.com>
NVIDIA · Nov 3, 2023 · 1327f5b · 1327f5b
1 parent 2ad5be4
commit 1327f5b
Show file tree

Hide file tree

Showing 12 changed files with 733 additions and 81 deletions.
diff --git a/dali/operators/python_function/python_function.cc b/dali/operators/python_function/python_function.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -65,6 +65,6 @@ as PyTorch tensors.)code")
         .NoPrune()
         .AddParent("PythonFunctionBase")
         .AddOptionalArg("batch_processing", R"code(Determines whether the function gets
-an entire batch as an input.)code", false);
+an entire batch as an input.)code", true);
 
 }  // namespace dali
diff --git a/dali/python/nvidia/dali/external_source.py b/dali/python/nvidia/dali/external_source.py
@@ -432,10 +432,6 @@ class ExternalSource():
     .. note::
         This is applicable only when copying data to and from GPU memory.
 
-`blocking` : bool, optional
-    Determines whether the external source should wait until data is available or just fail
-    when the data is not available.
-
 `no_copy` : bool, optional
     Determines whether DALI should copy the buffer when feed_input is called.
 

diff --git a/dali/python/nvidia/dali/external_source.pyi b/dali/python/nvidia/dali/external_source.pyi
@@ -0,0 +1,124 @@
+# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Union, Optional, overload
+from typing import Sequence, Any, Callable, Iterable
+
+from nvidia.dali.data_node import DataNode
+from nvidia.dali.types import DALIDataType, DALIImageType, DALIInterpType
+
+
+class ExternalSource:
+
+    # The `source` parameter represents the Union of types accepted by the `fn.external_source`,
+    # check the comment there for the explanation.
+    def __init__(
+        self,
+        source: Optional[Union[Callable[..., Any], Iterable[Any], Callable[..., Sequence[Any]], Iterable[Sequence[Any]]]] = None,
+        num_outputs: Optional[int] = None,
+        *,
+        batch: Optional[bool] = None,
+        batch_info: Optional[bool] = False,
+        dtype: Union[Sequence[DALIDataType], DALIDataType, None] = None,
+        ndim: Union[Sequence[int], int, None] = None,
+        layout: Union[Sequence[str], str, None] = None,
+        name: Optional[str] = None,
+        device: Optional[str] = "cpu",
+        cuda_stream: Optional[Any] = None,
+        use_copy_kernel: Optional[bool] = False,
+        cycle: Union[str, bool, None] = None,
+        repeat_last: Optional[bool] = False,
+        parallel: Optional[bool] = False,
+        no_copy: Optional[bool] = None,
+        prefetch_queue_depth: Optional[int] = 1,
+        bytes_per_sample_hint: Union[Sequence[int], int, None] = [0],
+    ) -> None:
+        ...
+
+    def __call__(
+        self,
+        *,
+        source: Optional[Union[Callable[..., Any], Iterable[Any], Callable[..., Sequence[Any]], Iterable[Sequence[Any]]]] = None,
+        batch: Optional[bool] = None,
+        batch_info: Optional[bool] = False,
+        dtype: Union[Sequence[DALIDataType], DALIDataType, None] = None,
+        ndim: Union[Sequence[int], int, None] = None,
+        layout: Union[Sequence[str], str, None] = None,
+        name: Optional[str] = None,
+        cuda_stream: Optional[Any] = None,
+        use_copy_kernel: Optional[bool] = False,
+        cycle: Union[str, bool, None] = None,
+        repeat_last: Optional[bool] = False,
+        parallel: Optional[bool] = False,
+        no_copy: Optional[bool] = None,
+        prefetch_queue_depth: Optional[int] = 1,
+        bytes_per_sample_hint: Union[Sequence[int], int, None] = [0],
+    ) -> DataNode:
+        ...
+
+
+# The overload representing a call without specifying `num_outputs`. It expects a function
+# returning a tensor or a batch of tensors directly, corresponding to exactly one DataNode output.
+# `Any` can be replaced to represent TensorLike and BatchLike values.
+# TODO(klecki): overloads with specific `batch` values can be considered
+@overload
+def external_source(
+    source: Optional[Union[Callable[..., Any], Iterable[Any]]] = None,
+    *,
+    batch: Optional[bool] = None,
+    batch_info: Optional[bool] = False,
+    dtype: Union[DALIDataType, Sequence[DALIDataType], None] = None,
+    ndim: Union[int, Sequence[int], None] = None,
+    layout: Union[str, Sequence[str], None] = None,
+    name: Optional[str] = None,
+    device: Optional[str] = "cpu",
+    cuda_stream: Optional[Any] = None,
+    use_copy_kernel: Optional[bool] = False,
+    cycle: Union[str, bool, None] = None,
+    repeat_last: Optional[bool] = False,
+    parallel: Optional[bool] = False,
+    no_copy: Optional[bool] = None,
+    prefetch_queue_depth: Optional[int] = 1,
+    bytes_per_sample_hint: Union[Sequence[int], int, None] = [0],
+) -> DataNode:
+    ...
+
+
+# The overload representing a call with `num_outputs` specified. It expects a function
+# returning a tuple/sequence of tensors or batches, corresponding to a tuple of `num_outputs`
+# DataNode outputs.
+# `Any` can be replaced to represent TensorLike and BatchLike values.
+# TODO(klecki): overloads with specific `batch` values can be considered
+@overload
+def external_source(
+    source: Optional[Union[Callable[..., Sequence[Any]], Iterable[Sequence[Any]]]] = None,
+    num_outputs: int = ...,
+    *,
+    batch: Optional[bool] = None,
+    batch_info: Optional[bool] = False,
+    dtype: Union[Sequence[DALIDataType], DALIDataType, None] = None,
+    ndim: Union[Sequence[int], int, None] = None,
+    layout: Union[Sequence[str], str, None] = None,
+    name: Optional[str] = None,
+    device: Optional[str] = "cpu",
+    cuda_stream: Optional[Any] = None,
+    use_copy_kernel: Optional[bool] = False,
+    cycle: Union[str, bool, None] = None,
+    repeat_last: Optional[bool] = False,
+    parallel: Optional[bool] = False,
+    no_copy: Optional[bool] = None,
+    prefetch_queue_depth: Optional[int] = 1,
+    bytes_per_sample_hint: Union[Sequence[int], int, None] = [0],
+) -> Sequence[DataNode]:
+    ...