Skip to content

Commit

Permalink
Enable sanitizers in regular CI runs (NVIDIA#3422)
Browse files Browse the repository at this point in the history
* Enable sanitizers in regular CI runs

- adds a suppression list that should disable all detected
  and not DALI originated errors
- makes all leaks to fail the test job
- turns off `fast_unwind_on_malloc` to make the stack trace
  more accurate (but slower) and allow more accurate suppressions,
  when the frame pointer is not preserved (python binary)
- shortens tests TF based L0 test and excludes numpy_reader test when sanitizers are on

Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com>
  • Loading branch information
JanuszL authored and cyyever committed Jan 23, 2022
1 parent 11986cd commit 7af1c1b
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 20 deletions.
10 changes: 9 additions & 1 deletion qa/TL0_python-self-test-readers-decoders/test_body.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
#!/bin/bash -e

test_nose() {
# numpy seems to be extremly slow with sanitizers to dissable it
if [ -n "$DALI_ENABLE_SANITIZERS" ]; then
FILTER_PATTERN="test_operator_readers_numpy.py"
else
FILTER_PATTERN="#"
fi

for test_script in $(ls test_operator_readers_*.py test_operator_decoders_*.py \
test_external_source_dali.py test_external_source_numpy.py \
test_external_source_parallel_garbage_collection_order.py \
test_external_source_parallel_custom_serialization.py \
test_pool.py test_external_source_parallel.py test_external_source_parallel_shared_batch.py); do
test_pool.py test_external_source_parallel.py test_external_source_parallel_shared_batch.py \
| sed "/$FILTER_PATTERN/d"); do
nosetests --verbose --attr '!slow' ${test_script}
done
}
Expand Down
16 changes: 11 additions & 5 deletions qa/TL0_tensorflow_plugin/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@
pip_packages="nose tensorflow-gpu"
target_dir=./dali/test/python

# populate epilog and prolog with variants to enable/disable conda and virtual env
# every test will be executed for bellow configs
prolog=(: enable_virtualenv)
epilog=(: disable_virtualenv)
# reduce the lenght of the sanitizers tests as much as possible
# use only one TF verion, don't test virtual env
if [ -n "$DALI_ENABLE_SANITIZERS" ]; then
one_config_only=true
else
# populate epilog and prolog with variants to enable/disable conda and virtual env
# every test will be executed for bellow configs
prolog=(: enable_virtualenv)
epilog=(: disable_virtualenv)
fi

test_body() {
# The package name can be nvidia-dali-tf-plugin, nvidia-dali-tf-plugin-weekly or nvidia-dali-tf-plugin-nightly
Expand All @@ -26,7 +32,7 @@ test_body() {
nosetests --verbose test_dali_tf_dataset.py
nosetests --verbose test_dali_tf_dataset_shape.py
nosetests --verbose test_dali_tf_dataset_eager.py
nosetests --verbose test_dali_tf_dataset_graph.py
nosetests --verbose test_dali_tf_dataset_graph.py
}

pushd ../..
Expand Down
49 changes: 41 additions & 8 deletions qa/leak.sup
Original file line number Diff line number Diff line change
@@ -1,10 +1,43 @@
# TODO when all test passes in CI we can set suppress false-positives
# NVJPEG
leak:libnvcuvid.so
# external libraries
leak:paddle
leak:llvm
#leak:PyCode_NewWithPosOnlyArgs
#leak:PyLong_FromLong
#leak:_PyLong_New
#leak:PyBytes_FromString
#leak:PyCapsule_New
#leak:av_realloc_f
#leak:memalign
leak:zmq
leak:sklearn
leak:scipy
leak:numpy
leak:libcublas
leak:libcudnn
leak:numba
leak:libmxnet
leak:libtorch
leak:libc10
leak:av_realloc_f
leak:av_malloc
# tensorflow
leak:PyInit__pywrap_debug_events_writer
leak:_pywrap
leak:PyInit__op_def_registry
leak:PyInit__tf_stack
leak:PyInit__dtypes
# no idea how to suppress them other than as below but they are not caused by DALI
# still there is some danger that any of the below functions appear in a valid leak
leak:std::string::_Rep::_S_create
leak:PyLong_FromLong
leak:PyCode_NewWithPosOnlyArgs
leak:_PyLong_New
leak:PyBytes_FromString
leak:PyType_GenericAlloc
leak:PyCapsule_New
leak:_PyObject_GC_NewVar
leak:_PyObject_GC_New
leak:PyDict_Copy
leak:_PyObject_New
leak:PyUnicode_New
leak:PyTuple_Pack
leak:PyTuple_New
leak:_PyObject_MakeTpCall
leak:PyDict_SetDefault
leak:PySequence_List
leak:_PyFunction_Vectorcall
14 changes: 8 additions & 6 deletions qa/test_template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,21 @@ numer_of_prolog_elms=${#prolog[@]}
enable_sanitizer() {
# supress leaks that are false positive or not related to DALI
export LSAN_OPTIONS=suppressions=$topdir/qa/leak.sup
export ASAN_OPTIONS=symbolize=1:protect_shadow_gap=0:log_path=sanitizer.log:start_deactivated=true:allocator_may_return_null=1::detect_leaks=1
export ASAN_OPTIONS=symbolize=1:protect_shadow_gap=0:log_path=sanitizer.log:start_deactivated=true:allocator_may_return_null=1:detect_leaks=1:fast_unwind_on_malloc=0
export ASAN_SYMBOLIZER_PATH=$(which llvm-symbolizer)
# avoid python false positives
export PYTHONMALLOC=malloc
# if something calls dlclose on a module that leaks and it happens before asan can extract symbols we get "unknown module"
# in the stack trace, to prevent this provide dlclose that does nothing
echo "int dlclose(void* a) { return 0; }" > /tmp/fake_dlclose.c && gcc -shared -o /tmp/libfakeclose.so /tmp/fake_dlclose.c
export OLD_LD_PRELOAD=${LD_PRELOAD}
export LD_PRELOAD="${LD_PRELOAD} /tmp/libfakeclose.so"
}

# turn off sanitizer to avoid breaking any non-related system built-ins
disable_sanitizer() {
export ASAN_OPTIONS=start_deactivated=true:detect_leaks=0
export LD_PRELOAD=${OLD_LD_PRELOAD}
unset ASAN_SYMBOLIZER_PATH
unset PYTHONMALLOC
}
Expand Down Expand Up @@ -85,12 +91,8 @@ process_sanitizers_logs() {
find $topdir -iname "sanitizer.log.*" -print0 | xargs -0 -I file cat file > $topdir/sanitizer.log
if [ -e $topdir/sanitizer.log ]; then
cat $topdir/sanitizer.log
grep -q ERROR $topdir/sanitizer.log || true
# ToDo - enable when the suppression file is completed
# grep -q ERROR $topdir/sanitizer.log && exit 1 || true
grep -q ERROR $topdir/sanitizer.log && exit 1 || true
fi
# rm so the consequitive test won't reread the same logs over and over
find $topdir -iname "sanitizer.log.*" -delete
}

# get extra index url for given packages
Expand Down

0 comments on commit 7af1c1b

Please sign in to comment.