diff --git a/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/README.md b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/README.md new file mode 100644 index 000000000000..dbd350bfe1e5 --- /dev/null +++ b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/README.md @@ -0,0 +1,56 @@ +# DataLake Performance Tests + +In order to run the performance tests, the `azure-devtools` package must be installed. This is done as part of the `dev_requirements`. +Start be creating a new virtual environment for your perf tests. This will need to be a Python 3 environment, preferably >=3.7. +Note that there are no T1 tests for this project. + +### Setup for test resources + +These tests will run against a pre-configured Storage account. The following environment variable will need to be set for the tests to access the live resources: +``` +AZURE_STORAGE_CONNECTION_STRING= +``` + +### Setup for T2 perf test runs + +```cmd +(env) ~/azure-storage-file-datalake> pip install -r dev_requirements.txt +(env) ~/azure-storage-file-datalake> pip install -e . +``` + +## Test commands + +When `azure-devtools` is installed, you will have access to the `perfstress` command line tool, which will scan the current module for runable perf tests. Only a specific test can be run at a time (i.e. there is no "run all" feature). + +```cmd +(env) ~/azure-storage-file-datalake> cd tests +(env) ~/azure-storage-file-datalake/tests> perfstress +``` +Using the `perfstress` command alone will list the available perf tests found. + +### Common perf command line options +These options are available for all perf tests: +- `--duration=10` Number of seconds to run as many operations (the "run" function) as possible. Default is 10. +- `--iterations=1` Number of test iterations to run. Default is 1. +- `--parallel=1` Number of tests to run in parallel. Default is 1. +- `--no-client-share` Whether each parallel test instance should share a single client, or use their own. Default is False (sharing). +- `--warm-up=5` Number of seconds to spend warming up the connection before measuring begins. Default is 5. +- `--sync` Whether to run the tests in sync or async. Default is False (async). +- `--no-cleanup` Whether to keep newly created resources after test run. Default is False (resources will be deleted). + +### Common DataLake command line options +The options are available for all SB perf tests: +- `--size=10240` Size in bytes of data to be transferred in upload or download tests. Default is 10240. +- `--max-concurrency=1` Number of threads to concurrently upload/download a single operation using the SDK API parameter. Default is 1. + +### T2 Tests +The tests currently written for the T2 SDK: +- `UploadTest` Uploads a stream of `size` bytes to a new File. +- `UploadFromFileTest` Uploads a local file of `size` bytes to a new File. +- `DownloadTest` Download a stream of `size` bytes. +- `AppendTest` Append `size` bytes to an existing file. + +## Example command +```cmd +(env) ~/azure-storage-file-datalake/tests> perfstress UploadTest --parallel=2 --size=10240 +``` \ No newline at end of file diff --git a/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/__init__.py b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/_test_base.py b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/_test_base.py new file mode 100644 index 000000000000..cf8728fb9cb1 --- /dev/null +++ b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/_test_base.py @@ -0,0 +1,71 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import os +import uuid + +from azure_devtools.perfstress_tests import PerfStressTest + +from azure.core.exceptions import ResourceNotFoundError +from azure.storage.filedatalake import DataLakeServiceClient as SyncDataLakeServiceClient +from azure.storage.filedatalake.aio import DataLakeServiceClient as AsyncDataLakeServiceClient + + +class _ServiceTest(PerfStressTest): + service_client = None + async_service_client = None + + def __init__(self, arguments): + super().__init__(arguments) + connection_string = self.get_from_env("AZURE_STORAGE_CONNECTION_STRING") + if not _ServiceTest.service_client or self.args.no_client_share: + _ServiceTest.service_client = SyncDataLakeServiceClient.from_connection_string(conn_str=connection_string) + _ServiceTest.async_service_client = AsyncDataLakeServiceClient.from_connection_string(conn_str=connection_string) + self.service_client = _ServiceTest.service_client + self.async_service_client =_ServiceTest.async_service_client + + async def close(self): + await self.async_service_client.close() + await super().close() + + @staticmethod + def add_arguments(parser): + super(_ServiceTest, _ServiceTest).add_arguments(parser) + parser.add_argument('-c', '--max-concurrency', nargs='?', type=int, help='Maximum number of concurrent threads used for data transfer. Defaults to 1', default=1) + parser.add_argument('-s', '--size', nargs='?', type=int, help='Size of data to transfer. Default is 10240.', default=10240) + parser.add_argument('--no-client-share', action='store_true', help='Create one ServiceClient per test instance. Default is to share a single ServiceClient.', default=False) + + +class _FileSystemTest(_ServiceTest): + fs_name = "perfstress-" + str(uuid.uuid4()) + + def __init__(self, arguments): + super().__init__(arguments) + self.fs_client = self.service_client.get_file_system_client(self.fs_name) + self.async_fs_client = self.async_service_client.get_file_system_client(self.fs_name) + + async def global_setup(self): + await super().global_setup() + await self.async_fs_client.create_file_system() + + async def global_cleanup(self): + await self.async_fs_client.delete_file_system() + await super().global_cleanup() + + async def close(self): + await self.async_fs_client.close() + await super().close() + + +class _FileTest(_FileSystemTest): + def __init__(self, arguments): + super().__init__(arguments) + file_name = "sharefiletest-" + str(uuid.uuid4()) + self.file_client = self.fs_client.get_file_client(file_name) + self.async_file_client = self.async_fs_client.get_file_client(file_name) + + async def close(self): + await self.async_file_client.close() + await super().close() diff --git a/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/append.py b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/append.py new file mode 100644 index 000000000000..a12ba507e91a --- /dev/null +++ b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/append.py @@ -0,0 +1,37 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import uuid + +from azure_devtools.perfstress_tests import RandomStream, AsyncRandomStream + +from ._test_base import _FileSystemTest + + +class AppendTest(_FileSystemTest): + def __init__(self, arguments): + super().__init__(arguments) + file_name = "filetest-" + str(uuid.uuid4()) + self.file_client = self.fs_client.get_file_client(file_name) + self.async_file_client = self.async_fs_client.get_file_client(file_name) + self.upload_stream = RandomStream(self.args.size) + self.upload_stream_async = AsyncRandomStream(self.args.size) + + async def setup(self): + await self.async_file_client.create_file() + + def run_sync(self): + self.upload_stream.reset() + self.file_client.append_data( + self.upload_stream, + length=self.args.size, + offset=0) + + async def run_async(self): + self.upload_stream_async.reset() + await self.async_file_client.append_data( + self.upload_stream_async, + length=self.args.size, + offset=0) diff --git a/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/read.py b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/read.py new file mode 100644 index 000000000000..e0addee7b440 --- /dev/null +++ b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/read.py @@ -0,0 +1,37 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from azure_devtools.perfstress_tests import get_random_bytes, WriteStream + +from ._test_base import _FileSystemTest + + +class DownloadTest(_FileSystemTest): + def __init__(self, arguments): + super().__init__(arguments) + file_name = "downloadtest" + self.file_client = self.fs_client.get_file_client(file_name) + self.async_file_client = self.async_fs_client.get_file_client(file_name) + self.download_stream = WriteStream() + + async def global_setup(self): + await super().global_setup() + data = get_random_bytes(self.args.size) + await self.async_file_client.create_file() + await self.async_file_client.upload_data(data, overwrite=True) + + def run_sync(self): + self.download_stream.reset() + stream = self.file_client.download_file(max_concurrency=self.args.max_concurrency) + stream.readinto(self.download_stream) + + async def run_async(self): + self.download_stream.reset() + stream = await self.async_file_client.download_file(max_concurrency=self.args.max_concurrency) + await stream.readinto(self.download_stream) + + async def close(self): + await self.async_file_client.close() + await super().close() diff --git a/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/upload.py b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/upload.py new file mode 100644 index 000000000000..09af7ba6cb4f --- /dev/null +++ b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/upload.py @@ -0,0 +1,32 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from ._test_base import _FileTest + +from azure_devtools.perfstress_tests import RandomStream +from azure_devtools.perfstress_tests import AsyncRandomStream + + +class UploadTest(_FileTest): + def __init__(self, arguments): + super().__init__(arguments) + self.upload_stream = RandomStream(self.args.size) + self.upload_stream_async = AsyncRandomStream(self.args.size) + + def run_sync(self): + self.upload_stream.reset() + self.file_client.upload_data( + self.upload_stream, + length=self.args.size, + overwrite=True, + max_concurrency=self.args.max_concurrency) + + async def run_async(self): + self.upload_stream_async.reset() + await self.async_file_client.upload_data( + self.upload_stream_async, + length=self.args.size, + overwrite=True, + max_concurrency=self.args.max_concurrency) diff --git a/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/upload_from_file.py b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/upload_from_file.py new file mode 100644 index 000000000000..00370b1d1e41 --- /dev/null +++ b/sdk/storage/azure-storage-file-datalake/tests/perfstress_tests/upload_from_file.py @@ -0,0 +1,40 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import os +import tempfile + +from azure_devtools.perfstress_tests import get_random_bytes + +from ._test_base import _FileTest + + +class UploadFromFileTest(_FileTest): + temp_file = None + + async def global_setup(self): + await super().global_setup() + data = get_random_bytes(self.args.size) + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + UploadFromFileTest.temp_file = temp_file.name + temp_file.write(data) + + async def global_cleanup(self): + os.remove(UploadFromFileTest.temp_file) + await super().global_cleanup() + + def run_sync(self): + with open(UploadFromFileTest.temp_file, 'rb') as fp: + self.file_client.upload_data( + fp, + overwrite=True, + max_concurrency=self.args.max_concurrency) + + async def run_async(self): + with open(UploadFromFileTest.temp_file, 'rb') as fp: + await self.async_file_client.upload_data( + fp, + overwrite=True, + max_concurrency=self.args.max_concurrency) diff --git a/tools/azure-devtools/src/azure_devtools/perfstress_tests/async_random_stream.py b/tools/azure-devtools/src/azure_devtools/perfstress_tests/async_random_stream.py index 56ceea2af1d6..098a019fa36e 100644 --- a/tools/azure-devtools/src/azure_devtools/perfstress_tests/async_random_stream.py +++ b/tools/azure-devtools/src/azure_devtools/perfstress_tests/async_random_stream.py @@ -5,11 +5,11 @@ from io import BytesIO -from .random_stream import get_random_bytes +from .random_stream import get_random_bytes, _DEFAULT_LENGTH class AsyncRandomStream(BytesIO): - def __init__(self, length, initial_buffer_length=1024 * 1024): + def __init__(self, length, initial_buffer_length=_DEFAULT_LENGTH): super().__init__() self._base_data = get_random_bytes(initial_buffer_length) self._data_length = length @@ -45,6 +45,17 @@ def seek(self, index): def tell(self): return self._position + def seek(self, index, whence=0): + if whence == 0: + self._position = index + elif whence == 1: + self._position = self._position + index + elif whence == 2: + self._position = self._length - 1 + index + + def tell(self): + return self._position + def remaining(self): return self._remaining diff --git a/tools/azure-devtools/src/azure_devtools/perfstress_tests/random_stream.py b/tools/azure-devtools/src/azure_devtools/perfstress_tests/random_stream.py index 393bd5dcf809..324a3eb67553 100644 --- a/tools/azure-devtools/src/azure_devtools/perfstress_tests/random_stream.py +++ b/tools/azure-devtools/src/azure_devtools/perfstress_tests/random_stream.py @@ -43,12 +43,17 @@ def read(self, size=None): self._remaining = self._remaining - e self._position += e return self._base_data[:e] - + def tell(self): return self._position - def seek(self, index): - self._position = index + def seek(self, index, whence=0): + if whence == 0: + self._position = index + elif whence == 1: + self._position = self._position + index + elif whence == 2: + self._position = self._length - 1 + index def remaining(self): return self._remaining @@ -72,6 +77,6 @@ def seek(self, index): def seekable(self): return True - + def tell(self): return self._position