diff --git a/docs/en/api.rst b/docs/en/api.rst index 3cb68d27eb..ab75ca6b7b 100644 --- a/docs/en/api.rst +++ b/docs/en/api.rst @@ -1,8 +1,3 @@ -fileio -------- -.. automodule:: mmcv.fileio - :members: - image ------ .. automodule:: mmcv.image diff --git a/docs/en/index.rst b/docs/en/index.rst index bccbc37297..6840da3d45 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -18,7 +18,6 @@ You can switch between Chinese and English documents in the lower-left corner of understand_mmcv/config.md understand_mmcv/registry.md understand_mmcv/runner.md - understand_mmcv/io.md understand_mmcv/data_process.md understand_mmcv/visualization.md understand_mmcv/cnn.md diff --git a/docs/en/understand_mmcv/io.md b/docs/en/understand_mmcv/io.md deleted file mode 100644 index 64fbc8b8e6..0000000000 --- a/docs/en/understand_mmcv/io.md +++ /dev/null @@ -1,247 +0,0 @@ -## File IO - -This module provides two universal API to load and dump files of different formats. - -```{note} -Since v1.3.16, the IO modules support loading (dumping) data from (to) different backends, respectively. More details are in PR [#1330](https://github.com/open-mmlab/mmcv/pull/1330). -``` - -### Load and dump data - -`mmcv` provides a universal api for loading and dumping data, currently -supported formats are json, yaml and pickle. - -#### Load from disk or dump to disk - -```python -import mmcv - -# load data from a file -data = mmcv.load('test.json') -data = mmcv.load('test.yaml') -data = mmcv.load('test.pkl') -# load data from a file-like object -with open('test.json', 'r') as f: - data = mmcv.load(f, file_format='json') - -# dump data to a string -json_str = mmcv.dump(data, file_format='json') - -# dump data to a file with a filename (infer format from file extension) -mmcv.dump(data, 'out.pkl') - -# dump data to a file with a file-like object -with open('test.yaml', 'w') as f: - data = mmcv.dump(data, f, file_format='yaml') -``` - -#### Load from other backends or dump to other backends - -```python -import mmcv - -# load data from a file -data = mmcv.load('s3://bucket-name/test.json') -data = mmcv.load('s3://bucket-name/test.yaml') -data = mmcv.load('s3://bucket-name/test.pkl') - -# dump data to a file with a filename (infer format from file extension) -mmcv.dump(data, 's3://bucket-name/out.pkl') -``` - -It is also very convenient to extend the api to support more file formats. -All you need to do is to write a file handler inherited from `BaseFileHandler` -and register it with one or several file formats. - -You need to implement at least 3 methods. - -```python -import mmcv - -# To register multiple file formats, a list can be used as the argument. -# @mmcv.register_handler(['txt', 'log']) -@mmcv.register_handler('txt') -class TxtHandler1(mmcv.BaseFileHandler): - - def load_from_fileobj(self, file): - return file.read() - - def dump_to_fileobj(self, obj, file): - file.write(str(obj)) - - def dump_to_str(self, obj, **kwargs): - return str(obj) -``` - -Here is an example of `PickleHandler`. - -```python -import pickle - -class PickleHandler(mmcv.BaseFileHandler): - - def load_from_fileobj(self, file, **kwargs): - return pickle.load(file, **kwargs) - - def load_from_path(self, filepath, **kwargs): - return super(PickleHandler, self).load_from_path( - filepath, mode='rb', **kwargs) - - def dump_to_str(self, obj, **kwargs): - kwargs.setdefault('protocol', 2) - return pickle.dumps(obj, **kwargs) - - def dump_to_fileobj(self, obj, file, **kwargs): - kwargs.setdefault('protocol', 2) - pickle.dump(obj, file, **kwargs) - - def dump_to_path(self, obj, filepath, **kwargs): - super(PickleHandler, self).dump_to_path( - obj, filepath, mode='wb', **kwargs) -``` - -### Load a text file as a list or dict - -For example `a.txt` is a text file with 5 lines. - -``` -a -b -c -d -e -``` - -#### Load from disk - -Use `list_from_file` to load the list from a.txt. - -```python ->>> mmcv.list_from_file('a.txt') -['a', 'b', 'c', 'd', 'e'] ->>> mmcv.list_from_file('a.txt', offset=2) -['c', 'd', 'e'] ->>> mmcv.list_from_file('a.txt', max_num=2) -['a', 'b'] ->>> mmcv.list_from_file('a.txt', prefix='/mnt/') -['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] -``` - -For example `b.txt` is a text file with 3 lines. - -``` -1 cat -2 dog cow -3 panda -``` - -Then use `dict_from_file` to load the dict from `b.txt`. - -```python ->>> mmcv.dict_from_file('b.txt') -{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} ->>> mmcv.dict_from_file('b.txt', key_type=int) -{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} -``` - -#### Load from other backends - -Use `list_from_file` to load the list from `s3://bucket-name/a.txt`. - -```python ->>> mmcv.list_from_file('s3://bucket-name/a.txt') -['a', 'b', 'c', 'd', 'e'] ->>> mmcv.list_from_file('s3://bucket-name/a.txt', offset=2) -['c', 'd', 'e'] ->>> mmcv.list_from_file('s3://bucket-name/a.txt', max_num=2) -['a', 'b'] ->>> mmcv.list_from_file('s3://bucket-name/a.txt', prefix='/mnt/') -['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] -``` - -Use `dict_from_file` to load the dict from `s3://bucket-name/b.txt`. - -```python ->>> mmcv.dict_from_file('s3://bucket-name/b.txt') -{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} ->>> mmcv.dict_from_file('s3://bucket-name/b.txt', key_type=int) -{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} -``` - -### Load and dump checkpoints - -#### Load checkpoints from disk or save to disk - -We can read the checkpoints from disk or save to disk in the following way. - -```python -import torch - -filepath1 = '/path/of/your/checkpoint1.pth' -filepath2 = '/path/of/your/checkpoint2.pth' -# read from filepath1 -checkpoint = torch.load(filepath1) -# save to filepath2 -torch.save(checkpoint, filepath2) -``` - -MMCV provides many backends. `HardDiskBackend` is one of them and we can use it to read or save checkpoints. - -```python -import io -from mmcv.fileio.file_client import HardDiskBackend - -disk_backend = HardDiskBackend() -with io.BytesIO(disk_backend.get(filepath1)) as buffer: - checkpoint = torch.load(buffer) -with io.BytesIO() as buffer: - torch.save(checkpoint, buffer) - disk_backend.put(buffer.getvalue(), filepath2) -``` - -If we want to implement an interface which automatically select the corresponding -backend based on the file path, we can use the `FileClient`. -For example, we want to implement two methods for reading checkpoints as well as saving checkpoints, -which need to support different types of file paths, either disk paths, network paths or other paths. - -```python -from mmcv.fileio.file_client import FileClient - -def load_checkpoint(path): - file_client = FileClient.infer(uri=path) - with io.BytesIO(file_client.get(path)) as buffer: - checkpoint = torch.load(buffer) - return checkpoint - -def save_checkpoint(checkpoint, path): - with io.BytesIO() as buffer: - torch.save(checkpoint, buffer) - file_client.put(buffer.getvalue(), path) - -file_client = FileClient.infer_client(uri=filepath1) -checkpoint = load_checkpoint(filepath1) -save_checkpoint(checkpoint, filepath2) -``` - -#### Load checkpoints from the Internet - -```{note} -Currently, it only supports reading checkpoints from the Internet, and does not support saving checkpoints to the Internet. -``` - -```python -import io -import torch -from mmcv.fileio.file_client import HTTPBackend, FileClient - -filepath = 'http://path/of/your/checkpoint.pth' -checkpoint = torch.utils.model_zoo.load_url(filepath) - -http_backend = HTTPBackend() -with io.BytesIO(http_backend.get(filepath)) as buffer: - checkpoint = torch.load(buffer) - -file_client = FileClient.infer_client(uri=filepath) -with io.BytesIO(file_client.get(filepath)) as buffer: - checkpoint = torch.load(buffer) -``` diff --git a/docs/zh_cn/api.rst b/docs/zh_cn/api.rst index 747aa659aa..6d2c744204 100644 --- a/docs/zh_cn/api.rst +++ b/docs/zh_cn/api.rst @@ -1,8 +1,3 @@ -fileio -------- -.. automodule:: mmcv.fileio - :members: - image ------ .. automodule:: mmcv.image diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst index cd2c833c6d..1b93345b67 100644 --- a/docs/zh_cn/index.rst +++ b/docs/zh_cn/index.rst @@ -18,7 +18,6 @@ understand_mmcv/config.md understand_mmcv/registry.md understand_mmcv/runner.md - understand_mmcv/io.md understand_mmcv/data_process.md understand_mmcv/data_transform.md understand_mmcv/visualization.md diff --git a/docs/zh_cn/understand_mmcv/io.md b/docs/zh_cn/understand_mmcv/io.md deleted file mode 100644 index eb4fe14ba1..0000000000 --- a/docs/zh_cn/understand_mmcv/io.md +++ /dev/null @@ -1,241 +0,0 @@ -## 文件输入输出 - -文件输入输出模块提供了两个通用的 API 接口用于读取和保存不同格式的文件。 - -```{note} -在 v1.3.16 及之后的版本中,IO 模块支持从不同后端读取数据并支持将数据至不同后端。更多细节请访问 PR [#1330](https://github.com/open-mmlab/mmcv/pull/1330)。 -``` - -### 读取和保存数据 - -`mmcv` 提供了一个通用的 api 用于读取和保存数据,目前支持的格式有 json、yaml 和 pickle。 - -#### 从硬盘读取数据或者将数据保存至硬盘 - -```python -import mmcv - -# 从文件中读取数据 -data = mmcv.load('test.json') -data = mmcv.load('test.yaml') -data = mmcv.load('test.pkl') -# 从文件对象中读取数据 -with open('test.json', 'r') as f: - data = mmcv.load(f, file_format='json') - -# 将数据序列化为字符串 -json_str = mmcv.dump(data, file_format='json') - -# 将数据保存至文件 (根据文件名后缀反推文件类型) -mmcv.dump(data, 'out.pkl') - -# 将数据保存至文件对象 -with open('test.yaml', 'w') as f: - data = mmcv.dump(data, f, file_format='yaml') -``` - -#### 从其他后端加载或者保存至其他后端 - -```python -import mmcv - -# 从 s3 文件读取数据 -data = mmcv.load('s3://bucket-name/test.json') -data = mmcv.load('s3://bucket-name/test.yaml') -data = mmcv.load('s3://bucket-name/test.pkl') - -# 将数据保存至 s3 文件 (根据文件名后缀反推文件类型) -mmcv.dump(data, 's3://bucket-name/out.pkl') -``` - -我们提供了易于拓展的方式以支持更多的文件格式。我们只需要创建一个继承自 `BaseFileHandler` 的 -文件句柄类并将其注册到 `mmcv` 中即可。句柄类至少需要重写三个方法。 - -```python -import mmcv - -# 支持为文件句柄类注册多个文件格式 -# @mmcv.register_handler(['txt', 'log']) -@mmcv.register_handler('txt') -class TxtHandler1(mmcv.BaseFileHandler): - - def load_from_fileobj(self, file): - return file.read() - - def dump_to_fileobj(self, obj, file): - file.write(str(obj)) - - def dump_to_str(self, obj, **kwargs): - return str(obj) -``` - -以 `PickleHandler` 为例 - -```python -import pickle - -class PickleHandler(mmcv.BaseFileHandler): - - def load_from_fileobj(self, file, **kwargs): - return pickle.load(file, **kwargs) - - def load_from_path(self, filepath, **kwargs): - return super(PickleHandler, self).load_from_path( - filepath, mode='rb', **kwargs) - - def dump_to_str(self, obj, **kwargs): - kwargs.setdefault('protocol', 2) - return pickle.dumps(obj, **kwargs) - - def dump_to_fileobj(self, obj, file, **kwargs): - kwargs.setdefault('protocol', 2) - pickle.dump(obj, file, **kwargs) - - def dump_to_path(self, obj, filepath, **kwargs): - super(PickleHandler, self).dump_to_path( - obj, filepath, mode='wb', **kwargs) -``` - -### 读取文件并返回列表或字典 - -例如, `a.txt` 是文本文件,一共有5行内容。 - -``` -a -b -c -d -e -``` - -#### 从硬盘读取 - -使用 `list_from_file` 读取 `a.txt` - -```python ->>> mmcv.list_from_file('a.txt') -['a', 'b', 'c', 'd', 'e'] ->>> mmcv.list_from_file('a.txt', offset=2) -['c', 'd', 'e'] ->>> mmcv.list_from_file('a.txt', max_num=2) -['a', 'b'] ->>> mmcv.list_from_file('a.txt', prefix='/mnt/') -['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] -``` - -同样, `b.txt` 也是文本文件,一共有3行内容 - -``` -1 cat -2 dog cow -3 panda -``` - -使用 `dict_from_file` 读取 `b.txt` - -```python ->>> mmcv.dict_from_file('b.txt') -{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} ->>> mmcv.dict_from_file('b.txt', key_type=int) -{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} -``` - -#### 从其他后端读取 - -使用 `list_from_file` 读取 `s3://bucket-name/a.txt` - -```python ->>> mmcv.list_from_file('s3://bucket-name/a.txt') -['a', 'b', 'c', 'd', 'e'] ->>> mmcv.list_from_file('s3://bucket-name/a.txt', offset=2) -['c', 'd', 'e'] ->>> mmcv.list_from_file('s3://bucket-name/a.txt', max_num=2) -['a', 'b'] ->>> mmcv.list_from_file('s3://bucket-name/a.txt', prefix='/mnt/') -['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] -``` - -使用 `dict_from_file` 读取 `b.txt` - -```python ->>> mmcv.dict_from_file('s3://bucket-name/b.txt') -{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} ->>> mmcv.dict_from_file('s3://bucket-name/b.txt', key_type=int) -{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} -``` - -### 读取和保存权重文件 - -#### 从硬盘读取权重文件或者将权重文件保存至硬盘 - -我们可以通过下面的方式从磁盘读取权重文件或者将权重文件保存至磁盘 - -```python -import torch - -filepath1 = '/path/of/your/checkpoint1.pth' -filepath2 = '/path/of/your/checkpoint2.pth' -# 从 filepath1 读取权重文件 -checkpoint = torch.load(filepath1) -# 将权重文件保存至 filepath2 -torch.save(checkpoint, filepath2) -``` - -MMCV 提供了很多后端,`HardDiskBackend` 是其中一个,我们可以通过它来读取或者保存权重文件。 - -```python -import io -from mmcv.fileio.file_client import HardDiskBackend - -disk_backend = HardDiskBackend() -with io.BytesIO(disk_backend.get(filepath1)) as buffer: - checkpoint = torch.load(buffer) -with io.BytesIO() as buffer: - torch.save(checkpoint, f) - disk_backend.put(f.getvalue(), filepath2) -``` - -如果我们想在接口中实现根据文件路径自动选择对应的后端,我们可以使用 `FileClient`。 -例如,我们想实现两个方法,分别是读取权重以及保存权重,它们需支持不同类型的文件路径,可以是磁盘路径,也可以是网络路径或者其他路径。 - -```python -from mmcv.fileio.file_client import FileClient - -def load_checkpoint(path): - file_client = FileClient.infer(uri=path) - with io.BytesIO(file_client.get(path)) as buffer: - checkpoint = torch.load(buffer) - return checkpoint - -def save_checkpoint(checkpoint, path): - with io.BytesIO() as buffer: - torch.save(checkpoint, buffer) - file_client.put(buffer.getvalue(), path) - -file_client = FileClient.infer_client(uri=filepath1) -checkpoint = load_checkpoint(filepath1) -save_checkpoint(checkpoint, filepath2) -``` - -#### 从网络远端读取权重文件 - -```{note} -目前只支持从网络远端读取权重文件,暂不支持将权重文件写入网络远端 -``` - -```python -import io -import torch -from mmcv.fileio.file_client import HTTPBackend, FileClient - -filepath = 'http://path/of/your/checkpoint.pth' -checkpoint = torch.utils.model_zoo.load_url(filepath) - -http_backend = HTTPBackend() -with io.BytesIO(http_backend.get(filepath)) as buffer: - checkpoint = torch.load(buffer) - -file_client = FileClient.infer_client(uri=filepath) -with io.BytesIO(file_client.get(filepath)) as buffer: - checkpoint = torch.load(buffer) -``` diff --git a/mmcv/__init__.py b/mmcv/__init__.py index 0ed3b97d48..57ac414727 100644 --- a/mmcv/__init__.py +++ b/mmcv/__init__.py @@ -1,7 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # flake8: noqa from .arraymisc import * -from .fileio import * from .image import * from .transforms import * from .utils import * diff --git a/mmcv/engine/test.py b/mmcv/engine/test.py index 83546caec4..9baad4e0bf 100644 --- a/mmcv/engine/test.py +++ b/mmcv/engine/test.py @@ -6,6 +6,7 @@ import time from typing import Optional +import mmengine import torch import torch.distributed as dist import torch.nn as nn @@ -135,7 +136,7 @@ def collect_results_cpu(result_part: list, mmcv.mkdir_or_exist(tmpdir) # dump the part result to the dir part_file = osp.join(tmpdir, f'part_{rank}.pkl') # type: ignore - mmcv.dump(result_part, part_file) + mmengine.dump(result_part, part_file) dist.barrier() # collect all parts if rank != 0: @@ -145,7 +146,7 @@ def collect_results_cpu(result_part: list, part_list = [] for i in range(world_size): part_file = osp.join(tmpdir, f'part_{i}.pkl') # type: ignore - part_result = mmcv.load(part_file) + part_result = mmengine.load(part_file) # When data is severely insufficient, an empty part_result # on a certain gpu could makes the overall outputs empty. if part_result: diff --git a/mmcv/fileio/__init__.py b/mmcv/fileio/__init__.py deleted file mode 100644 index 2051b85f7e..0000000000 --- a/mmcv/fileio/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .file_client import BaseStorageBackend, FileClient -from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler -from .io import dump, load, register_handler -from .parse import dict_from_file, list_from_file - -__all__ = [ - 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', - 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', - 'list_from_file', 'dict_from_file' -] diff --git a/mmcv/fileio/file_client.py b/mmcv/fileio/file_client.py deleted file mode 100644 index ee7c3164e2..0000000000 --- a/mmcv/fileio/file_client.py +++ /dev/null @@ -1,1173 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import inspect -import os -import os.path as osp -import re -import tempfile -import warnings -from abc import ABCMeta, abstractmethod -from contextlib import contextmanager -from pathlib import Path -from typing import Any, Generator, Iterator, Optional, Tuple, Union -from urllib.request import urlopen - -import mmcv -from mmcv.utils.misc import has_method -from mmcv.utils.path import is_filepath - - -class BaseStorageBackend(metaclass=ABCMeta): - """Abstract class of storage backends. - - All backends need to implement two apis: ``get()`` and ``get_text()``. - ``get()`` reads the file as a byte stream and ``get_text()`` reads the file - as texts. - """ - - # a flag to indicate whether the backend can create a symlink for a file - _allow_symlink = False - - @property - def name(self): - return self.__class__.__name__ - - @property - def allow_symlink(self): - return self._allow_symlink - - @abstractmethod - def get(self, filepath): - pass - - @abstractmethod - def get_text(self, filepath): - pass - - -class CephBackend(BaseStorageBackend): - """Ceph storage backend (for internal use). - - Args: - path_mapping (dict|None): path mapping dict from local path to Petrel - path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` - will be replaced by ``dst``. Default: None. - - .. warning:: - :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, - please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. - """ - - def __init__(self, path_mapping=None): - try: - import ceph - except ImportError: - raise ImportError('Please install ceph to enable CephBackend.') - - warnings.warn( - 'CephBackend will be deprecated, please use PetrelBackend instead', - DeprecationWarning) - self._client = ceph.S3Client() - assert isinstance(path_mapping, dict) or path_mapping is None - self.path_mapping = path_mapping - - def get(self, filepath): - filepath = str(filepath) - if self.path_mapping is not None: - for k, v in self.path_mapping.items(): - filepath = filepath.replace(k, v) - value = self._client.Get(filepath) - value_buf = memoryview(value) - return value_buf - - def get_text(self, filepath, encoding=None): - raise NotImplementedError - - -class PetrelBackend(BaseStorageBackend): - """Petrel storage backend (for internal use). - - PetrelBackend supports reading and writing data to multiple clusters. - If the file path contains the cluster name, PetrelBackend will read data - from specified cluster or write data to it. Otherwise, PetrelBackend will - access the default cluster. - - Args: - path_mapping (dict, optional): Path mapping dict from local path to - Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in - ``filepath`` will be replaced by ``dst``. Default: None. - enable_mc (bool, optional): Whether to enable memcached support. - Default: True. - - Examples: - >>> filepath1 = 's3://path/of/file' - >>> filepath2 = 'cluster-name:s3://path/of/file' - >>> client = PetrelBackend() - >>> client.get(filepath1) # get data from default cluster - >>> client.get(filepath2) # get data from 'cluster-name' cluster - """ - - def __init__(self, - path_mapping: Optional[dict] = None, - enable_mc: bool = True): - try: - from petrel_client import client - except ImportError: - raise ImportError('Please install petrel_client to enable ' - 'PetrelBackend.') - - self._client = client.Client(enable_mc=enable_mc) - assert isinstance(path_mapping, dict) or path_mapping is None - self.path_mapping = path_mapping - - def _map_path(self, filepath: Union[str, Path]) -> str: - """Map ``filepath`` to a string path whose prefix will be replaced by - :attr:`self.path_mapping`. - - Args: - filepath (str): Path to be mapped. - """ - filepath = str(filepath) - if self.path_mapping is not None: - for k, v in self.path_mapping.items(): - filepath = filepath.replace(k, v) - return filepath - - def _format_path(self, filepath: str) -> str: - """Convert a ``filepath`` to standard format of petrel oss. - - If the ``filepath`` is concatenated by ``os.path.join``, in a Windows - environment, the ``filepath`` will be the format of - 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the - above ``filepath`` will be converted to 's3://bucket_name/image.jpg'. - - Args: - filepath (str): Path to be formatted. - """ - return re.sub(r'\\+', '/', filepath) - - def get(self, filepath: Union[str, Path]) -> memoryview: - """Read data from a given ``filepath`` with 'rb' mode. - - Args: - filepath (str or Path): Path to read data. - - Returns: - memoryview: A memory view of expected bytes object to avoid - copying. The memoryview object can be converted to bytes by - ``value_buf.tobytes()``. - """ - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - value = self._client.Get(filepath) - value_buf = memoryview(value) - return value_buf - - def get_text(self, - filepath: Union[str, Path], - encoding: str = 'utf-8') -> str: - """Read data from a given ``filepath`` with 'r' mode. - - Args: - filepath (str or Path): Path to read data. - encoding (str): The encoding format used to open the ``filepath``. - Default: 'utf-8'. - - Returns: - str: Expected text reading from ``filepath``. - """ - return str(self.get(filepath), encoding=encoding) - - def put(self, obj: bytes, filepath: Union[str, Path]) -> None: - """Save data to a given ``filepath``. - - Args: - obj (bytes): Data to be saved. - filepath (str or Path): Path to write data. - """ - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - self._client.put(filepath, obj) - - def put_text(self, - obj: str, - filepath: Union[str, Path], - encoding: str = 'utf-8') -> None: - """Save data to a given ``filepath``. - - Args: - obj (str): Data to be written. - filepath (str or Path): Path to write data. - encoding (str): The encoding format used to encode the ``obj``. - Default: 'utf-8'. - """ - self.put(bytes(obj, encoding=encoding), filepath) - - def remove(self, filepath: Union[str, Path]) -> None: - """Remove a file. - - Args: - filepath (str or Path): Path to be removed. - """ - if not has_method(self._client, 'delete'): - raise NotImplementedError( - 'Current version of Petrel Python SDK has not supported ' - 'the `delete` method, please use a higher version or dev' - ' branch instead.') - - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - self._client.delete(filepath) - - def exists(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path exists. - - Args: - filepath (str or Path): Path to be checked whether exists. - - Returns: - bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. - """ - if not (has_method(self._client, 'contains') - and has_method(self._client, 'isdir')): - raise NotImplementedError( - 'Current version of Petrel Python SDK has not supported ' - 'the `contains` and `isdir` methods, please use a higher' - 'version or dev branch instead.') - - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - return self._client.contains(filepath) or self._client.isdir(filepath) - - def isdir(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a directory. - - Args: - filepath (str or Path): Path to be checked whether it is a - directory. - - Returns: - bool: Return ``True`` if ``filepath`` points to a directory, - ``False`` otherwise. - """ - if not has_method(self._client, 'isdir'): - raise NotImplementedError( - 'Current version of Petrel Python SDK has not supported ' - 'the `isdir` method, please use a higher version or dev' - ' branch instead.') - - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - return self._client.isdir(filepath) - - def isfile(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a file. - - Args: - filepath (str or Path): Path to be checked whether it is a file. - - Returns: - bool: Return ``True`` if ``filepath`` points to a file, ``False`` - otherwise. - """ - if not has_method(self._client, 'contains'): - raise NotImplementedError( - 'Current version of Petrel Python SDK has not supported ' - 'the `contains` method, please use a higher version or ' - 'dev branch instead.') - - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - return self._client.contains(filepath) - - def join_path(self, filepath: Union[str, Path], - *filepaths: Union[str, Path]) -> str: - """Concatenate all file paths. - - Args: - filepath (str or Path): Path to be concatenated. - - Returns: - str: The result after concatenation. - """ - filepath = self._format_path(self._map_path(filepath)) - if filepath.endswith('/'): - filepath = filepath[:-1] - formatted_paths = [filepath] - for path in filepaths: - formatted_paths.append(self._format_path(self._map_path(path))) - return '/'.join(formatted_paths) - - @contextmanager - def get_local_path( - self, - filepath: Union[str, - Path]) -> Generator[Union[str, Path], None, None]: - """Download a file from ``filepath`` and return a temporary path. - - ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It - can be called with ``with`` statement, and when exists from the - ``with`` statement, the temporary path will be released. - - Args: - filepath (str | Path): Download a file from ``filepath``. - - Examples: - >>> client = PetrelBackend() - >>> # After existing from the ``with`` clause, - >>> # the path will be removed - >>> with client.get_local_path('s3://path/of/your/file') as path: - ... # do something here - - Yields: - Iterable[str]: Only yield one temporary path. - """ - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - assert self.isfile(filepath) - try: - f = tempfile.NamedTemporaryFile(delete=False) - f.write(self.get(filepath)) - f.close() - yield f.name - finally: - os.remove(f.name) - - def list_dir_or_file(self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False) -> Iterator[str]: - """Scan a directory to find the interested directories or files in - arbitrary order. - - Note: - Petrel has no concept of directories but it simulates the directory - hierarchy in the filesystem through public prefixes. In addition, - if the returned path ends with '/', it means the path is a public - prefix which is a logical directory. - - Note: - :meth:`list_dir_or_file` returns the path relative to ``dir_path``. - In addition, the returned path of directory will not contains the - suffix '/' which is consistent with other backends. - - Args: - dir_path (str | Path): Path of the directory. - list_dir (bool): List the directories. Default: True. - list_file (bool): List the path of files. Default: True. - suffix (str or tuple[str], optional): File suffix - that we are interested in. Default: None. - recursive (bool): If set to True, recursively scan the - directory. Default: False. - - Yields: - Iterable[str]: A relative path to ``dir_path``. - """ - if not has_method(self._client, 'list'): - raise NotImplementedError( - 'Current version of Petrel Python SDK has not supported ' - 'the `list` method, please use a higher version or dev' - ' branch instead.') - - dir_path = self._map_path(dir_path) - dir_path = self._format_path(dir_path) - if list_dir and suffix is not None: - raise TypeError( - '`list_dir` should be False when `suffix` is not None') - - if (suffix is not None) and not isinstance(suffix, (str, tuple)): - raise TypeError('`suffix` must be a string or tuple of strings') - - # Petrel's simulated directory hierarchy assumes that directory paths - # should end with `/` - if not dir_path.endswith('/'): - dir_path += '/' - - root = dir_path - - def _list_dir_or_file(dir_path, list_dir, list_file, suffix, - recursive): - for path in self._client.list(dir_path): - # the `self.isdir` is not used here to determine whether path - # is a directory, because `self.isdir` relies on - # `self._client.list` - if path.endswith('/'): # a directory path - next_dir_path = self.join_path(dir_path, path) - if list_dir: - # get the relative path and exclude the last - # character '/' - rel_dir = next_dir_path[len(root):-1] - yield rel_dir - if recursive: - yield from _list_dir_or_file(next_dir_path, list_dir, - list_file, suffix, - recursive) - else: # a file path - absolute_path = self.join_path(dir_path, path) - rel_path = absolute_path[len(root):] - if (suffix is None - or rel_path.endswith(suffix)) and list_file: - yield rel_path - - return _list_dir_or_file(dir_path, list_dir, list_file, suffix, - recursive) - - -class MemcachedBackend(BaseStorageBackend): - """Memcached storage backend. - - Attributes: - server_list_cfg (str): Config file for memcached server list. - client_cfg (str): Config file for memcached client. - sys_path (str | None): Additional path to be appended to `sys.path`. - Default: None. - """ - - def __init__(self, server_list_cfg, client_cfg, sys_path=None): - if sys_path is not None: - import sys - sys.path.append(sys_path) - try: - import mc - except ImportError: - raise ImportError( - 'Please install memcached to enable MemcachedBackend.') - - self.server_list_cfg = server_list_cfg - self.client_cfg = client_cfg - self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, - self.client_cfg) - # mc.pyvector servers as a point which points to a memory cache - self._mc_buffer = mc.pyvector() - - def get(self, filepath): - filepath = str(filepath) - import mc - self._client.Get(filepath, self._mc_buffer) - value_buf = mc.ConvertBuffer(self._mc_buffer) - return value_buf - - def get_text(self, filepath, encoding=None): - raise NotImplementedError - - -class LmdbBackend(BaseStorageBackend): - """Lmdb storage backend. - - Args: - db_path (str): Lmdb database path. - readonly (bool, optional): Lmdb environment parameter. If True, - disallow any write operations. Default: True. - lock (bool, optional): Lmdb environment parameter. If False, when - concurrent access occurs, do not lock the database. Default: False. - readahead (bool, optional): Lmdb environment parameter. If False, - disable the OS filesystem readahead mechanism, which may improve - random read performance when a database is larger than RAM. - Default: False. - - Attributes: - db_path (str): Lmdb database path. - """ - - def __init__(self, - db_path, - readonly=True, - lock=False, - readahead=False, - **kwargs): - try: - import lmdb # NOQA - except ImportError: - raise ImportError('Please install lmdb to enable LmdbBackend.') - - self.db_path = str(db_path) - self.readonly = readonly - self.lock = lock - self.readahead = readahead - self.kwargs = kwargs - self._client = None - - def get(self, filepath): - """Get values according to the filepath. - - Args: - filepath (str | obj:`Path`): Here, filepath is the lmdb key. - """ - if self._client is None: - self._client = self._get_client() - - with self._client.begin(write=False) as txn: - value_buf = txn.get(str(filepath).encode('utf-8')) - return value_buf - - def get_text(self, filepath, encoding=None): - raise NotImplementedError - - def _get_client(self): - import lmdb - - return lmdb.open( - self.db_path, - readonly=self.readonly, - lock=self.lock, - readahead=self.readahead, - **self.kwargs) - - def __del__(self): - self._client.close() - - -class HardDiskBackend(BaseStorageBackend): - """Raw hard disks storage backend.""" - - _allow_symlink = True - - def get(self, filepath: Union[str, Path]) -> bytes: - """Read data from a given ``filepath`` with 'rb' mode. - - Args: - filepath (str or Path): Path to read data. - - Returns: - bytes: Expected bytes object. - """ - with open(filepath, 'rb') as f: - value_buf = f.read() - return value_buf - - def get_text(self, - filepath: Union[str, Path], - encoding: str = 'utf-8') -> str: - """Read data from a given ``filepath`` with 'r' mode. - - Args: - filepath (str or Path): Path to read data. - encoding (str): The encoding format used to open the ``filepath``. - Default: 'utf-8'. - - Returns: - str: Expected text reading from ``filepath``. - """ - with open(filepath, encoding=encoding) as f: - value_buf = f.read() - return value_buf - - def put(self, obj: bytes, filepath: Union[str, Path]) -> None: - """Write data to a given ``filepath`` with 'wb' mode. - - Note: - ``put`` will create a directory if the directory of ``filepath`` - does not exist. - - Args: - obj (bytes): Data to be written. - filepath (str or Path): Path to write data. - """ - mmcv.mkdir_or_exist(osp.dirname(filepath)) - with open(filepath, 'wb') as f: - f.write(obj) - - def put_text(self, - obj: str, - filepath: Union[str, Path], - encoding: str = 'utf-8') -> None: - """Write data to a given ``filepath`` with 'w' mode. - - Note: - ``put_text`` will create a directory if the directory of - ``filepath`` does not exist. - - Args: - obj (str): Data to be written. - filepath (str or Path): Path to write data. - encoding (str): The encoding format used to open the ``filepath``. - Default: 'utf-8'. - """ - mmcv.mkdir_or_exist(osp.dirname(filepath)) - with open(filepath, 'w', encoding=encoding) as f: - f.write(obj) - - def remove(self, filepath: Union[str, Path]) -> None: - """Remove a file. - - Args: - filepath (str or Path): Path to be removed. - """ - os.remove(filepath) - - def exists(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path exists. - - Args: - filepath (str or Path): Path to be checked whether exists. - - Returns: - bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. - """ - return osp.exists(filepath) - - def isdir(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a directory. - - Args: - filepath (str or Path): Path to be checked whether it is a - directory. - - Returns: - bool: Return ``True`` if ``filepath`` points to a directory, - ``False`` otherwise. - """ - return osp.isdir(filepath) - - def isfile(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a file. - - Args: - filepath (str or Path): Path to be checked whether it is a file. - - Returns: - bool: Return ``True`` if ``filepath`` points to a file, ``False`` - otherwise. - """ - return osp.isfile(filepath) - - def join_path(self, filepath: Union[str, Path], - *filepaths: Union[str, Path]) -> str: - """Concatenate all file paths. - - Join one or more filepath components intelligently. The return value - is the concatenation of filepath and any members of *filepaths. - - Args: - filepath (str or Path): Path to be concatenated. - - Returns: - str: The result of concatenation. - """ - return osp.join(filepath, *filepaths) - - @contextmanager - def get_local_path( - self, - filepath: Union[str, - Path]) -> Generator[Union[str, Path], None, None]: - """Only for unified API and do nothing.""" - yield filepath - - def list_dir_or_file(self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False) -> Iterator[str]: - """Scan a directory to find the interested directories or files in - arbitrary order. - - Note: - :meth:`list_dir_or_file` returns the path relative to ``dir_path``. - - Args: - dir_path (str | Path): Path of the directory. - list_dir (bool): List the directories. Default: True. - list_file (bool): List the path of files. Default: True. - suffix (str or tuple[str], optional): File suffix - that we are interested in. Default: None. - recursive (bool): If set to True, recursively scan the - directory. Default: False. - - Yields: - Iterable[str]: A relative path to ``dir_path``. - """ - if list_dir and suffix is not None: - raise TypeError('`suffix` should be None when `list_dir` is True') - - if (suffix is not None) and not isinstance(suffix, (str, tuple)): - raise TypeError('`suffix` must be a string or tuple of strings') - - root = dir_path - - def _list_dir_or_file(dir_path, list_dir, list_file, suffix, - recursive): - for entry in os.scandir(dir_path): - if not entry.name.startswith('.') and entry.is_file(): - rel_path = osp.relpath(entry.path, root) - if (suffix is None - or rel_path.endswith(suffix)) and list_file: - yield rel_path - elif osp.isdir(entry.path): - if list_dir: - rel_dir = osp.relpath(entry.path, root) - yield rel_dir - if recursive: - yield from _list_dir_or_file(entry.path, list_dir, - list_file, suffix, - recursive) - - return _list_dir_or_file(dir_path, list_dir, list_file, suffix, - recursive) - - -class HTTPBackend(BaseStorageBackend): - """HTTP and HTTPS storage bachend.""" - - def get(self, filepath): - value_buf = urlopen(filepath).read() - return value_buf - - def get_text(self, filepath, encoding='utf-8'): - value_buf = urlopen(filepath).read() - return value_buf.decode(encoding) - - @contextmanager - def get_local_path( - self, filepath: str) -> Generator[Union[str, Path], None, None]: - """Download a file from ``filepath``. - - ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It - can be called with ``with`` statement, and when exists from the - ``with`` statement, the temporary path will be released. - - Args: - filepath (str): Download a file from ``filepath``. - - Examples: - >>> client = HTTPBackend() - >>> # After existing from the ``with`` clause, - >>> # the path will be removed - >>> with client.get_local_path('http://path/of/your/file') as path: - ... # do something here - """ - try: - f = tempfile.NamedTemporaryFile(delete=False) - f.write(self.get(filepath)) - f.close() - yield f.name - finally: - os.remove(f.name) - - -class FileClient: - """A general file client to access files in different backends. - - The client loads a file or text in a specified backend from its path - and returns it as a binary or text file. There are two ways to choose a - backend, the name of backend and the prefix of path. Although both of them - can be used to choose a storage backend, ``backend`` has a higher priority - that is if they are all set, the storage backend will be chosen by the - backend argument. If they are all `None`, the disk backend will be chosen. - Note that It can also register other backend accessor with a given name, - prefixes, and backend class. In addition, We use the singleton pattern to - avoid repeated object creation. If the arguments are the same, the same - object will be returned. - - Args: - backend (str, optional): The storage backend type. Options are "disk", - "ceph", "memcached", "lmdb", "http" and "petrel". Default: None. - prefix (str, optional): The prefix of the registered storage backend. - Options are "s3", "http", "https". Default: None. - - Examples: - >>> # only set backend - >>> file_client = FileClient(backend='petrel') - >>> # only set prefix - >>> file_client = FileClient(prefix='s3') - >>> # set both backend and prefix but use backend to choose client - >>> file_client = FileClient(backend='petrel', prefix='s3') - >>> # if the arguments are the same, the same object is returned - >>> file_client1 = FileClient(backend='petrel') - >>> file_client1 is file_client - True - - Attributes: - client (:obj:`BaseStorageBackend`): The backend object. - """ - - _backends = { - 'disk': HardDiskBackend, - 'ceph': CephBackend, - 'memcached': MemcachedBackend, - 'lmdb': LmdbBackend, - 'petrel': PetrelBackend, - 'http': HTTPBackend, - } - - _prefix_to_backends = { - 's3': PetrelBackend, - 'http': HTTPBackend, - 'https': HTTPBackend, - } - - _instances: dict = {} - - client: Any - - def __new__(cls, backend=None, prefix=None, **kwargs): - if backend is None and prefix is None: - backend = 'disk' - if backend is not None and backend not in cls._backends: - raise ValueError( - f'Backend {backend} is not supported. Currently supported ones' - f' are {list(cls._backends.keys())}') - if prefix is not None and prefix not in cls._prefix_to_backends: - raise ValueError( - f'prefix {prefix} is not supported. Currently supported ones ' - f'are {list(cls._prefix_to_backends.keys())}') - - # concatenate the arguments to a unique key for determining whether - # objects with the same arguments were created - arg_key = f'{backend}:{prefix}' - for key, value in kwargs.items(): - arg_key += f':{key}:{value}' - - if arg_key in cls._instances: - _instance = cls._instances[arg_key] - else: - # create a new object and put it to _instance - _instance = super().__new__(cls) - if backend is not None: - _instance.client = cls._backends[backend](**kwargs) - else: - _instance.client = cls._prefix_to_backends[prefix](**kwargs) - - cls._instances[arg_key] = _instance - - return _instance - - @property - def name(self): - return self.client.name - - @property - def allow_symlink(self): - return self.client.allow_symlink - - @staticmethod - def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: - """Parse the prefix of a uri. - - Args: - uri (str | Path): Uri to be parsed that contains the file prefix. - - Examples: - >>> FileClient.parse_uri_prefix('s3://path/of/your/file') - 's3' - - Returns: - str | None: Return the prefix of uri if the uri contains '://' else - ``None``. - """ - assert is_filepath(uri) - uri = str(uri) - if '://' not in uri: - return None - else: - prefix, _ = uri.split('://') - # In the case of PetrelBackend, the prefix may contains the cluster - # name like clusterName:s3 - if ':' in prefix: - _, prefix = prefix.split(':') - return prefix - - @classmethod - def infer_client(cls, - file_client_args: Optional[dict] = None, - uri: Optional[Union[str, Path]] = None) -> 'FileClient': - """Infer a suitable file client based on the URI and arguments. - - Args: - file_client_args (dict, optional): Arguments to instantiate a - FileClient. Default: None. - uri (str | Path, optional): Uri to be parsed that contains the file - prefix. Default: None. - - Examples: - >>> uri = 's3://path/of/your/file' - >>> file_client = FileClient.infer_client(uri=uri) - >>> file_client_args = {'backend': 'petrel'} - >>> file_client = FileClient.infer_client(file_client_args) - - Returns: - FileClient: Instantiated FileClient object. - """ - assert file_client_args is not None or uri is not None - if file_client_args is None: - file_prefix = cls.parse_uri_prefix(uri) # type: ignore - return cls(prefix=file_prefix) - else: - return cls(**file_client_args) - - @classmethod - def _register_backend(cls, name, backend, force=False, prefixes=None): - if not isinstance(name, str): - raise TypeError('the backend name should be a string, ' - f'but got {type(name)}') - if not inspect.isclass(backend): - raise TypeError( - f'backend should be a class but got {type(backend)}') - if not issubclass(backend, BaseStorageBackend): - raise TypeError( - f'backend {backend} is not a subclass of BaseStorageBackend') - if not force and name in cls._backends: - raise KeyError( - f'{name} is already registered as a storage backend, ' - 'add "force=True" if you want to override it') - - if name in cls._backends and force: - for arg_key, instance in list(cls._instances.items()): - if isinstance(instance.client, cls._backends[name]): - cls._instances.pop(arg_key) - cls._backends[name] = backend - - if prefixes is not None: - if isinstance(prefixes, str): - prefixes = [prefixes] - else: - assert isinstance(prefixes, (list, tuple)) - for prefix in prefixes: - if prefix not in cls._prefix_to_backends: - cls._prefix_to_backends[prefix] = backend - elif (prefix in cls._prefix_to_backends) and force: - overridden_backend = cls._prefix_to_backends[prefix] - if isinstance(overridden_backend, list): - overridden_backend = tuple(overridden_backend) - for arg_key, instance in list(cls._instances.items()): - if isinstance(instance.client, overridden_backend): - cls._instances.pop(arg_key) - cls._prefix_to_backends[prefix] = backend - else: - raise KeyError( - f'{prefix} is already registered as a storage backend,' - ' add "force=True" if you want to override it') - - @classmethod - def register_backend(cls, name, backend=None, force=False, prefixes=None): - """Register a backend to FileClient. - - This method can be used as a normal class method or a decorator. - - .. code-block:: python - - class NewBackend(BaseStorageBackend): - - def get(self, filepath): - return filepath - - def get_text(self, filepath): - return filepath - - FileClient.register_backend('new', NewBackend) - - or - - .. code-block:: python - - @FileClient.register_backend('new') - class NewBackend(BaseStorageBackend): - - def get(self, filepath): - return filepath - - def get_text(self, filepath): - return filepath - - Args: - name (str): The name of the registered backend. - backend (class, optional): The backend class to be registered, - which must be a subclass of :class:`BaseStorageBackend`. - When this method is used as a decorator, backend is None. - Defaults to None. - force (bool, optional): Whether to override the backend if the name - has already been registered. Defaults to False. - prefixes (str or list[str] or tuple[str], optional): The prefixes - of the registered storage backend. Default: None. - `New in version 1.3.15.` - """ - if backend is not None: - cls._register_backend( - name, backend, force=force, prefixes=prefixes) - return - - def _register(backend_cls): - cls._register_backend( - name, backend_cls, force=force, prefixes=prefixes) - return backend_cls - - return _register - - def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]: - """Read data from a given ``filepath`` with 'rb' mode. - - Note: - There are two types of return values for ``get``, one is ``bytes`` - and the other is ``memoryview``. The advantage of using memoryview - is that you can avoid copying, and if you want to convert it to - ``bytes``, you can use ``.tobytes()``. - - Args: - filepath (str or Path): Path to read data. - - Returns: - bytes | memoryview: Expected bytes object or a memory view of the - bytes object. - """ - return self.client.get(filepath) - - def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str: - """Read data from a given ``filepath`` with 'r' mode. - - Args: - filepath (str or Path): Path to read data. - encoding (str): The encoding format used to open the ``filepath``. - Default: 'utf-8'. - - Returns: - str: Expected text reading from ``filepath``. - """ - return self.client.get_text(filepath, encoding) - - def put(self, obj: bytes, filepath: Union[str, Path]) -> None: - """Write data to a given ``filepath`` with 'wb' mode. - - Note: - ``put`` should create a directory if the directory of ``filepath`` - does not exist. - - Args: - obj (bytes): Data to be written. - filepath (str or Path): Path to write data. - """ - self.client.put(obj, filepath) - - def put_text(self, obj: str, filepath: Union[str, Path]) -> None: - """Write data to a given ``filepath`` with 'w' mode. - - Note: - ``put_text`` should create a directory if the directory of - ``filepath`` does not exist. - - Args: - obj (str): Data to be written. - filepath (str or Path): Path to write data. - encoding (str, optional): The encoding format used to open the - `filepath`. Default: 'utf-8'. - """ - self.client.put_text(obj, filepath) - - def remove(self, filepath: Union[str, Path]) -> None: - """Remove a file. - - Args: - filepath (str, Path): Path to be removed. - """ - self.client.remove(filepath) - - def exists(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path exists. - - Args: - filepath (str or Path): Path to be checked whether exists. - - Returns: - bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. - """ - return self.client.exists(filepath) - - def isdir(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a directory. - - Args: - filepath (str or Path): Path to be checked whether it is a - directory. - - Returns: - bool: Return ``True`` if ``filepath`` points to a directory, - ``False`` otherwise. - """ - return self.client.isdir(filepath) - - def isfile(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a file. - - Args: - filepath (str or Path): Path to be checked whether it is a file. - - Returns: - bool: Return ``True`` if ``filepath`` points to a file, ``False`` - otherwise. - """ - return self.client.isfile(filepath) - - def join_path(self, filepath: Union[str, Path], - *filepaths: Union[str, Path]) -> str: - """Concatenate all file paths. - - Join one or more filepath components intelligently. The return value - is the concatenation of filepath and any members of *filepaths. - - Args: - filepath (str or Path): Path to be concatenated. - - Returns: - str: The result of concatenation. - """ - return self.client.join_path(filepath, *filepaths) - - @contextmanager - def get_local_path( - self, - filepath: Union[str, - Path]) -> Generator[Union[str, Path], None, None]: - """Download data from ``filepath`` and write the data to local path. - - ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It - can be called with ``with`` statement, and when exists from the - ``with`` statement, the temporary path will be released. - - Note: - If the ``filepath`` is a local path, just return itself. - - .. warning:: - ``get_local_path`` is an experimental interface that may change in - the future. - - Args: - filepath (str or Path): Path to be read data. - - Examples: - >>> file_client = FileClient(prefix='s3') - >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path: - ... # do something here - - Yields: - Iterable[str]: Only yield one path. - """ - with self.client.get_local_path(str(filepath)) as local_path: - yield local_path - - def list_dir_or_file(self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False) -> Iterator[str]: - """Scan a directory to find the interested directories or files in - arbitrary order. - - Note: - :meth:`list_dir_or_file` returns the path relative to ``dir_path``. - - Args: - dir_path (str | Path): Path of the directory. - list_dir (bool): List the directories. Default: True. - list_file (bool): List the path of files. Default: True. - suffix (str or tuple[str], optional): File suffix - that we are interested in. Default: None. - recursive (bool): If set to True, recursively scan the - directory. Default: False. - - Yields: - Iterable[str]: A relative path to ``dir_path``. - """ - yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, - suffix, recursive) diff --git a/mmcv/fileio/handlers/__init__.py b/mmcv/fileio/handlers/__init__.py deleted file mode 100644 index aa24d91972..0000000000 --- a/mmcv/fileio/handlers/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .base import BaseFileHandler -from .json_handler import JsonHandler -from .pickle_handler import PickleHandler -from .yaml_handler import YamlHandler - -__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] diff --git a/mmcv/fileio/handlers/base.py b/mmcv/fileio/handlers/base.py deleted file mode 100644 index 0c9cc15b67..0000000000 --- a/mmcv/fileio/handlers/base.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from abc import ABCMeta, abstractmethod - - -class BaseFileHandler(metaclass=ABCMeta): - # `str_like` is a flag to indicate whether the type of file object is - # str-like object or bytes-like object. Pickle only processes bytes-like - # objects but json only processes str-like object. If it is str-like - # object, `StringIO` will be used to process the buffer. - str_like = True - - @abstractmethod - def load_from_fileobj(self, file, **kwargs): - pass - - @abstractmethod - def dump_to_fileobj(self, obj, file, **kwargs): - pass - - @abstractmethod - def dump_to_str(self, obj, **kwargs): - pass - - def load_from_path(self, filepath: str, mode: str = 'r', **kwargs): - with open(filepath, mode) as f: - return self.load_from_fileobj(f, **kwargs) - - def dump_to_path(self, obj, filepath: str, mode: str = 'w', **kwargs): - with open(filepath, mode) as f: - self.dump_to_fileobj(obj, f, **kwargs) diff --git a/mmcv/fileio/handlers/json_handler.py b/mmcv/fileio/handlers/json_handler.py deleted file mode 100644 index 18d4f15f74..0000000000 --- a/mmcv/fileio/handlers/json_handler.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import json - -import numpy as np - -from .base import BaseFileHandler - - -def set_default(obj): - """Set default json values for non-serializable values. - - It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. - It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, - etc.) into plain numbers of plain python built-in types. - """ - if isinstance(obj, (set, range)): - return list(obj) - elif isinstance(obj, np.ndarray): - return obj.tolist() - elif isinstance(obj, np.generic): - return obj.item() - raise TypeError(f'{type(obj)} is unsupported for json dump') - - -class JsonHandler(BaseFileHandler): - - def load_from_fileobj(self, file): - return json.load(file) - - def dump_to_fileobj(self, obj, file, **kwargs): - kwargs.setdefault('default', set_default) - json.dump(obj, file, **kwargs) - - def dump_to_str(self, obj, **kwargs): - kwargs.setdefault('default', set_default) - return json.dumps(obj, **kwargs) diff --git a/mmcv/fileio/handlers/pickle_handler.py b/mmcv/fileio/handlers/pickle_handler.py deleted file mode 100644 index 073856fd25..0000000000 --- a/mmcv/fileio/handlers/pickle_handler.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import pickle - -from .base import BaseFileHandler - - -class PickleHandler(BaseFileHandler): - - str_like = False - - def load_from_fileobj(self, file, **kwargs): - return pickle.load(file, **kwargs) - - def load_from_path(self, filepath, **kwargs): - return super().load_from_path(filepath, mode='rb', **kwargs) - - def dump_to_str(self, obj, **kwargs): - kwargs.setdefault('protocol', 2) - return pickle.dumps(obj, **kwargs) - - def dump_to_fileobj(self, obj, file, **kwargs): - kwargs.setdefault('protocol', 2) - pickle.dump(obj, file, **kwargs) - - def dump_to_path(self, obj, filepath, **kwargs): - super().dump_to_path(obj, filepath, mode='wb', **kwargs) diff --git a/mmcv/fileio/handlers/yaml_handler.py b/mmcv/fileio/handlers/yaml_handler.py deleted file mode 100644 index 1c1b077943..0000000000 --- a/mmcv/fileio/handlers/yaml_handler.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import yaml - -try: - from yaml import CDumper as Dumper - from yaml import CLoader as Loader -except ImportError: - from yaml import Loader, Dumper # type: ignore - -from .base import BaseFileHandler # isort:skip - - -class YamlHandler(BaseFileHandler): - - def load_from_fileobj(self, file, **kwargs): - kwargs.setdefault('Loader', Loader) - return yaml.load(file, **kwargs) - - def dump_to_fileobj(self, obj, file, **kwargs): - kwargs.setdefault('Dumper', Dumper) - yaml.dump(obj, file, **kwargs) - - def dump_to_str(self, obj, **kwargs): - kwargs.setdefault('Dumper', Dumper) - return yaml.dump(obj, **kwargs) diff --git a/mmcv/fileio/io.py b/mmcv/fileio/io.py deleted file mode 100644 index 91192103cf..0000000000 --- a/mmcv/fileio/io.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from io import BytesIO, StringIO -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, TextIO, Union - -from ..utils import is_list_of -from .file_client import FileClient -from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler - -FileLikeObject = Union[TextIO, StringIO, BytesIO] - -file_handlers = { - 'json': JsonHandler(), - 'yaml': YamlHandler(), - 'yml': YamlHandler(), - 'pickle': PickleHandler(), - 'pkl': PickleHandler() -} - - -def load(file: Union[str, Path, FileLikeObject], - file_format: Optional[str] = None, - file_client_args: Optional[Dict] = None, - **kwargs): - """Load data from json/yaml/pickle files. - - This method provides a unified api for loading data from serialized files. - - Note: - In v1.3.16 and later, ``load`` supports loading data from serialized - files those can be storaged in different backends. - - Args: - file (str or :obj:`Path` or file-like object): Filename or a file-like - object. - file_format (str, optional): If not specified, the file format will be - inferred from the file extension, otherwise use the specified one. - Currently supported formats include "json", "yaml/yml" and - "pickle/pkl". - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - - Examples: - >>> load('/path/of/your/file') # file is storaged in disk - >>> load('https://path/of/your/file') # file is storaged in Internet - >>> load('s3://path/of/your/file') # file is storaged in petrel - - Returns: - The content from the file. - """ - if isinstance(file, Path): - file = str(file) - if file_format is None and isinstance(file, str): - file_format = file.split('.')[-1] - if file_format not in file_handlers: - raise TypeError(f'Unsupported format: {file_format}') - - handler = file_handlers[file_format] - f: FileLikeObject - if isinstance(file, str): - file_client = FileClient.infer_client(file_client_args, file) - if handler.str_like: - with StringIO(file_client.get_text(file)) as f: - obj = handler.load_from_fileobj(f, **kwargs) - else: - with BytesIO(file_client.get(file)) as f: - obj = handler.load_from_fileobj(f, **kwargs) - elif hasattr(file, 'read'): - obj = handler.load_from_fileobj(file, **kwargs) - else: - raise TypeError('"file" must be a filepath str or a file-object') - return obj - - -def dump(obj: Any, - file: Optional[Union[str, Path, FileLikeObject]] = None, - file_format: Optional[str] = None, - file_client_args: Optional[Dict] = None, - **kwargs): - """Dump data to json/yaml/pickle strings or files. - - This method provides a unified api for dumping data as strings or to files, - and also supports custom arguments for each file format. - - Note: - In v1.3.16 and later, ``dump`` supports dumping data as strings or to - files which is saved to different backends. - - Args: - obj (any): The python object to be dumped. - file (str or :obj:`Path` or file-like object, optional): If not - specified, then the object is dumped to a str, otherwise to a file - specified by the filename or file-like object. - file_format (str, optional): Same as :func:`load`. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - - Examples: - >>> dump('hello world', '/path/of/your/file') # disk - >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel - - Returns: - bool: True for success, False otherwise. - """ - if isinstance(file, Path): - file = str(file) - if file_format is None: - if isinstance(file, str): - file_format = file.split('.')[-1] - elif file is None: - raise ValueError( - 'file_format must be specified since file is None') - if file_format not in file_handlers: - raise TypeError(f'Unsupported format: {file_format}') - f: FileLikeObject - handler = file_handlers[file_format] - if file is None: - return handler.dump_to_str(obj, **kwargs) - elif isinstance(file, str): - file_client = FileClient.infer_client(file_client_args, file) - if handler.str_like: - with StringIO() as f: - handler.dump_to_fileobj(obj, f, **kwargs) - file_client.put_text(f.getvalue(), file) - else: - with BytesIO() as f: - handler.dump_to_fileobj(obj, f, **kwargs) - file_client.put(f.getvalue(), file) - elif hasattr(file, 'write'): - handler.dump_to_fileobj(obj, file, **kwargs) - else: - raise TypeError('"file" must be a filename str or a file-object') - - -def _register_handler(handler: BaseFileHandler, - file_formats: Union[str, List[str]]) -> None: - """Register a handler for some file extensions. - - Args: - handler (:obj:`BaseFileHandler`): Handler to be registered. - file_formats (str or list[str]): File formats to be handled by this - handler. - """ - if not isinstance(handler, BaseFileHandler): - raise TypeError( - f'handler must be a child of BaseFileHandler, not {type(handler)}') - if isinstance(file_formats, str): - file_formats = [file_formats] - if not is_list_of(file_formats, str): - raise TypeError('file_formats must be a str or a list of str') - for ext in file_formats: - file_handlers[ext] = handler - - -def register_handler(file_formats: Union[str, list], **kwargs) -> Callable: - - def wrap(cls): - _register_handler(cls(**kwargs), file_formats) - return cls - - return wrap diff --git a/mmcv/fileio/parse.py b/mmcv/fileio/parse.py deleted file mode 100644 index f28e591193..0000000000 --- a/mmcv/fileio/parse.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. - -from io import StringIO -from pathlib import Path -from typing import Dict, List, Optional, Union - -from .file_client import FileClient - - -def list_from_file(filename: Union[str, Path], - prefix: str = '', - offset: int = 0, - max_num: int = 0, - encoding: str = 'utf-8', - file_client_args: Optional[Dict] = None) -> List: - """Load a text file and parse the content as a list of strings. - - Note: - In v1.3.16 and later, ``list_from_file`` supports loading a text file - which can be storaged in different backends and parsing the content as - a list for strings. - - Args: - filename (str): Filename. - prefix (str): The prefix to be inserted to the beginning of each item. - offset (int): The offset of lines. - max_num (int): The maximum number of lines to be read, - zeros and negatives mean no limitation. - encoding (str): Encoding used to open the file. Default utf-8. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - - Examples: - >>> list_from_file('/path/of/your/file') # disk - ['hello', 'world'] - >>> list_from_file('s3://path/of/your/file') # ceph or petrel - ['hello', 'world'] - - Returns: - list[str]: A list of strings. - """ - cnt = 0 - item_list = [] - file_client = FileClient.infer_client(file_client_args, filename) - with StringIO(file_client.get_text(filename, encoding)) as f: - for _ in range(offset): - f.readline() - for line in f: - if 0 < max_num <= cnt: - break - item_list.append(prefix + line.rstrip('\n\r')) - cnt += 1 - return item_list - - -def dict_from_file(filename: Union[str, Path], - key_type: type = str, - encoding: str = 'utf-8', - file_client_args: Optional[Dict] = None) -> Dict: - """Load a text file and parse the content as a dict. - - Each line of the text file will be two or more columns split by - whitespaces or tabs. The first column will be parsed as dict keys, and - the following columns will be parsed as dict values. - - Note: - In v1.3.16 and later, ``dict_from_file`` supports loading a text file - which can be storaged in different backends and parsing the content as - a dict. - - Args: - filename(str): Filename. - key_type(type): Type of the dict keys. str is user by default and - type conversion will be performed if specified. - encoding (str): Encoding used to open the file. Default utf-8. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - - Examples: - >>> dict_from_file('/path/of/your/file') # disk - {'key1': 'value1', 'key2': 'value2'} - >>> dict_from_file('s3://path/of/your/file') # ceph or petrel - {'key1': 'value1', 'key2': 'value2'} - - Returns: - dict: The parsed contents. - """ - mapping = {} - file_client = FileClient.infer_client(file_client_args, filename) - with StringIO(file_client.get_text(filename, encoding)) as f: - for line in f: - items = line.rstrip('\n').split() - assert len(items) >= 2 - key = key_type(items[0]) - val = items[1:] if len(items) > 2 else items[1] - mapping[key] = val - return mapping diff --git a/mmcv/image/io.py b/mmcv/image/io.py index ae81b561a8..b8f3a277c6 100644 --- a/mmcv/image/io.py +++ b/mmcv/image/io.py @@ -8,8 +8,8 @@ import numpy as np from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, IMREAD_UNCHANGED) +from mmengine.fileio import FileClient -from mmcv.fileio import FileClient from mmcv.utils import is_filepath, is_str try: @@ -167,7 +167,7 @@ def imread(img_or_path, If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. file_client_args (dict | None): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. + FileClient. See :class:`mmengine.fileio.FileClient` for details. Default: None. Returns: @@ -283,7 +283,7 @@ def imwrite(img, auto_mkdir (bool): If the parent folder of `file_path` does not exist, whether to create it automatically. It will be deprecated. file_client_args (dict | None): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. + FileClient. See :class:`mmengine.fileio.FileClient` for details. Default: None. Returns: diff --git a/mmcv/runner/checkpoint.py b/mmcv/runner/checkpoint.py index 0811856642..1e1d44dad8 100644 --- a/mmcv/runner/checkpoint.py +++ b/mmcv/runner/checkpoint.py @@ -12,14 +12,15 @@ from tempfile import TemporaryDirectory from typing import Callable, Dict, List, Optional, Tuple, Union +import mmengine import torch import torch.nn as nn import torchvision +from mmengine.fileio import FileClient +from mmengine.fileio import load as load_file from torch.optim import Optimizer import mmcv -from ..fileio import FileClient -from ..fileio import load as load_file from ..parallel import is_module_wrapper from ..utils import digit_version, load_url, mkdir_or_exist from .dist_utils import get_dist_info @@ -136,7 +137,7 @@ def get_torchvision_models(): # 'resnet50' or 'ResNet50_Weights.IMAGENET1K_V1' in the config. json_path = osp.join(mmcv.__path__[0], 'model_zoo/torchvision_0.12.json') - model_urls = mmcv.load(json_path) + model_urls = mmengine.load(json_path) for cls_name, cls in torchvision.models.__dict__.items(): # The name of torchvision model weights classes ends with # `_Weights` such as `ResNet18_Weights`. However, some model weight @@ -409,8 +410,8 @@ def load_from_ceph(filename: str, 'petrel'. Default: 'petrel'. .. warning:: - :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, - please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + :class:`mmengine.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmengine.fileio.file_client.PetrelBackend` instead. Returns: dict or OrderedDict: The loaded checkpoint. @@ -751,7 +752,7 @@ def save_checkpoint(model: torch.nn.Module, optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. meta (dict, optional): Metadata to be saved in checkpoint. file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. + FileClient. See :class:`mmengine.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` """ diff --git a/mmcv/runner/hooks/checkpoint.py b/mmcv/runner/hooks/checkpoint.py index 5cc4f356d0..8a74c7229f 100644 --- a/mmcv/runner/hooks/checkpoint.py +++ b/mmcv/runner/hooks/checkpoint.py @@ -3,7 +3,8 @@ import warnings from typing import Optional -from mmcv.fileio import FileClient +from mmengine.fileio import FileClient + from ..dist_utils import allreduce_params, master_only from .hook import HOOKS, Hook @@ -35,7 +36,7 @@ class CheckpointHook(Hook): sync_buffer (bool, optional): Whether to synchronize buffers in different gpus. Default: False. file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. + FileClient. See :class:`mmengine.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` diff --git a/mmcv/runner/hooks/evaluation.py b/mmcv/runner/hooks/evaluation.py index 181e03409f..3437cd40d7 100644 --- a/mmcv/runner/hooks/evaluation.py +++ b/mmcv/runner/hooks/evaluation.py @@ -5,10 +5,10 @@ from typing import Callable, List, Optional import torch.distributed as dist +from mmengine.fileio import FileClient from torch.nn.modules.batchnorm import _BatchNorm from torch.utils.data import DataLoader -from mmcv.fileio import FileClient from mmcv.utils import is_seq_of from .hook import Hook from .logger import LoggerHook @@ -61,7 +61,7 @@ class EvalHook(Hook): level directory of `runner.work_dir`. `New in version 1.3.16.` file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmcv.fileio.FileClient` for details. Default: None. + See :class:`mmengine.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` **eval_kwargs: Evaluation arguments fed into the evaluate function of the dataset. @@ -437,7 +437,7 @@ class DistEvalHook(EvalHook): the `out_dir` will be the concatenation of `out_dir` and the last level directory of `runner.work_dir`. file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmcv.fileio.FileClient` for details. Default: None. + See :class:`mmengine.fileio.FileClient` for details. Default: None. **eval_kwargs: Evaluation arguments fed into the evaluate function of the dataset. """ diff --git a/mmcv/runner/hooks/logger/pavi.py b/mmcv/runner/hooks/logger/pavi.py index 2d2e12cb8d..3263b3cfa2 100644 --- a/mmcv/runner/hooks/logger/pavi.py +++ b/mmcv/runner/hooks/logger/pavi.py @@ -4,6 +4,7 @@ import os.path as osp from typing import Dict, Optional +import mmengine import torch import yaml @@ -96,9 +97,9 @@ def before_run(self, runner) -> None: config_dict = config_dict.copy() config_dict.setdefault('max_iter', runner.max_iters) # non-serializable values are first converted in - # mmcv.dump to json + # mmengine.dump to json config_dict = json.loads( - mmcv.dump(config_dict, file_format='json')) + mmengine.dump(config_dict, file_format='json')) session_text = yaml.dump(config_dict) self.init_kwargs.setdefault('session_text', session_text) self.writer = SummaryWriter(**self.init_kwargs) diff --git a/mmcv/runner/hooks/logger/text.py b/mmcv/runner/hooks/logger/text.py index fbfa208a62..33e32ffeab 100644 --- a/mmcv/runner/hooks/logger/text.py +++ b/mmcv/runner/hooks/logger/text.py @@ -5,11 +5,11 @@ from collections import OrderedDict from typing import Dict, Optional, Union +import mmengine import torch import torch.distributed as dist +from mmengine.fileio.file_client import FileClient -import mmcv -from mmcv.fileio.file_client import FileClient from mmcv.utils import is_tuple_of, scandir from ..hook import HOOKS from .base import LoggerHook @@ -48,7 +48,7 @@ class TextLoggerHook(LoggerHook): removed. Default: True. `New in version 1.3.16.` file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. + FileClient. See :class:`mmengine.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` """ @@ -190,7 +190,7 @@ def _dump_log(self, log_dict: Dict, runner) -> None: # only append log at last line if runner.rank == 0: with open(self.json_log_path, 'a+') as f: - mmcv.dump(json_log, f, file_format='json') + mmengine.dump(json_log, f, file_format='json') f.write('\n') def _round_float(self, items): diff --git a/mmcv/transforms/loading.py b/mmcv/transforms/loading.py index f9ace0b1e9..42cd567f02 100644 --- a/mmcv/transforms/loading.py +++ b/mmcv/transforms/loading.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from typing import Optional +import mmengine import numpy as np import mmcv @@ -33,7 +34,7 @@ class LoadImageFromFile(BaseTransform): See :func:``mmcv.imfrombytes`` for details. Defaults to 'cv2'. file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmcv.fileio.FileClient` for details. + See :class:`mmengine.fileio.FileClient` for details. Defaults to ``dict(backend='disk')``. ignore_empty (bool): Whether to allow loading empty image or file path not existent. Defaults to False. @@ -50,7 +51,7 @@ def __init__(self, self.color_type = color_type self.imdecode_backend = imdecode_backend self.file_client_args = file_client_args.copy() - self.file_client = mmcv.FileClient(**self.file_client_args) + self.file_client = mmengine.FileClient(**self.file_client_args) def transform(self, results: dict) -> Optional[dict]: """Functions to load image. @@ -168,7 +169,7 @@ class LoadAnnotations(BaseTransform): See :fun:``mmcv.imfrombytes`` for details. Defaults to 'cv2'. file_client_args (dict): Arguments to instantiate a FileClient. - See :class:``mmcv.fileio.FileClient`` for details. + See :class:``mmengine.fileio.FileClient`` for details. Defaults to ``dict(backend='disk')``. """ @@ -188,7 +189,7 @@ def __init__( self.with_keypoints = with_keypoints self.imdecode_backend = imdecode_backend self.file_client_args = file_client_args.copy() - self.file_client = mmcv.FileClient(**self.file_client_args) + self.file_client = mmengine.FileClient(**self.file_client_args) def _load_bboxes(self, results: dict) -> None: """Private function to load bounding box annotations. diff --git a/mmcv/utils/config.py b/mmcv/utils/config.py index a76bc48724..f5e9f1d979 100644 --- a/mmcv/utils/config.py +++ b/mmcv/utils/config.py @@ -15,6 +15,7 @@ from importlib import import_module from pathlib import Path +import mmengine from addict import Dict from yapf.yapflib.yapf_api import FormatCode @@ -217,8 +218,7 @@ def _file2dict(filename, use_predefined_variables=True): # delete imported module del sys.modules[temp_module_name] elif filename.endswith(('.yml', '.yaml', '.json')): - import mmcv - cfg_dict = mmcv.load(temp_config_file.name) + cfg_dict = mmengine.load(temp_config_file.name) # close temp file temp_config_file.close() @@ -583,20 +583,19 @@ def dump(self, file=None): file (str, optional): Path of the output file where the config will be dumped. Defaults to None. """ - import mmcv cfg_dict = super().__getattribute__('_cfg_dict').to_dict() if file is None: if self.filename is None or self.filename.endswith('.py'): return self.pretty_text else: file_format = self.filename.split('.')[-1] - return mmcv.dump(cfg_dict, file_format=file_format) + return mmengine.dump(cfg_dict, file_format=file_format) elif file.endswith('.py'): with open(file, 'w', encoding='utf-8') as f: f.write(self.pretty_text) else: file_format = file.split('.')[-1] - return mmcv.dump(cfg_dict, file=file, file_format=file_format) + return mmengine.dump(cfg_dict, file=file, file_format=file_format) def merge_from_dict(self, options, allow_list_keys=True): """Merge list into cfg_dict. diff --git a/tests/test_fileclient.py b/tests/test_fileclient.py deleted file mode 100644 index 292779f36a..0000000000 --- a/tests/test_fileclient.py +++ /dev/null @@ -1,862 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -import os.path as osp -import sys -import tempfile -from contextlib import contextmanager -from copy import deepcopy -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -import mmcv -from mmcv import BaseStorageBackend, FileClient -from mmcv.utils import has_method - -sys.modules['ceph'] = MagicMock() -sys.modules['petrel_client'] = MagicMock() -sys.modules['petrel_client.client'] = MagicMock() -sys.modules['mc'] = MagicMock() - - -@contextmanager -def build_temporary_directory(): - """Build a temporary directory containing many files to test - ``FileClient.list_dir_or_file``. - - . \n - | -- dir1 \n - | -- | -- text3.txt \n - | -- dir2 \n - | -- | -- dir3 \n - | -- | -- | -- text4.txt \n - | -- | -- img.jpg \n - | -- text1.txt \n - | -- text2.txt \n - """ - with tempfile.TemporaryDirectory() as tmp_dir: - text1 = Path(tmp_dir) / 'text1.txt' - text1.open('w').write('text1') - text2 = Path(tmp_dir) / 'text2.txt' - text2.open('w').write('text2') - dir1 = Path(tmp_dir) / 'dir1' - dir1.mkdir() - text3 = dir1 / 'text3.txt' - text3.open('w').write('text3') - dir2 = Path(tmp_dir) / 'dir2' - dir2.mkdir() - jpg1 = dir2 / 'img.jpg' - jpg1.open('wb').write(b'img') - dir3 = dir2 / 'dir3' - dir3.mkdir() - text4 = dir3 / 'text4.txt' - text4.open('w').write('text4') - yield tmp_dir - - -@contextmanager -def delete_and_reset_method(obj, method): - method_obj = deepcopy(getattr(type(obj), method)) - try: - delattr(type(obj), method) - yield - finally: - setattr(type(obj), method, method_obj) - - -class MockS3Client: - - def __init__(self, enable_mc=True): - self.enable_mc = enable_mc - - def Get(self, filepath): - with open(filepath, 'rb') as f: - content = f.read() - return content - - -class MockPetrelClient: - - def __init__(self, enable_mc=True, enable_multi_cluster=False): - self.enable_mc = enable_mc - self.enable_multi_cluster = enable_multi_cluster - - def Get(self, filepath): - with open(filepath, 'rb') as f: - content = f.read() - return content - - def put(self): - pass - - def delete(self): - pass - - def contains(self): - pass - - def isdir(self): - pass - - def list(self, dir_path): - for entry in os.scandir(dir_path): - if not entry.name.startswith('.') and entry.is_file(): - yield entry.name - elif osp.isdir(entry.path): - yield entry.name + '/' - - -class MockMemcachedClient: - - def __init__(self, server_list_cfg, client_cfg): - pass - - def Get(self, filepath, buffer): - with open(filepath, 'rb') as f: - buffer.content = f.read() - - -class TestFileClient: - - @classmethod - def setup_class(cls): - cls.test_data_dir = Path(__file__).parent / 'data' - cls.img_path = cls.test_data_dir / 'color.jpg' - cls.img_shape = (300, 400, 3) - cls.text_path = cls.test_data_dir / 'filelist.txt' - - def test_error(self): - with pytest.raises(ValueError): - FileClient('hadoop') - - def test_disk_backend(self): - disk_backend = FileClient('disk') - - # test `name` attribute - assert disk_backend.name == 'HardDiskBackend' - # test `allow_symlink` attribute - assert disk_backend.allow_symlink - # test `get` - # input path is Path object - img_bytes = disk_backend.get(self.img_path) - img = mmcv.imfrombytes(img_bytes) - assert self.img_path.open('rb').read() == img_bytes - assert img.shape == self.img_shape - # input path is str - img_bytes = disk_backend.get(str(self.img_path)) - img = mmcv.imfrombytes(img_bytes) - assert self.img_path.open('rb').read() == img_bytes - assert img.shape == self.img_shape - - # test `get_text` - # input path is Path object - value_buf = disk_backend.get_text(self.text_path) - assert self.text_path.open('r').read() == value_buf - # input path is str - value_buf = disk_backend.get_text(str(self.text_path)) - assert self.text_path.open('r').read() == value_buf - - with tempfile.TemporaryDirectory() as tmp_dir: - # test `put` - filepath1 = Path(tmp_dir) / 'test.jpg' - disk_backend.put(b'disk', filepath1) - assert filepath1.open('rb').read() == b'disk' - # test the `mkdir_or_exist` behavior in `put` - _filepath1 = Path(tmp_dir) / 'not_existed_dir1' / 'test.jpg' - disk_backend.put(b'disk', _filepath1) - assert _filepath1.open('rb').read() == b'disk' - - # test `put_text` - filepath2 = Path(tmp_dir) / 'test.txt' - disk_backend.put_text('disk', filepath2) - assert filepath2.open('r').read() == 'disk' - # test the `mkdir_or_exist` behavior in `put_text` - _filepath2 = Path(tmp_dir) / 'not_existed_dir2' / 'test.txt' - disk_backend.put_text('disk', _filepath2) - assert _filepath2.open('r').read() == 'disk' - - # test `isfile` - assert disk_backend.isfile(filepath2) - assert not disk_backend.isfile(Path(tmp_dir) / 'not/existed/path') - - # test `remove` - disk_backend.remove(filepath2) - - # test `exists` - assert not disk_backend.exists(filepath2) - - # test `get_local_path` - # if the backend is disk, `get_local_path` just return the input - with disk_backend.get_local_path(filepath1) as path: - assert str(filepath1) == path - assert osp.isfile(filepath1) - - # test `join_path` - disk_dir = '/path/of/your/directory' - assert disk_backend.join_path(disk_dir, 'file') == \ - osp.join(disk_dir, 'file') - assert disk_backend.join_path(disk_dir, 'dir', 'file') == \ - osp.join(disk_dir, 'dir', 'file') - - # test `list_dir_or_file` - with build_temporary_directory() as tmp_dir: - # 1. list directories and files - assert set(disk_backend.list_dir_or_file(tmp_dir)) == { - 'dir1', 'dir2', 'text1.txt', 'text2.txt' - } - # 2. list directories and files recursively - assert set(disk_backend.list_dir_or_file( - tmp_dir, recursive=True)) == { - 'dir1', - osp.join('dir1', 'text3.txt'), 'dir2', - osp.join('dir2', 'dir3'), - osp.join('dir2', 'dir3', 'text4.txt'), - osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' - } - # 3. only list directories - assert set( - disk_backend.list_dir_or_file( - tmp_dir, list_file=False)) == {'dir1', 'dir2'} - with pytest.raises( - TypeError, - match='`suffix` should be None when `list_dir` is True'): - # Exception is raised among the `list_dir_or_file` of client, - # so we need to invode the client to trigger the exception - disk_backend.client.list_dir_or_file( - tmp_dir, list_file=False, suffix='.txt') - # 4. only list directories recursively - assert set( - disk_backend.list_dir_or_file( - tmp_dir, list_file=False, recursive=True)) == { - 'dir1', 'dir2', - osp.join('dir2', 'dir3') - } - # 5. only list files - assert set(disk_backend.list_dir_or_file( - tmp_dir, list_dir=False)) == {'text1.txt', 'text2.txt'} - # 6. only list files recursively - assert set( - disk_backend.list_dir_or_file( - tmp_dir, list_dir=False, recursive=True)) == { - osp.join('dir1', 'text3.txt'), - osp.join('dir2', 'dir3', 'text4.txt'), - osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' - } - # 7. only list files ending with suffix - assert set( - disk_backend.list_dir_or_file( - tmp_dir, list_dir=False, - suffix='.txt')) == {'text1.txt', 'text2.txt'} - assert set( - disk_backend.list_dir_or_file( - tmp_dir, list_dir=False, - suffix=('.txt', '.jpg'))) == {'text1.txt', 'text2.txt'} - with pytest.raises( - TypeError, - match='`suffix` must be a string or tuple of strings'): - disk_backend.client.list_dir_or_file( - tmp_dir, list_dir=False, suffix=['.txt', '.jpg']) - # 8. only list files ending with suffix recursively - assert set( - disk_backend.list_dir_or_file( - tmp_dir, list_dir=False, suffix='.txt', - recursive=True)) == { - osp.join('dir1', 'text3.txt'), - osp.join('dir2', 'dir3', 'text4.txt'), 'text1.txt', - 'text2.txt' - } - # 7. only list files ending with suffix - assert set( - disk_backend.list_dir_or_file( - tmp_dir, - list_dir=False, - suffix=('.txt', '.jpg'), - recursive=True)) == { - osp.join('dir1', 'text3.txt'), - osp.join('dir2', 'dir3', 'text4.txt'), - osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' - } - - @patch('ceph.S3Client', MockS3Client) - def test_ceph_backend(self): - ceph_backend = FileClient('ceph') - - # test `allow_symlink` attribute - assert not ceph_backend.allow_symlink - - # input path is Path object - with pytest.raises(NotImplementedError): - ceph_backend.get_text(self.text_path) - # input path is str - with pytest.raises(NotImplementedError): - ceph_backend.get_text(str(self.text_path)) - - # input path is Path object - img_bytes = ceph_backend.get(self.img_path) - img = mmcv.imfrombytes(img_bytes) - assert img.shape == self.img_shape - # input path is str - img_bytes = ceph_backend.get(str(self.img_path)) - img = mmcv.imfrombytes(img_bytes) - assert img.shape == self.img_shape - - # `path_mapping` is either None or dict - with pytest.raises(AssertionError): - FileClient('ceph', path_mapping=1) - # test `path_mapping` - ceph_path = 's3://user/data' - ceph_backend = FileClient( - 'ceph', path_mapping={str(self.test_data_dir): ceph_path}) - ceph_backend.client._client.Get = MagicMock( - return_value=ceph_backend.client._client.Get(self.img_path)) - img_bytes = ceph_backend.get(self.img_path) - img = mmcv.imfrombytes(img_bytes) - assert img.shape == self.img_shape - ceph_backend.client._client.Get.assert_called_with( - str(self.img_path).replace(str(self.test_data_dir), ceph_path)) - - @patch('petrel_client.client.Client', MockPetrelClient) - @pytest.mark.parametrize('backend,prefix', [('petrel', None), - (None, 's3')]) - def test_petrel_backend(self, backend, prefix): - petrel_backend = FileClient(backend=backend, prefix=prefix) - - # test `allow_symlink` attribute - assert not petrel_backend.allow_symlink - - # input path is Path object - img_bytes = petrel_backend.get(self.img_path) - img = mmcv.imfrombytes(img_bytes) - assert img.shape == self.img_shape - # input path is str - img_bytes = petrel_backend.get(str(self.img_path)) - img = mmcv.imfrombytes(img_bytes) - assert img.shape == self.img_shape - - # `path_mapping` is either None or dict - with pytest.raises(AssertionError): - FileClient('petrel', path_mapping=1) - - # test `_map_path` - petrel_dir = 's3://user/data' - petrel_backend = FileClient( - 'petrel', path_mapping={str(self.test_data_dir): petrel_dir}) - assert petrel_backend.client._map_path(str(self.img_path)) == \ - str(self.img_path).replace(str(self.test_data_dir), petrel_dir) - - petrel_path = f'{petrel_dir}/test.jpg' - petrel_backend = FileClient('petrel') - - # test `_format_path` - assert petrel_backend.client._format_path('s3://user\\data\\test.jpg')\ - == petrel_path - - # test `get` - with patch.object( - petrel_backend.client._client, 'Get', - return_value=b'petrel') as mock_get: - assert petrel_backend.get(petrel_path) == b'petrel' - mock_get.assert_called_once_with(petrel_path) - - # test `get_text` - with patch.object( - petrel_backend.client._client, 'Get', - return_value=b'petrel') as mock_get: - assert petrel_backend.get_text(petrel_path) == 'petrel' - mock_get.assert_called_once_with(petrel_path) - - # test `put` - with patch.object(petrel_backend.client._client, 'put') as mock_put: - petrel_backend.put(b'petrel', petrel_path) - mock_put.assert_called_once_with(petrel_path, b'petrel') - - # test `put_text` - with patch.object(petrel_backend.client._client, 'put') as mock_put: - petrel_backend.put_text('petrel', petrel_path) - mock_put.assert_called_once_with(petrel_path, b'petrel') - - # test `remove` - assert has_method(petrel_backend.client._client, 'delete') - # raise Exception if `delete` is not implemented - with delete_and_reset_method(petrel_backend.client._client, 'delete'): - assert not has_method(petrel_backend.client._client, 'delete') - with pytest.raises(NotImplementedError): - petrel_backend.remove(petrel_path) - - with patch.object(petrel_backend.client._client, - 'delete') as mock_delete: - petrel_backend.remove(petrel_path) - mock_delete.assert_called_once_with(petrel_path) - - # test `exists` - assert has_method(petrel_backend.client._client, 'contains') - assert has_method(petrel_backend.client._client, 'isdir') - # raise Exception if `delete` is not implemented - with delete_and_reset_method(petrel_backend.client._client, - 'contains'), delete_and_reset_method( - petrel_backend.client._client, - 'isdir'): - assert not has_method(petrel_backend.client._client, 'contains') - assert not has_method(petrel_backend.client._client, 'isdir') - with pytest.raises(NotImplementedError): - petrel_backend.exists(petrel_path) - - with patch.object( - petrel_backend.client._client, 'contains', - return_value=True) as mock_contains: - assert petrel_backend.exists(petrel_path) - mock_contains.assert_called_once_with(petrel_path) - - # test `isdir` - assert has_method(petrel_backend.client._client, 'isdir') - with delete_and_reset_method(petrel_backend.client._client, 'isdir'): - assert not has_method(petrel_backend.client._client, 'isdir') - with pytest.raises(NotImplementedError): - petrel_backend.isdir(petrel_path) - - with patch.object( - petrel_backend.client._client, 'isdir', - return_value=True) as mock_isdir: - assert petrel_backend.isdir(petrel_dir) - mock_isdir.assert_called_once_with(petrel_dir) - - # test `isfile` - assert has_method(petrel_backend.client._client, 'contains') - with delete_and_reset_method(petrel_backend.client._client, - 'contains'): - assert not has_method(petrel_backend.client._client, 'contains') - with pytest.raises(NotImplementedError): - petrel_backend.isfile(petrel_path) - - with patch.object( - petrel_backend.client._client, 'contains', - return_value=True) as mock_contains: - assert petrel_backend.isfile(petrel_path) - mock_contains.assert_called_once_with(petrel_path) - - # test `join_path` - assert petrel_backend.join_path(petrel_dir, 'file') == \ - f'{petrel_dir}/file' - assert petrel_backend.join_path(f'{petrel_dir}/', 'file') == \ - f'{petrel_dir}/file' - assert petrel_backend.join_path(petrel_dir, 'dir', 'file') == \ - f'{petrel_dir}/dir/file' - - # test `get_local_path` - with patch.object(petrel_backend.client._client, 'Get', - return_value=b'petrel') as mock_get, \ - patch.object(petrel_backend.client._client, 'contains', - return_value=True) as mock_contains: - with petrel_backend.get_local_path(petrel_path) as path: - assert Path(path).open('rb').read() == b'petrel' - # exist the with block and path will be released - assert not osp.isfile(path) - mock_get.assert_called_once_with(petrel_path) - mock_contains.assert_called_once_with(petrel_path) - - # test `list_dir_or_file` - assert has_method(petrel_backend.client._client, 'list') - with delete_and_reset_method(petrel_backend.client._client, 'list'): - assert not has_method(petrel_backend.client._client, 'list') - with pytest.raises(NotImplementedError): - list(petrel_backend.list_dir_or_file(petrel_dir)) - - with build_temporary_directory() as tmp_dir: - # 1. list directories and files - assert set(petrel_backend.list_dir_or_file(tmp_dir)) == { - 'dir1', 'dir2', 'text1.txt', 'text2.txt' - } - # 2. list directories and files recursively - assert set( - petrel_backend.list_dir_or_file(tmp_dir, recursive=True)) == { - 'dir1', '/'.join(('dir1', 'text3.txt')), 'dir2', '/'.join( - ('dir2', 'dir3')), '/'.join( - ('dir2', 'dir3', 'text4.txt')), '/'.join( - ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' - } - # 3. only list directories - assert set( - petrel_backend.list_dir_or_file( - tmp_dir, list_file=False)) == {'dir1', 'dir2'} - with pytest.raises( - TypeError, - match=('`list_dir` should be False when `suffix` is not ' - 'None')): - # Exception is raised among the `list_dir_or_file` of client, - # so we need to invode the client to trigger the exception - petrel_backend.client.list_dir_or_file( - tmp_dir, list_file=False, suffix='.txt') - # 4. only list directories recursively - assert set( - petrel_backend.list_dir_or_file( - tmp_dir, list_file=False, recursive=True)) == { - 'dir1', 'dir2', '/'.join(('dir2', 'dir3')) - } - # 5. only list files - assert set( - petrel_backend.list_dir_or_file( - tmp_dir, list_dir=False)) == {'text1.txt', 'text2.txt'} - # 6. only list files recursively - assert set( - petrel_backend.list_dir_or_file( - tmp_dir, list_dir=False, recursive=True)) == { - '/'.join(('dir1', 'text3.txt')), '/'.join( - ('dir2', 'dir3', 'text4.txt')), '/'.join( - ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' - } - # 7. only list files ending with suffix - assert set( - petrel_backend.list_dir_or_file( - tmp_dir, list_dir=False, - suffix='.txt')) == {'text1.txt', 'text2.txt'} - assert set( - petrel_backend.list_dir_or_file( - tmp_dir, list_dir=False, - suffix=('.txt', '.jpg'))) == {'text1.txt', 'text2.txt'} - with pytest.raises( - TypeError, - match='`suffix` must be a string or tuple of strings'): - petrel_backend.client.list_dir_or_file( - tmp_dir, list_dir=False, suffix=['.txt', '.jpg']) - # 8. only list files ending with suffix recursively - assert set( - petrel_backend.list_dir_or_file( - tmp_dir, list_dir=False, suffix='.txt', - recursive=True)) == { - '/'.join(('dir1', 'text3.txt')), '/'.join( - ('dir2', 'dir3', 'text4.txt')), 'text1.txt', - 'text2.txt' - } - # 7. only list files ending with suffix - assert set( - petrel_backend.list_dir_or_file( - tmp_dir, - list_dir=False, - suffix=('.txt', '.jpg'), - recursive=True)) == { - '/'.join(('dir1', 'text3.txt')), '/'.join( - ('dir2', 'dir3', 'text4.txt')), '/'.join( - ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' - } - - @patch('mc.MemcachedClient.GetInstance', MockMemcachedClient) - @patch('mc.pyvector', MagicMock) - @patch('mc.ConvertBuffer', lambda x: x.content) - def test_memcached_backend(self): - mc_cfg = dict(server_list_cfg='', client_cfg='', sys_path=None) - mc_backend = FileClient('memcached', **mc_cfg) - - # test `allow_symlink` attribute - assert not mc_backend.allow_symlink - - # input path is Path object - with pytest.raises(NotImplementedError): - mc_backend.get_text(self.text_path) - # input path is str - with pytest.raises(NotImplementedError): - mc_backend.get_text(str(self.text_path)) - - # input path is Path object - img_bytes = mc_backend.get(self.img_path) - img = mmcv.imfrombytes(img_bytes) - assert img.shape == self.img_shape - # input path is str - img_bytes = mc_backend.get(str(self.img_path)) - img = mmcv.imfrombytes(img_bytes) - assert img.shape == self.img_shape - - def test_lmdb_backend(self): - lmdb_path = self.test_data_dir / 'demo.lmdb' - - # db_path is Path object - lmdb_backend = FileClient('lmdb', db_path=lmdb_path) - - # test `allow_symlink` attribute - assert not lmdb_backend.allow_symlink - - with pytest.raises(NotImplementedError): - lmdb_backend.get_text(self.text_path) - - img_bytes = lmdb_backend.get('baboon') - img = mmcv.imfrombytes(img_bytes) - assert img.shape == (120, 125, 3) - - # db_path is str - lmdb_backend = FileClient('lmdb', db_path=str(lmdb_path)) - with pytest.raises(NotImplementedError): - lmdb_backend.get_text(str(self.text_path)) - img_bytes = lmdb_backend.get('baboon') - img = mmcv.imfrombytes(img_bytes) - assert img.shape == (120, 125, 3) - - @pytest.mark.parametrize('backend,prefix', [('http', None), - (None, 'http')]) - def test_http_backend(self, backend, prefix): - http_backend = FileClient(backend=backend, prefix=prefix) - img_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \ - 'master/tests/data/color.jpg' - text_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \ - 'master/tests/data/filelist.txt' - - # test `allow_symlink` attribute - assert not http_backend.allow_symlink - - # input is path or Path object - with pytest.raises(Exception): - http_backend.get(self.img_path) - with pytest.raises(Exception): - http_backend.get(str(self.img_path)) - with pytest.raises(Exception): - http_backend.get_text(self.text_path) - with pytest.raises(Exception): - http_backend.get_text(str(self.text_path)) - - # input url is http image - img_bytes = http_backend.get(img_url) - img = mmcv.imfrombytes(img_bytes) - assert img.shape == self.img_shape - - # input url is http text - value_buf = http_backend.get_text(text_url) - assert self.text_path.open('r').read() == value_buf - - # test `_get_local_path` - # exist the with block and path will be released - with http_backend.get_local_path(img_url) as path: - assert mmcv.imread(path).shape == self.img_shape - assert not osp.isfile(path) - - def test_new_magic_method(self): - - class DummyBackend1(BaseStorageBackend): - - def get(self, filepath): - return filepath - - def get_text(self, filepath, encoding='utf-8'): - return filepath - - FileClient.register_backend('dummy_backend', DummyBackend1) - client1 = FileClient(backend='dummy_backend') - client2 = FileClient(backend='dummy_backend') - assert client1 is client2 - - # if a backend is overwrote, it will disable the singleton pattern for - # the backend - class DummyBackend2(BaseStorageBackend): - - def get(self, filepath): - pass - - def get_text(self, filepath): - pass - - FileClient.register_backend('dummy_backend', DummyBackend2, force=True) - client3 = FileClient(backend='dummy_backend') - client4 = FileClient(backend='dummy_backend') - assert client2 is not client3 - assert client3 is client4 - - def test_parse_uri_prefix(self): - # input path is None - with pytest.raises(AssertionError): - FileClient.parse_uri_prefix(None) - # input path is list - with pytest.raises(AssertionError): - FileClient.parse_uri_prefix([]) - - # input path is Path object - assert FileClient.parse_uri_prefix(self.img_path) is None - # input path is str - assert FileClient.parse_uri_prefix(str(self.img_path)) is None - - # input path starts with https - img_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \ - 'master/tests/data/color.jpg' - assert FileClient.parse_uri_prefix(img_url) == 'https' - - # input path starts with s3 - img_url = 's3://your_bucket/img.png' - assert FileClient.parse_uri_prefix(img_url) == 's3' - - # input path starts with clusterName:s3 - img_url = 'clusterName:s3://your_bucket/img.png' - assert FileClient.parse_uri_prefix(img_url) == 's3' - - def test_infer_client(self): - # HardDiskBackend - file_client_args = {'backend': 'disk'} - client = FileClient.infer_client(file_client_args) - assert client.name == 'HardDiskBackend' - client = FileClient.infer_client(uri=self.img_path) - assert client.name == 'HardDiskBackend' - - # PetrelBackend - file_client_args = {'backend': 'petrel'} - client = FileClient.infer_client(file_client_args) - assert client.name == 'PetrelBackend' - uri = 's3://user_data' - client = FileClient.infer_client(uri=uri) - assert client.name == 'PetrelBackend' - - def test_register_backend(self): - - # name must be a string - with pytest.raises(TypeError): - - class TestClass1: - pass - - FileClient.register_backend(1, TestClass1) - - # module must be a class - with pytest.raises(TypeError): - FileClient.register_backend('int', 0) - - # module must be a subclass of BaseStorageBackend - with pytest.raises(TypeError): - - class TestClass1: - pass - - FileClient.register_backend('TestClass1', TestClass1) - - class ExampleBackend(BaseStorageBackend): - - def get(self, filepath): - return filepath - - def get_text(self, filepath, encoding='utf-8'): - return filepath - - FileClient.register_backend('example', ExampleBackend) - example_backend = FileClient('example') - assert example_backend.get(self.img_path) == self.img_path - assert example_backend.get_text(self.text_path) == self.text_path - assert 'example' in FileClient._backends - - class Example2Backend(BaseStorageBackend): - - def get(self, filepath): - return b'bytes2' - - def get_text(self, filepath, encoding='utf-8'): - return 'text2' - - # force=False - with pytest.raises(KeyError): - FileClient.register_backend('example', Example2Backend) - - FileClient.register_backend('example', Example2Backend, force=True) - example_backend = FileClient('example') - assert example_backend.get(self.img_path) == b'bytes2' - assert example_backend.get_text(self.text_path) == 'text2' - - @FileClient.register_backend(name='example3') - class Example3Backend(BaseStorageBackend): - - def get(self, filepath): - return b'bytes3' - - def get_text(self, filepath, encoding='utf-8'): - return 'text3' - - example_backend = FileClient('example3') - assert example_backend.get(self.img_path) == b'bytes3' - assert example_backend.get_text(self.text_path) == 'text3' - assert 'example3' in FileClient._backends - - # force=False - with pytest.raises(KeyError): - - @FileClient.register_backend(name='example3') - class Example4Backend(BaseStorageBackend): - - def get(self, filepath): - return b'bytes4' - - def get_text(self, filepath, encoding='utf-8'): - return 'text4' - - @FileClient.register_backend(name='example3', force=True) - class Example5Backend(BaseStorageBackend): - - def get(self, filepath): - return b'bytes5' - - def get_text(self, filepath, encoding='utf-8'): - return 'text5' - - example_backend = FileClient('example3') - assert example_backend.get(self.img_path) == b'bytes5' - assert example_backend.get_text(self.text_path) == 'text5' - - # prefixes is a str - class Example6Backend(BaseStorageBackend): - - def get(self, filepath): - return b'bytes6' - - def get_text(self, filepath, encoding='utf-8'): - return 'text6' - - FileClient.register_backend( - 'example4', - Example6Backend, - force=True, - prefixes='example4_prefix') - example_backend = FileClient('example4') - assert example_backend.get(self.img_path) == b'bytes6' - assert example_backend.get_text(self.text_path) == 'text6' - example_backend = FileClient(prefix='example4_prefix') - assert example_backend.get(self.img_path) == b'bytes6' - assert example_backend.get_text(self.text_path) == 'text6' - example_backend = FileClient('example4', prefix='example4_prefix') - assert example_backend.get(self.img_path) == b'bytes6' - assert example_backend.get_text(self.text_path) == 'text6' - - # prefixes is a list of str - class Example7Backend(BaseStorageBackend): - - def get(self, filepath): - return b'bytes7' - - def get_text(self, filepath, encoding='utf-8'): - return 'text7' - - FileClient.register_backend( - 'example5', - Example7Backend, - force=True, - prefixes=['example5_prefix1', 'example5_prefix2']) - example_backend = FileClient('example5') - assert example_backend.get(self.img_path) == b'bytes7' - assert example_backend.get_text(self.text_path) == 'text7' - example_backend = FileClient(prefix='example5_prefix1') - assert example_backend.get(self.img_path) == b'bytes7' - assert example_backend.get_text(self.text_path) == 'text7' - example_backend = FileClient(prefix='example5_prefix2') - assert example_backend.get(self.img_path) == b'bytes7' - assert example_backend.get_text(self.text_path) == 'text7' - - # backend has a higher priority than prefixes - class Example8Backend(BaseStorageBackend): - - def get(self, filepath): - return b'bytes8' - - def get_text(self, filepath, encoding='utf-8'): - return 'text8' - - FileClient.register_backend( - 'example6', - Example8Backend, - force=True, - prefixes='example6_prefix') - example_backend = FileClient('example6') - assert example_backend.get(self.img_path) == b'bytes8' - assert example_backend.get_text(self.text_path) == 'text8' - example_backend = FileClient('example6', prefix='example4_prefix') - assert example_backend.get(self.img_path) == b'bytes8' - assert example_backend.get_text(self.text_path) == 'text8' diff --git a/tests/test_fileio.py b/tests/test_fileio.py deleted file mode 100644 index f5e23bf7f4..0000000000 --- a/tests/test_fileio.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -import os.path as osp -import sys -import tempfile -from unittest.mock import MagicMock, patch - -import pytest - -import mmcv -from mmcv.fileio.file_client import HTTPBackend, PetrelBackend - -sys.modules['petrel_client'] = MagicMock() -sys.modules['petrel_client.client'] = MagicMock() - - -def _test_handler(file_format, test_obj, str_checker, mode='r+'): - # dump to a string - dump_str = mmcv.dump(test_obj, file_format=file_format) - str_checker(dump_str) - - # load/dump with filenames from disk - tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test_dump') - mmcv.dump(test_obj, tmp_filename, file_format=file_format) - assert osp.isfile(tmp_filename) - load_obj = mmcv.load(tmp_filename, file_format=file_format) - assert load_obj == test_obj - os.remove(tmp_filename) - - # load/dump with filename from petrel - method = 'put' if 'b' in mode else 'put_text' - with patch.object(PetrelBackend, method, return_value=None) as mock_method: - filename = 's3://path/of/your/file' - mmcv.dump(test_obj, filename, file_format=file_format) - mock_method.assert_called() - - # json load/dump with a file-like object - with tempfile.NamedTemporaryFile(mode, delete=False) as f: - tmp_filename = f.name - mmcv.dump(test_obj, f, file_format=file_format) - assert osp.isfile(tmp_filename) - with open(tmp_filename, mode) as f: - load_obj = mmcv.load(f, file_format=file_format) - assert load_obj == test_obj - os.remove(tmp_filename) - - # automatically inference the file format from the given filename - tmp_filename = osp.join(tempfile.gettempdir(), - 'mmcv_test_dump.' + file_format) - mmcv.dump(test_obj, tmp_filename) - assert osp.isfile(tmp_filename) - load_obj = mmcv.load(tmp_filename) - assert load_obj == test_obj - os.remove(tmp_filename) - - -obj_for_test = [{'a': 'abc', 'b': 1}, 2, 'c'] - - -def test_json(): - - def json_checker(dump_str): - assert dump_str in [ - '[{"a": "abc", "b": 1}, 2, "c"]', '[{"b": 1, "a": "abc"}, 2, "c"]' - ] - - _test_handler('json', obj_for_test, json_checker) - - -def test_yaml(): - - def yaml_checker(dump_str): - assert dump_str in [ - '- {a: abc, b: 1}\n- 2\n- c\n', '- {b: 1, a: abc}\n- 2\n- c\n', - '- a: abc\n b: 1\n- 2\n- c\n', '- b: 1\n a: abc\n- 2\n- c\n' - ] - - _test_handler('yaml', obj_for_test, yaml_checker) - - -def test_pickle(): - - def pickle_checker(dump_str): - import pickle - assert pickle.loads(dump_str) == obj_for_test - - _test_handler('pickle', obj_for_test, pickle_checker, mode='rb+') - - -def test_exception(): - test_obj = [{'a': 'abc', 'b': 1}, 2, 'c'] - - with pytest.raises(ValueError): - mmcv.dump(test_obj) - - with pytest.raises(TypeError): - mmcv.dump(test_obj, 'tmp.txt') - - -def test_register_handler(): - - @mmcv.register_handler('txt') - class TxtHandler1(mmcv.BaseFileHandler): - - def load_from_fileobj(self, file): - return file.read() - - def dump_to_fileobj(self, obj, file): - file.write(str(obj)) - - def dump_to_str(self, obj, **kwargs): - return str(obj) - - @mmcv.register_handler(['txt1', 'txt2']) - class TxtHandler2(mmcv.BaseFileHandler): - - def load_from_fileobj(self, file): - return file.read() - - def dump_to_fileobj(self, obj, file): - file.write('\n') - file.write(str(obj)) - - def dump_to_str(self, obj, **kwargs): - return str(obj) - - content = mmcv.load(osp.join(osp.dirname(__file__), 'data/filelist.txt')) - assert content == '1.jpg\n2.jpg\n3.jpg\n4.jpg\n5.jpg' - tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test.txt2') - mmcv.dump(content, tmp_filename) - with open(tmp_filename) as f: - written = f.read() - os.remove(tmp_filename) - assert written == '\n' + content - - -def test_list_from_file(): - # get list from disk - filename = osp.join(osp.dirname(__file__), 'data/filelist.txt') - filelist = mmcv.list_from_file(filename) - assert filelist == ['1.jpg', '2.jpg', '3.jpg', '4.jpg', '5.jpg'] - filelist = mmcv.list_from_file(filename, prefix='a/') - assert filelist == ['a/1.jpg', 'a/2.jpg', 'a/3.jpg', 'a/4.jpg', 'a/5.jpg'] - filelist = mmcv.list_from_file(filename, offset=2) - assert filelist == ['3.jpg', '4.jpg', '5.jpg'] - filelist = mmcv.list_from_file(filename, max_num=2) - assert filelist == ['1.jpg', '2.jpg'] - filelist = mmcv.list_from_file(filename, offset=3, max_num=3) - assert filelist == ['4.jpg', '5.jpg'] - - # get list from http - with patch.object( - HTTPBackend, 'get_text', return_value='1.jpg\n2.jpg\n3.jpg'): - filename = 'http://path/of/your/file' - filelist = mmcv.list_from_file( - filename, file_client_args={'backend': 'http'}) - assert filelist == ['1.jpg', '2.jpg', '3.jpg'] - filelist = mmcv.list_from_file( - filename, file_client_args={'prefix': 'http'}) - assert filelist == ['1.jpg', '2.jpg', '3.jpg'] - filelist = mmcv.list_from_file(filename) - assert filelist == ['1.jpg', '2.jpg', '3.jpg'] - - # get list from petrel - with patch.object( - PetrelBackend, 'get_text', return_value='1.jpg\n2.jpg\n3.jpg'): - filename = 's3://path/of/your/file' - filelist = mmcv.list_from_file( - filename, file_client_args={'backend': 'petrel'}) - assert filelist == ['1.jpg', '2.jpg', '3.jpg'] - filelist = mmcv.list_from_file( - filename, file_client_args={'prefix': 's3'}) - assert filelist == ['1.jpg', '2.jpg', '3.jpg'] - filelist = mmcv.list_from_file(filename) - assert filelist == ['1.jpg', '2.jpg', '3.jpg'] - - -def test_dict_from_file(): - # get dict from disk - filename = osp.join(osp.dirname(__file__), 'data/mapping.txt') - mapping = mmcv.dict_from_file(filename) - assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} - mapping = mmcv.dict_from_file(filename, key_type=int) - assert mapping == {1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} - - # get dict from http - with patch.object( - HTTPBackend, 'get_text', return_value='1 cat\n2 dog cow\n3 panda'): - filename = 'http://path/of/your/file' - mapping = mmcv.dict_from_file( - filename, file_client_args={'backend': 'http'}) - assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} - mapping = mmcv.dict_from_file( - filename, file_client_args={'prefix': 'http'}) - assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} - mapping = mmcv.dict_from_file(filename) - assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} - - # get dict from petrel - with patch.object( - PetrelBackend, 'get_text', - return_value='1 cat\n2 dog cow\n3 panda'): - filename = 's3://path/of/your/file' - mapping = mmcv.dict_from_file( - filename, file_client_args={'backend': 'petrel'}) - assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} - mapping = mmcv.dict_from_file( - filename, file_client_args={'prefix': 's3'}) - assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} - mapping = mmcv.dict_from_file(filename) - assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} diff --git a/tests/test_image/test_io.py b/tests/test_image/test_io.py index 7c1b4dd683..4fe1bc8af0 100644 --- a/tests/test_image/test_io.py +++ b/tests/test_image/test_io.py @@ -7,13 +7,14 @@ from unittest.mock import MagicMock, patch import cv2 +import mmengine import numpy as np import pytest import torch +from mmengine.fileio.file_client import HTTPBackend, PetrelBackend from numpy.testing import assert_allclose, assert_array_equal import mmcv -from mmcv.fileio.file_client import HTTPBackend, PetrelBackend if torch.__version__ == 'parrots': pytest.skip('not necessary in parrots test', allow_module_level=True) @@ -46,7 +47,7 @@ def setup_class(cls): @classmethod def teardown_class(cls): # clean instances avoid to influence other unittest - mmcv.FileClient._instances = {} + mmengine.FileClient._instances = {} def assert_img_equal(self, img, ref_img, ratio_thr=0.999): assert img.shape == ref_img.shape diff --git a/tests/test_load_model_zoo.py b/tests/test_load_model_zoo.py index 904cb94031..ae4495db72 100644 --- a/tests/test_load_model_zoo.py +++ b/tests/test_load_model_zoo.py @@ -3,6 +3,7 @@ import os.path as osp from unittest.mock import patch +import mmengine import pytest import torchvision @@ -30,7 +31,7 @@ def test_default_mmcv_home(): assert _get_mmcv_home() == os.path.expanduser( os.path.join(DEFAULT_CACHE_DIR, 'mmcv')) model_urls = get_external_models() - assert model_urls == mmcv.load( + assert model_urls == mmengine.load( osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json')) diff --git a/tests/test_ops/test_nms.py b/tests/test_ops/test_nms.py index aece8ad5e4..9f1ac65d61 100644 --- a/tests/test_ops/test_nms.py +++ b/tests/test_ops/test_nms.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import mmengine import numpy as np import pytest import torch @@ -144,9 +145,8 @@ def test_nms_match(self): nms_match(wrong_dets, iou_thr) def test_batched_nms(self): - import mmcv from mmcv.ops import batched_nms - results = mmcv.load('./tests/data/batched_nms_data.pkl') + results = mmengine.load('./tests/data/batched_nms_data.pkl') nms_max_num = 100 nms_cfg = dict( diff --git a/tests/test_ops/test_tensorrt.py b/tests/test_ops/test_tensorrt.py index f7a1057f89..61189c7681 100644 --- a/tests/test_ops/test_tensorrt.py +++ b/tests/test_ops/test_tensorrt.py @@ -3,6 +3,7 @@ from functools import partial from typing import Callable +import mmengine import numpy as np import onnx import pytest @@ -117,7 +118,6 @@ def test_roialign(): def test_nms(): try: - import mmcv from mmcv.ops import nms except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') @@ -125,7 +125,7 @@ def test_nms(): # trt config fp16_mode = False max_workspace_size = 1 << 30 - data = mmcv.load('./tests/data/batched_nms_data.pkl') + data = mmengine.load('./tests/data/batched_nms_data.pkl') boxes = torch.from_numpy(data['boxes']).cuda() scores = torch.from_numpy(data['scores']).cuda() nms = partial( @@ -188,7 +188,6 @@ def test_nms(): def test_batched_nms(): try: - import mmcv from mmcv.ops import batched_nms except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') @@ -197,7 +196,7 @@ def test_batched_nms(): os.environ['ONNX_BACKEND'] = 'MMCVTensorRT' fp16_mode = False max_workspace_size = 1 << 30 - data = mmcv.load('./tests/data/batched_nms_data.pkl') + data = mmengine.load('./tests/data/batched_nms_data.pkl') nms_cfg = dict(type='nms', iou_threshold=0.7, score_threshold=0.1) boxes = torch.from_numpy(data['boxes']).cuda() scores = torch.from_numpy(data['scores']).cuda() diff --git a/tests/test_runner/test_basemodule.py b/tests/test_runner/test_basemodule.py index 1f186bf828..6c2cbd8e6b 100644 --- a/tests/test_runner/test_basemodule.py +++ b/tests/test_runner/test_basemodule.py @@ -1,11 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. import tempfile +import mmengine import pytest import torch from torch import nn -import mmcv from mmcv.cnn.utils.weight_init import update_init_info from mmcv.runner import BaseModule, ModuleDict, ModuleList, Sequential from mmcv.utils import Registry, build_from_cfg @@ -135,7 +135,7 @@ def __init__(self, init_cfg=None): # assert initialization information has been dumped assert os.path.exists(log_file) - lines = mmcv.list_from_file(log_file) + lines = mmengine.list_from_file(log_file) # check initialization information is right for i, line in enumerate(lines): @@ -210,7 +210,7 @@ def __init__(self, init_cfg=None, checklog_init_cfg=None): # assert initialization information has been dumped assert os.path.exists(log_file) - lines = mmcv.list_from_file(log_file) + lines = mmengine.list_from_file(log_file) # check initialization information is right for i, line in enumerate(lines): if 'TopLevelModule' in line and 'init_cfg' not in line: diff --git a/tests/test_runner/test_checkpoint.py b/tests/test_runner/test_checkpoint.py index 95ab7bcaf7..6b842e0e61 100644 --- a/tests/test_runner/test_checkpoint.py +++ b/tests/test_runner/test_checkpoint.py @@ -8,9 +8,9 @@ import torch import torch.nn as nn import torch.optim as optim +from mmengine.fileio.file_client import PetrelBackend from torch.nn.parallel import DataParallel -from mmcv.fileio.file_client import PetrelBackend from mmcv.parallel.registry import MODULE_WRAPPERS from mmcv.runner.checkpoint import (_load_checkpoint_with_prefix, get_state_dict, load_checkpoint, diff --git a/tests/test_runner/test_eval_hook.py b/tests/test_runner/test_eval_hook.py index e03ce82db2..7cab166f83 100644 --- a/tests/test_runner/test_eval_hook.py +++ b/tests/test_runner/test_eval_hook.py @@ -11,9 +11,9 @@ import torch import torch.nn as nn import torch.optim as optim +from mmengine.fileio.file_client import PetrelBackend from torch.utils.data import DataLoader, Dataset -from mmcv.fileio.file_client import PetrelBackend from mmcv.runner import DistEvalHook as BaseDistEvalHook from mmcv.runner import EpochBasedRunner from mmcv.runner import EvalHook as BaseEvalHook diff --git a/tests/test_runner/test_hooks.py b/tests/test_runner/test_hooks.py index bdb93a9013..391a7865f0 100644 --- a/tests/test_runner/test_hooks.py +++ b/tests/test_runner/test_hooks.py @@ -18,10 +18,10 @@ import pytest import torch import torch.nn as nn +from mmengine.fileio.file_client import PetrelBackend from torch.nn.init import constant_ from torch.utils.data import DataLoader -from mmcv.fileio.file_client import PetrelBackend # yapf: disable from mmcv.runner import (CheckpointHook, ClearMLLoggerHook, DvcliveLoggerHook, EMAHook, Fp16OptimizerHook, diff --git a/tests/test_utils/test_config.py b/tests/test_utils/test_config.py index 96118e7109..4490a900d4 100644 --- a/tests/test_utils/test_config.py +++ b/tests/test_utils/test_config.py @@ -10,8 +10,9 @@ import pytest import yaml +from mmengine import dump, load -from mmcv import Config, ConfigDict, DictAction, dump, load +from mmcv import Config, ConfigDict, DictAction data_path = osp.join(osp.dirname(osp.dirname(__file__)), 'data')