Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite S3Boto3Storage.listdir() for efficiency #352

Merged
merged 1 commit into from
Sep 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 14 additions & 17 deletions storages/backends/s3boto3.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,25 +520,22 @@ def exists(self, name):
return False

def listdir(self, name):
name = self._normalize_name(self._clean_name(name))
# for the bucket.objects.filter and logic below name needs to end in /
# But for the root path "" we leave it as an empty string
if name and not name.endswith('/'):
name += '/'
path = self._normalize_name(self._clean_name(name))
# The path needs to end with a slash, but if the root is empty, leave
# it.
if path and not path.endswith('/'):
path += '/'

directories = []
files = []
dirs = set()
base_parts = name.split("/")[:-1]
for item in self.bucket.objects.filter(Prefix=self._encode_name(name)):
parts = item.key.split("/")
parts = parts[len(base_parts):]
if len(parts) == 1:
# File
files.append(parts[0])
elif len(parts) > 1:
# Directory
dirs.add(parts[0])
return list(dirs), files
paginator = self.connection.meta.client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=self.bucket_name, Delimiter='/', Prefix=path)
for page in pages:
for entry in page.get('CommonPrefixes', ()):
directories.append(posixpath.relpath(entry['Prefix'], path))
for entry in page.get('Contents', ()):
files.append(posixpath.relpath(entry['Key'], path))
return directories, files

def size(self, name):
name = self._normalize_name(self._clean_name(name))
Expand Down
86 changes: 47 additions & 39 deletions tests/test_s3boto3.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,50 +399,58 @@ def test_storage_delete(self):
self.storage.bucket.Object.return_value.delete.assert_called_with()

def test_storage_listdir_base(self):
file_names = ["some/path/1.txt", "2.txt", "other/path/3.txt", "4.txt"]

result = []
for p in file_names:
obj = mock.MagicMock()
obj.key = p
result.append(obj)
self.storage.bucket.objects.filter.return_value = iter(result)

dirs, files = self.storage.listdir("")
self.storage.bucket.objects.filter.assert_called_with(Prefix="")

self.assertEqual(len(dirs), 2)
for directory in ["some", "other"]:
self.assertTrue(directory in dirs,
""" "%s" not in directory list "%s".""" % (
directory, dirs))

self.assertEqual(len(files), 2)
for filename in ["2.txt", "4.txt"]:
self.assertTrue(filename in files,
""" "%s" not in file list "%s".""" % (
filename, files))
# Files:
# some/path/1.txt
# 2.txt
# other/path/3.txt
# 4.txt
pages = [
{
'CommonPrefixes': [
{'Prefix': 'some'},
{'Prefix': 'other'},
],
'Contents': [
{'Key': '2.txt'},
{'Key': '4.txt'},
],
},
]

def test_storage_listdir_subdir(self):
file_names = ["some/path/1.txt", "some/2.txt"]
paginator = mock.MagicMock()
paginator.paginate.return_value = pages
self.storage._connections.connection.meta.client.get_paginator.return_value = paginator

dirs, files = self.storage.listdir('')
paginator.paginate.assert_called_with(Bucket=None, Delimiter='/', Prefix='')

result = []
for p in file_names:
obj = mock.MagicMock()
obj.key = p
result.append(obj)
self.storage.bucket.objects.filter.return_value = iter(result)
self.assertEqual(dirs, ['some', 'other'])
self.assertEqual(files, ['2.txt', '4.txt'])

def test_storage_listdir_subdir(self):
# Files:
# some/path/1.txt
# some/2.txt
pages = [
{
'CommonPrefixes': [
{'Prefix': 'some/path'},
],
'Contents': [
{'Key': 'some/2.txt'},
],
},
]

dirs, files = self.storage.listdir("some/")
self.storage.bucket.objects.filter.assert_called_with(Prefix="some/")
paginator = mock.MagicMock()
paginator.paginate.return_value = pages
self.storage._connections.connection.meta.client.get_paginator.return_value = paginator

self.assertEqual(len(dirs), 1)
self.assertTrue('path' in dirs,
""" "path" not in directory list "%s".""" % (dirs,))
dirs, files = self.storage.listdir('some/')
paginator.paginate.assert_called_with(Bucket=None, Delimiter='/', Prefix='some/')

self.assertEqual(len(files), 1)
self.assertTrue('2.txt' in files,
""" "2.txt" not in files list "%s".""" % (files,))
self.assertEqual(dirs, ['path'])
self.assertEqual(files, ['2.txt'])

def test_storage_size(self):
obj = self.storage.bucket.Object.return_value
Expand Down