From bb0dead9fdb6fcddab5e563ad7e718e17c3a0aa9 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 13 Jun 2017 15:34:49 -0400 Subject: [PATCH 1/2] Ensure blob downloads w/ 'mediaLink' set include 'userProject' as needed. --- storage/google/cloud/storage/blob.py | 24 ++++++++++++++++++------ storage/tests/unit/test_blob.py | 15 ++++++++++++++- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/storage/google/cloud/storage/blob.py b/storage/google/cloud/storage/blob.py index aad2f47295aa..170447fd786f 100644 --- a/storage/google/cloud/storage/blob.py +++ b/storage/google/cloud/storage/blob.py @@ -35,7 +35,11 @@ import warnings import httplib2 +from six.moves.urllib.parse import parse_qsl from six.moves.urllib.parse import quote +from six.moves.urllib.parse import urlencode +from six.moves.urllib.parse import urlsplit +from six.moves.urllib.parse import urlunsplit import google.auth.transport.requests from google import resumable_media @@ -404,14 +408,22 @@ def _get_download_url(self): :returns: The download URL for the current blob. """ if self.media_link is None: - download_url = _DOWNLOAD_URL_TEMPLATE.format(path=self.path) + base_url = _DOWNLOAD_URL_TEMPLATE.format(path=self.path) + scheme, netloc, path, query, frag = urlsplit(base_url) + query = parse_qsl(query) + # Only add `generation' for synthesized URLs: 'mediaLink' from + # server presumably already encodes it. if self.generation is not None: - download_url += u'&generation={:d}'.format(self.generation) - if self.user_project is not None: - download_url += u'&userProject={}'.format(self.user_project) - return download_url + query.append(('generation', '{:d}'.format(self.generation))) else: - return self.media_link + scheme, netloc, path, query, frag = urlsplit(self.media_link) + query = parse_qsl(query) + + if self.user_project is not None: + query.append(('userProject', self.user_project)) + + query = urlencode(query) + return urlunsplit((scheme, netloc, path, query, frag)) def _do_download(self, transport, file_obj, download_url, headers): """Perform a download without any error handling. diff --git a/storage/tests/unit/test_blob.py b/storage/tests/unit/test_blob.py index 1c31e9ea1b0f..8ef793ae8674 100644 --- a/storage/tests/unit/test_blob.py +++ b/storage/tests/unit/test_blob.py @@ -366,7 +366,7 @@ def test__make_transport(self, fake_session_factory): def test__get_download_url_with_media_link(self): blob_name = 'something.txt' - bucket = mock.Mock(spec=[]) + bucket = _Bucket(name='IRRELEVANT') blob = self._make_one(blob_name, bucket=bucket) media_link = 'http://test.invalid' # Set the media link on the blob @@ -375,6 +375,19 @@ def test__get_download_url_with_media_link(self): download_url = blob._get_download_url() self.assertEqual(download_url, media_link) + def test__get_download_url_with_media_link_w_user_project(self): + blob_name = 'something.txt' + user_project = 'user-project-123' + bucket = _Bucket(name='IRRELEVANT', user_project=user_project) + blob = self._make_one(blob_name, bucket=bucket) + media_link = 'http://test.invalid' + # Set the media link on the blob + blob._properties['mediaLink'] = media_link + + download_url = blob._get_download_url() + self.assertEqual( + download_url, '{}?userProject={}'.format(media_link, user_project)) + def test__get_download_url_on_the_fly(self): blob_name = 'bzzz-fly.txt' bucket = _Bucket(name='buhkit') From bf94df8ff6f3707e8fecf67b0df127eeec5698c0 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 19 Jun 2017 16:12:46 -0400 Subject: [PATCH 2/2] Factor out query-string-appending logic. Addresses: https://github.com/GoogleCloudPlatform/google-cloud-python/pull/3500#discussion_r121788841 --- storage/google/cloud/storage/blob.py | 49 ++++++++++++++++++++-------- storage/tests/unit/test_blob.py | 31 ++++++++++++++++++ 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/storage/google/cloud/storage/blob.py b/storage/google/cloud/storage/blob.py index 170447fd786f..898d84d1b791 100644 --- a/storage/google/cloud/storage/blob.py +++ b/storage/google/cloud/storage/blob.py @@ -407,23 +407,19 @@ def _get_download_url(self): :rtype: str :returns: The download URL for the current blob. """ + name_value_pairs = [] if self.media_link is None: base_url = _DOWNLOAD_URL_TEMPLATE.format(path=self.path) - scheme, netloc, path, query, frag = urlsplit(base_url) - query = parse_qsl(query) - # Only add `generation' for synthesized URLs: 'mediaLink' from - # server presumably already encodes it. if self.generation is not None: - query.append(('generation', '{:d}'.format(self.generation))) + name_value_pairs.append( + ('generation', '{:d}'.format(self.generation))) else: - scheme, netloc, path, query, frag = urlsplit(self.media_link) - query = parse_qsl(query) + base_url = self.media_link if self.user_project is not None: - query.append(('userProject', self.user_project)) + name_value_pairs.append(('userProject', self.user_project)) - query = urlencode(query) - return urlunsplit((scheme, netloc, path, query, frag)) + return _add_query_parameters(base_url, name_value_pairs) def _do_download(self, transport, file_obj, download_url, headers): """Perform a download without any error handling. @@ -670,12 +666,14 @@ def _do_multipart_upload(self, client, stream, content_type, info = self._get_upload_arguments(content_type) headers, object_metadata, content_type = info - upload_url = _MULTIPART_URL_TEMPLATE.format( + base_url = _MULTIPART_URL_TEMPLATE.format( bucket_path=self.bucket.path) + name_value_pairs = [] if self.user_project is not None: - upload_url += '&userProject={}'.format(self.user_project) + name_value_pairs.append(('userProject', self.user_project)) + upload_url = _add_query_parameters(base_url, name_value_pairs) upload = MultipartUpload(upload_url, headers=headers) if num_retries is not None: @@ -746,12 +744,14 @@ def _initiate_resumable_upload(self, client, stream, content_type, if extra_headers is not None: headers.update(extra_headers) - upload_url = _RESUMABLE_URL_TEMPLATE.format( + base_url = _RESUMABLE_URL_TEMPLATE.format( bucket_path=self.bucket.path) + name_value_pairs = [] if self.user_project is not None: - upload_url += '&userProject={}'.format(self.user_project) + name_value_pairs.append(('userProject', self.user_project)) + upload_url = _add_query_parameters(base_url, name_value_pairs) upload = ResumableUpload(upload_url, chunk_size, headers=headers) if num_retries is not None: @@ -1688,3 +1688,24 @@ def _raise_from_invalid_response(error, error_info=None): faux_response = httplib2.Response({'status': response.status_code}) raise make_exception(faux_response, response.content, error_info=error_info, use_json=False) + + +def _add_query_parameters(base_url, name_value_pairs): + """Add one query parameter to a base URL. + + :type base_url: string + :param base_url: Base URL (may already contain query parameters) + + :type name_value_pairs: list of (string, string) tuples. + :param name_value_pairs: Names and values of the query parameters to add + + :rtype: string + :returns: URL with additional query strings appended. + """ + if len(name_value_pairs) == 0: + return base_url + + scheme, netloc, path, query, frag = urlsplit(base_url) + query = parse_qsl(query) + query.extend(name_value_pairs) + return urlunsplit((scheme, netloc, path, urlencode(query), frag)) diff --git a/storage/tests/unit/test_blob.py b/storage/tests/unit/test_blob.py index 8ef793ae8674..ad0f88b5129f 100644 --- a/storage/tests/unit/test_blob.py +++ b/storage/tests/unit/test_blob.py @@ -2443,6 +2443,37 @@ def test_with_error_info(self): self.assertEqual(exc_info.exception.errors, []) +class Test__add_query_parameters(unittest.TestCase): + + @staticmethod + def _call_fut(*args, **kwargs): + from google.cloud.storage.blob import _add_query_parameters + + return _add_query_parameters(*args, **kwargs) + + def test_w_empty_list(self): + BASE_URL = 'https://test.example.com/base' + self.assertEqual(self._call_fut(BASE_URL, []), BASE_URL) + + def test_wo_existing_qs(self): + BASE_URL = 'https://test.example.com/base' + NV_LIST = [('one', 'One'), ('two', 'Two')] + expected = '&'.join([ + '{}={}'.format(name, value) for name, value in NV_LIST]) + self.assertEqual( + self._call_fut(BASE_URL, NV_LIST), + '{}?{}'.format(BASE_URL, expected)) + + def test_w_existing_qs(self): + BASE_URL = 'https://test.example.com/base?one=Three' + NV_LIST = [('one', 'One'), ('two', 'Two')] + expected = '&'.join([ + '{}={}'.format(name, value) for name, value in NV_LIST]) + self.assertEqual( + self._call_fut(BASE_URL, NV_LIST), + '{}&{}'.format(BASE_URL, expected)) + + class _Connection(object): API_BASE_URL = 'http://example.com'