BQ: client.extract_table starts extract job (#3991)

* BQ: client.extract_table starts extract job Add system tests for extract_table. * BigQuery: client.extract_table use `**kwargs` for Python 2.7. * BQ: extract_table. Use dict.get for kwargs. job_id instead of job_name.
googleapis · Sep 19, 2017 · 3284a0d · 3284a0d
1 parent bb4bc6c
commit 3284a0d
Show file tree

Hide file tree

Showing 8 changed files with 484 additions and 100 deletions.
diff --git a/bigquery/google/cloud/bigquery/__init__.py b/bigquery/google/cloud/bigquery/__init__.py
@@ -32,6 +32,7 @@
 from google.cloud.bigquery.client import Client
 from google.cloud.bigquery.dataset import AccessEntry
 from google.cloud.bigquery.dataset import Dataset
+from google.cloud.bigquery.job import ExtractJobConfig
 from google.cloud.bigquery.schema import SchemaField
 from google.cloud.bigquery.table import Table
 
@@ -41,6 +42,7 @@
     'ArrayQueryParameter',
     'Client',
     'Dataset',
+    'ExtractJobConfig',
     'ScalarQueryParameter',
     'SchemaField',
     'StructQueryParameter',

diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py
@@ -299,6 +299,82 @@ def _time_to_json(value):
 _SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter
 
 
+class _ApiResourceProperty(object):
+    """Base property implementation.
+
+    Values will be stored on a `_properties` helper attribute of the
+    property's job instance.
+
+    :type name: str
+    :param name:  name of the property
+
+    :type resource_name: str
+    :param resource_name:  name of the property in the resource dictionary
+    """
+
+    def __init__(self, name, resource_name):
+        self.name = name
+        self.resource_name = resource_name
+
+    def __get__(self, instance, owner):
+        """Descriptor protocol:  accessor"""
+        if instance is None:
+            return self
+        return instance._properties.get(self.resource_name)
+
+    def _validate(self, value):
+        """Subclasses override to impose validation policy."""
+        pass
+
+    def __set__(self, instance, value):
+        """Descriptor protocol:  mutator"""
+        self._validate(value)
+        instance._properties[self.resource_name] = value
+
+    def __delete__(self, instance):
+        """Descriptor protocol:  deleter"""
+        del instance._properties[self.resource_name]
+
+
+class _TypedApiResourceProperty(_ApiResourceProperty):
+    """Property implementation:  validates based on value type.
+
+    :type name: str
+    :param name:  name of the property
+
+    :type resource_name: str
+    :param resource_name:  name of the property in the resource dictionary
+
+    :type property_type: type or sequence of types
+    :param property_type: type to be validated
+    """
+    def __init__(self, name, resource_name, property_type):
+        super(_TypedApiResourceProperty, self).__init__(
+            name, resource_name)
+        self.property_type = property_type
+
+    def _validate(self, value):
+        """Ensure that 'value' is of the appropriate type.
+
+        :raises: ValueError on a type mismatch.
+        """
+        if value is None:
+            return
+        if not isinstance(value, self.property_type):
+            raise ValueError('Required type: %s' % (self.property_type,))
+
+
+class _EnumApiResourceProperty(_ApiResourceProperty):
+    """Pseudo-enumeration class.
+
+    :type name: str
+    :param name:  name of the property.
+
+    :type resource_name: str
+    :param resource_name:  name of the property in the resource dictionary
+    """
+
+
 class _ConfigurationProperty(object):
     """Base property implementation.
 

diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
@@ -16,6 +16,8 @@
 
 from __future__ import absolute_import
 
+import uuid
+
 from google.api.core import page_iterator
 from google.cloud.client import ClientWithProject
 from google.cloud.bigquery._http import Connection
@@ -385,27 +387,44 @@ def copy_table(self, job_id, destination, *sources):
         """
         return CopyJob(job_id, destination, sources, client=self)
 
-    def extract_table_to_storage(self, job_id, source, *destination_uris):
-        """Construct a job for extracting a table into Cloud Storage files.
+    def extract_table(self, source, *destination_uris, **kwargs):
+        """Start a job to extract a table into Cloud Storage files.
 
         See
         https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract
 
-        :type job_id: str
-        :param job_id: Name of the job.
-
-        :type source: :class:`google.cloud.bigquery.table.Table`
+        :type source: :class:`google.cloud.bigquery.table.TableReference`
         :param source: table to be extracted.
 
         :type destination_uris: sequence of string
-        :param destination_uris: URIs of CloudStorage file(s) into which
-                                 table data is to be extracted; in format
-                                 ``gs://<bucket_name>/<object_name_or_glob>``.
+        :param destination_uris:
+            URIs of Cloud Storage file(s) into which table data is to be
+            extracted; in format ``gs://<bucket_name>/<object_name_or_glob>``.
+
+        :type kwargs: dict
+        :param kwargs: Additional keyword arguments.
+
+        :Keyword Arguments:
+            * *job_config*
+              (:class:`google.cloud.bigquery.job.ExtractJobConfig`) --
+              (Optional) Extra configuration options for the extract job.
+            * *job_id* (``str``) --
+              Additional content
+              (Optional) The ID of the job.
 
         :rtype: :class:`google.cloud.bigquery.job.ExtractJob`
         :returns: a new ``ExtractJob`` instance
         """
-        return ExtractJob(job_id, source, destination_uris, client=self)
+        job_config = kwargs.get('job_config')
+        job_id = kwargs.get('job_id')
+        if job_id is None:
+            job_id = str(uuid.uuid4())
+
+        job = ExtractJob(
+            job_id, source, list(destination_uris), client=self,
+            job_config=job_config)
+        job.begin()
+        return job
 
     def run_async_query(self, job_id, query,
                         udf_resources=(), query_parameters=()):