From e2b555172cf51b61bc5643d612a4156f832bd478 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 May 2017 17:01:42 -0700 Subject: [PATCH 1/6] BUG: oauth2client deprecated, use google-auth instead. Remove the use of oauth2client and use google-auth library, instead. See GH#37. Rather than check for multiple versions of the libraries, use the setup.py to specify compatible versions. I believe this is safe since Pandas checks for the pandas_gbq package. Since google-auth does not use the argparse module to override user authentication flow settings, add a parameter to choose between the web and console flow. Addresses some eventual consistency issues in table/dataset listing in the integration tests. --- .gitignore | 4 + ci/requirements-2.7-0.19.2.pip | 10 +- ci/requirements-3.5-0.18.1.pip | 6 +- ci/requirements-3.6-0.20.1.pip | 6 +- ci/requirements-3.6-MASTER.pip | 6 +- docs/source/changelog.rst | 4 +- pandas_gbq/gbq.py | 271 +++++++++++++++++++++------------ pandas_gbq/tests/test_gbq.py | 174 ++++++++++++--------- requirements.txt | 4 +- setup.py | 11 +- 10 files changed, 309 insertions(+), 187 deletions(-) diff --git a/.gitignore b/.gitignore index eb19ab7b..deba4dd8 100644 --- a/.gitignore +++ b/.gitignore @@ -76,3 +76,7 @@ Thumbs.db # caches # .cache + +# Credentials # +############### +bigquery_credentials.dat diff --git a/ci/requirements-2.7-0.19.2.pip b/ci/requirements-2.7-0.19.2.pip index 103055ba..a5013b7b 100644 --- a/ci/requirements-2.7-0.19.2.pip +++ b/ci/requirements-2.7-0.19.2.pip @@ -1,5 +1,7 @@ -httplib2 -google-api-python-client==1.2 -python-gflags==2.0 -oauth2client==1.5.0 +google-api-python-client +google-auth +google-auth-httplib2 +google-auth-oauthlib PyCrypto +python-gflags==2.0 +mock diff --git a/ci/requirements-3.5-0.18.1.pip b/ci/requirements-3.5-0.18.1.pip index 05c938ab..a1608720 100644 --- a/ci/requirements-3.5-0.18.1.pip +++ b/ci/requirements-3.5-0.18.1.pip @@ -1,3 +1,5 @@ -httplib2 google-api-python-client -oauth2client +google-auth +google-auth-httplib2 +google-auth-oauthlib +mock diff --git a/ci/requirements-3.6-0.20.1.pip b/ci/requirements-3.6-0.20.1.pip index 05c938ab..a1608720 100644 --- a/ci/requirements-3.6-0.20.1.pip +++ b/ci/requirements-3.6-0.20.1.pip @@ -1,3 +1,5 @@ -httplib2 google-api-python-client -oauth2client +google-auth +google-auth-httplib2 +google-auth-oauthlib +mock diff --git a/ci/requirements-3.6-MASTER.pip b/ci/requirements-3.6-MASTER.pip index 05c938ab..a1608720 100644 --- a/ci/requirements-3.6-MASTER.pip +++ b/ci/requirements-3.6-MASTER.pip @@ -1,3 +1,5 @@ -httplib2 google-api-python-client -oauth2client +google-auth +google-auth-httplib2 +google-auth-oauthlib +mock diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 011a65a2..0b7bb437 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -4,10 +4,10 @@ Changelog 0.2.0 / 2017-??-?? ------------------ -- Resolve issue where the optional ``--noauth_local_webserver`` command line argument would not be propagated during the authentication process. (:issue:`35`) - Drop support for Python 3.4 (:issue:`40`) - The dataframe passed to ```.to_gbq(...., if_exists='append')``` needs to contain only a subset of the fields in the BigQuery schema. (:issue:`24`) - +- Use the `google-auth `__ library for authentication because oauth2client is deprecated. (:issue:`39`) +- ``read_gbq`` now has a ``auth_local_webserver`` boolean argument for controlling whether to use web server or console flow when getting user credentials. 0.1.6 / 2017-05-03 ------------------ diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 0c34124c..5a7d7018 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -21,10 +21,9 @@ def _check_google_client_version(): except ImportError: raise ImportError('Could not import pkg_resources (setuptools).') - if compat.PY3: - google_api_minimum_version = '1.4.1' - else: - google_api_minimum_version = '1.2.0' + # Version 1.6.0 is the first version to support google-auth. + # https://github.com/google/google-api-python-client/blob/master/CHANGELOG + google_api_minimum_version = '1.6.0' _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( 'google-api-python-client').version @@ -42,21 +41,38 @@ def _test_google_api_imports(): try: import httplib2 # noqa - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - except: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - from oauth2client.client import AccessTokenRefreshError # noqa - from oauth2client.client import OAuth2WebServerFlow # noqa - from oauth2client.file import Storage # noqa - from oauth2client.tools import run_flow, argparser # noqa + from googleapiclient.discovery import build # noqa + from googleapiclient.errors import HttpError # noqa + import google.auth # noqa + from google_auth_oauthlib.flow import InstalledAppFlow # noqa + import google_auth_httplib2 # noqa except ImportError as e: raise ImportError("Missing module required for Google BigQuery " "support: {0}".format(str(e))) +def _try_credentials(project_id, credentials): + import httplib2 + from googleapiclient.discovery import build + import googleapiclient.errors + from google_auth_httplib2 import AuthorizedHttp + + if credentials is None: + return None + + http = httplib2.Http() + try: + http = AuthorizedHttp(credentials, http=http) + bigquery_service = build('bigquery', 'v2', http=http) + # Check if the application has rights to the BigQuery project + jobs = bigquery_service.jobs() + job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} + jobs.insert(projectId=project_id, body=job_data).execute() + return credentials + except googleapiclient.errors.Error: + return None + + class InvalidPrivateKeyFormat(ValueError): """ Raised when provided private key has invalid format. @@ -147,13 +163,13 @@ class GbqConnector(object): scope = 'https://www.googleapis.com/auth/bigquery' def __init__(self, project_id, reauth=False, verbose=False, - private_key=None, dialect='legacy'): - _check_google_client_version() - _test_google_api_imports() + private_key=None, auth_local_webserver=False, + dialect='legacy'): self.project_id = project_id self.reauth = reauth self.verbose = verbose self.private_key = private_key + self.auth_local_webserver = auth_local_webserver self.dialect = dialect self.credentials = self.get_credentials() self.service = self.get_service() @@ -188,78 +204,130 @@ def get_application_default_credentials(self): from the environment. Or, the retrieved credentials do not have access to the project (self.project_id) on BigQuery. """ - import httplib2 - try: - from googleapiclient.discovery import build - except ImportError: - from apiclient.discovery import build + import google.auth + from google.auth.exceptions import DefaultCredentialsError + try: - from oauth2client.client import GoogleCredentials - except ImportError: + credentials, _ = google.auth.default(scopes=[self.scope]) + except (DefaultCredentialsError, IOError): return None + return _try_credentials(self.project_id, credentials) + + def load_user_account_credentials(self): + """ + Loads user account credentials from a local file. + + Parameters + ---------- + None + + Returns + ------- + - GoogleCredentials, + If the credentials can loaded. The retrieved credentials should + also have access to the project (self.project_id) on BigQuery. + - OR None, + If credentials can not be loaded from a file. Or, the retrieved + credentials do not have access to the project (self.project_id) + on BigQuery. + """ + import httplib2 + from google_auth_httplib2 import Request + from google.oauth2.credentials import Credentials + try: - credentials = GoogleCredentials.get_application_default() - except: + with open('bigquery_credentials.dat') as credentials_file: + credentials_json = json.load(credentials_file) + except (IOError, ValueError): return None + credentials = Credentials( + token=credentials_json.get('access_token'), + refresh_token=credentials_json.get('refresh_token'), + id_token=credentials_json.get('id_token'), + token_uri=credentials_json.get('token_uri'), + client_id=credentials_json.get('client_id'), + client_secret=credentials_json.get('client_secret'), + scopes=credentials_json.get('scopes')) + + # Refresh the token before trying to use it. http = httplib2.Http() + request = Request(http) + credentials.refresh(request) + + return _try_credentials(self.project_id, credentials) + + def save_user_account_credentials(self, credentials): + """ + Saves user account credentials to a local file. + """ try: - http = credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - # Check if the application has rights to the BigQuery project - jobs = bigquery_service.jobs() - job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} - jobs.insert(projectId=self.project_id, body=job_data).execute() - return credentials - except: - return None + with open('bigquery_credentials.dat', 'w') as credentials_file: + credentials_json = { + 'refresh_token': credentials.refresh_token, + 'id_token': credentials.id_token, + 'token_uri': credentials.token_uri, + 'client_id': credentials.client_id, + 'client_secret': credentials.client_secret, + 'scopes': credentials.scopes, + } + json.dump(credentials_json, credentials_file) + except IOError: + self._print('Unable to save credentials.') def get_user_account_credentials(self): - from oauth2client.client import OAuth2WebServerFlow - from oauth2client.file import Storage - from oauth2client.tools import run_flow, argparser - - flow = OAuth2WebServerFlow( - client_id=('495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd' - '.apps.googleusercontent.com'), - client_secret='kOc9wMptUtxkcIFbtZCcrEAc', - scope=self.scope, - redirect_uri='urn:ietf:wg:oauth:2.0:oob') + """Gets user account credentials. - storage = Storage('bigquery_credentials.dat') - credentials = storage.get() + This method authenticates using user credentials, either loading saved + credentials from a file or by going through the OAuth flow. - if credentials is None or credentials.invalid or self.reauth: - credentials = run_flow(flow, storage, argparser.parse_args()) + Parameters + ---------- + None - return credentials + Returns + ------- + GoogleCredentials : credentials + Credentials for the user with BigQuery access. + """ + from google_auth_oauthlib.flow import InstalledAppFlow + from oauthlib.oauth2.rfc6749.errors import OAuth2Error + + credentials = self.load_user_account_credentials() + + client_config = { + 'installed': { + 'client_id': ('495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd' + '.apps.googleusercontent.com'), + 'client_secret': 'kOc9wMptUtxkcIFbtZCcrEAc', + 'redirect_uris': ['urn:ietf:wg:oauth:2.0:oob'], + 'auth_uri': 'https://accounts.google.com/o/oauth2/auth', + 'token_uri': 'https://accounts.google.com/o/oauth2/token', + } + } - def get_service_account_credentials(self): - # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 - # We need to know that a supported version of oauth2client is installed - # Test that either of the following is installed: - # - SignedJwtAssertionCredentials from oauth2client.client - # - ServiceAccountCredentials from oauth2client.service_account - # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0 - # ServiceAccountCredentials is available in oauthclient >= 2.0.0 - oauth2client_v1 = True - oauth2client_v2 = True + if credentials is None or self.reauth: + app_flow = InstalledAppFlow.from_client_config( + client_config, scopes=[self.scope]) - try: - from oauth2client.client import SignedJwtAssertionCredentials - except ImportError: - oauth2client_v1 = False + try: + if self.auth_local_webserver: + credentials = app_flow.run_local_server() + else: + credentials = app_flow.run_console() + except OAuth2Error as ex: + raise AccessDenied( + "Unable to get valid credentials: {0}".format(ex)) - try: - from oauth2client.service_account import ServiceAccountCredentials - except ImportError: - oauth2client_v2 = False + self.save_user_account_credentials(credentials) - if not oauth2client_v1 and not oauth2client_v2: - raise ImportError("Missing oauth2client required for BigQuery " - "service account support") + return credentials + def get_service_account_credentials(self): + import httplib2 + from google_auth_httplib2 import Request + from google.oauth2.service_account import Credentials from os.path import isfile try: @@ -277,16 +345,15 @@ def get_service_account_credentials(self): json_key['private_key'] = bytes( json_key['private_key'], 'UTF-8') - if oauth2client_v1: - return SignedJwtAssertionCredentials( - json_key['client_email'], - json_key['private_key'], - self.scope, - ) - else: - return ServiceAccountCredentials.from_json_keyfile_dict( - json_key, - self.scope) + credentials = Credentials.from_service_account_info(json_key) + credentials = credentials.with_scopes([self.scope]) + + # Refresh the token before trying to use it. + http = httplib2.Http() + request = Request(http) + credentials.refresh(request) + + return credentials except (KeyError, ValueError, TypeError, AttributeError): raise InvalidPrivateKeyFormat( "Private key is missing or invalid. It should be service " @@ -324,13 +391,12 @@ def sizeof_fmt(num, suffix='B'): def get_service(self): import httplib2 - try: - from googleapiclient.discovery import build - except: - from apiclient.discovery import build + from google_auth_httplib2 import AuthorizedHttp + from googleapiclient.discovery import build http = httplib2.Http() - http = self.credentials.authorize(http) + http = AuthorizedHttp( + self.credentials, http=http) bigquery_service = build('bigquery', 'v2', http=http) return bigquery_service @@ -380,9 +446,7 @@ def run_query(self, query, **kwargs): from googleapiclient.errors import HttpError except: from apiclient.errors import HttpError - from oauth2client.client import AccessTokenRefreshError - - _check_google_client_version() + from google.auth.exceptions import RefreshError job_collection = self.service.jobs() @@ -419,7 +483,7 @@ def run_query(self, query, **kwargs): query_reply = job_collection.insert( projectId=self.project_id, body=job_data).execute() self._print('ok.\nQuery running...') - except (AccessTokenRefreshError, ValueError): + except (RefreshError, ValueError): if self.private_key: raise AccessDenied( "The service account credentials are not valid") @@ -711,8 +775,8 @@ def _parse_entry(field_value, field_type): def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=True, private_key=None, dialect='legacy', - **kwargs): + reauth=False, verbose=True, private_key=None, + auth_local_webserver=False, dialect='legacy', **kwargs): r"""Load data from Google BigQuery. The main method a user calls to execute a Query in Google BigQuery @@ -756,6 +820,9 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. jupyter iPython notebook on remote host) + auth_local_webserver : boolean (default False) + Use a local webserver when getting user credentials to handle + OAuth authorization flow redirects. dialect : {'legacy', 'standard'}, default 'legacy' 'legacy' : Use BigQuery's legacy SQL dialect. @@ -786,9 +853,9 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, if dialect not in ('legacy', 'standard'): raise ValueError("'{0}' is not valid for dialect".format(dialect)) - connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key, - dialect=dialect) + connector = GbqConnector( + project_id, reauth=reauth, verbose=verbose, private_key=private_key, + dialect=dialect, auth_local_webserver=auth_local_webserver) schema, pages = connector.run_query(query, **kwargs) dataframe_list = [] while len(pages) > 0: @@ -838,7 +905,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, def to_gbq(dataframe, destination_table, project_id, chunksize=10000, - verbose=True, reauth=False, if_exists='fail', private_key=None): + verbose=True, reauth=False, if_exists='fail', private_key=None, + auth_local_webserver=False): """Write a DataFrame to a Google BigQuery table. The main method a user calls to export pandas DataFrame contents to @@ -887,6 +955,9 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. jupyter iPython notebook on remote host) + auth_local_webserver : boolean (default False) + Use a local webserver when getting user credentials to handle + OAuth authorization flow redirects. """ if if_exists not in ('fail', 'replace', 'append'): @@ -896,8 +967,9 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000, raise NotFoundException( "Invalid Table Name. Should be of the form 'datasetId.tableId' ") - connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key) + connector = GbqConnector( + project_id, reauth=reauth, verbose=verbose, private_key=private_key, + auth_local_webserver=auth_local_webserver) dataset_id, table_id = destination_table.rsplit('.', 1) table = _Table(project_id, dataset_id, reauth=reauth, @@ -1127,6 +1199,9 @@ def datasets(self): pageToken=next_page_token).execute() dataset_response = list_dataset_response.get('datasets') + if dataset_response is None: + dataset_response = [] + next_page_token = list_dataset_response.get('nextPageToken') if dataset_response is None: diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py index 069bc7ee..ba2923b9 100644 --- a/pandas_gbq/tests/test_gbq.py +++ b/pandas_gbq/tests/test_gbq.py @@ -27,6 +27,9 @@ _GOOGLE_API_CLIENT_INSTALLED = False _GOOGLE_API_CLIENT_VALID_VERSION = False _HTTPLIB2_INSTALLED = False +_GOOGLE_AUTH_INSTALLED = False +_GOOGLE_AUTH_HTTPLIB2_INSTALLED = False +_GOOGLE_AUTH_OAUTHLIB_INSTALLED = False _SETUPTOOLS_INSTALLED = False @@ -85,7 +88,9 @@ def _get_private_key_contents(): def _test_imports(): global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ - _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED + _GOOGLE_AUTH_INSTALLED, _GOOGLE_AUTH_HTTPLIB2_INSTALLED, \ + _GOOGLE_AUTH_OAUTHLIB_INSTALLED, _HTTPLIB2_INSTALLED, \ + _SETUPTOOLS_INSTALLED try: import pkg_resources @@ -93,25 +98,13 @@ def _test_imports(): except ImportError: _SETUPTOOLS_INSTALLED = False - if compat.PY3: - google_api_minimum_version = '1.4.1' - else: - google_api_minimum_version = '1.2.0' + google_api_minimum_version = '1.6.0' if _SETUPTOOLS_INSTALLED: try: - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - except: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - - from oauth2client.client import OAuth2WebServerFlow # noqa - from oauth2client.client import AccessTokenRefreshError # noqa + from googleapiclient.discovery import build # noqa + from googleapiclient.errors import HttpError # noqa - from oauth2client.file import Storage # noqa - from oauth2client.tools import run_flow # noqa _GOOGLE_API_CLIENT_INSTALLED = True _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( 'google-api-python-client').version @@ -123,6 +116,29 @@ def _test_imports(): except ImportError: _GOOGLE_API_CLIENT_INSTALLED = False + try: + from google.auth import default # noqa + from google.auth.exceptions import DefaultCredentialsError # noqa + from google.oauth2.credentials import Credentials # noqa + from google.oauth2.service_account import Credentials # noqa + _GOOGLE_AUTH_INSTALLED = True + except ImportError: + _GOOGLE_AUTH_INSTALLED = False + + try: + from google_auth_httplib2 import AuthorizedHttp # noqa + from google_auth_httplib2 import Request # noqa + _GOOGLE_AUTH_HTTPLIB2_INSTALLED = True + except ImportError: + _GOOGLE_AUTH_HTTPLIB2_INSTALLED = False + + try: + from google_auth_oauthlib.flow import InstalledAppFlow # noqa + from oauthlib.oauth2.rfc6749.errors import OAuth2Error # noqa + _GOOGLE_AUTH_OAUTHLIB_INSTALLED = True + except ImportError: + _GOOGLE_AUTH_OAUTHLIB_INSTALLED = False + try: import httplib2 # noqa _HTTPLIB2_INSTALLED = True @@ -136,46 +152,34 @@ def _test_imports(): raise ImportError('Could not import Google API Client.') if not _GOOGLE_API_CLIENT_VALID_VERSION: - raise ImportError("pandas requires google-api-python-client >= {0} " - "for Google BigQuery support, " - "current version {1}" + raise ImportError('pandas requires google-api-python-client >= {0} ' + 'for Google BigQuery support, ' + 'current version {1}' .format(google_api_minimum_version, _GOOGLE_API_CLIENT_VERSION)) - if not _HTTPLIB2_INSTALLED: + if not _GOOGLE_AUTH_INSTALLED: raise ImportError( - "pandas requires httplib2 for Google BigQuery support") - - # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 - # We need to know that a supported version of oauth2client is installed - # Test that either of the following is installed: - # - SignedJwtAssertionCredentials from oauth2client.client - # - ServiceAccountCredentials from oauth2client.service_account - # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0 - # ServiceAccountCredentials is available in oauthclient >= 2.0.0 - oauth2client_v1 = True - oauth2client_v2 = True + 'pandas requires google-auth for Google BigQuery support') - try: - from oauth2client.client import SignedJwtAssertionCredentials # noqa - except ImportError: - oauth2client_v1 = False + if not _GOOGLE_AUTH_HTTPLIB2_INSTALLED: + raise ImportError( + 'pandas requires google-auth-httplib2 for Google BigQuery support') - try: - from oauth2client.service_account import ServiceAccountCredentials # noqa - except ImportError: - oauth2client_v2 = False + if not _GOOGLE_AUTH_OAUTHLIB_INSTALLED: + raise ImportError( + 'pandas requires google-auth-oauthlib for Google BigQuery support') - if not oauth2client_v1 and not oauth2client_v2: - raise ImportError("Missing oauth2client required for BigQuery " - "service account support") + if not _HTTPLIB2_INSTALLED: + raise ImportError( + 'pandas requires httplib2 for Google BigQuery support') def _setup_common(): try: _test_imports() except (ImportError, NotImplementedError) as import_exception: - pytest.skip(import_exception) + pytest.skip(str(import_exception)) if _in_travis_environment(): logging.getLogger('oauth2client').setLevel(logging.ERROR) @@ -185,26 +189,18 @@ def _setup_common(): def _check_if_can_get_correct_default_credentials(): # Checks if "Application Default Credentials" can be fetched # from the environment the tests are running in. - # See Issue #13577 + # See https://github.com/pandas-dev/pandas/issues/13577 + + import google.auth + from google.auth.exceptions import DefaultCredentialsError - import httplib2 - try: - from googleapiclient.discovery import build - except ImportError: - from apiclient.discovery import build try: - from oauth2client.client import GoogleCredentials - credentials = GoogleCredentials.get_application_default() - http = httplib2.Http() - http = credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - jobs = bigquery_service.jobs() - job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} - jobs.insert(projectId=_get_project_id(), body=job_data).execute() - return True - except: + credentials, _ = google.auth.default(scopes=[gbq.GbqConnector.scope]) + except (DefaultCredentialsError, IOError): return False + return gbq._try_credentials(_get_project_id(), credentials) is not None + def clean_gbq_environment(dataset_prefix, private_key=None): dataset = gbq._Dataset(_get_project_id(), private_key=private_key) @@ -219,17 +215,31 @@ def clean_gbq_environment(dataset_prefix, private_key=None): if dataset_id in all_datasets: table = gbq._Table(_get_project_id(), dataset_id, private_key=private_key) + + # Table listing is eventually consistent, so loop until + # all tables no longer appear (max 30 seconds). + table_retry = 30 all_tables = dataset.tables(dataset_id) - for table_id in all_tables: - table.delete(table_id) + while all_tables and table_retry > 0: + for table_id in all_tables: + try: + table.delete(table_id) + except gbq.NotFoundException as e: + pass + sleep(1) + table_retry = table_retry - 1 + all_tables = dataset.tables(dataset_id) dataset.delete(dataset_id) retry = 0 except gbq.GenericGBQException as ex: - # Build in retry logic to work around the following error : + # Build in retry logic to work around the following errors : # An internal error occurred and the request could not be... - if 'An internal error occurred' in ex.message and retry > 0: - pass + # Dataset ... is still in use + error_message = str(ex).lower() + if ('an internal error occurred' in error_message or + 'still in use' in error_message) and retry > 0: + sleep(30) else: raise ex @@ -264,14 +274,15 @@ def setup_method(self, method): _skip_if_no_project_id() _skip_local_auth_if_in_travis_env() - self.sut = gbq.GbqConnector(_get_project_id()) + self.sut = gbq.GbqConnector( + _get_project_id(), auth_local_webserver=True) def test_should_be_able_to_make_a_connector(self): assert self.sut is not None, 'Could not create a GbqConnector' def test_should_be_able_to_get_valid_credentials(self): credentials = self.sut.get_credentials() - assert credentials.invalid != 'Returned credentials invalid' + assert credentials.valid def test_should_be_able_to_get_a_bigquery_service(self): bigquery_service = self.sut.get_service() @@ -287,18 +298,35 @@ def test_should_be_able_to_get_results_from_query(self): def test_get_application_default_credentials_does_not_throw_error(self): if _check_if_can_get_correct_default_credentials(): - pytest.skip("Can get default_credentials " - "from the environment!") - credentials = self.sut.get_application_default_credentials() + # Can get real credentials, so mock it out to fail. + import mock + from google.auth.exceptions import DefaultCredentialsError + with mock.patch('google.auth.default', + side_effect=DefaultCredentialsError()): + credentials = self.sut.get_application_default_credentials() + else: + credentials = self.sut.get_application_default_credentials() assert credentials is None def test_get_application_default_credentials_returns_credentials(self): if not _check_if_can_get_correct_default_credentials(): pytest.skip("Cannot get default_credentials " "from the environment!") - from oauth2client.client import GoogleCredentials + from google.auth.credentials import Credentials credentials = self.sut.get_application_default_credentials() - assert isinstance(credentials, GoogleCredentials) + assert isinstance(credentials, Credentials) + + def test_get_user_account_credentials_bad_file_returns_credentials(self): + import mock + from google.auth.credentials import Credentials + with mock.patch('__main__.open', side_effect=IOError()): + credentials = self.sut.get_user_account_credentials() + assert isinstance(credentials, Credentials) + + def test_get_user_account_credentials_returns_credentials(self): + from google.auth.credentials import Credentials + credentials = self.sut.get_user_account_credentials() + assert isinstance(credentials, Credentials) class TestGBQConnectorIntegrationWithServiceAccountKeyPath(object): @@ -317,7 +345,7 @@ def test_should_be_able_to_make_a_connector(self): def test_should_be_able_to_get_valid_credentials(self): credentials = self.sut.get_credentials() - assert not credentials.invalid + assert credentials.valid def test_should_be_able_to_get_a_bigquery_service(self): bigquery_service = self.sut.get_service() @@ -348,7 +376,7 @@ def test_should_be_able_to_make_a_connector(self): def test_should_be_able_to_get_valid_credentials(self): credentials = self.sut.get_credentials() - assert not credentials.invalid + assert credentials.valid def test_should_be_able_to_get_a_bigquery_service(self): bigquery_service = self.sut.get_service() diff --git a/requirements.txt b/requirements.txt index 11bb6018..c72b5a5a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ pandas httplib2 google-api-python-client -oauth2client +google-auth +google-auth-httplib2 +google-auth-oauthlib diff --git a/setup.py b/setup.py index a3b8f06f..818a1572 100644 --- a/setup.py +++ b/setup.py @@ -17,9 +17,14 @@ def readme(): return f.read() -INSTALL_REQUIRES = ( - ['pandas', 'httplib2', 'google-api-python-client', 'oauth2client'] -) +INSTALL_REQUIRES = [ + 'pandas', + 'httplib2>=0.9.2', + 'google-api-python-client>=1.6.0', + 'google-auth>=1.0.0', + 'google-auth-httplib2>=0.0.2', + 'google-auth-oauthlib>=0.1.0', +] setup( From b20934ce709e388a50955ef88167f65dd81812c9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 May 2017 12:44:14 -0700 Subject: [PATCH 2/6] MAINT: pandas.util.testing.assertRaises removed This method was removed in https://github.com/pandas-dev/pandas/pull/16089 in favor of pytest.raises. --- pandas_gbq/tests/test_gbq.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py index ba2923b9..c595b564 100644 --- a/pandas_gbq/tests/test_gbq.py +++ b/pandas_gbq/tests/test_gbq.py @@ -9,6 +9,7 @@ import logging import numpy as np +import pytest from distutils.version import StrictVersion from pandas import compat From 7aed58959698fb93fe3dce124a6f7fd089c077ab Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 May 2017 12:51:53 -0700 Subject: [PATCH 3/6] MAINT: pandas.util.testing.assert_equals removed This method was removed in https://github.com/pandas-dev/pandas/pull/16017 in favor of pytest.raises. --- pandas_gbq/tests/test_gbq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py index c595b564..ba2923b9 100644 --- a/pandas_gbq/tests/test_gbq.py +++ b/pandas_gbq/tests/test_gbq.py @@ -9,7 +9,6 @@ import logging import numpy as np -import pytest from distutils.version import StrictVersion from pandas import compat From da092d7de444b447c80341bc256ffc01a80cf827 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 May 2017 14:49:09 -0700 Subject: [PATCH 4/6] DOC: add version tags for new auth_local_webserver params. --- ci/requirements-2.7-0.19.2.pip | 8 ++--- docs/source/changelog.rst | 2 +- pandas_gbq/gbq.py | 56 +++++++++++++++++++++++++++------- setup.py | 4 +-- 4 files changed, 52 insertions(+), 18 deletions(-) diff --git a/ci/requirements-2.7-0.19.2.pip b/ci/requirements-2.7-0.19.2.pip index a5013b7b..2a098ca2 100644 --- a/ci/requirements-2.7-0.19.2.pip +++ b/ci/requirements-2.7-0.19.2.pip @@ -1,7 +1,7 @@ -google-api-python-client -google-auth -google-auth-httplib2 -google-auth-oauthlib +google-api-python-client==1.6.0 +google-auth==1.0.0 +google-auth-httplib2==0.0.1 +google-auth-oauthlib==0.0.1 PyCrypto python-gflags==2.0 mock diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 0b7bb437..05981843 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -7,7 +7,7 @@ Changelog - Drop support for Python 3.4 (:issue:`40`) - The dataframe passed to ```.to_gbq(...., if_exists='append')``` needs to contain only a subset of the fields in the BigQuery schema. (:issue:`24`) - Use the `google-auth `__ library for authentication because oauth2client is deprecated. (:issue:`39`) -- ``read_gbq`` now has a ``auth_local_webserver`` boolean argument for controlling whether to use web server or console flow when getting user credentials. +- ``read_gbq`` now has a ``auth_local_webserver`` boolean argument for controlling whether to use web server or console flow when getting user credentials. Replaces `--noauth_local_webserver` command line argument (:issue:`35`) 0.1.6 / 2017-05-03 ------------------ diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 5a7d7018..6156533c 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -41,14 +41,28 @@ def _test_google_api_imports(): try: import httplib2 # noqa - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - import google.auth # noqa from google_auth_oauthlib.flow import InstalledAppFlow # noqa import google_auth_httplib2 # noqa - except ImportError as e: + except ImportError as ex: raise ImportError("Missing module required for Google BigQuery " - "support: {0}".format(str(e))) + "support: {0}".format(str(ex))) + + try: + from googleapiclient.discovery import build # noqa + from googleapiclient.errors import HttpError # noqa + except ImportError as ex: + raise ImportError( + "pandas requires google-api-python-client for Google BigQuery " + "support: {0}".format(str(ex))) + + try: + import google.auth # noqa + except ImportError as ex: + raise ImportError( + "pandas requires google-auth for Google BigQuery support: " + "{0}".format(str(ex))) + + _check_google_client_version() def _try_credentials(project_id, credentials): @@ -218,6 +232,8 @@ def load_user_account_credentials(self): """ Loads user account credentials from a local file. + .. versionadded 0.2.0 + Parameters ---------- None @@ -261,6 +277,8 @@ def load_user_account_credentials(self): def save_user_account_credentials(self, credentials): """ Saves user account credentials to a local file. + + .. versionadded 0.2.0 """ try: with open('bigquery_credentials.dat', 'w') as credentials_file: @@ -820,9 +838,15 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. jupyter iPython notebook on remote host) - auth_local_webserver : boolean (default False) - Use a local webserver when getting user credentials to handle - OAuth authorization flow redirects. + auth_local_webserver : boolean, default False + Use the [local webserver flow] instead of the [console flow] when + getting user credentials. + + .. [local webserver flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. [console flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + .. versionadded:: 0.2.0 dialect : {'legacy', 'standard'}, default 'legacy' 'legacy' : Use BigQuery's legacy SQL dialect. @@ -847,6 +871,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, """ + _test_google_api_imports() + if not project_id: raise TypeError("Missing required parameter: project_id") @@ -955,11 +981,19 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. jupyter iPython notebook on remote host) - auth_local_webserver : boolean (default False) - Use a local webserver when getting user credentials to handle - OAuth authorization flow redirects. + auth_local_webserver : boolean, default False + Use the [local webserver flow] instead of the [console flow] when + getting user credentials. + + .. [local webserver flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. [console flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + .. versionadded:: 0.2.0 """ + _test_google_api_imports() + if if_exists not in ('fail', 'replace', 'append'): raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) diff --git a/setup.py b/setup.py index 818a1572..df3cd85d 100644 --- a/setup.py +++ b/setup.py @@ -22,8 +22,8 @@ def readme(): 'httplib2>=0.9.2', 'google-api-python-client>=1.6.0', 'google-auth>=1.0.0', - 'google-auth-httplib2>=0.0.2', - 'google-auth-oauthlib>=0.1.0', + 'google-auth-httplib2>=0.0.1', + 'google-auth-oauthlib>=0.0.1', ] From f9ec6c74683471ca26b718bd6a2cd0f50b2eceaf Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 May 2017 10:09:49 -0700 Subject: [PATCH 5/6] CLN: share _test_imports between main module and tests --- pandas_gbq/gbq.py | 42 ++++++++++------ pandas_gbq/tests/test_gbq.py | 97 ++---------------------------------- 2 files changed, 31 insertions(+), 108 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 6156533c..b9bb9498 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -30,9 +30,9 @@ def _check_google_client_version(): if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) < StrictVersion(google_api_minimum_version)): - raise ImportError("pandas requires google-api-python-client >= {0} " - "for Google BigQuery support, " - "current version {1}" + raise ImportError('pandas requires google-api-python-client >= {0} ' + 'for Google BigQuery support, ' + 'current version {1}' .format(google_api_minimum_version, _GOOGLE_API_CLIENT_VERSION)) @@ -41,26 +41,40 @@ def _test_google_api_imports(): try: import httplib2 # noqa + except ImportError as ex: + raise ImportError( + 'pandas requires httplib2 for Google BigQuery support: ' + '{0}'.format(ex)) + + try: from google_auth_oauthlib.flow import InstalledAppFlow # noqa - import google_auth_httplib2 # noqa except ImportError as ex: - raise ImportError("Missing module required for Google BigQuery " - "support: {0}".format(str(ex))) + raise ImportError( + 'pandas requires google-auth-oauthlib for Google BigQuery ' + 'support: {0}'.format(ex)) + + try: + from google_auth_httplib2 import AuthorizedHttp # noqa + from google_auth_httplib2 import Request # noqa + except ImportError as ex: + raise ImportError( + 'pandas requires google-auth-httplib2 for Google BigQuery ' + 'support: {0}'.format(ex)) try: from googleapiclient.discovery import build # noqa from googleapiclient.errors import HttpError # noqa except ImportError as ex: raise ImportError( - "pandas requires google-api-python-client for Google BigQuery " - "support: {0}".format(str(ex))) + "pandas requires google-api-python-client for Google BigQuery " + "support: {0}".format(ex)) try: import google.auth # noqa except ImportError as ex: raise ImportError( - "pandas requires google-auth for Google BigQuery support: " - "{0}".format(str(ex))) + "pandas requires google-auth for Google BigQuery support: " + "{0}".format(ex)) _check_google_client_version() @@ -76,8 +90,8 @@ def _try_credentials(project_id, credentials): http = httplib2.Http() try: - http = AuthorizedHttp(credentials, http=http) - bigquery_service = build('bigquery', 'v2', http=http) + authed_http = AuthorizedHttp(credentials, http=http) + bigquery_service = build('bigquery', 'v2', http=authed_http) # Check if the application has rights to the BigQuery project jobs = bigquery_service.jobs() job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} @@ -413,9 +427,9 @@ def get_service(self): from googleapiclient.discovery import build http = httplib2.Http() - http = AuthorizedHttp( + authed_http = AuthorizedHttp( self.credentials, http=http) - bigquery_service = build('bigquery', 'v2', http=http) + bigquery_service = build('bigquery', 'v2', http=authed_http) return bigquery_service diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py index ba2923b9..e8eda1d3 100644 --- a/pandas_gbq/tests/test_gbq.py +++ b/pandas_gbq/tests/test_gbq.py @@ -10,7 +10,6 @@ import numpy as np -from distutils.version import StrictVersion from pandas import compat from pandas.compat import u, range @@ -23,16 +22,6 @@ TABLE_ID = 'new_test' -_IMPORTS = False -_GOOGLE_API_CLIENT_INSTALLED = False -_GOOGLE_API_CLIENT_VALID_VERSION = False -_HTTPLIB2_INSTALLED = False -_GOOGLE_AUTH_INSTALLED = False -_GOOGLE_AUTH_HTTPLIB2_INSTALLED = False -_GOOGLE_AUTH_OAUTHLIB_INSTALLED = False -_SETUPTOOLS_INSTALLED = False - - def _skip_if_no_project_id(): if not _get_project_id(): pytest.skip( @@ -87,92 +76,12 @@ def _get_private_key_contents(): def _test_imports(): - global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ - _GOOGLE_AUTH_INSTALLED, _GOOGLE_AUTH_HTTPLIB2_INSTALLED, \ - _GOOGLE_AUTH_OAUTHLIB_INSTALLED, _HTTPLIB2_INSTALLED, \ - _SETUPTOOLS_INSTALLED - try: - import pkg_resources - _SETUPTOOLS_INSTALLED = True + import pkg_resources # noqa except ImportError: - _SETUPTOOLS_INSTALLED = False - - google_api_minimum_version = '1.6.0' - - if _SETUPTOOLS_INSTALLED: - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - - _GOOGLE_API_CLIENT_INSTALLED = True - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( - 'google-api-python-client').version - - if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) >= - StrictVersion(google_api_minimum_version)): - _GOOGLE_API_CLIENT_VALID_VERSION = True - - except ImportError: - _GOOGLE_API_CLIENT_INSTALLED = False - - try: - from google.auth import default # noqa - from google.auth.exceptions import DefaultCredentialsError # noqa - from google.oauth2.credentials import Credentials # noqa - from google.oauth2.service_account import Credentials # noqa - _GOOGLE_AUTH_INSTALLED = True - except ImportError: - _GOOGLE_AUTH_INSTALLED = False - - try: - from google_auth_httplib2 import AuthorizedHttp # noqa - from google_auth_httplib2 import Request # noqa - _GOOGLE_AUTH_HTTPLIB2_INSTALLED = True - except ImportError: - _GOOGLE_AUTH_HTTPLIB2_INSTALLED = False - - try: - from google_auth_oauthlib.flow import InstalledAppFlow # noqa - from oauthlib.oauth2.rfc6749.errors import OAuth2Error # noqa - _GOOGLE_AUTH_OAUTHLIB_INSTALLED = True - except ImportError: - _GOOGLE_AUTH_OAUTHLIB_INSTALLED = False - - try: - import httplib2 # noqa - _HTTPLIB2_INSTALLED = True - except ImportError: - _HTTPLIB2_INSTALLED = False - - if not _SETUPTOOLS_INSTALLED: raise ImportError('Could not import pkg_resources (setuptools).') - if not _GOOGLE_API_CLIENT_INSTALLED: - raise ImportError('Could not import Google API Client.') - - if not _GOOGLE_API_CLIENT_VALID_VERSION: - raise ImportError('pandas requires google-api-python-client >= {0} ' - 'for Google BigQuery support, ' - 'current version {1}' - .format(google_api_minimum_version, - _GOOGLE_API_CLIENT_VERSION)) - - if not _GOOGLE_AUTH_INSTALLED: - raise ImportError( - 'pandas requires google-auth for Google BigQuery support') - - if not _GOOGLE_AUTH_HTTPLIB2_INSTALLED: - raise ImportError( - 'pandas requires google-auth-httplib2 for Google BigQuery support') - - if not _GOOGLE_AUTH_OAUTHLIB_INSTALLED: - raise ImportError( - 'pandas requires google-auth-oauthlib for Google BigQuery support') - - if not _HTTPLIB2_INSTALLED: - raise ImportError( - 'pandas requires httplib2 for Google BigQuery support') + gbq._test_google_api_imports() def _setup_common(): @@ -224,7 +133,7 @@ def clean_gbq_environment(dataset_prefix, private_key=None): for table_id in all_tables: try: table.delete(table_id) - except gbq.NotFoundException as e: + except gbq.NotFoundException: pass sleep(1) table_retry = table_retry - 1 From d554d67db0976558ee1001a21bf2c6ca4eb48036 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 9 Jun 2017 09:19:09 -0700 Subject: [PATCH 6/6] TST: pin versions on 3.5 rather than 2.7. --- ci/requirements-2.7-0.19.2.pip | 10 +++++----- ci/requirements-3.5-0.18.1.pip | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ci/requirements-2.7-0.19.2.pip b/ci/requirements-2.7-0.19.2.pip index 2a098ca2..852dc153 100644 --- a/ci/requirements-2.7-0.19.2.pip +++ b/ci/requirements-2.7-0.19.2.pip @@ -1,7 +1,7 @@ -google-api-python-client==1.6.0 -google-auth==1.0.0 -google-auth-httplib2==0.0.1 -google-auth-oauthlib==0.0.1 +google-api-python-client +google-auth +google-auth-httplib2 +google-auth-oauthlib PyCrypto -python-gflags==2.0 +python-gflags mock diff --git a/ci/requirements-3.5-0.18.1.pip b/ci/requirements-3.5-0.18.1.pip index a1608720..6fb8a03d 100644 --- a/ci/requirements-3.5-0.18.1.pip +++ b/ci/requirements-3.5-0.18.1.pip @@ -1,5 +1,5 @@ -google-api-python-client -google-auth -google-auth-httplib2 -google-auth-oauthlib +google-api-python-client==1.6.0 +google-auth==1.0.0 +google-auth-httplib2==0.0.1 +google-auth-oauthlib==0.0.1 mock