From 42ccab294c223e24cbcd63e9edcade650c356189 Mon Sep 17 00:00:00 2001 From: bo Date: Fri, 22 Feb 2019 09:19:19 +0100 Subject: [PATCH 1/9] resolve divide by 0 error when uploading empty dataframe --- pandas_gbq/gbq.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 948fd980..493b4937 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -519,8 +519,9 @@ def load_data( chunks = tqdm.tqdm(chunks) for remaining_rows in chunks: logger.info( - "\rLoad is {0}% Complete".format( - ((total_rows - remaining_rows) * 100) / total_rows + "\r{} out of {} rows loaded.".format( + total_rows - remaining_rows, + total_rows ) ) except self.http_error as ex: From 3864d46dc46a19d49ec295cee3c298a77c81078b Mon Sep 17 00:00:00 2001 From: bo Date: Fri, 22 Feb 2019 09:40:38 +0100 Subject: [PATCH 2/9] reformat with black --- pandas_gbq/gbq.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 493b4937..2abfca9a 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -520,8 +520,7 @@ def load_data( for remaining_rows in chunks: logger.info( "\r{} out of {} rows loaded.".format( - total_rows - remaining_rows, - total_rows + total_rows - remaining_rows, total_rows ) ) except self.http_error as ex: From 272aa7bebef5dc99869e7c44adf8011258c8d7c9 Mon Sep 17 00:00:00 2001 From: bo Date: Fri, 22 Feb 2019 16:31:29 +0100 Subject: [PATCH 3/9] add unit test when uploading empty dataframe --- tests/unit/test_gbq.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 4f1d18ad..9b69c10f 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -246,6 +246,28 @@ def test_to_gbq_doesnt_run_query( mock_bigquery_client.query.assert_not_called() +def test_to_gbq_uploading_empty_dataframe( + recwarn, min_bq_version, monkeypatch +): + import pkg_resources + + pandas_version = pkg_resources.parse_version("0.23.0") + with pytest.warns(FutureWarning), mock.patch( + "pkg_resources.Distribution.parsed_version", + new_callable=mock.PropertyMock, + ) as mock_version: + mock_version.side_effect = [min_bq_version, pandas_version] + try: + gbq.to_gbq( + DataFrame(), + "dataset.tablename", + project_id="my-project", + verbose=True, + ) + except gbq.TableCreationError: + pass + + def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): import pydata_google_auth From 8bfde502c919052bd4f4a7a7a4d6cdae2578965d Mon Sep 17 00:00:00 2001 From: wb <519369865@qq.com> Date: Sat, 23 Feb 2019 11:44:09 +0100 Subject: [PATCH 4/9] add empty data upload system test --- tests/system/test_gbq.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index dde34cb1..ff59878b 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -894,6 +894,28 @@ def test_upload_data(self, project_id): ) assert result["num_rows"][0] == test_size + def test_upload_empty_data(self, project_id): + test_id = "data_with_0_rows" + test_size = 0 + df = pd.DataFrame() + + gbq.to_gbq( + df, + self.destination_table + test_id, + project_id, + credentials=self.credentials, + ) + + result = gbq.read_gbq( + "SELECT COUNT(*) AS num_rows FROM {0}".format( + self.destination_table + test_id + ), + project_id=project_id, + credentials=self.credentials, + dialect="legacy", + ) + assert result["num_rows"][0] == test_size + def test_upload_data_if_table_exists_fail(self, project_id): test_id = "2" test_size = 10 From 59a79509f45121282c1108c06e4649041f820819 Mon Sep 17 00:00:00 2001 From: wb <519369865@qq.com> Date: Sat, 23 Feb 2019 11:45:00 +0100 Subject: [PATCH 5/9] remove empty df unit test --- tests/unit/test_gbq.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 9b69c10f..4f1d18ad 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -246,28 +246,6 @@ def test_to_gbq_doesnt_run_query( mock_bigquery_client.query.assert_not_called() -def test_to_gbq_uploading_empty_dataframe( - recwarn, min_bq_version, monkeypatch -): - import pkg_resources - - pandas_version = pkg_resources.parse_version("0.23.0") - with pytest.warns(FutureWarning), mock.patch( - "pkg_resources.Distribution.parsed_version", - new_callable=mock.PropertyMock, - ) as mock_version: - mock_version.side_effect = [min_bq_version, pandas_version] - try: - gbq.to_gbq( - DataFrame(), - "dataset.tablename", - project_id="my-project", - verbose=True, - ) - except gbq.TableCreationError: - pass - - def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): import pydata_google_auth From 87ae1259932c745c1c3dda3d45afb197c903dbb1 Mon Sep 17 00:00:00 2001 From: wb <519369865@qq.com> Date: Sat, 23 Feb 2019 11:48:04 +0100 Subject: [PATCH 6/9] update empty df --- tests/system/test_gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index ff59878b..9dad3965 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -897,7 +897,7 @@ def test_upload_data(self, project_id): def test_upload_empty_data(self, project_id): test_id = "data_with_0_rows" test_size = 0 - df = pd.DataFrame() + df = DataFrame() gbq.to_gbq( df, From 02c897a4fab65de13ca01196daf8828fc408dc7f Mon Sep 17 00:00:00 2001 From: bo Date: Mon, 25 Feb 2019 16:28:32 +0100 Subject: [PATCH 7/9] add 0.10.0 release note --- docs/source/changelog.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 6f3aa5cd..102a6f5b 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,6 +1,13 @@ Changelog ========= +.. _changelog-0.10.0: + +0.10.0 / 2019-02-25 +------------------ + +- This fixes a bug where pandas-gbq could not upload an empty database. (:issue:`237`) + .. _changelog-0.9.0: 0.9.0 / 2019-01-11 From 1fe7c13c265048071b344142ebe2ca170f24f3c3 Mon Sep 17 00:00:00 2001 From: bo Date: Mon, 25 Feb 2019 16:37:59 +0100 Subject: [PATCH 8/9] update release note version number to 0.11.0 --- docs/source/changelog.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index b77b7fb1..1eebd040 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,14 +1,16 @@ Changelog ========= -.. _changelog-0.10.0: +.. _changelog-0.11.0: -0.10.0 / 2019-02-25 +0.11.0 / 2019-02-25 ------------------ - This fixes a bug where pandas-gbq could not upload an empty database. (:issue:`237`) +.. _changelog-0.10.0: + 0.10.0 / TBD ------------ From d62c36e15de64b3c6e69485dbd3937b7dbfd8cd4 Mon Sep 17 00:00:00 2001 From: bo Date: Tue, 26 Feb 2019 16:03:39 +0100 Subject: [PATCH 9/9] update empty dataframe bug fix in change log --- docs/source/changelog.rst | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 1eebd040..3b43ccd3 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,19 +1,13 @@ Changelog ========= -.. _changelog-0.11.0: - -0.11.0 / 2019-02-25 ------------------- - -- This fixes a bug where pandas-gbq could not upload an empty database. (:issue:`237`) - - .. _changelog-0.10.0: 0.10.0 / TBD ------------ +- This fixes a bug where pandas-gbq could not upload an empty database. (:issue:`237`) + Dependency updates ~~~~~~~~~~~~~~~~~~ @@ -243,4 +237,4 @@ Initial release of transfered code from `pandas `__ -- :func:`read_gbq` now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss `pandas-GH#14064 `__, and `pandas-GH#14305 `__ +- :func:`read_gbq` now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss `pandas-GH#14064 `__, and `pandas-GH#14305 `__ \ No newline at end of file