From 4f8ee07ed46c1c1781e73573deeb43704e04eda0 Mon Sep 17 00:00:00 2001 From: wyattscarpenter Date: Sun, 30 Jun 2024 16:11:54 -0400 Subject: [PATCH 1/7] move py.typed to correct places https://peps.python.org/pep-0561/ says 'For namespace packages (see PEP 420), the py.typed file should be in the submodules of the namespace, to avoid conflicts and for clarity.'. Previously, when I added the py.typed file to this project, https://github.com/databricks/databricks-sql-python/pull/382 , I was unaware this was a namespace package (although, curiously, it seems I had done it right initially and then changed to the wrong way). As PEP 561 warns us, this does create conflicts; other libraries in the databricks namespace package (such as, in my case, databricks-vectorsearch) are then treated as though they are typed, which they are not. This commit moves the py.typed file to the correct places, the submodule folders, fixing that problem. Signed-off-by: wyattscarpenter --- src/databricks/{ => sql}/py.typed | 0 src/databricks/sqlalchemy/py.typed | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/databricks/{ => sql}/py.typed (100%) create mode 100755 src/databricks/sqlalchemy/py.typed diff --git a/src/databricks/py.typed b/src/databricks/sql/py.typed similarity index 100% rename from src/databricks/py.typed rename to src/databricks/sql/py.typed diff --git a/src/databricks/sqlalchemy/py.typed b/src/databricks/sqlalchemy/py.typed new file mode 100755 index 00000000..e69de29b From 0d1c076675bb67b05c0edf96f28741006ac5e0a1 Mon Sep 17 00:00:00 2001 From: wyattscarpenter Date: Sun, 30 Jun 2024 17:29:54 -0400 Subject: [PATCH 2/7] change target of mypy to src/databricks instead of src. I think this might fix the CI code-quality checks failure, but unfortunately I can't replicate that failure locally and the error message is unhelpful Signed-off-by: wyattscarpenter --- .github/workflows/code-quality-checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index 03c3991d..1df9cd59 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -160,4 +160,4 @@ jobs: # mypy the code #---------------------------------------------- - name: Mypy - run: poetry run mypy --install-types --non-interactive src + run: poetry run mypy --install-types --non-interactive src/databricks From d148d64c72253f71718820dbe9340ffda12a5eab Mon Sep 17 00:00:00 2001 From: wyattscarpenter Date: Sun, 30 Jun 2024 17:31:26 -0400 Subject: [PATCH 3/7] Possible workaround for bad error message 'error: --install-types failed (no mypy cache directory)'; see https://github.com/python/mypy/issues/10768#issuecomment-2178450153 Signed-off-by: wyattscarpenter --- .github/workflows/code-quality-checks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index 1df9cd59..c97e4f6b 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -160,4 +160,5 @@ jobs: # mypy the code #---------------------------------------------- - name: Mypy + run: mkdir .mypy_cache # Workaround for bad error message "error: --install-types failed (no mypy cache directory)"; see https://github.com/python/mypy/issues/10768#issuecomment-2178450153 run: poetry run mypy --install-types --non-interactive src/databricks From f1ee456f4456bbf970774b01f4100779a189562f Mon Sep 17 00:00:00 2001 From: wyattscarpenter Date: Sun, 30 Jun 2024 17:45:50 -0400 Subject: [PATCH 4/7] fix invalid yaml syntax Signed-off-by: wyattscarpenter --- .github/workflows/code-quality-checks.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index c97e4f6b..6ecb49bd 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -160,5 +160,6 @@ jobs: # mypy the code #---------------------------------------------- - name: Mypy - run: mkdir .mypy_cache # Workaround for bad error message "error: --install-types failed (no mypy cache directory)"; see https://github.com/python/mypy/issues/10768#issuecomment-2178450153 - run: poetry run mypy --install-types --non-interactive src/databricks + run: | + mkdir .mypy_cache # Workaround for bad error message "error: --install-types failed (no mypy cache directory)"; see https://github.com/python/mypy/issues/10768#issuecomment-2178450153 + poetry run mypy --install-types --non-interactive src/databricks From ae1942f8da88a12eeb74ec23dbd9acacc1009cb5 Mon Sep 17 00:00:00 2001 From: wyattscarpenter Date: Mon, 1 Jul 2024 03:57:28 -0400 Subject: [PATCH 5/7] Best fix (#3) Fixes the problem by cding and supplying a flag to mypy (that mypy needs this flag is seemingly fixed/changed in later versions of mypy; but that's another pr altogether...). Also fixes a type error that was somehow in the arguments of the program (?!) (I guess this is because you guys are still using implicit optional) --------- Signed-off-by: wyattscarpenter --- .github/workflows/code-quality-checks.yml | 3 ++- src/databricks/sql/utils.py | 13 ++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) mode change 100644 => 100755 src/databricks/sql/utils.py diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index 6ecb49bd..2dd55b77 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -161,5 +161,6 @@ jobs: #---------------------------------------------- - name: Mypy run: | + cd src # Need to be in the actual databricks/ folder or mypy does the wrong thing. mkdir .mypy_cache # Workaround for bad error message "error: --install-types failed (no mypy cache directory)"; see https://github.com/python/mypy/issues/10768#issuecomment-2178450153 - poetry run mypy --install-types --non-interactive src/databricks + poetry run mypy --config-file ../pyproject.toml --install-types --non-interactive --namespace-packages databricks diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py old mode 100644 new mode 100755 index 9f21a8eb..3af473b1 --- a/src/databricks/sql/utils.py +++ b/src/databricks/sql/utils.py @@ -134,7 +134,7 @@ def __init__( schema_bytes, max_download_threads: int, start_row_offset: int = 0, - result_links: List[TSparkArrowResultLink] = None, + result_links: List[TSparkArrowResultLink] = [], lz4_compressed: bool = True, description: List[List[Any]] = None, ): @@ -161,13 +161,12 @@ def __init__( start_row_offset ) ) - if result_links is not None: - for result_link in result_links: - logger.debug( - "- start row offset: {}, row count: {}".format( - result_link.startRowOffset, result_link.rowCount - ) + for result_link in result_links: + logger.debug( + "- start row offset: {}, row count: {}".format( + result_link.startRowOffset, result_link.rowCount ) + ) self.download_manager = ResultFileDownloadManager( self.max_download_threads, self.lz4_compressed From cb812721c7d278633769524d3325b3f43b2e37e2 Mon Sep 17 00:00:00 2001 From: wyattscarpenter Date: Mon, 1 Jul 2024 10:42:50 -0400 Subject: [PATCH 6/7] return the old result_links default (#5) Return the old result_links default, make the type optional, & I'm pretty sure the original problem is that add_file_links can't take a None, so these statements should be in the body of the if-statement that ensures it is not None Signed-off-by: wyattscarpenter --- src/databricks/sql/utils.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) mode change 100755 => 100644 src/databricks/sql/utils.py diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py old mode 100755 new mode 100644 index 3af473b1..cd57f338 --- a/src/databricks/sql/utils.py +++ b/src/databricks/sql/utils.py @@ -7,7 +7,7 @@ from collections.abc import Iterable from decimal import Decimal from enum import Enum -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Union import re import lz4.frame @@ -134,7 +134,7 @@ def __init__( schema_bytes, max_download_threads: int, start_row_offset: int = 0, - result_links: List[TSparkArrowResultLink] = [], + result_links: Optional[List[TSparkArrowResultLink]] = None, lz4_compressed: bool = True, description: List[List[Any]] = None, ): @@ -161,17 +161,17 @@ def __init__( start_row_offset ) ) - for result_link in result_links: - logger.debug( - "- start row offset: {}, row count: {}".format( - result_link.startRowOffset, result_link.rowCount + if result_links is not None: + for result_link in result_links: + logger.debug( + "- start row offset: {}, row count: {}".format( + result_link.startRowOffset, result_link.rowCount + ) ) + self.download_manager = ResultFileDownloadManager( + self.max_download_threads, self.lz4_compressed ) - - self.download_manager = ResultFileDownloadManager( - self.max_download_threads, self.lz4_compressed - ) - self.download_manager.add_file_links(result_links) + self.download_manager.add_file_links(result_links) self.table = self._create_next_table() self.table_row_index = 0 From b678e9b898e796411d901010c7ea2cdbff446a2e Mon Sep 17 00:00:00 2001 From: wyattscarpenter Date: Tue, 2 Jul 2024 02:01:22 -0400 Subject: [PATCH 7/7] Update src/databricks/sql/utils.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "self.download_manager is unconditionally used later, so must be created. Looks this part of code is totally not covered with tests 🤔" Co-authored-by: Levko Kravets Signed-off-by: wyattscarpenter --- src/databricks/sql/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py index cd57f338..6063eca1 100644 --- a/src/databricks/sql/utils.py +++ b/src/databricks/sql/utils.py @@ -168,10 +168,10 @@ def __init__( result_link.startRowOffset, result_link.rowCount ) ) - self.download_manager = ResultFileDownloadManager( - self.max_download_threads, self.lz4_compressed - ) - self.download_manager.add_file_links(result_links) + self.download_manager = ResultFileDownloadManager( + self.max_download_threads, self.lz4_compressed + ) + self.download_manager.add_file_links(result_links or []) self.table = self._create_next_table() self.table_row_index = 0