From 2606ac4bcc652075311b962be92536f1e149e3bb Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Wed, 24 May 2023 17:01:53 -0700 Subject: [PATCH 01/10] close tmp file handle --- src/snowflake/connector/cache.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/snowflake/connector/cache.py b/src/snowflake/connector/cache.py index b15ae132b..6415e34bd 100644 --- a/src/snowflake/connector/cache.py +++ b/src/snowflake/connector/cache.py @@ -524,8 +524,10 @@ def _save(self, load_first: bool = True) -> bool: prefix=fname, dir=_dir, ) - with open(tmp_file, "wb") as w_file: - pickle.dump(self, w_file) + os.write(tmp_file, pickle.dumps(self)) + os.close( + tmp_file + ) # tmp_file is already an open handle, we close it after writing # We write to a tmp file and then move it to have atomic write os.replace(tmp_file_path, self.file_path) self.last_loaded = datetime.datetime.fromtimestamp( From eed93b1a250163fea2f44ff13cd0cc779df906c6 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Wed, 31 May 2023 23:44:29 -0700 Subject: [PATCH 02/10] update fix --- src/snowflake/connector/cache.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/snowflake/connector/cache.py b/src/snowflake/connector/cache.py index 6415e34bd..58a1a4c83 100644 --- a/src/snowflake/connector/cache.py +++ b/src/snowflake/connector/cache.py @@ -524,10 +524,16 @@ def _save(self, load_first: bool = True) -> bool: prefix=fname, dir=_dir, ) - os.write(tmp_file, pickle.dumps(self)) - os.close( - tmp_file - ) # tmp_file is already an open handle, we close it after writing + # tmp_file is an opened OS level handle, to avoid opening twice which will further + # hold the file descriptor from unlinking the file, thus we use the low level os.close + # to close the file descriptor first. + os.close(tmp_file) + # we do not use os.write here, because this _save method will be called + # during garbage collection, and calling os.write will cause seg fault. + # instead, we call a normal open here, replying on the error behavior that open + # will be garbage collected and raise a normal error to exit the _save method call. + with open(tmp_file_path, "wb") as w_file: + pickle.dump(self, w_file) # We write to a tmp file and then move it to have atomic write os.replace(tmp_file_path, self.file_path) self.last_loaded = datetime.datetime.fromtimestamp( From edc632527b8c75f14f027d3a5e2d95f2f772c8dc Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Thu, 1 Jun 2023 13:48:52 -0700 Subject: [PATCH 03/10] add test --- src/snowflake/connector/cache.py | 15 ++++++++------- test/unit/subprocess_cache.py | 17 +++++++++++++++++ test/unit/test_cache.py | 14 ++++++++++++++ 3 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 test/unit/subprocess_cache.py diff --git a/src/snowflake/connector/cache.py b/src/snowflake/connector/cache.py index 58a1a4c83..b712aff94 100644 --- a/src/snowflake/connector/cache.py +++ b/src/snowflake/connector/cache.py @@ -524,15 +524,16 @@ def _save(self, load_first: bool = True) -> bool: prefix=fname, dir=_dir, ) - # tmp_file is an opened OS level handle, to avoid opening twice which will further - # hold the file descriptor from unlinking the file, thus we use the low level os.close - # to close the file descriptor first. + # tmp_file is an opened OS level handle, which means we need to close it manually. + # https://docs.python.org/3/library/tempfile.html#tempfile.mkstemp + # ideally we shall just use the tmp_file fd to write, + # however, using os.write(tmp_file, bytes) causes seg fault during garbage collection when exiting + # python program. + # thus we fall back to the approach using the normal open() method to open a file and write. os.close(tmp_file) - # we do not use os.write here, because this _save method will be called - # during garbage collection, and calling os.write will cause seg fault. - # instead, we call a normal open here, replying on the error behavior that open - # will be garbage collected and raise a normal error to exit the _save method call. with open(tmp_file_path, "wb") as w_file: + # note: during garbage collection when exiting python program, open will raise not defined + # error as it has been released, we depend on this behavior to exit the program gracefully pickle.dump(self, w_file) # We write to a tmp file and then move it to have atomic write os.replace(tmp_file_path, self.file_path) diff --git a/test/unit/subprocess_cache.py b/test/unit/subprocess_cache.py new file mode 100644 index 000000000..3506b574b --- /dev/null +++ b/test/unit/subprocess_cache.py @@ -0,0 +1,17 @@ +# +# Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved. +# + +import os +import sys + +import snowflake.connector.cache as cache + +tmpdir = sys.argv[1] + +cache_path = os.path.join(tmpdir, "cache.txt") +assert os.listdir(tmpdir) == [] +c1 = cache.SFDictFileCache(file_path=cache_path) +c1["key"] = "value" +c1._save() +assert os.listdir(tmpdir) == ["cache.txt", "cache.txt.lock"] diff --git a/test/unit/test_cache.py b/test/unit/test_cache.py index 22199db00..f44422c5e 100644 --- a/test/unit/test_cache.py +++ b/test/unit/test_cache.py @@ -8,6 +8,8 @@ import os.path import pickle import stat +import subprocess +import sys from unittest import mock import pytest @@ -479,3 +481,15 @@ def test_broken_pickle_objects(self, tmpdir): c2 = cache.SFDictFileCache(file_path=cache_path) assert not c2._load() # load should return false due to no file + + def test_tmp_file_removed(self, tmpdir): + assert os.listdir(tmpdir) == [] + sub_process = subprocess.run( + [ + sys.executable if sys.executable else "python", + f"{os.getcwd()}/subprocess_cache.py", + tmpdir.strpath, + ] + ) + sub_process.check_returncode() + assert os.listdir(tmpdir) == ["cache.txt", "cache.txt.lock"] From 51667aecb31b3120552d484ffb28bfed1466eb68 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Thu, 1 Jun 2023 15:47:36 -0700 Subject: [PATCH 04/10] fix tests --- test/unit/test_cache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/unit/test_cache.py b/test/unit/test_cache.py index f44422c5e..c0cfcba86 100644 --- a/test/unit/test_cache.py +++ b/test/unit/test_cache.py @@ -487,7 +487,8 @@ def test_tmp_file_removed(self, tmpdir): sub_process = subprocess.run( [ sys.executable if sys.executable else "python", - f"{os.getcwd()}/subprocess_cache.py", + "-m", + "subprocess_cache", tmpdir.strpath, ] ) From 01d0e529618fe445931c3ed0ce5d2d79ceef8b72 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Thu, 1 Jun 2023 20:21:47 -0700 Subject: [PATCH 05/10] add test to check ocsp cache dir --- test/extras/run.py | 17 ++++++++++++++++- test/unit/subprocess_cache.py | 17 ----------------- test/unit/test_cache.py | 15 --------------- 3 files changed, 16 insertions(+), 33 deletions(-) delete mode 100644 test/unit/subprocess_cache.py diff --git a/test/extras/run.py b/test/extras/run.py index c16a944db..56551f7e6 100644 --- a/test/extras/run.py +++ b/test/extras/run.py @@ -1,11 +1,13 @@ # # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved. # - +import os import pathlib import subprocess import sys +import snowflake.connector.ocsp_snowflake + # This script run every Python file in this directory other than this # one in a subprocess and checks their exit codes @@ -23,3 +25,16 @@ ] ) sub_process.check_returncode() + ocsp_cache_dir_path = pathlib.Path( + snowflake.connector.ocsp_snowflake.OCSP_RESPONSE_VALIDATION_CACHE.file_path + ).parent + cache_files = os.listdir(ocsp_cache_dir_path) + # Windows does not have ocsp_response_validation_cache.lock + assert cache_files == [ + "ocsp_response_validation_cache.lock", + "ocsp_response_validation_cache", + "ocsp_response_cache.json", + ] or cache_files == [ + "ocsp_response_validation_cache", + "ocsp_response_cache.json", + ] diff --git a/test/unit/subprocess_cache.py b/test/unit/subprocess_cache.py deleted file mode 100644 index 3506b574b..000000000 --- a/test/unit/subprocess_cache.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved. -# - -import os -import sys - -import snowflake.connector.cache as cache - -tmpdir = sys.argv[1] - -cache_path = os.path.join(tmpdir, "cache.txt") -assert os.listdir(tmpdir) == [] -c1 = cache.SFDictFileCache(file_path=cache_path) -c1["key"] = "value" -c1._save() -assert os.listdir(tmpdir) == ["cache.txt", "cache.txt.lock"] diff --git a/test/unit/test_cache.py b/test/unit/test_cache.py index c0cfcba86..22199db00 100644 --- a/test/unit/test_cache.py +++ b/test/unit/test_cache.py @@ -8,8 +8,6 @@ import os.path import pickle import stat -import subprocess -import sys from unittest import mock import pytest @@ -481,16 +479,3 @@ def test_broken_pickle_objects(self, tmpdir): c2 = cache.SFDictFileCache(file_path=cache_path) assert not c2._load() # load should return false due to no file - - def test_tmp_file_removed(self, tmpdir): - assert os.listdir(tmpdir) == [] - sub_process = subprocess.run( - [ - sys.executable if sys.executable else "python", - "-m", - "subprocess_cache", - tmpdir.strpath, - ] - ) - sub_process.check_returncode() - assert os.listdir(tmpdir) == ["cache.txt", "cache.txt.lock"] From 15766fa3d7fd42aeaf636d5ea16868b8094aadd7 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Thu, 1 Jun 2023 20:25:32 -0700 Subject: [PATCH 06/10] add comment --- test/extras/run.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/extras/run.py b/test/extras/run.py index 56551f7e6..523f06feb 100644 --- a/test/extras/run.py +++ b/test/extras/run.py @@ -28,13 +28,14 @@ ocsp_cache_dir_path = pathlib.Path( snowflake.connector.ocsp_snowflake.OCSP_RESPONSE_VALIDATION_CACHE.file_path ).parent - cache_files = os.listdir(ocsp_cache_dir_path) + cache_files = set(os.listdir(ocsp_cache_dir_path)) + # This is to test SNOW-79940, making sure tmp files are removed # Windows does not have ocsp_response_validation_cache.lock - assert cache_files == [ + assert cache_files == { "ocsp_response_validation_cache.lock", "ocsp_response_validation_cache", "ocsp_response_cache.json", - ] or cache_files == [ + } or cache_files == { "ocsp_response_validation_cache", "ocsp_response_cache.json", - ] + } From 416433f2e97e2f6cb3dd7215341bfe44747add4e Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Thu, 1 Jun 2023 20:27:20 -0700 Subject: [PATCH 07/10] add changelog --- DESCRIPTION.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION.md b/DESCRIPTION.md index 477baedb5..c3b90d67e 100644 --- a/DESCRIPTION.md +++ b/DESCRIPTION.md @@ -10,6 +10,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne - v3.0.5(TBD) - Added the ability to read Snowflake's central configuration file. + - Improved OCSP response caching to remove tmp cache files on Windows. - v3.0.4(May 23,2023) - Fixed a bug in which `cursor.execute()` could modify the argument statement_params dictionary object when executing a multistatement query. From 7e871050978e2b083fef4fe2eb3381aec4822969 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 6 Jun 2023 09:57:57 -0700 Subject: [PATCH 08/10] add comments and test --- src/snowflake/connector/cache.py | 7 +++++-- test/extras/run.py | 25 +++++++++++++++++-------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/snowflake/connector/cache.py b/src/snowflake/connector/cache.py index b712aff94..22d075dc4 100644 --- a/src/snowflake/connector/cache.py +++ b/src/snowflake/connector/cache.py @@ -532,8 +532,11 @@ def _save(self, load_first: bool = True) -> bool: # thus we fall back to the approach using the normal open() method to open a file and write. os.close(tmp_file) with open(tmp_file_path, "wb") as w_file: - # note: during garbage collection when exiting python program, open will raise not defined - # error as it has been released, we depend on this behavior to exit the program gracefully + # note: when exiting python program, garbage collection will kick in + # leading to `open` being garbage collected, + # calling open raises NameError, the `finally` logic will clean up the created tmp files. + # this is also the reason why we need os.close(tmp_file) to close the tmp fd as `open` is + # not reliable during garbage collection pickle.dump(self, w_file) # We write to a tmp file and then move it to have atomic write os.replace(tmp_file_path, self.file_path) diff --git a/test/extras/run.py b/test/extras/run.py index 523f06feb..5477b46f5 100644 --- a/test/extras/run.py +++ b/test/extras/run.py @@ -6,6 +6,7 @@ import subprocess import sys +import snowflake.connector.compat import snowflake.connector.ocsp_snowflake # This script run every Python file in this directory other than this @@ -31,11 +32,19 @@ cache_files = set(os.listdir(ocsp_cache_dir_path)) # This is to test SNOW-79940, making sure tmp files are removed # Windows does not have ocsp_response_validation_cache.lock - assert cache_files == { - "ocsp_response_validation_cache.lock", - "ocsp_response_validation_cache", - "ocsp_response_cache.json", - } or cache_files == { - "ocsp_response_validation_cache", - "ocsp_response_cache.json", - } + assert ( + cache_files + == { + "ocsp_response_validation_cache.lock", + "ocsp_response_validation_cache", + "ocsp_response_cache.json", + } + and not snowflake.connector.compat.IS_WINDOWS + ) or ( + cache_files + == { + "ocsp_response_validation_cache", + "ocsp_response_cache.json", + } + and snowflake.connector.compat.IS_WINDOWS + ) From 766758d91800c7b8fe5d09d81f7d99db7c8ccd03 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 6 Jun 2023 11:54:50 -0700 Subject: [PATCH 09/10] pr feedback --- src/snowflake/connector/cache.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/snowflake/connector/cache.py b/src/snowflake/connector/cache.py index 22d075dc4..2cc069af2 100644 --- a/src/snowflake/connector/cache.py +++ b/src/snowflake/connector/cache.py @@ -530,13 +530,7 @@ def _save(self, load_first: bool = True) -> bool: # however, using os.write(tmp_file, bytes) causes seg fault during garbage collection when exiting # python program. # thus we fall back to the approach using the normal open() method to open a file and write. - os.close(tmp_file) - with open(tmp_file_path, "wb") as w_file: - # note: when exiting python program, garbage collection will kick in - # leading to `open` being garbage collected, - # calling open raises NameError, the `finally` logic will clean up the created tmp files. - # this is also the reason why we need os.close(tmp_file) to close the tmp fd as `open` is - # not reliable during garbage collection + with open(tmp_file, "wb") as w_file: pickle.dump(self, w_file) # We write to a tmp file and then move it to have atomic write os.replace(tmp_file_path, self.file_path) @@ -544,6 +538,14 @@ def _save(self, load_first: bool = True) -> bool: getmtime(self.file_path), ) return True + except NameError: + # note: when exiting python program, garbage collection will kick in + # leading to `open` being garbage collected, + # calling `open` raises NameError, we close the tmp file fd here to release the tmp file fd + try: + os.close(tmp_file) + except OSError: + pass except OSError as o_err: raise PermissionError( o_err.errno, From c2d4eda91c9b4b56b7e5f146827e08eefad0e7bb Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 6 Jun 2023 13:17:28 -0700 Subject: [PATCH 10/10] remove compat import --- test/extras/run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/extras/run.py b/test/extras/run.py index 5477b46f5..856677552 100644 --- a/test/extras/run.py +++ b/test/extras/run.py @@ -3,10 +3,10 @@ # import os import pathlib +import platform import subprocess import sys -import snowflake.connector.compat import snowflake.connector.ocsp_snowflake # This script run every Python file in this directory other than this @@ -39,12 +39,12 @@ "ocsp_response_validation_cache", "ocsp_response_cache.json", } - and not snowflake.connector.compat.IS_WINDOWS + and not platform.system() == "Windows" ) or ( cache_files == { "ocsp_response_validation_cache", "ocsp_response_cache.json", } - and snowflake.connector.compat.IS_WINDOWS + and platform.system() == "Windows" )