Skip to content

Commit

Permalink
Move CustomFileOpen class to its own python file and overhaul it, all…
Browse files Browse the repository at this point in the history
…owing io-like objects and fully document
  • Loading branch information
krzywon committed Jan 30, 2024
1 parent d71d553 commit 4f688a5
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 29 deletions.
131 changes: 131 additions & 0 deletions sasdata/data_io/open.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""
A custom context manager to handle file io.
This creates a simple file handle that ensures files are opened and closed properly.
"""
from urllib.request import urlopen
from pathlib import Path
from os import path
from io import BytesIO, FileIO, StringIO
from typing import Union, Optional

PATHLIKE = Union[str, Path, path]


class CustomFileOpen:
"""A custom context manager to handle files, regardless of where the file is located.
...
Attributes
----------
file : Path
A pathlib.Path object that points to the file location.
filename : str
The name of the file, with no path information associated.
mode : str
The file io mode, see https://docs.python.org/3/library/functions.html#open (default `rb`)
fd : Union[BytesIO, FileIO, StringIO]
An open file handle
errors : list[Exception]
A list of exceptions caught during file handling that should be handled post file-handling.
Methods
-------
__enter__()
Called when creating a context manager from this class. Should never be called directly.
Opens the file, if necessary, and returns the instance with the given name from the manager.
Usage:
``` f_open = CustomFileOpen(file, mode, filename)
with f_open as fd:
contents = fd.fd.read()
```
__exit__()
Called when the context manager exits. Should never be called directly.
Closes any active file handles.
"""

def __init__(self, file: Union[PATHLIKE, BytesIO, FileIO, StringIO], mode: Optional[str] = 'rb',
full_path: Optional[PATHLIKE] = None):
"""Create an instance of the file handler.
Parameters
----------
file (Union[PATHLIKE, BytesIO, FileIO, StringIO]): A string representation of a file path, a Python path
object, or a file io object.
mode (Optional[str]): The file open mode (if needed). (default `rb`)
full_path (Optional[str]): A complete filepath to the file. (default: None)
"""
if hasattr(file, 'read'):
# io style objects
self.fd = file
self.file = Path(full_path)
else:
self.file = Path(file)
self.fd = None
self.filename = self.file.name
self.mode = mode
self.errors = []

def __enter__(self):
"""A context method that either fetches a file from a URL, opens a local file, or keeps the existing file open.
Returns
-------
self : The CustomFileOpen instance.
"""
if self.fd is not None:
# Likely an IO-like object was passed to the manager so the file is already opened.
pass
elif '://' in self.file:
# Use urllib.request package to access remote files
with urlopen(self.filename) as req:
content = req.read()
self.fd = BytesIO(content)
self.fd.name = self.filename
else:
# Use native open to access local files
self.fd = open(self.file, self.mode)
# Return the instance to allow access to the filename, and any open file handles.
return self

def __exit__(self, exc_type, exc_val, exc_tb) -> Optional[bool]:
"""Close all open file handles when exiting the context manager.
Parameters
----------
exc_type (Optional[Exception]): A string representation of a file path, a Python path
object, or a file io object.
exc_val (Optional[str]): The
exc_tb (Optional[str]): A complete filepath to the file. (default: None)
Returns
-------
bool : Should the context manager suppress errors thrown during execution?
"""
if self.fd is not None:
self.fd.close()
self.fd = None
return self._check_error(exc_type, exc_val, exc_tb)

def _check_error(self, exc_type, exc_val, exc_tb) -> bool:
"""Check if the
Parameters
----------
exc_type (Optional[Exception]): A string representation of a file path, a Python path
object, or a file io object.
exc_val (Optional[str]): The
exc_tb (Optional[str]): A complete filepath to the file. (default: None)
Returns
-------
bool : Should the error be suppressed?
"""
suppress = False
# TODO: This suppresses *ALL* exceptions. Check for specific exception types.
if exc_type:
exception = Exception(exc_type, exc_val, exc_tb)
self.errors.append(exception)
return suppress
return suppress
30 changes: 1 addition & 29 deletions sasdata/data_io/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
and registers the built-in file extensions.
"""
import os
from urllib.request import urlopen
from io import BytesIO
from typing import Optional, List, Union, TYPE_CHECKING
from collections import defaultdict
from pathlib import Path

from sasdata.data_util.loader_exceptions import NoKnownLoaderException
from sasdata.data_util.util import unique_preserve_order
from sasdata.dataloader import readers as all_readers
from sasdata.data_io.open import CustomFileOpen

# TYPE_CHECKING hides imports at runtime: https://docs.python.org/3/library/typing.html#typing.TYPE_CHECKING
# Imports used here are only used for type checking, but would create a circular import, otherwise.
Expand All @@ -35,33 +34,6 @@ def create_empty_data_with_errors(path: Union[str, Path], errors: List[Exception
return [data_object]


class CustomFileOpen:
"""Custom context manager to fetch file contents depending on where the file is located."""
def __init__(self, filename, mode='rb'):
self.filename = filename
self.mode = mode
self.fd = None

def __enter__(self):
"""A context method that either fetches a file from a URL or opens a local file."""
if '://' in self.filename:
# Use urllib.request package to access remote files
with urlopen(self.filename) as req:
content = req.read()
self.fd = BytesIO(content)
self.fd.name = self.filename
else:
# Use native open to access local files
self.fd = open(self.filename, self.mode)
# Return the instance to allow access to the filename, and any open file handles.
return self

def __exit__(self, exc_type, exc_val, exc_tb):
"""Close all open file handles when exiting the context manager."""
if self.fd is not None:
self.fd.close()


class ExtensionRegistry:
"""
Associate a file loader with an extension.
Expand Down

0 comments on commit 4f688a5

Please sign in to comment.