Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

openpyxl: Type usages of PIL and zipfile #10706

Merged
merged 7 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions stubs/openpyxl/@tests/stubtest_allowlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ openpyxl\.descriptors\.(base\.)?MinMax\.allow_none
openpyxl\.descriptors\.(base\.)?String\.allow_none
openpyxl\.descriptors\.(base\.)?Typed\.allow_none

# "has a default value but stub argument does not"
# Runtime has default arguments that would fail
# Inconsistent methods because
# - using the default value results in an error because of the runtime type-guards
# - or, keyword arguments are explicitly specified
openpyxl.cell.text.PhoneticProperties.__init__
openpyxl.cell.text.PhoneticText.__init__
openpyxl.chart.axis._BaseAxis.__init__
Expand Down Expand Up @@ -109,6 +110,7 @@ openpyxl.drawing.text.GeomGuide.__init__
openpyxl.drawing.text.PresetTextShape.__init__
openpyxl.drawing.text.TextField.__init__
openpyxl.drawing.text.TextNormalAutofit.__init__
openpyxl.packaging.relationship.get_rel
openpyxl.packaging.relationship.Relationship.__init__
openpyxl.packaging.workbook.ChildSheet.__init__
openpyxl.packaging.workbook.PivotCache.__init__
Expand Down
21 changes: 16 additions & 5 deletions stubs/openpyxl/openpyxl/drawing/image.pyi
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
from _typeshed import Incomplete
from _typeshed import SupportsRead
from pathlib import Path
from types import ModuleType
from typing import Any
from typing_extensions import Literal, TypeAlias

# Is actually PIL.Image.Image
_PILImageImage: TypeAlias = Any
# same as first parameter of PIL.Image.open
_PILImageFilePath: TypeAlias = str | bytes | Path | SupportsRead[bytes]

PILImage: ModuleType | Literal[False]

class Image:
anchor: str
ref: Incomplete
format: Incomplete
def __init__(self, img) -> None: ...
ref: _PILImageImage | _PILImageFilePath
format: str
def __init__(self, img: _PILImageImage | _PILImageFilePath) -> None: ...
@property
def path(self): ...
def path(self) -> str: ...
28 changes: 25 additions & 3 deletions stubs/openpyxl/openpyxl/packaging/relationship.pyi
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from _typeshed import Incomplete, Unused
from collections.abc import Generator
from typing import ClassVar, overload
from typing import ClassVar, TypeVar, overload
from typing_extensions import Literal
from zipfile import ZipFile

from openpyxl.descriptors.base import Alias, String
from openpyxl.descriptors.serialisable import Serialisable
from openpyxl.pivot.cache import CacheDefinition
from openpyxl.pivot.record import RecordList
from openpyxl.pivot.table import TableDefinition

_SerialisableT = TypeVar("_SerialisableT", bound=Serialisable)
_SerialisableRelTypeT = TypeVar("_SerialisableRelTypeT", bound=CacheDefinition | RecordList | TableDefinition)

class Relationship(Serialisable):
tagname: ClassVar[str]
Expand Down Expand Up @@ -37,5 +44,20 @@ class RelationshipList(Serialisable):
def to_tree(self): ...

def get_rels_path(path): ...
def get_dependents(archive, filename): ...
def get_rel(archive, deps, id: Incomplete | None = None, cls: Incomplete | None = None): ...
def get_dependents(archive: ZipFile, filename: str) -> RelationshipList: ...

# If `id` is None, `cls` needs to have ClassVar `rel_type`.
# The `deps` attribute used at runtime is for internal use immediatly after the return.
# `cls` cannot be None
@overload
def get_rel(
archive: ZipFile, deps: RelationshipList, id: None = None, *, cls: type[_SerialisableRelTypeT]
) -> _SerialisableRelTypeT | None: ...
@overload
def get_rel(
archive: ZipFile, deps: RelationshipList, id: None, cls: type[_SerialisableRelTypeT]
) -> _SerialisableRelTypeT | None: ...
@overload
def get_rel(archive: ZipFile, deps: RelationshipList, id: str, *, cls: type[_SerialisableT]) -> _SerialisableT: ...
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this redundant with the next overload?

Copy link
Collaborator Author

@Avasam Avasam Sep 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanna say you're right? Probably a leftover from having id being optional.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure how doable it is, but I suggested catching this at mypy: python/mypy#16164

@overload
def get_rel(archive: ZipFile, deps: RelationshipList, id: str, cls: type[_SerialisableT]) -> _SerialisableT: ...
7 changes: 6 additions & 1 deletion stubs/openpyxl/openpyxl/reader/drawings.pyi
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
def find_images(archive, path): ...
from zipfile import ZipFile

from openpyxl.chart._chart import ChartBase
from openpyxl.drawing.image import Image

def find_images(archive: ZipFile, path: str) -> tuple[list[ChartBase], list[Image]]: ...
7 changes: 4 additions & 3 deletions stubs/openpyxl/openpyxl/reader/excel.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from _typeshed import Incomplete, StrPath, SupportsRead
from _typeshed import Incomplete, StrPath
from typing import IO
from typing_extensions import Final, Literal, TypeAlias
from zipfile import ZipFile

Expand Down Expand Up @@ -26,7 +27,7 @@ class ExcelReader:

def __init__(
self,
fn: SupportsRead[bytes] | str,
fn: StrPath | IO[bytes],
read_only: bool = False,
keep_vba: bool = False,
data_only: bool = False,
Expand All @@ -44,7 +45,7 @@ class ExcelReader:
def read(self) -> None: ...

def load_workbook(
filename: SupportsRead[bytes] | StrPath,
filename: StrPath | IO[bytes],
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/python/typeshed/pull/9511/files#r1113278179 Avasam

This, and ExcelReader.__init__ is ultimately passed to ZipFile in _validate_archive.
ZipFile.__init__ is typed as ZipFile(file: StrPath | IO[bytes], ...)

Even in read mode it still needs a name variable, seek and tell methods.

AlexWaygood

I'd be okay with switching both of the conflicting annotations to IO[bytes]. We could probably come up with a more precise protocol, but for now consistency with the stdlib zipfile stub is probably the most important thing

Copy link
Collaborator Author

@Avasam Avasam Sep 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is responsible for the mypy_primer diff.
Affected code in Pandas: https://github.com/pandas-dev/pandas/blob/705d4312cf1d94ef2497bcd8091e0eabd1085f4a/pandas/io/excel/_openpyxl.py#L566
If this change is incorrect, then zipfile should probably be updated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the primer hit I think we should bite the bullet, introduce a protocol in zipfile, and use an equivalent protocol here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind holding off on this PR until we have a proper Protocol defined for zipfile.

I'll link #5835 as this is another use-case for it. Otherwise we'll have to duplicate the protocol in openpyxl for a while.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think #5835 is happening anytime soon so for now we'll just have to duplicate it.

read_only: bool = False,
keep_vba: bool = False,
data_only: bool = False,
Expand Down
5 changes: 3 additions & 2 deletions stubs/openpyxl/openpyxl/reader/workbook.pyi
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from _typeshed import Incomplete
from collections.abc import Generator
from zipfile import ZipFile

from openpyxl.workbook import Workbook

class WorkbookParser:
archive: Incomplete
archive: ZipFile
workbook_part_name: Incomplete
wb: Workbook
keep_links: Incomplete
sheets: Incomplete
def __init__(self, archive, workbook_part_name, keep_links: bool = True) -> None: ...
def __init__(self, archive: ZipFile, workbook_part_name, keep_links: bool = True) -> None: ...
@property
def rels(self): ...
caches: Incomplete
Expand Down
8 changes: 6 additions & 2 deletions stubs/openpyxl/openpyxl/styles/stylesheet.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from _typeshed import Incomplete, Unused
from typing import ClassVar
from typing import ClassVar, TypeVar
from typing_extensions import Literal, Self
from zipfile import ZipFile

from openpyxl.descriptors.base import Typed
from openpyxl.descriptors.excel import ExtensionList
Expand All @@ -10,6 +11,9 @@ from openpyxl.styles.colors import ColorList
from openpyxl.styles.named_styles import _NamedCellStyleList
from openpyxl.styles.numbers import NumberFormatList
from openpyxl.styles.table import TableStyleList
from openpyxl.workbook.workbook import Workbook

_WorkbookT = TypeVar("_WorkbookT", bound=Workbook)

class Stylesheet(Serialisable):
tagname: ClassVar[str]
Expand Down Expand Up @@ -50,5 +54,5 @@ class Stylesheet(Serialisable):
def custom_formats(self): ...
def to_tree(self, tagname: str | None = None, idx: Incomplete | None = None, namespace: str | None = None): ...

def apply_stylesheet(archive, wb): ...
def apply_stylesheet(archive: ZipFile, wb: _WorkbookT) -> _WorkbookT | None: ...
def write_stylesheet(wb): ...
3 changes: 2 additions & 1 deletion stubs/openpyxl/openpyxl/workbook/external_link/external.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from _typeshed import Incomplete, Unused
from typing import ClassVar
from typing_extensions import Literal, TypeAlias
from zipfile import ZipFile

from openpyxl.descriptors.base import Bool, Integer, NoneSet, String, Typed, _ConvertibleToBool, _ConvertibleToInt
from openpyxl.descriptors.nested import NestedText
Expand Down Expand Up @@ -76,4 +77,4 @@ class ExternalLink(Serialisable):
@property
def path(self): ...

def read_external_link(archive, book_path): ...
def read_external_link(archive: ZipFile, book_path: str) -> ExternalLink: ...
3 changes: 2 additions & 1 deletion stubs/openpyxl/openpyxl/workbook/workbook.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ from collections.abc import Iterator
from datetime import datetime
from typing import IO
from typing_extensions import Final
from zipfile import ZipFile

from openpyxl import _Decodable
from openpyxl.chartsheet.chartsheet import Chartsheet
Expand All @@ -21,7 +22,7 @@ class Workbook:
security: Incomplete
shared_strings: Incomplete
loaded_theme: Incomplete
vba_archive: Incomplete
vba_archive: ZipFile | None
is_template: bool
code_name: Incomplete
encoding: str
Expand Down
18 changes: 12 additions & 6 deletions stubs/openpyxl/openpyxl/writer/excel.pyi
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from _typeshed import Incomplete
from _typeshed import StrPath
from typing import IO
from typing_extensions import Literal
from zipfile import ZipFile

from openpyxl.packaging.manifest import Manifest
from openpyxl.workbook.workbook import Workbook

class ExcelWriter:
workbook: Incomplete
manifest: Incomplete
vba_modified: Incomplete
def __init__(self, workbook, archive) -> None: ...
workbook: Workbook
manifest: Manifest
vba_modified: set[str | None]
def __init__(self, workbook: Workbook, archive: ZipFile) -> None: ...
def write_data(self) -> None: ...
def write_worksheet(self, ws) -> None: ...
def save(self) -> None: ...

def save_workbook(workbook, filename): ...
def save_workbook(workbook: Workbook, filename: StrPath | IO[bytes]) -> Literal[True]: ...