diff --git a/stubs/html5lib/html5lib/_inputstream.pyi b/stubs/html5lib/html5lib/_inputstream.pyi index 992aaa2f07a6..02bb378e77ed 100644 --- a/stubs/html5lib/html5lib/_inputstream.pyi +++ b/stubs/html5lib/html5lib/_inputstream.pyi @@ -1,5 +1,10 @@ -from _typeshed import Incomplete -from typing import Any +from _typeshed import Incomplete, SupportsRead +from typing import Any, overload +from typing_extensions import TypeAlias + +_UnicodeInputStream: TypeAlias = str | SupportsRead[str] +_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes] +_InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files spaceCharactersBytes: Any asciiLettersBytes: Any @@ -20,14 +25,26 @@ class BufferedStream: def seek(self, pos) -> None: ... def read(self, bytes): ... -def HTMLInputStream(source, **kwargs): ... +@overload +def HTMLInputStream(source: _UnicodeInputStream) -> HTMLUnicodeInputStream: ... +@overload +def HTMLInputStream( + source: _BinaryInputStream, + *, + override_encoding: str | bytes | None = None, + transport_encoding: str | bytes | None = None, + same_origin_parent_encoding: str | bytes | None = None, + likely_encoding: str | bytes | None = None, + default_encoding: str = "windows-1252", + useChardet: bool = True, +) -> HTMLBinaryInputStream: ... class HTMLUnicodeInputStream: reportCharacterErrors: Any newLines: Any charEncoding: Any dataStream: Any - def __init__(self, source) -> None: ... + def __init__(self, source: _UnicodeInputStream) -> None: ... chunk: str chunkSize: int chunkOffset: int @@ -56,11 +73,11 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): charEncoding: Any def __init__( self, - source, - override_encoding: Incomplete | None = None, - transport_encoding: Incomplete | None = None, - same_origin_parent_encoding: Incomplete | None = None, - likely_encoding: Incomplete | None = None, + source: _BinaryInputStream, + override_encoding: str | bytes | None = None, + transport_encoding: str | bytes | None = None, + same_origin_parent_encoding: str | bytes | None = None, + likely_encoding: str | bytes | None = None, default_encoding: str = "windows-1252", useChardet: bool = True, ) -> None: ... @@ -108,4 +125,4 @@ class ContentAttrParser: def __init__(self, data) -> None: ... def parse(self): ... -def lookupEncoding(encoding): ... +def lookupEncoding(encoding: str | bytes | None) -> str | None: ... diff --git a/stubs/html5lib/html5lib/_tokenizer.pyi b/stubs/html5lib/html5lib/_tokenizer.pyi index f9685a0dd112..72b701c37491 100644 --- a/stubs/html5lib/html5lib/_tokenizer.pyi +++ b/stubs/html5lib/html5lib/_tokenizer.pyi @@ -1,6 +1,8 @@ from _typeshed import Incomplete from typing import Any +from ._inputstream import _InputStream + entitiesTrie: Any attributeMap = dict @@ -12,7 +14,7 @@ class HTMLTokenizer: state: Any escape: bool currentToken: Any - def __init__(self, stream, parser: Incomplete | None = None, **kwargs) -> None: ... + def __init__(self, stream: _InputStream, parser: Incomplete | None = None, **kwargs) -> None: ... tokenQueue: Any def __iter__(self): ... def consumeNumberEntity(self, isHex): ... @@ -36,23 +38,23 @@ class HTMLTokenizer: def rawtextLessThanSignState(self): ... def rawtextEndTagOpenState(self): ... def rawtextEndTagNameState(self): ... - def scriptDataLessThanSignState(self): ... - def scriptDataEndTagOpenState(self): ... - def scriptDataEndTagNameState(self): ... - def scriptDataEscapeStartState(self): ... - def scriptDataEscapeStartDashState(self): ... - def scriptDataEscapedState(self): ... - def scriptDataEscapedDashState(self): ... - def scriptDataEscapedDashDashState(self): ... - def scriptDataEscapedLessThanSignState(self): ... - def scriptDataEscapedEndTagOpenState(self): ... - def scriptDataEscapedEndTagNameState(self): ... - def scriptDataDoubleEscapeStartState(self): ... - def scriptDataDoubleEscapedState(self): ... - def scriptDataDoubleEscapedDashState(self): ... - def scriptDataDoubleEscapedDashDashState(self): ... - def scriptDataDoubleEscapedLessThanSignState(self): ... - def scriptDataDoubleEscapeEndState(self): ... + def scriptDataLessThanSignState(self) -> bool: ... + def scriptDataEndTagOpenState(self) -> bool: ... + def scriptDataEndTagNameState(self) -> bool: ... + def scriptDataEscapeStartState(self) -> bool: ... + def scriptDataEscapeStartDashState(self) -> bool: ... + def scriptDataEscapedState(self) -> bool: ... + def scriptDataEscapedDashState(self) -> bool: ... + def scriptDataEscapedDashDashState(self) -> bool: ... + def scriptDataEscapedLessThanSignState(self) -> bool: ... + def scriptDataEscapedEndTagOpenState(self) -> bool: ... + def scriptDataEscapedEndTagNameState(self) -> bool: ... + def scriptDataDoubleEscapeStartState(self) -> bool: ... + def scriptDataDoubleEscapedState(self) -> bool: ... + def scriptDataDoubleEscapedDashState(self) -> bool: ... + def scriptDataDoubleEscapedDashDashState(self) -> bool: ... + def scriptDataDoubleEscapedLessThanSignState(self) -> bool: ... + def scriptDataDoubleEscapeEndState(self) -> bool: ... def beforeAttributeNameState(self): ... def attributeNameState(self): ... def afterAttributeNameState(self): ... @@ -64,17 +66,17 @@ class HTMLTokenizer: def selfClosingStartTagState(self): ... def bogusCommentState(self): ... def markupDeclarationOpenState(self): ... - def commentStartState(self): ... - def commentStartDashState(self): ... - def commentState(self): ... - def commentEndDashState(self): ... - def commentEndState(self): ... - def commentEndBangState(self): ... - def doctypeState(self): ... - def beforeDoctypeNameState(self): ... - def doctypeNameState(self): ... - def afterDoctypeNameState(self): ... - def afterDoctypePublicKeywordState(self): ... + def commentStartState(self) -> bool: ... + def commentStartDashState(self) -> bool: ... + def commentState(self) -> bool: ... + def commentEndDashState(self) -> bool: ... + def commentEndState(self) -> bool: ... + def commentEndBangState(self) -> bool: ... + def doctypeState(self) -> bool: ... + def beforeDoctypeNameState(self) -> bool: ... + def doctypeNameState(self) -> bool: ... + def afterDoctypeNameState(self) -> bool: ... + def afterDoctypePublicKeywordState(self) -> bool: ... def beforeDoctypePublicIdentifierState(self): ... def doctypePublicIdentifierDoubleQuotedState(self): ... def doctypePublicIdentifierSingleQuotedState(self): ... diff --git a/stubs/html5lib/html5lib/html5parser.pyi b/stubs/html5lib/html5lib/html5parser.pyi index e946f7d99485..3f2fa19db7a1 100644 --- a/stubs/html5lib/html5lib/html5parser.pyi +++ b/stubs/html5lib/html5lib/html5parser.pyi @@ -1,25 +1,25 @@ -from _typeshed import Incomplete, SupportsRead +from _typeshed import Incomplete from typing import Any, Literal, overload from xml.etree.ElementTree import Element +from ._inputstream import _InputStream +from ._tokenizer import HTMLTokenizer + @overload def parse( - doc: str | bytes | SupportsRead[str] | SupportsRead[bytes], - treebuilder: Literal["etree"] = "etree", - namespaceHTMLElements: bool = True, - **kwargs, + doc: _InputStream, treebuilder: Literal["etree"] = "etree", namespaceHTMLElements: bool = True, **kwargs ) -> Element: ... @overload -def parse( - doc: str | bytes | SupportsRead[str] | SupportsRead[bytes], treebuilder: str, namespaceHTMLElements: bool = True, **kwargs +def parse(doc: _InputStream, treebuilder: str, namespaceHTMLElements: bool = True, **kwargs): ... +def parseFragment( + doc: _InputStream, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs ): ... -def parseFragment(doc, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs): ... def method_decorator_metaclass(function): ... class HTMLParser: - strict: Any + strict: bool tree: Any - errors: Any + errors: list[Incomplete] phases: Any def __init__( self, tree: Incomplete | None = None, strict: bool = False, namespaceHTMLElements: bool = True, debug: bool = False @@ -27,20 +27,21 @@ class HTMLParser: firstStartTag: bool log: Any compatMode: str + container: str innerHTML: Any phase: Any lastPhase: Any beforeRCDataPhase: Any framesetOK: bool - tokenizer: Any + tokenizer: HTMLTokenizer def reset(self) -> None: ... @property def documentEncoding(self) -> str | None: ... - def isHTMLIntegrationPoint(self, element) -> bool: ... - def isMathMLTextIntegrationPoint(self, element) -> bool: ... + def isHTMLIntegrationPoint(self, element: Element) -> bool: ... + def isMathMLTextIntegrationPoint(self, element: Element) -> bool: ... def mainLoop(self) -> None: ... - def parse(self, stream, scripting: bool = ..., **kwargs): ... - def parseFragment(self, stream, *args, **kwargs): ... + def parse(self, stream: _InputStream, scripting: bool = ..., **kwargs): ... + def parseFragment(self, stream: _InputStream, *args, **kwargs): ... def parseError(self, errorcode: str = "XXX-undefined-error", datavars: Incomplete | None = None) -> None: ... def adjustMathMLAttributes(self, token) -> None: ... def adjustSVGAttributes(self, token) -> None: ...