diff --git a/stubs/html5lib/html5lib/_inputstream.pyi b/stubs/html5lib/html5lib/_inputstream.pyi
index 992aaa2f07a6..02bb378e77ed 100644
--- a/stubs/html5lib/html5lib/_inputstream.pyi
+++ b/stubs/html5lib/html5lib/_inputstream.pyi
@@ -1,5 +1,10 @@
-from _typeshed import Incomplete
-from typing import Any
+from _typeshed import Incomplete, SupportsRead
+from typing import Any, overload
+from typing_extensions import TypeAlias
+
+_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
+_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
+_InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files
spaceCharactersBytes: Any
asciiLettersBytes: Any
@@ -20,14 +25,26 @@ class BufferedStream:
def seek(self, pos) -> None: ...
def read(self, bytes): ...
-def HTMLInputStream(source, **kwargs): ...
+@overload
+def HTMLInputStream(source: _UnicodeInputStream) -> HTMLUnicodeInputStream: ...
+@overload
+def HTMLInputStream(
+ source: _BinaryInputStream,
+ *,
+ override_encoding: str | bytes | None = None,
+ transport_encoding: str | bytes | None = None,
+ same_origin_parent_encoding: str | bytes | None = None,
+ likely_encoding: str | bytes | None = None,
+ default_encoding: str = "windows-1252",
+ useChardet: bool = True,
+) -> HTMLBinaryInputStream: ...
class HTMLUnicodeInputStream:
reportCharacterErrors: Any
newLines: Any
charEncoding: Any
dataStream: Any
- def __init__(self, source) -> None: ...
+ def __init__(self, source: _UnicodeInputStream) -> None: ...
chunk: str
chunkSize: int
chunkOffset: int
@@ -56,11 +73,11 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
charEncoding: Any
def __init__(
self,
- source,
- override_encoding: Incomplete | None = None,
- transport_encoding: Incomplete | None = None,
- same_origin_parent_encoding: Incomplete | None = None,
- likely_encoding: Incomplete | None = None,
+ source: _BinaryInputStream,
+ override_encoding: str | bytes | None = None,
+ transport_encoding: str | bytes | None = None,
+ same_origin_parent_encoding: str | bytes | None = None,
+ likely_encoding: str | bytes | None = None,
default_encoding: str = "windows-1252",
useChardet: bool = True,
) -> None: ...
@@ -108,4 +125,4 @@ class ContentAttrParser:
def __init__(self, data) -> None: ...
def parse(self): ...
-def lookupEncoding(encoding): ...
+def lookupEncoding(encoding: str | bytes | None) -> str | None: ...
diff --git a/stubs/html5lib/html5lib/_tokenizer.pyi b/stubs/html5lib/html5lib/_tokenizer.pyi
index f9685a0dd112..72b701c37491 100644
--- a/stubs/html5lib/html5lib/_tokenizer.pyi
+++ b/stubs/html5lib/html5lib/_tokenizer.pyi
@@ -1,6 +1,8 @@
from _typeshed import Incomplete
from typing import Any
+from ._inputstream import _InputStream
+
entitiesTrie: Any
attributeMap = dict
@@ -12,7 +14,7 @@ class HTMLTokenizer:
state: Any
escape: bool
currentToken: Any
- def __init__(self, stream, parser: Incomplete | None = None, **kwargs) -> None: ...
+ def __init__(self, stream: _InputStream, parser: Incomplete | None = None, **kwargs) -> None: ...
tokenQueue: Any
def __iter__(self): ...
def consumeNumberEntity(self, isHex): ...
@@ -36,23 +38,23 @@ class HTMLTokenizer:
def rawtextLessThanSignState(self): ...
def rawtextEndTagOpenState(self): ...
def rawtextEndTagNameState(self): ...
- def scriptDataLessThanSignState(self): ...
- def scriptDataEndTagOpenState(self): ...
- def scriptDataEndTagNameState(self): ...
- def scriptDataEscapeStartState(self): ...
- def scriptDataEscapeStartDashState(self): ...
- def scriptDataEscapedState(self): ...
- def scriptDataEscapedDashState(self): ...
- def scriptDataEscapedDashDashState(self): ...
- def scriptDataEscapedLessThanSignState(self): ...
- def scriptDataEscapedEndTagOpenState(self): ...
- def scriptDataEscapedEndTagNameState(self): ...
- def scriptDataDoubleEscapeStartState(self): ...
- def scriptDataDoubleEscapedState(self): ...
- def scriptDataDoubleEscapedDashState(self): ...
- def scriptDataDoubleEscapedDashDashState(self): ...
- def scriptDataDoubleEscapedLessThanSignState(self): ...
- def scriptDataDoubleEscapeEndState(self): ...
+ def scriptDataLessThanSignState(self) -> bool: ...
+ def scriptDataEndTagOpenState(self) -> bool: ...
+ def scriptDataEndTagNameState(self) -> bool: ...
+ def scriptDataEscapeStartState(self) -> bool: ...
+ def scriptDataEscapeStartDashState(self) -> bool: ...
+ def scriptDataEscapedState(self) -> bool: ...
+ def scriptDataEscapedDashState(self) -> bool: ...
+ def scriptDataEscapedDashDashState(self) -> bool: ...
+ def scriptDataEscapedLessThanSignState(self) -> bool: ...
+ def scriptDataEscapedEndTagOpenState(self) -> bool: ...
+ def scriptDataEscapedEndTagNameState(self) -> bool: ...
+ def scriptDataDoubleEscapeStartState(self) -> bool: ...
+ def scriptDataDoubleEscapedState(self) -> bool: ...
+ def scriptDataDoubleEscapedDashState(self) -> bool: ...
+ def scriptDataDoubleEscapedDashDashState(self) -> bool: ...
+ def scriptDataDoubleEscapedLessThanSignState(self) -> bool: ...
+ def scriptDataDoubleEscapeEndState(self) -> bool: ...
def beforeAttributeNameState(self): ...
def attributeNameState(self): ...
def afterAttributeNameState(self): ...
@@ -64,17 +66,17 @@ class HTMLTokenizer:
def selfClosingStartTagState(self): ...
def bogusCommentState(self): ...
def markupDeclarationOpenState(self): ...
- def commentStartState(self): ...
- def commentStartDashState(self): ...
- def commentState(self): ...
- def commentEndDashState(self): ...
- def commentEndState(self): ...
- def commentEndBangState(self): ...
- def doctypeState(self): ...
- def beforeDoctypeNameState(self): ...
- def doctypeNameState(self): ...
- def afterDoctypeNameState(self): ...
- def afterDoctypePublicKeywordState(self): ...
+ def commentStartState(self) -> bool: ...
+ def commentStartDashState(self) -> bool: ...
+ def commentState(self) -> bool: ...
+ def commentEndDashState(self) -> bool: ...
+ def commentEndState(self) -> bool: ...
+ def commentEndBangState(self) -> bool: ...
+ def doctypeState(self) -> bool: ...
+ def beforeDoctypeNameState(self) -> bool: ...
+ def doctypeNameState(self) -> bool: ...
+ def afterDoctypeNameState(self) -> bool: ...
+ def afterDoctypePublicKeywordState(self) -> bool: ...
def beforeDoctypePublicIdentifierState(self): ...
def doctypePublicIdentifierDoubleQuotedState(self): ...
def doctypePublicIdentifierSingleQuotedState(self): ...
diff --git a/stubs/html5lib/html5lib/html5parser.pyi b/stubs/html5lib/html5lib/html5parser.pyi
index e946f7d99485..3f2fa19db7a1 100644
--- a/stubs/html5lib/html5lib/html5parser.pyi
+++ b/stubs/html5lib/html5lib/html5parser.pyi
@@ -1,25 +1,25 @@
-from _typeshed import Incomplete, SupportsRead
+from _typeshed import Incomplete
from typing import Any, Literal, overload
from xml.etree.ElementTree import Element
+from ._inputstream import _InputStream
+from ._tokenizer import HTMLTokenizer
+
@overload
def parse(
- doc: str | bytes | SupportsRead[str] | SupportsRead[bytes],
- treebuilder: Literal["etree"] = "etree",
- namespaceHTMLElements: bool = True,
- **kwargs,
+ doc: _InputStream, treebuilder: Literal["etree"] = "etree", namespaceHTMLElements: bool = True, **kwargs
) -> Element: ...
@overload
-def parse(
- doc: str | bytes | SupportsRead[str] | SupportsRead[bytes], treebuilder: str, namespaceHTMLElements: bool = True, **kwargs
+def parse(doc: _InputStream, treebuilder: str, namespaceHTMLElements: bool = True, **kwargs): ...
+def parseFragment(
+ doc: _InputStream, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs
): ...
-def parseFragment(doc, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs): ...
def method_decorator_metaclass(function): ...
class HTMLParser:
- strict: Any
+ strict: bool
tree: Any
- errors: Any
+ errors: list[Incomplete]
phases: Any
def __init__(
self, tree: Incomplete | None = None, strict: bool = False, namespaceHTMLElements: bool = True, debug: bool = False
@@ -27,20 +27,21 @@ class HTMLParser:
firstStartTag: bool
log: Any
compatMode: str
+ container: str
innerHTML: Any
phase: Any
lastPhase: Any
beforeRCDataPhase: Any
framesetOK: bool
- tokenizer: Any
+ tokenizer: HTMLTokenizer
def reset(self) -> None: ...
@property
def documentEncoding(self) -> str | None: ...
- def isHTMLIntegrationPoint(self, element) -> bool: ...
- def isMathMLTextIntegrationPoint(self, element) -> bool: ...
+ def isHTMLIntegrationPoint(self, element: Element) -> bool: ...
+ def isMathMLTextIntegrationPoint(self, element: Element) -> bool: ...
def mainLoop(self) -> None: ...
- def parse(self, stream, scripting: bool = ..., **kwargs): ...
- def parseFragment(self, stream, *args, **kwargs): ...
+ def parse(self, stream: _InputStream, scripting: bool = ..., **kwargs): ...
+ def parseFragment(self, stream: _InputStream, *args, **kwargs): ...
def parseError(self, errorcode: str = "XXX-undefined-error", datavars: Incomplete | None = None) -> None: ...
def adjustMathMLAttributes(self, token) -> None: ...
def adjustSVGAttributes(self, token) -> None: ...