Skip to content

Commit

Permalink
html5lib: Add various types (#11429)
Browse files Browse the repository at this point in the history
I started out investigating comments in #11411 and ended up adding a few other
types that were reasonably obvious from the source code. For reference:
https://github.com/html5lib/html5lib-python/tree/master/html5lib
  • Loading branch information
JelleZijlstra authored Feb 20, 2024
1 parent 601587e commit 78b7dc6
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 54 deletions.
37 changes: 27 additions & 10 deletions stubs/html5lib/html5lib/_inputstream.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from _typeshed import Incomplete
from typing import Any
from _typeshed import Incomplete, SupportsRead
from typing import Any, overload
from typing_extensions import TypeAlias

_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
_InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files

spaceCharactersBytes: Any
asciiLettersBytes: Any
Expand All @@ -20,14 +25,26 @@ class BufferedStream:
def seek(self, pos) -> None: ...
def read(self, bytes): ...

def HTMLInputStream(source, **kwargs): ...
@overload
def HTMLInputStream(source: _UnicodeInputStream) -> HTMLUnicodeInputStream: ...
@overload
def HTMLInputStream(
source: _BinaryInputStream,
*,
override_encoding: str | bytes | None = None,
transport_encoding: str | bytes | None = None,
same_origin_parent_encoding: str | bytes | None = None,
likely_encoding: str | bytes | None = None,
default_encoding: str = "windows-1252",
useChardet: bool = True,
) -> HTMLBinaryInputStream: ...

class HTMLUnicodeInputStream:
reportCharacterErrors: Any
newLines: Any
charEncoding: Any
dataStream: Any
def __init__(self, source) -> None: ...
def __init__(self, source: _UnicodeInputStream) -> None: ...
chunk: str
chunkSize: int
chunkOffset: int
Expand Down Expand Up @@ -56,11 +73,11 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
charEncoding: Any
def __init__(
self,
source,
override_encoding: Incomplete | None = None,
transport_encoding: Incomplete | None = None,
same_origin_parent_encoding: Incomplete | None = None,
likely_encoding: Incomplete | None = None,
source: _BinaryInputStream,
override_encoding: str | bytes | None = None,
transport_encoding: str | bytes | None = None,
same_origin_parent_encoding: str | bytes | None = None,
likely_encoding: str | bytes | None = None,
default_encoding: str = "windows-1252",
useChardet: bool = True,
) -> None: ...
Expand Down Expand Up @@ -108,4 +125,4 @@ class ContentAttrParser:
def __init__(self, data) -> None: ...
def parse(self): ...

def lookupEncoding(encoding): ...
def lookupEncoding(encoding: str | bytes | None) -> str | None: ...
60 changes: 31 additions & 29 deletions stubs/html5lib/html5lib/_tokenizer.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from _typeshed import Incomplete
from typing import Any

from ._inputstream import _InputStream

entitiesTrie: Any
attributeMap = dict

Expand All @@ -12,7 +14,7 @@ class HTMLTokenizer:
state: Any
escape: bool
currentToken: Any
def __init__(self, stream, parser: Incomplete | None = None, **kwargs) -> None: ...
def __init__(self, stream: _InputStream, parser: Incomplete | None = None, **kwargs) -> None: ...
tokenQueue: Any
def __iter__(self): ...
def consumeNumberEntity(self, isHex): ...
Expand All @@ -36,23 +38,23 @@ class HTMLTokenizer:
def rawtextLessThanSignState(self): ...
def rawtextEndTagOpenState(self): ...
def rawtextEndTagNameState(self): ...
def scriptDataLessThanSignState(self): ...
def scriptDataEndTagOpenState(self): ...
def scriptDataEndTagNameState(self): ...
def scriptDataEscapeStartState(self): ...
def scriptDataEscapeStartDashState(self): ...
def scriptDataEscapedState(self): ...
def scriptDataEscapedDashState(self): ...
def scriptDataEscapedDashDashState(self): ...
def scriptDataEscapedLessThanSignState(self): ...
def scriptDataEscapedEndTagOpenState(self): ...
def scriptDataEscapedEndTagNameState(self): ...
def scriptDataDoubleEscapeStartState(self): ...
def scriptDataDoubleEscapedState(self): ...
def scriptDataDoubleEscapedDashState(self): ...
def scriptDataDoubleEscapedDashDashState(self): ...
def scriptDataDoubleEscapedLessThanSignState(self): ...
def scriptDataDoubleEscapeEndState(self): ...
def scriptDataLessThanSignState(self) -> bool: ...
def scriptDataEndTagOpenState(self) -> bool: ...
def scriptDataEndTagNameState(self) -> bool: ...
def scriptDataEscapeStartState(self) -> bool: ...
def scriptDataEscapeStartDashState(self) -> bool: ...
def scriptDataEscapedState(self) -> bool: ...
def scriptDataEscapedDashState(self) -> bool: ...
def scriptDataEscapedDashDashState(self) -> bool: ...
def scriptDataEscapedLessThanSignState(self) -> bool: ...
def scriptDataEscapedEndTagOpenState(self) -> bool: ...
def scriptDataEscapedEndTagNameState(self) -> bool: ...
def scriptDataDoubleEscapeStartState(self) -> bool: ...
def scriptDataDoubleEscapedState(self) -> bool: ...
def scriptDataDoubleEscapedDashState(self) -> bool: ...
def scriptDataDoubleEscapedDashDashState(self) -> bool: ...
def scriptDataDoubleEscapedLessThanSignState(self) -> bool: ...
def scriptDataDoubleEscapeEndState(self) -> bool: ...
def beforeAttributeNameState(self): ...
def attributeNameState(self): ...
def afterAttributeNameState(self): ...
Expand All @@ -64,17 +66,17 @@ class HTMLTokenizer:
def selfClosingStartTagState(self): ...
def bogusCommentState(self): ...
def markupDeclarationOpenState(self): ...
def commentStartState(self): ...
def commentStartDashState(self): ...
def commentState(self): ...
def commentEndDashState(self): ...
def commentEndState(self): ...
def commentEndBangState(self): ...
def doctypeState(self): ...
def beforeDoctypeNameState(self): ...
def doctypeNameState(self): ...
def afterDoctypeNameState(self): ...
def afterDoctypePublicKeywordState(self): ...
def commentStartState(self) -> bool: ...
def commentStartDashState(self) -> bool: ...
def commentState(self) -> bool: ...
def commentEndDashState(self) -> bool: ...
def commentEndState(self) -> bool: ...
def commentEndBangState(self) -> bool: ...
def doctypeState(self) -> bool: ...
def beforeDoctypeNameState(self) -> bool: ...
def doctypeNameState(self) -> bool: ...
def afterDoctypeNameState(self) -> bool: ...
def afterDoctypePublicKeywordState(self) -> bool: ...
def beforeDoctypePublicIdentifierState(self): ...
def doctypePublicIdentifierDoubleQuotedState(self): ...
def doctypePublicIdentifierSingleQuotedState(self): ...
Expand Down
31 changes: 16 additions & 15 deletions stubs/html5lib/html5lib/html5parser.pyi
Original file line number Diff line number Diff line change
@@ -1,46 +1,47 @@
from _typeshed import Incomplete, SupportsRead
from _typeshed import Incomplete
from typing import Any, Literal, overload
from xml.etree.ElementTree import Element

from ._inputstream import _InputStream
from ._tokenizer import HTMLTokenizer

@overload
def parse(
doc: str | bytes | SupportsRead[str] | SupportsRead[bytes],
treebuilder: Literal["etree"] = "etree",
namespaceHTMLElements: bool = True,
**kwargs,
doc: _InputStream, treebuilder: Literal["etree"] = "etree", namespaceHTMLElements: bool = True, **kwargs
) -> Element: ...
@overload
def parse(
doc: str | bytes | SupportsRead[str] | SupportsRead[bytes], treebuilder: str, namespaceHTMLElements: bool = True, **kwargs
def parse(doc: _InputStream, treebuilder: str, namespaceHTMLElements: bool = True, **kwargs): ...
def parseFragment(
doc: _InputStream, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs
): ...
def parseFragment(doc, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs): ...
def method_decorator_metaclass(function): ...

class HTMLParser:
strict: Any
strict: bool
tree: Any
errors: Any
errors: list[Incomplete]
phases: Any
def __init__(
self, tree: Incomplete | None = None, strict: bool = False, namespaceHTMLElements: bool = True, debug: bool = False
) -> None: ...
firstStartTag: bool
log: Any
compatMode: str
container: str
innerHTML: Any
phase: Any
lastPhase: Any
beforeRCDataPhase: Any
framesetOK: bool
tokenizer: Any
tokenizer: HTMLTokenizer
def reset(self) -> None: ...
@property
def documentEncoding(self) -> str | None: ...
def isHTMLIntegrationPoint(self, element) -> bool: ...
def isMathMLTextIntegrationPoint(self, element) -> bool: ...
def isHTMLIntegrationPoint(self, element: Element) -> bool: ...
def isMathMLTextIntegrationPoint(self, element: Element) -> bool: ...
def mainLoop(self) -> None: ...
def parse(self, stream, scripting: bool = ..., **kwargs): ...
def parseFragment(self, stream, *args, **kwargs): ...
def parse(self, stream: _InputStream, scripting: bool = ..., **kwargs): ...
def parseFragment(self, stream: _InputStream, *args, **kwargs): ...
def parseError(self, errorcode: str = "XXX-undefined-error", datavars: Incomplete | None = None) -> None: ...
def adjustMathMLAttributes(self, token) -> None: ...
def adjustSVGAttributes(self, token) -> None: ...
Expand Down

0 comments on commit 78b7dc6

Please sign in to comment.