Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TYPING: more type hints for io.formats.printing #27765

Merged
merged 5 commits into from
Aug 23, 2019
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 16 additions & 24 deletions pandas/io/formats/printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
"""

import sys
from typing import Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union

from pandas._config import get_option

from pandas.core.dtypes.inference import is_sequence

EscapeChars = Union[Dict[str, str], Iterable[str]]


def adjoin(space: int, *lists: List[str], **kwargs) -> str:
"""
Expand Down Expand Up @@ -148,19 +150,16 @@ def _pprint_dict(


def pprint_thing(
thing,
thing: Any,
_nest_lvl: int = 0,
escape_chars: Optional[Union[Dict[str, str], Iterable[str]]] = None,
escape_chars: Optional[EscapeChars] = None,
default_escapes: bool = False,
quote_strings: bool = False,
max_seq_items: Optional[int] = None,
) -> str:
"""
This function is the sanctioned way of converting objects
to a unicode representation.

properly handles nested sequences containing unicode strings
(unicode(object) does not)
to a string representation and properly handles nested sequences.

Parameters
----------
Expand All @@ -178,21 +177,13 @@ def pprint_thing(

Returns
-------
result - unicode str
str

"""

def as_escaped_unicode(thing, escape_chars=escape_chars):
# Unicode is fine, else we try to decode using utf-8 and 'replace'
# if that's not it either, we have no way of knowing and the user
# should deal with it himself.

try:
result = str(thing) # we should try this first
except UnicodeDecodeError:
# either utf-8 or we replace errors
result = str(thing).decode("utf-8", "replace")

def as_escaped_string(
thing: Any, escape_chars: Optional[EscapeChars] = escape_chars
) -> str:
translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
if isinstance(escape_chars, dict):
if default_escapes:
Expand All @@ -202,10 +193,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
escape_chars = list(escape_chars.keys())
else:
escape_chars = escape_chars or tuple()

result = str(thing)
for c in escape_chars:
result = result.replace(c, translate[c])

return str(result)
return result

if hasattr(thing, "__next__"):
return str(thing)
Expand All @@ -224,11 +216,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
max_seq_items=max_seq_items,
)
elif isinstance(thing, str) and quote_strings:
result = "'{thing}'".format(thing=as_escaped_unicode(thing))
result = "'{thing}'".format(thing=as_escaped_string(thing))
else:
result = as_escaped_unicode(thing)
result = as_escaped_string(thing)

return str(result) # always unicode
return result


def pprint_thing_encoded(
Expand Down