Skip to content

Commit

Permalink
fix: add proper css style parsing for colors
Browse files Browse the repository at this point in the history
  • Loading branch information
vzhd1701 committed Dec 5, 2021
1 parent ed2716d commit ffbbd69
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 36 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,4 @@ Uploading 'Test note' |#### | 40/304
- [progress](http://github.com/verigak/progress/)
- [requests](https://github.com/psf/requests)
- [w3lib](https://github.com/scrapy/w3lib)
- [tinycss2](https://github.com/Kozea/tinycss2)
96 changes: 63 additions & 33 deletions enex2notion/colors.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import contextlib
import re
from functools import partial
from math import sqrt
from types import MappingProxyType

from tinycss2 import parse_declaration_list
from tinycss2.color3 import parse_color

HEX_BASE = 16

base16 = partial(int, base=HEX_BASE)
Expand Down Expand Up @@ -60,6 +64,8 @@

COLORS_BG = MappingProxyType(
{
"black_background": (0, 0, 0),
"white_background": (255, 255, 255),
"gray_background": (241, 241, 239),
"brown_background": (244, 238, 238),
"orange_background": (255, 209, 176),
Expand All @@ -74,27 +80,54 @@
)


def extract_color(style):
style = style.replace("inversion-type-color", "")

def extract_color(style): # noqa: WPS210
color_map = {
".*en-highlight:(.*?);": _extract_background_text,
r".*background-color:\s*(.*?);": _extract_background_rgb,
r".*color:\s*(.*?);": _extract_foreground_rgb,
".*en-highlight$": _extract_background_text,
"^background-color$": _extract_background_rgb,
"^color$": _extract_foreground_rgb,
}

for regex, color_extract_func in color_map.items():
match = re.match(regex, style)
if match:
color = color_extract_func(match.group(1))
if color:
return color
for s_name, s_value in _parse_style(style).items():
for regex, color_extract_func in color_map.items():
if re.match(regex, s_name):
color = color_extract_func(s_value)
if color:
return color

return None


def _extract_background_text(color):
color = f"{color}_background"
def _parse_style(style):
result_styles = {}

declarations = (
d
for d in parse_declaration_list(style, skip_comments=True, skip_whitespace=True)
if d.type == "declaration"
)

for dec in declarations:
with contextlib.suppress(StopIteration):
result_styles[dec.lower_name] = next(
v for v in dec.value if v.type not in {"whitespace", "comment"}
)

return result_styles


def _parse_css_color(color_token):
rgba = parse_color(color_token)

if rgba is None:
return None

float_to_int_rgb = 255

return tuple(int(c * float_to_int_rgb) for c in (rgba[:3]))


def _extract_background_text(color_token):
color = f"{color_token.value}_background"

if color == "green_background":
color = "teal_background"
Expand All @@ -105,17 +138,28 @@ def _extract_background_text(color):
return None


def _extract_background_rgb(rgb_str):
rbg_bg = _parse_rgb(rgb_str)
def _extract_background_rgb(color_token):
rbg_bg = _parse_css_color(color_token)

if rbg_bg is None:
return None

if EVERNOTE_STANDARD_BG.get(rbg_bg):
return EVERNOTE_STANDARD_BG[rbg_bg]
else:
color = _closest_color(COLORS_BG, rbg_bg)

if color not in {"black_background", "white_background"}:
return color

return None

return _closest_color(COLORS_BG, rbg_bg)

def _extract_foreground_rgb(color_token):
rbg_fg = _parse_css_color(color_token)

def _extract_foreground_rgb(rgb_str):
rbg_fg = _parse_rgb(rgb_str)
if rbg_fg is None:
return None

if EVERNOTE_STANDARD_FG.get(rbg_fg):
color = EVERNOTE_STANDARD_FG[rbg_fg]
Expand All @@ -128,15 +172,6 @@ def _extract_foreground_rgb(rgb_str):
return None


def _parse_rgb(color: str):
if "rgb" in color:
color_str = re.match(r"^rgb\((.*?)\)$", color).group(1)
return tuple(map(int, color_str.split(",")))

color_str = re.match("^#(.*?)$", color).group(1)
return tuple(map(base16, _chunk_string(color_str, 2)))


def _closest_color(colors, rgb): # noqa: WPS210
r, g, b = rgb
color_diffs = []
Expand All @@ -147,8 +182,3 @@ def _closest_color(colors, rgb): # noqa: WPS210
)
color_diffs.append((color_diff, color_name))
return min(color_diffs)[1]


def _chunk_string(string: str, max_length: int):
chunk_steps = range(0, len(string), max_length)
return [string[i : i + max_length] for i in chunk_steps]
33 changes: 32 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ python-dateutil = "^2.8.2"
progress = "^1.6"
requests = "^2.26.0"
w3lib = "^1.22.0"
tinycss2 = "^1.1.1"

[tool.poetry.dev-dependencies]
pytest = "^6.2.3"
Expand Down
33 changes: 31 additions & 2 deletions tests/test_string_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,14 +191,43 @@ def test_extract_text_color_black():
test_note = parse_html(
'<div><span style="color:rgb(51, 51, 51);">black</span></div>'
'<div><span style="color:rgb(255, 255, 255);">white</span></div>'
'<div><span style="background-color:rgb(51, 51, 51);">black</span></div>'
'<div><span style="background-color:rgb(255, 255, 255);">white</span></div>'
)

assert extract_string(test_note) == TextProp(
text="black\nwhite",
properties=[["black\nwhite"]],
text="black\nwhite\nblack\nwhite",
properties=[["black\nwhite\nblack\nwhite"]],
)


def test_extract_text_color_strange():
test_note = parse_html(
'<div><span style="color:magentific;">strange</span></div>'
'<div><span style="background-color:magentific;">strange</span></div>'
)

assert extract_string(test_note) == TextProp(
text="strange\nstrange",
properties=[["strange\nstrange"]],
)


def test_extract_text_color_empty():
test_note = parse_html('<div><span style="color:;--boop">empty</span></div>')

assert extract_string(test_note) == TextProp(
text="empty",
properties=[["empty"]],
)


def test_extract_text_bad_css():
test_note = parse_html('<div><span style="--boop">bad</span></div>')

assert extract_string(test_note) == TextProp(text="bad", properties=[["bad"]])


def test_extract_text_color_near():
test_note = parse_html(
'<div><span style="color:rgb(200, 158, 37);">yellow</span></div>'
Expand Down

0 comments on commit ffbbd69

Please sign in to comment.