Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TypeError: FacetedEncoding.init() got multiple values for argument 'self' #3634

Closed
gaspardc-met opened this issue Oct 7, 2024 · 11 comments · Fixed by #3637
Closed

TypeError: FacetedEncoding.init() got multiple values for argument 'self' #3634

gaspardc-met opened this issue Oct 7, 2024 · 11 comments · Fixed by #3637
Labels

Comments

@gaspardc-met
Copy link

gaspardc-met commented Oct 7, 2024

What happened?

Using a complex altair chart in streamlit, worked in 5.3.0, and broke with 5.4.0 and then 5.4.1.
It seems to be an altair issue: TypeError: FacetedEncoding.init() got multiple values for argument 'self'

I tried to reproduce the error with the plot code and anonymized example data.
The error didn't show up neither with altair alone nor streamlit+altair.

However, it might give you an idea of the plot I use. The error happens in the first call to .encode in the fonction .mark_bar.encode()
Image

Long Code Block

import functools

import altair as alt
import numpy as np
import pandas as pd
import streamlit as st

# Mocked dependencies and constants
CMAP_BINS = [0, 0.25, 0.5, 0.75, 1.0]
CMAP_COLORS = ["#f7fbff", "#c6dbef", "#6baed6", "#2171b5", "#08306b"]


def custom_blues():
    bins = [0, 0.25, 0.5, 0.75, 1.0]
    colors = ["#f7fbff", "#c6dbef", "#6baed6", "#2171b5", "#08306b"]
    return bins, colors


def chronogram_legend(target="load", pump_toggle=False):
    legend = "Load Legend"
    short_legend = "Load"
    extra = None
    return legend, short_legend, extra


def chronogram_processing(chronogram, timedelta="60min", filter_load=True):
    data = chronogram.copy()
    data["start_time"] = data.index
    data["end_time"] = data.index + pd.Timedelta(timedelta)
    return data.reset_index(drop=True)


def get_machines_starts_and_stops(chronogram, timedelta="60min", separator_dt=None):
    starts_and_stops = alt.Chart(pd.DataFrame({"x": [], "y": []}))
    starts_and_stops_texts = alt.Chart(pd.DataFrame({"x": [], "y": []}))
    return starts_and_stops, starts_and_stops_texts


def get_vertical_separator(separator_dt, labels_y="", y_field="machine"):
    separator = alt.Chart(pd.DataFrame({"x": [], "y": []}))
    separator_labels = alt.Chart(pd.DataFrame({"x": [], "y": []}))
    return separator, separator_labels


@st.cache_data(show_spinner=False, ttl=60 * 60)  # TTL in seconds
def plot_chronogram(
    chronogram: pd.DataFrame,
    formatted=".0f",
    target="load",
    timedelta="60min",
    filter_load=True,
    expand: bool = False,
    pump_toggle: bool = False,
    display_starts_and_stops: bool = False,
    separator_dt: pd.Timestamp = None,
):
    legend, short_legend, _ = chronogram_legend(target=target, pump_toggle=pump_toggle)
    data = chronogram_processing(chronogram=chronogram, timedelta=timedelta, filter_load=filter_load)
    if expand:
        data = data.set_index("start_time").sort_index().reset_index()
        data.loc[28:, "end_time"] = data.loc[28:, "end_time"] + pd.Timedelta("45T")

    if target == "pressure":
        bins, colors = custom_blues()
        scale = alt.Scale(domain=bins, range=colors, type="ordinal")
    elif target == "load":
        scale = alt.Scale(domain=CMAP_BINS, range=CMAP_COLORS, type="threshold")
    else:
        scale = alt.Scale(scheme="blues")

    sort_order = [""] + data["machine"].sort_values().unique().tolist()

    chart = (
        alt.Chart(data)
        .mark_bar()
        .encode(
            x=alt.X("start_time:T", title="Horizon Temporel"),
            x2=alt.X2("end_time:T"),
            y=alt.Y("machine:N", title="Utilisation: Groupes ou AFC", sort=sort_order),
            color=alt.Color(
                "load:Q",
                title=short_legend,
                scale=scale,
                legend=alt.Legend(title=legend),
            ),
            stroke=alt.value("white"),
            strokeWidth=alt.value(2),
            tooltip=[
                alt.Tooltip("start_time:T", format="%Y-%m-%d", title="Date"),
                alt.Tooltip("start_time:T", format="%H:%M", title="Heure"),
                alt.Tooltip("load:Q", format=formatted, title=legend),
            ],
        )
    ).properties(
        title=f"Chronogramme d'opération: {legend}",
        width=800,
        height=350,
    )

    text = (
        alt.Chart(data)
        .mark_text(dx=0, dy=0, color="white", fontSize=10)
        .encode(
            x=alt.X("mid_time:T"),
            y=alt.Y("machine:N", sort=None),
            text=alt.Text("load:Q", format=formatted),
            tooltip=[
                alt.Tooltip("start_time:T", format="%Y-%m-%d", title="Date"),
                alt.Tooltip("start_time:T", format="%H:%M", title="Heure"),
                alt.Tooltip("load:Q", format=formatted, title=legend),
            ],
        )
        .transform_calculate(mid_time="datum.start_time + (datum.end_time - datum.start_time)/2")
        .transform_filter((alt.datum.load >= 0.0) & (alt.datum.load != 0.01))
    )

    text_hot = (
        alt.Chart(data)
        .mark_text(dx=0, dy=0, color="white", fontSize=10)
        .encode(
            x=alt.X("mid_time:T"),
            y=alt.Y("machine:N", sort=None),
            text=alt.value("Chaud"),
            tooltip=alt.value(None),
        )
        .transform_calculate(mid_time="datum.start_time + (datum.end_time - datum.start_time)/2")
        .transform_filter(alt.datum.load == 0.01)
    )

    all_charts = [chart, text, text_hot]

    if display_starts_and_stops:
        starts_and_stops, starts_and_stops_texts = get_machines_starts_and_stops(
            chronogram=chronogram,
            timedelta=timedelta,
            separator_dt=separator_dt,
        )
        all_charts += [starts_and_stops, starts_and_stops_texts]

    display_separator = separator_dt is not None and separator_dt > chronogram.index.min()
    if display_separator:
        separator, separator_labels = get_vertical_separator(separator_dt=separator_dt, labels_y="", y_field="machine")
        all_charts += [separator, separator_labels]

    composed = (
        functools.reduce(lambda a, b: a + b, all_charts)
        .configure_legend(orient="right", titleOrient="right")
        .configure_axis(labelFontSize=12, titleFontSize=12)
    )

    return composed


# Main block to generate and display the chart
if __name__ == "__main__":
    # Generate fake data
    date_range = pd.date_range(start="2022-01-01", periods=48, freq="30min")
    data = pd.DataFrame(index=date_range)
    data["machine"] = np.random.choice(["A", "B", "C"], size=len(data))
    data["load"] = np.random.rand(len(data))

    # Call the plotting function
    chart = plot_chronogram(chronogram=data)

    st.altair_chart(chart)

What would you like to happen instead?

No response

Which version of Altair are you using?

altair: 5.4.1
python: 3.11

@dangotbanned
Copy link
Member

dangotbanned commented Oct 7, 2024

Using a complex altair chart in streamlit, worked in 5.3.0, and broke with 5.4.0 and then 5.4.1. It seems to be an altair issue: TypeError: FacetedEncoding.init() got multiple values for argument 'self'

I tried to reproduce the error with the plot code and anonymized example data. The error didn't show up neither with altair alone nor streamlit+altair.

@gaspardc-met could you provide the traceback please?

I'm unsure what FacetedEncoding.init() is referring to, as that isn't a method on alt.FacetedEncoding:

Repro

import altair as alt

>>> alt.FacetedEncoding().init()

Traceback

AttributeError                            Traceback (most recent call last)
Cell In[17], line 3
      1 import altair as alt
----> 3 alt.FacetedEncoding().init()

File c:/../altair/altair/utils/schemapi.py:1063, in SchemaBase.__getattr__(self, attr)
   1061 except AttributeError:
   1062     _getattr = super().__getattribute__
-> 1063 return _getattr(attr)

AttributeError: 'FacetedEncoding' object has no attribute 'init'

@gaspardc-met
Copy link
Author

gaspardc-met commented Oct 8, 2024

Hey @dangotbanned ,

Sorry I clipped some of the error, it was the core.FacetedEncoding init at https://github.com/vega/altair/blob/02ad17d0f1f71ea5125a7c6c9b43a61fd80c4567/altair/vegalite/v5/schema/channels.py#L25276 :

Traceback (most recent call last):
    File "<project_root>/decorators.py", line 68, in wrapper
        result = main_func(*args, **kwargs)
    File "<project_root>/pages_stm/predictions.py", line 79, in main
        display_predictions_tab(...)
    File "<project_root>/pages_stm/predictions.py", line 229, in display_predictions_tab
        display_chronograms(...)
    File "<project_root>/predictions/plots.py", line 516, in display_chronograms
        plot_chronogram(...)
    File "<project_root>/venv/lib/python3.11/site-packages/streamlit/runtime/caching/cache_utils.py", line 212, in __call__
        return self._get_or_create_cached_value(args, kwargs)
    File "<project_root>/venv/lib/python3.11/site-packages/streamlit/runtime/caching/cache_utils.py", line 235, in _get_or_create_cached_value
        return self._handle_cache_miss(cache, value_key, func_args, func_kwargs)
    File "<project_root>/venv/lib/python3.11/site-packages/streamlit/runtime/caching/cache_utils.py", line 292, in _handle_cache_miss
        computed_value = self._info.func(*func_args, **func_kwargs)
    File "<project_root>/predictions/plots.py", line 331, in plot_chronogram
        .encode(...)
    File "<project_root>/venv/lib/python3.11/site-packages/altair/vegalite/v5/schema/channels.py", line 31242, in encode
        copy.encoding = core.FacetedEncoding(**encoding)

Error: TypeError: FacetedEncoding.init() got multiple values for argument 'self'

@gaspardc-met
Copy link
Author

Removing streamlit caching on the plotting function (plot_chronogram, last one called) reveals another error I encounter often:

Traceback (most recent call last):
    File "<project_root>/decorators.py", line 68, in wrapper
        result = main_func(*args, **kwargs)
    File "<project_root>/pages_stm/predictions.py", line 79, in main
        display_predictions_tab(...)
    File "<project_root>/pages_stm/predictions.py", line 229, in display_predictions_tab
        display_chronograms(...)
    File "<project_root>/predictions/plots.py", line 515, in display_chronograms
        plot_chronogram(...)
    File "<project_root>/predictions/plots.py", line 330, in plot_chronogram
        .encode(...)
    File "<project_root>/venv/lib/python3.11/site-packages/altair/vegalite/v5/schema/channels.py", line 31233, in encode
        kwargs = _infer_encoding_types(args, kwargs)
    File "<project_root>/venv/lib/python3.11/site-packages/altair/utils/core.py", line 964, in infer_encoding_types
        return cache.infer_encoding_types(kwargs)
    File "<project_root>/venv/lib/python3.11/site-packages/altair/utils/core.py", line 870, in infer_encoding_types
        return { ... }
    File "<project_root>/venv/lib/python3.11/site-packages/altair/utils/core.py", line 870, in <dictcomp>
        return { ... }

Error: RuntimeError: dictionary changed size during iteration

@dangotbanned
Copy link
Member

@gaspardc-met

Thanks for the additional traceback.

Issues

I'm trying to piece together all the issues you've raised related to altair in streamlit.

As you've seen in comment, removing streamlit caching has revealed this is actually the same issue as #3554.

Versions

Issue Working Broken
streamlit#8409 ??? 5.2.0
#3554 5.3.0 5.4.0
#3634 5.3.0 5.4.*

From this I can only conclude that #3444 is probably unrelated, since that was introduced in 5.4.0 but you had the same problem in 5.2.0

Looking at the previous functionality - you can see that this code has not changed in a meaningful way in any 5.* release.
Therefore I suspect the underlying problem is something else.

Possible Fix

As you have not provided a reproducible example, I can only guess what might be the cause.

It could be possible that alt.Chart().encode() is not thread-safe, and this could be addressed as part of #3589.

Reading PEP 667, made me think the use of locals() might be related.

I'm able to make the change below without causing a regression in versions we currently support (python>=3.8,<3.13):

diff --git a/altair/vegalite/v5/schema/channels.py

diff --git a/altair/vegalite/v5/schema/channels.py b/altair/vegalite/v5/schema/channels.py
index c978330a..327f5cc9 100644
--- a/altair/vegalite/v5/schema/channels.py
+++ b/altair/vegalite/v5/schema/channels.py
@@ -11,6 +11,7 @@ from __future__ import annotations
 # However, we need these overloads due to how the propertysetter works
 # mypy: disable-error-code="no-overload-impl, empty-body, misc"
 import sys
+import threading
 from typing import TYPE_CHECKING, Any, Literal, Sequence, TypedDict, Union, overload

 if sys.version_info >= (3, 10):
@@ -25257,14 +25258,15 @@ class _EncodingMixin:
             Offset of y-position of the marks
         """
         # Compat prep for `infer_encoding_types` signature
-        kwargs = locals()
-        kwargs.pop("self")
-        args = kwargs.pop("args")
-        if args:
-            kwargs = {k: v for k, v in kwargs.items() if v is not Undefined}
-
-        # Convert args to kwargs based on their types.
-        kwargs = _infer_encoding_types(args, kwargs)
+        with threading.Lock():
+            kwargs = locals().copy()
+            kwargs.pop("self")
+            args = kwargs.pop("args")
+            if args:
+                kwargs = {k: v for k, v in kwargs.items() if v is not Undefined}
+
+            # Convert args to kwargs based on their types.
+            kwargs = _infer_encoding_types(args, kwargs)
         # get a copy of the dict representation of the previous encoding
         # ignore type as copy method comes from SchemaBase
         copy = self.copy(deep=["encoding"])  # type: ignore[attr-defined]

You would need to test this locally and report back if it fixes the issue.

However, I do not see this change getting merged into altair without at least one supporting test.
For that, only you can help move this forward by taking the time to write a minimal repro.
I cannot see much of an effort to do so since I last requested this in comment

Example

@gaspardc-met in description

However, it might give you an idea of the plot I use.
The error happens in the first call to .encode in the function .mark_bar.encode()

I can't know exactly how much of the original code was related.

This is what a more minimal version would look like to me:

Code block

NOTE: Still not a minimal repro, since we need to be able to reproduce the error

import numpy as np
import pandas as pd
import streamlit as st  # type: ignore

import altair as alt

# Mocked dependencies and constants
SCALE = alt.Scale(
    domain=[0, 0.25, 0.5, 0.75, 1.0],
    range=["#f7fbff", "#c6dbef", "#6baed6", "#2171b5", "#08306b"],
    type="threshold",
)


@st.cache_data(show_spinner=False, ttl=60 * 60)  # TTL in seconds
def plot_chronogram(data: pd.DataFrame) -> alt.Chart:
    legend = "Load Legend"
    y = alt.Y(
        "machine:N", title="Utilisation: Groupes ou AFC", sort=["", "A", "B", "C"]
    )
    color = alt.Color(
        "load:Q", title="Load", scale=SCALE, legend=alt.Legend(title=legend)
    )
    tooltip = (
        alt.Tooltip("start_time:T", format="%Y-%m-%d", title="Date"),
        alt.Tooltip("start_time:T", format="%H:%M", title="Heure"),
        alt.Tooltip("load:Q", format=".0f", title=legend),
    )
    chart = (
        alt.Chart(data)
        .mark_bar()
        .encode(
            x=alt.X("start_time:T", title="Horizon Temporel"),
            x2=alt.X2("end_time:T"),
            y=y,
            color=color,
            stroke=alt.value("white"),
            strokeWidth=alt.value(2),
            tooltip=tooltip,
        )
    )
    return chart


# Main block to generate and display the chart
if __name__ == "__main__":
    # Generate fake data
    start = pd.date_range(start="2022-01-01", periods=48, freq="30min")
    data = pd.DataFrame(
        {
            "machine": np.random.choice(["A", "B", "C"], 48),  # noqa: NPY002
            "load": np.random.rand(48),  # noqa: NPY002
            "start_time": start,
            "end_time": start + pd.Timedelta("60min"),
        }
    )
    chart = plot_chronogram(data)
    st.altair_chart(chart)

Disregarding the streamlit parts, you can see the output here

Image

@gaspardc-met
Copy link
Author

gaspardc-met commented Oct 10, 2024

Hi @dangotbanned ,

Thank you for your incredibly detailed feedback.
I did my homework more seriously this time, and I think I have a minimal repro of the original bug, corrected by your thread locking.

The issue is not between altair and streamlit, but between altair and pyinstrument that I use to profile the app loading time.
I have tested this with the code below, with a minimal streamlit app and with the original streamlit app.
All work with the fix you provided, even with pyinstrument.

This solves both FacetedEncoding.init() and dictionnary changed size during iteration bugs that were one and the same

# altair_repro.py

import altair as alt
import numpy as np
import pandas as pd
from pyinstrument import Profiler


def plot_chart():
    start = pd.date_range(start="2022-01-01", periods=48, freq="30min")
    data = pd.DataFrame(
        {
            "machine": np.random.choice(["A", "B", "C"], 48),  # noqa: NPY002
            "load": np.random.rand(48),  # noqa: NPY002
            "start_time": start,
            "end_time": start + pd.Timedelta("60min"),
        }
    )

    chart = (
        alt.Chart(data)
        .mark_bar()
        .encode(
            x=alt.X("start_time:T", title=""),
            x2=alt.X2("end_time:T", title=""),
            y=alt.Y("machine:N", title=""),
            strokeWidth=alt.value(2),
        )
    )

    chart.display()


if __name__ == "__main__":
    with Profiler() as profiler:
        plot_chart()

Thank you for your valuable time and patience.

note: this does not fix this issue streamlit/streamlit#9616

@gaspardc-met
Copy link
Author

Just realized the bug doesn't seem to show up on python 3.10.

Steps to reproduce (on my side at least):

  • uv venv venv --python 3.12
  • source venv/bin/activate
  • uv pip install -U altair streamlit pyinstrument
  • python altair_repro.py

@dangotbanned
Copy link
Member

@gaspardc-met thanks for #3634 (comment), really helpful in narrowing things down

Just realized the bug doesn't seem to show up on python 3.10.

Steps to reproduce (on my side at least):

* `uv venv venv --python 3.12`

* `source venv/bin/activate`

* `uv pip install -U altair streamlit pyinstrument`

* `python altair_repro.py`

@gaspardc-met what version of pyinstrument are you using?

v4.7.3 may fix the issue and would explain the difference with python>=3.12

Another thing to test would be reducing the change in comment-Possible Fix to just locals().copy() on 3.12.

Using a Lock is probably sensible, but if there is a simpler option it would be good to know

@gaspardc-met
Copy link
Author

I used the latest pyinstrument on all python versions for the repro

  • pyinstrument==4.7.3

Good thing to note that that "just" setting locals().copy() seems to work 👍

@dangotbanned
Copy link
Member

I used the latest pyinstrument on all python versions for the repro

* `pyinstrument==4.7.3`

Good thing to note that that "just" setting locals().copy() seems to work 👍

Great thanks @gaspardc-met

One last alternative I wanna check before figuring out a repro using only:

[dev] dependencies

altair/pyproject.toml

Lines 67 to 83 in 02ad17d

dev = [
"hatch",
"ruff>=0.6.0",
"ibis-framework[polars]",
"ipython[kernel]",
"pandas>=0.25.3",
"pytest",
"pytest-cov",
"pytest-xdist[psutil]~=3.5",
"mistune",
"mypy",
"pandas-stubs",
"types-jsonschema",
"types-setuptools",
"geopandas",
"polars>=0.20.3",
]

I can add this in the generated code, which removes the need for using locals() entirely.
I'd prefer this since it is less "magic" and in theory should have the same semantics for all versions.

Does this work on 3.12?

diff --git a/altair/vegalite/v5/schema/channels.py b/altair/vegalite/v5/schema/channels.py
index c978330a..9972a54e 100644
--- a/altair/vegalite/v5/schema/channels.py
+++ b/altair/vegalite/v5/schema/channels.py
@@ -25257,9 +25257,48 @@ class _EncodingMixin:
             Offset of y-position of the marks
         """
         # Compat prep for `infer_encoding_types` signature
-        kwargs = locals()
-        kwargs.pop("self")
-        args = kwargs.pop("args")
+        kwargs = dict(  # noqa: C408
+            angle=angle,
+            color=color,
+            column=column,
+            description=description,
+            detail=detail,
+            facet=facet,
+            fill=fill,
+            fillOpacity=fillOpacity,
+            href=href,
+            key=key,
+            latitude=latitude,
+            latitude2=latitude2,
+            longitude=longitude,
+            longitude2=longitude2,
+            opacity=opacity,
+            order=order,
+            radius=radius,
+            radius2=radius2,
+            row=row,
+            shape=shape,
+            size=size,
+            stroke=stroke,
+            strokeDash=strokeDash,
+            strokeOpacity=strokeOpacity,
+            strokeWidth=strokeWidth,
+            text=text,
+            theta=theta,
+            theta2=theta2,
+            tooltip=tooltip,
+            url=url,
+            x=x,
+            x2=x2,
+            xError=xError,
+            xError2=xError2,
+            xOffset=xOffset,
+            y=y,
+            y2=y2,
+            yError=yError,
+            yError2=yError2,
+            yOffset=yOffset,
+        )
         if args:
             kwargs = {k: v for k, v in kwargs.items() if v is not Undefined}
 

@gaspardc-met
Copy link
Author

gaspardc-met commented Oct 11, 2024

Works like a charm (tested with 3.11 and 3.12) 👍

EDIT: so you have to repro the bug without pyinstrument ? So just "manually" messing with the locals ?

@dangotbanned
Copy link
Member

Works like a charm (tested with 3.11 and 3.12) 👍

Perfect

EDIT: so you have to repro the bug without pyinstrument ? So just "manually" messing with the locals ?

@gaspardc-met ideally I'd like to add a test to the altair test suite that:

It is definitely helpful knowing pyinstrument can trigger this issue, but reproducing the exact source would lower the chances of a future regression.

For example, if pyinstrument>4.7.3 doesn't produce the same bug - then we might not be able to catch a regression in the test suite.


I will say though that I'm personally fine with simply pushing the fix alone.

@mattijn @jonmmease does this sound reasonable to you?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants