Skip to content

Commit

Permalink
Make Scarf usage reporting in major+minor versions and counters in bu…
Browse files Browse the repository at this point in the history
…ckets
  • Loading branch information
jscheffl committed Aug 30, 2024
1 parent baadce9 commit 5a04519
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 12 deletions.
18 changes: 15 additions & 3 deletions airflow/utils/usage_data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def get_database_version() -> str:
return "None"

version_info = settings.engine.dialect.server_version_info
# Example: (1, 2, 3) -> "1.2.3"
return ".".join(map(str, version_info)) if version_info else "None"
# Example: (1, 2, 3) -> "1.2" (cut only major+minor w/o patch)
return ".".join(map(str, version_info[0:2])) if version_info else "None"


def get_database_name() -> str:
Expand All @@ -95,7 +95,8 @@ def get_executor() -> str:


def get_python_version() -> str:
return platform.python_version()
# Cut only major+minor from the python version string (e.g. 3.10.12 --> 3.10)
return ".".join(platform.python_version().split(".")[0:2])


def get_plugin_counts() -> dict[str, int]:
Expand All @@ -108,3 +109,14 @@ def get_plugin_counts() -> dict[str, int]:
"appbuilder_menu_items": sum(len(x["appbuilder_menu_items"]) for x in plugin_info),
"timetables": sum(len(x["timetables"]) for x in plugin_info),
}


def to_bucket(counter: int) -> str:
"""As we don't want to have preceise numbers, make number into a bucket."""
if counter == 0:
return "0"
buckets = [0, 5, 10, 20, 50, 100, 200, 500, 1000, 2000]
for idx, val in enumerate(buckets[1:]):
if buckets[idx] < counter and counter <= val:
return f"{buckets[idx] + 1}-{val}"
return f"{buckets[-1]}+"
7 changes: 5 additions & 2 deletions airflow/www/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,9 @@ def build_scarf_url(dags_count: int) -> str:
appbuilder_views_count = plugin_counts["appbuilder_views"]
appbuilder_menu_items_count = plugin_counts["appbuilder_menu_items"]
timetables_count = plugin_counts["timetables"]
dag_bucket = usage_data_collection.to_bucket(dags_count)
plugins_bucket = usage_data_collection.to_bucket(plugins_count)
timetable_bucket = usage_data_collection.to_bucket(timetables_count)

# Path Format:
# /{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}/{plugin_count}/{flask_blueprint_count}/{appbuilder_view_count}/{appbuilder_menu_item_count}/{timetables}
Expand All @@ -248,8 +251,8 @@ def build_scarf_url(dags_count: int) -> str:
scarf_url = (
f"{scarf_domain}/webserver"
f"/{version}/{python_version}"
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dags_count}"
f"/{plugins_count}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetables_count}"
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dag_bucket}"
f"/{plugins_bucket}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetable_bucket}"
)

return scarf_url
Expand Down
43 changes: 39 additions & 4 deletions tests/utils/test_usage_data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@

from airflow import __version__ as airflow_version
from airflow.configuration import conf
from airflow.utils.usage_data_collection import get_database_version, usage_data_collection
from airflow.utils.usage_data_collection import (
get_database_version,
get_python_version,
to_bucket,
usage_data_collection,
)


@pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True, True)])
Expand All @@ -51,7 +56,7 @@ def test_scarf_analytics(
):
platform_sys = platform.system()
platform_machine = platform.machine()
python_version = platform.python_version()
python_version = get_python_version()
executor = conf.get("core", "EXECUTOR")
scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler"
usage_data_collection()
Expand All @@ -74,12 +79,42 @@ def test_scarf_analytics(
@pytest.mark.parametrize(
"version_info, expected_version",
[
((1, 2, 3), "1.2.3"), # Normal version tuple
((1, 2, 3), "1.2"), # Normal version tuple
(None, "None"), # No version info available
((1,), "1"), # Single element version tuple
((1, 2, 3, "beta", 4), "1.2.3.beta.4"), # Complex version tuple with strings
((1, 2, 3, "beta", 4), "1.2"), # Complex version tuple with strings
],
)
def test_get_database_version(version_info, expected_version):
with mock.patch("airflow.settings.engine.dialect.server_version_info", new=version_info):
assert get_database_version() == expected_version


@pytest.mark.parametrize(
"version_info, expected_version",
[
("1.2.3", "1.2"), # Normal version
("4", "4"), # Single element version
("1.2.3.beta4", "1.2"), # Complex version tuple with strings
],
)
def test_get_python_version(version_info, expected_version):
with mock.patch("platform.python_version", return_value=version_info):
assert get_python_version() == expected_version


@pytest.mark.parametrize(
"counter, expected_bucket",
[
(0, "0"),
(1, "1-5"),
(5, "1-5"),
(6, "6-10"),
(11, "11-20"),
(20, "11-20"),
(21, "21-50"),
(10000, "2000+"),
],
)
def test_to_bucket(counter, expected_bucket):
assert to_bucket(counter) == expected_bucket
6 changes: 3 additions & 3 deletions tests/www/views/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def test_invalid_dates(app, admin_client, url, content):
@patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3")
@patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres")
@patch("airflow.utils.usage_data_collection.get_executor", return_value="SequentialExecutor")
@patch("airflow.utils.usage_data_collection.get_python_version", return_value="3.8.5")
@patch("airflow.utils.usage_data_collection.get_python_version", return_value="3.8")
@patch("airflow.utils.usage_data_collection.get_plugin_counts")
def test_build_scarf_url(
get_plugin_counts,
Expand All @@ -626,8 +626,8 @@ def test_build_scarf_url(
result = build_scarf_url(5)
expected_url = (
"https://apacheairflow.gateway.scarf.sh/webserver/"
f"{airflow_version}/3.8.5/Linux/x86_64/postgres/12.3/SequentialExecutor/5"
f"/10/15/20/25/30"
f"{airflow_version}/3.8/Linux/x86_64/postgres/12.3/SequentialExecutor/1-5"
f"/6-10/15/20/25/21-50"
)
if enabled:
assert result == expected_url
Expand Down

0 comments on commit 5a04519

Please sign in to comment.