Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a custom configured resource for Strava API #5

Merged
merged 4 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
logs
.env
dagster_proj.egg-info
.vscode
.vscode
*.pyc
__pyache__
2 changes: 1 addition & 1 deletion analytics_dbt/target/manifest.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion dagster_proj/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dagster import Definitions, load_assets_from_modules

from .assets import activities, dbt, energy_prediction
from .resources import database_resource, dbt_resource
from .resources import database_resource, dbt_resource, strava_api_resouce
from .jobs import activities_update_job
from .schedules import activities_update_schedule

Expand All @@ -17,6 +17,7 @@
resources={
"database": database_resource,
"dbt": dbt_resource,
'strava': strava_api_resouce,
},
jobs=all_jobs,
schedules=all_schedules,
Expand Down
Binary file removed dagster_proj/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file removed dagster_proj/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file removed dagster_proj/__pycache__/project.cpython-310.pyc
Binary file not shown.
Binary file removed dagster_proj/__pycache__/project.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed dagster_proj/assets/__pycache__/dbt.cpython-310.pyc
Binary file not shown.
Binary file removed dagster_proj/assets/__pycache__/dbt.cpython-312.pyc
Binary file not shown.
Binary file not shown.
30 changes: 5 additions & 25 deletions dagster_proj/assets/activities.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,14 @@
import pendulum
import sys

logger = get_dagster_logger()

CLIENT_ID = EnvVar("CLIENT_ID").get_value()
CLIENT_SECRET = EnvVar("CLIENT_SECRET").get_value()
REFRESH_TOEKEN = EnvVar("REFRESH_TOKEN").get_value()

from ..resources import StravaAPIResource, strava_api_resouce

def strava_access_token(refresh_token, client_id, client_secret):
"""Return the access_token for Authorization bearer"""
auth_url = "https://www.strava.com/oauth/token"
payload = {
'client_id': client_id,
'client_secret': client_secret,
'refresh_token': refresh_token,
'grant_type': 'refresh_token'
}
response = requests.post(auth_url, data=payload)
access_token = response.json()['access_token']
return access_token
logger = get_dagster_logger()

@dlt.source
def strava_rest_api_config(client_id, client_secret, refresh_token):
def strava_rest_api_config(strava_resource: StravaAPIResource):
logger.info("Extracting Strava data source")
access_token = strava_access_token(refresh_token, client_id, client_secret)
access_token = strava_api_resouce.get_access_token()

config: RESTAPIConfig = {
"client": {
Expand Down Expand Up @@ -85,11 +69,7 @@ def load_strava_activities():
dataset_name="activities",
progress="log")

source = strava_rest_api_config(
client_id=CLIENT_ID,
client_secret=CLIENT_SECRET,
refresh_token=REFRESH_TOEKEN
)
source = strava_rest_api_config(strava_api_resouce)

load_info = pipeline.run(source)
logger.info(load_info)
Binary file removed dagster_proj/jobs/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
10 changes: 9 additions & 1 deletion dagster_proj/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,19 @@
from dagster_dbt import DbtCliResource
from ..project import dbt_project

from .configured_resources import StravaAPIResource

dbt_resource = DbtCliResource(
project_dir=dbt_project,
profiles_dir='analytics_dbt',
)

database_resource = DuckDBResource(
database=EnvVar("DUCKDB_DATABASE"),
)
)

strava_api_resouce = StravaAPIResource(
client_id=EnvVar("CLIENT_ID").get_value(),
client_secret=EnvVar("CLIENT_SECRET").get_value(),
refresh_token=EnvVar("REFRESH_TOKEN").get_value(),
)
Binary file not shown.
Binary file not shown.
27 changes: 27 additions & 0 deletions dagster_proj/resources/configured_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from dagster import ConfigurableResource, asset, Definitions
import requests
from typing import Dict, Any

class StravaAPIResource(ConfigurableResource):
client_id: str
client_secret: str
refresh_token: str
_access_token: str = None

def get_access_token(self) -> str:
if not self._access_token:
self._access_token = self._refresh_access_token()
return self._access_token

def _refresh_access_token(self) -> str:
auth_url = "https://www.strava.com/oauth/token"
payload = {
'client_id': self.client_id,
'client_secret': self.client_secret,
'refresh_token': self.refresh_token,
'grant_type': 'refresh_token'
}
response = requests.post(auth_url, data=payload, timeout=100)
response.raise_for_status()
return response.json()['access_token']

Binary file not shown.
Binary file modified data/staging/strava.duckdb
Binary file not shown.
7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"dagster-dbt>=0.25.3",
"numpy>=2.0.2",
"scikit-learn>=1.6.0",
"dagster-cloud>=1.9.3",
]

[project.optional-dependencies]
Expand All @@ -30,7 +31,5 @@ build-backend = "setuptools.build_meta"
module_name = "dagster_proj"
code_location_name = "dagster_proj"

[dependency-groups]
dev = [
"ipykernel>=6.29.5",
]
[tool.setuptools]
packages = ["dagster_proj"]
87 changes: 85 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ cachetools==5.5.0
# via streamlit
certifi==2024.12.14
# via requests
cffi==1.17.1
# via cryptography
charset-normalizer==3.4.0
# via requests
click==8.1.7
Expand All @@ -50,15 +52,23 @@ coloredlogs==14.0
# via dagster
croniter==3.0.4
# via dagster
cryptography==44.0.0
# via pyjwt
daff==1.3.46
# via dbt-core
dagster==1.9.5
# via
# dagster-proj (pyproject.toml)
# dagster-cloud
# dagster-cloud-cli
# dagster-dbt
# dagster-duckdb
# dagster-graphql
# dagster-webserver
dagster-cloud==1.9.5
# via dagster-proj (pyproject.toml)
dagster-cloud-cli==1.9.5
# via dagster-cloud
dagster-dbt==0.25.5
# via dagster-proj (pyproject.toml)
dagster-duckdb==0.25.5
Expand Down Expand Up @@ -90,6 +100,12 @@ dbt-semantic-interfaces==0.5.1
# via dbt-core
deepdiff==7.0.1
# via dbt-common
deprecated==1.2.15
# via
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-semantic-conventions
dlt==1.4.1
# via dagster-proj (pyproject.toml)
docstring-parser==0.16
Expand All @@ -107,12 +123,18 @@ fsspec==2024.10.0
# universal-pathlib
gitdb==4.0.11
# via gitpython
github3-py==4.0.1
# via dagster-cloud-cli
gitpython==3.1.43
# via
# dlt
# streamlit
giturlparse==0.12.0
# via dlt
googleapis-common-protos==1.66.0
# via
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-exporter-otlp-proto-http
gql==3.5.0
# via dagster-graphql
graphene==3.4.3
Expand All @@ -128,6 +150,7 @@ grpcio==1.68.1
# via
# dagster
# grpcio-health-checking
# opentelemetry-exporter-otlp-proto-grpc
grpcio-health-checking==1.68.1
# via dagster
h11==0.14.0
Expand All @@ -146,7 +169,9 @@ idna==3.10
# requests
# yarl
importlib-metadata==6.11.0
# via dbt-semantic-interfaces
# via
# dbt-semantic-interfaces
# opentelemetry-api
isodate==0.6.1
# via
# agate
Expand Down Expand Up @@ -214,6 +239,33 @@ numpy==2.2.0
# scikit-learn
# scipy
# streamlit
opentelemetry-api==1.29.0
# via
# dagster-cloud
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-exporter-otlp-proto-http
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-common==1.29.0
# via
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-exporter-otlp-proto-http
opentelemetry-exporter-otlp-proto-grpc==1.29.0
# via dagster-cloud
opentelemetry-exporter-otlp-proto-http==1.29.0
# via dagster-cloud
opentelemetry-proto==1.29.0
# via
# opentelemetry-exporter-otlp-proto-common
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-exporter-otlp-proto-http
opentelemetry-sdk==1.29.0
# via
# dagster-cloud
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-exporter-otlp-proto-http
opentelemetry-semantic-conventions==0.50b0
# via opentelemetry-sdk
ordered-set==4.1.0
# via deepdiff
orjson==3.10.12
Expand All @@ -224,6 +276,7 @@ packaging==24.2
# via
# altair
# dagster
# dagster-cloud-cli
# dagster-dbt
# dbt-core
# dlt
Expand All @@ -241,12 +294,16 @@ pathvalidate==3.2.1
# via dlt
pendulum==3.0.0
# via dlt
pex==2.27.1
# via dagster-cloud
pillow==11.0.0
# via streamlit
pluggy==1.5.0
# via dlt
ply==3.11
# via jsonpath-ng
prompt-toolkit==3.0.36
# via questionary
propcache==0.2.1
# via yarl
protobuf==5.29.1
Expand All @@ -255,10 +312,14 @@ protobuf==5.29.1
# dbt-adapters
# dbt-common
# dbt-core
# googleapis-common-protos
# grpcio-health-checking
# opentelemetry-proto
# streamlit
pyarrow==18.1.0
# via streamlit
pycparser==2.22
# via cffi
pydantic==2.10.3
# via
# dagster
Expand All @@ -269,11 +330,14 @@ pydeck==0.9.1
# via streamlit
pygments==2.18.0
# via rich
pyjwt==2.10.1
# via github3-py
python-dateutil==2.9.0.post0
# via
# croniter
# dbt-common
# dbt-semantic-interfaces
# github3-py
# graphene
# pandas
# pendulum
Expand All @@ -297,24 +361,33 @@ pytz==2024.2
pyyaml==6.0.2
# via
# dagster
# dagster-cloud-cli
# dbt-core
# dbt-semantic-interfaces
# dlt
# uvicorn
questionary==2.0.1
# via
# dagster-cloud
# dagster-cloud-cli
referencing==0.35.1
# via
# jsonschema
# jsonschema-specifications
requests==2.32.3
# via
# dagster
# dagster-cloud
# dagster-cloud-cli
# dagster-dbt
# dagster-graphql
# dbt-common
# dbt-core
# dlt
# github3-py
# gql
# minimal-snowplow-tracker
# opentelemetry-exporter-otlp-proto-http
# requests-toolbelt
# streamlit
requests-toolbelt==1.0.0
Expand Down Expand Up @@ -397,7 +470,10 @@ tornado==6.4.2
tqdm==4.67.1
# via dagster
typer==0.15.1
# via dagster-dbt
# via
# dagster-cloud
# dagster-cloud-cli
# dagster-dbt
types-setuptools==75.6.0.20241126
# via requirements-parser
typing-extensions==4.12.2
Expand All @@ -413,6 +489,7 @@ typing-extensions==4.12.2
# dlt
# graphene
# mashumaro
# opentelemetry-sdk
# pydantic
# pydantic-core
# sqlalchemy
Expand All @@ -425,6 +502,8 @@ tzdata==2024.2
# pendulum
universal-pathlib==0.2.6
# via dagster
uritemplate==4.1.1
# via github3-py
urllib3==2.2.3
# via requests
uvicorn==0.34.0
Expand All @@ -435,8 +514,12 @@ watchdog==5.0.3
# via dagster
watchfiles==1.0.3
# via uvicorn
wcwidth==0.2.13
# via prompt-toolkit
websockets==14.1
# via uvicorn
wrapt==1.17.0
# via deprecated
yarl==1.18.3
# via gql
zipp==3.21.0
Expand Down
2 changes: 0 additions & 2 deletions setup.cfg

This file was deleted.

Loading
Loading