Skip to content

Commit

Permalink
chore: Enable more Ruff rules
Browse files Browse the repository at this point in the history
  • Loading branch information
edgarrmondragon committed Jan 7, 2025
1 parent d769f65 commit 0fa8ac6
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 64 deletions.
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,13 @@ select = [
"E", # pycodestyle (errors)
"W", # pycodestyle (warnings)
"I", # isort
"N", # pep8-naming
"UP", # pyupgrade
"YTT", # flake8-2020
"ANN", # flake8-annotations
"B", # flake8-bugbear
"A", # flake8-builtins
"C4", # flake8-comprehensions
"DTZ", # flake8-datetimez
"FA", # flake8-future-annotations
"SIM", # flake8-simplify
Expand All @@ -87,3 +93,6 @@ select = [
"FURB", # refurb
"RUF", # Ruff-specific rules
]

[tool.ruff.lint.per-file-ignores]
"tap_github/tests/*" = ["ANN"]
21 changes: 13 additions & 8 deletions tap_github/authenticator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def __init__(
self,
token: str | None,
rate_limit_buffer: int | None = None,
logger: Any | None = None,
):
logger: Any | None = None, # noqa: ANN401
) -> None:
"""Init TokenManager info."""
self.token = token
self.logger = logger
Expand All @@ -48,7 +48,7 @@ def __init__(
else self.DEFAULT_RATE_LIMIT_BUFFER
)

def update_rate_limit(self, response_headers: Any) -> None:
def update_rate_limit(self, response_headers: Any) -> None: # noqa: ANN401
self.rate_limit = int(response_headers["X-RateLimit-Limit"])
self.rate_limit_remaining = int(response_headers["X-RateLimit-Remaining"])
self.rate_limit_reset = datetime.fromtimestamp(
Expand Down Expand Up @@ -97,7 +97,12 @@ def has_calls_remaining(self) -> bool:
class PersonalTokenManager(TokenManager):
"""A class to store token rate limiting information."""

def __init__(self, token: str, rate_limit_buffer: int | None = None, **kwargs):
def __init__(
self,
token: str,
rate_limit_buffer: int | None = None,
**kwargs, # noqa: ANN003
) -> None:
"""Init PersonalTokenRateLimit info."""
super().__init__(token, rate_limit_buffer=rate_limit_buffer, **kwargs)

Expand Down Expand Up @@ -166,8 +171,8 @@ def __init__(
env_key: str,
rate_limit_buffer: int | None = None,
expiry_time_buffer: int | None = None,
**kwargs,
):
**kwargs, # noqa: ANN003
) -> None:
if rate_limit_buffer is None:
rate_limit_buffer = self.DEFAULT_RATE_LIMIT_BUFFER
super().__init__(None, rate_limit_buffer=rate_limit_buffer, **kwargs)
Expand All @@ -184,7 +189,7 @@ def __init__(
self.token_expires_at: datetime | None = None
self.claim_token()

def claim_token(self):
def claim_token(self) -> None:
"""Updates the TokenManager's token and token_expires_at attributes.
The outcome will be _either_ that self.token is updated to a newly claimed valid token and
Expand Down Expand Up @@ -242,7 +247,7 @@ class GitHubTokenAuthenticator(APIAuthenticatorBase):
"""Base class for offloading API auth."""

@staticmethod
def get_env():
def get_env(): # noqa: ANN205
return dict(environ)

def prepare_tokens(self) -> list[TokenManager]:
Expand Down
24 changes: 16 additions & 8 deletions tap_github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ def http_headers(self) -> dict[str, str]:
return headers

def get_next_page_token(
self, response: requests.Response, previous_token: Any | None
) -> Any | None:
self,
response: requests.Response,
previous_token: Any | None, # noqa: ANN401
) -> Any | None: # noqa: ANN401
"""Return a token for identifying next page or None if no more pages."""
if (
previous_token
Expand Down Expand Up @@ -136,7 +138,9 @@ def get_next_page_token(
return (previous_token or 1) + 1

def get_url_params(
self, context: dict | None, next_page_token: Any | None
self,
context: dict | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
params: dict = {"per_page": self.MAX_PER_PAGE}
Expand Down Expand Up @@ -328,8 +332,10 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]:
yield from extract_jsonpath(self.query_jsonpath, input=resp_json)

def get_next_page_token(
self, response: requests.Response, previous_token: Any | None
) -> Any | None:
self,
response: requests.Response,
previous_token: Any | None, # noqa: ANN401
) -> Any | None: # noqa: ANN401
"""
Return a dict of cursors for identifying next page or None if no more pages.
Expand Down Expand Up @@ -370,7 +376,7 @@ def get_next_page_token(

# We leverage previous_token to remember the pagination cursors
# for indices below max_pagination_index.
next_page_cursors: dict[str, str] = dict()
next_page_cursors: dict[str, str] = {}
for key, value in (previous_token or {}).items():
# Only keep pagination info for indices below max_pagination_index.
pagination_index = int(str(key).split("_")[1])
Expand All @@ -392,10 +398,12 @@ def get_next_page_token(
return next_page_cursors

def get_url_params(
self, context: dict | None, next_page_token: Any | None
self,
context: dict | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
params = context.copy() if context else dict()
params = context.copy() if context else {}
params["per_page"] = self.MAX_PER_PAGE
if next_page_token:
params.update(next_page_token)
Expand Down
50 changes: 31 additions & 19 deletions tap_github/repository_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ class RepositoryStream(GitHubRestStream):
replication_key = "updated_at"

def get_url_params(
self, context: dict | None, next_page_token: Any | None
self,
context: dict | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
assert context is not None, f"Context cannot be empty for '{self.name}' stream."
Expand Down Expand Up @@ -84,13 +86,13 @@ class TempStream(GitHubGraphqlStream):
th.Property("databaseId", th.IntegerType),
).to_dict()

def __init__(self, tap, repo_list) -> None:
def __init__(self, tap, repo_list) -> None: # noqa: ANN001
super().__init__(tap)
self.repo_list = repo_list

@property
def query(self) -> str:
chunks = list()
chunks = []
for i, repo in enumerate(self.repo_list):
chunks.append(
f'repo{i}: repository(name: "{repo[1]}", owner: "{repo[0]}") '
Expand All @@ -114,7 +116,7 @@ def validate_response(self, response: requests.Response) -> None:
if len(repo_list) < 1:
return []

repos_with_ids: list = list()
repos_with_ids: list = []
temp_stream = TempStream(self._tap, list(repo_list))
# replace manually provided org/repo values by the ones obtained
# from github api. This guarantees that case is correct in the output data.
Expand Down Expand Up @@ -166,10 +168,8 @@ def partitions(self) -> list[dict[str, str]] | None:
]

if "repositories" in self.config:
split_repo_names = list(
map(lambda s: s.split("/"), self.config["repositories"])
)
augmented_repo_list = list()
split_repo_names = [s.split("/") for s in self.config["repositories"]]
augmented_repo_list = []
# chunk requests to the graphql endpoint to avoid timeouts and other
# obscure errors that the api doesn't say much about. The actual limit
# seems closer to 1000, use half that to stay safe.
Expand Down Expand Up @@ -640,7 +640,9 @@ class MilestonesStream(GitHubRestStream):
ignore_parent_replication_key = True

def get_url_params(
self, context: dict | None, next_page_token: Any | None
self,
context: dict | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
assert context is not None, f"Context cannot be empty for '{self.name}' stream."
Expand Down Expand Up @@ -840,7 +842,9 @@ class IssuesStream(GitHubRestStream):
state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"]

def get_url_params(
self, context: dict | None, next_page_token: Any | None
self,
context: dict | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
assert context is not None, f"Context cannot be empty for '{self.name}' stream."
Expand Down Expand Up @@ -1198,7 +1202,9 @@ class PullRequestsStream(GitHubRestStream):
use_fake_since_parameter = True

def get_url_params(
self, context: dict | None, next_page_token: Any | None
self,
context: dict | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
assert context is not None, f"Context cannot be empty for '{self.name}' stream."
Expand Down Expand Up @@ -1587,7 +1593,9 @@ class AnonymousContributorsStream(GitHubRestStream):
tolerated_http_errors: ClassVar[list[int]] = [204]

def get_url_params(
self, context: dict | None, next_page_token: Any | None
self,
context: dict | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
assert context is not None, f"Context cannot be empty for '{self.name}' stream."
Expand Down Expand Up @@ -1626,7 +1634,7 @@ class StargazersStream(GitHubRestStream):
# GitHub is missing the "since" parameter on this endpoint.
use_fake_since_parameter = True

def __init__(self, *args, **kwargs):
def __init__(self, *args, **kwargs) -> None: # noqa: ANN002, ANN003
super().__init__(*args, **kwargs)
# TODO - remove warning with next release.
self.logger.warning(
Expand Down Expand Up @@ -1676,7 +1684,7 @@ class StargazersGraphqlStream(GitHubGraphqlStream):
# The parent repository object changes if the number of stargazers changes.
ignore_parent_replication_key = False

def __init__(self, *args, **kwargs):
def __init__(self, *args, **kwargs) -> None: # noqa: ANN002, ANN003
super().__init__(*args, **kwargs)
# TODO - remove warning with next release.
self.logger.warning(
Expand All @@ -1693,8 +1701,10 @@ def post_process(self, row: dict, context: dict | None = None) -> dict:
return row

def get_next_page_token(
self, response: requests.Response, previous_token: Any | None
) -> Any | None:
self,
response: requests.Response,
previous_token: Any | None, # noqa: ANN401
) -> Any | None: # noqa: ANN401
"""
Exit early if a since parameter is provided.
"""
Expand Down Expand Up @@ -2092,7 +2102,7 @@ class WorkflowRunJobsStream(GitHubRestStream):
th.Property("runner_group_name", th.StringType),
).to_dict()

def __init__(self, *args, **kwargs):
def __init__(self, *args, **kwargs) -> None: # noqa: ANN002, ANN003
super().__init__(*args, **kwargs)
self._schema_emitted = False

Expand All @@ -2101,13 +2111,15 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]:
yield from extract_jsonpath(self.records_jsonpath, input=response.json())

def get_url_params(
self, context: dict | None, next_page_token: Any | None
self,
context: dict | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
params = super().get_url_params(context, next_page_token)
params["filter"] = "all"
return params

def _write_schema_message(self):
def _write_schema_message(self) -> None:
"""Write out a SCHEMA message with the stream schema."""
if not self._schema_emitted:
super()._write_schema_message()
Expand Down
8 changes: 4 additions & 4 deletions tap_github/scraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ def parse_counter(tag: Tag | NavigableString | None) -> int:
else:
title_string = cast(str, title[0])
return int(title_string.strip().replace(",", "").replace("+", ""))
except (KeyError, ValueError):
except (KeyError, ValueError) as e:
raise IndexError(
f"Could not parse counter {tag}. Maybe the GitHub page format has changed?"
)
) from e


def scrape_metrics(
Expand All @@ -130,11 +130,11 @@ def scrape_metrics(
try:
issues = parse_counter(soup.find("span", id="issues-repo-tab-count"))
prs = parse_counter(soup.find("span", id="pull-requests-repo-tab-count"))
except IndexError:
except IndexError as e:
# These two items should exist. We raise an error if we could not find them.
raise IndexError(
"Could not find issues or prs info. Maybe the GitHub page format has changed?" # noqa: E501
)
) from e

dependents_node = soup.find(string=used_by_regex)
# verify that we didn't hit some random text in the page.
Expand Down
4 changes: 2 additions & 2 deletions tap_github/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class Streams(Enum):
valid_queries: set[str]
streams: list[type[Stream]]

def __init__(self, valid_queries: set[str], streams: list[type[Stream]]):
def __init__(self, valid_queries: set[str], streams: list[type[Stream]]) -> None:
self.valid_queries = valid_queries
self.streams = streams

Expand Down Expand Up @@ -124,5 +124,5 @@ def __init__(self, valid_queries: set[str], streams: list[type[Stream]]):
)

@classmethod
def all_valid_queries(cls):
def all_valid_queries(cls) -> set[str]:
return set.union(*[stream.valid_queries for stream in Streams])
6 changes: 3 additions & 3 deletions tap_github/tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,20 @@


class TapGitHub(Tap):
"""GitHub tap class."""
"""Singer tap for the GitHub API."""

name = "tap-github"
package_name = "meltanolabs-tap-github"

@classproperty
def logger(cls) -> logging.Logger:
def logger(cls: type[TapGitHub]) -> logging.Logger: # noqa: N805
"""Get logger.
Returns:
Logger with local LOGLEVEL. LOGLEVEL from env takes priority.
"""

LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper() # noqa: N806
assert (
LOGLEVEL in logging._levelToName.values()
), f"Invalid LOGLEVEL configuration: {LOGLEVEL}"
Expand Down
4 changes: 2 additions & 2 deletions tap_github/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,15 @@ def alternative_sync_chidren(self, child_context: dict, no_sync: bool = True) ->
for child_stream in self.child_streams:
# Use org:write access level credentials for collaborators stream
if child_stream.name in ["collaborators"]:
ORG_LEVEL_TOKEN = os.environ.get("ORG_LEVEL_TOKEN")
ORG_LEVEL_TOKEN = os.environ.get("ORG_LEVEL_TOKEN") # noqa: N806
# TODO - Fix collaborators tests, likely by mocking API responses directly.
# Currently we have to bypass them as they are failing frequently.
if not ORG_LEVEL_TOKEN or no_sync:
logging.warning(
'No "ORG_LEVEL_TOKEN" found. Skipping collaborators stream sync.'
)
continue
SAVED_GTHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
SAVED_GTHUB_TOKEN = os.environ.get("GITHUB_TOKEN") # noqa: N806
os.environ["GITHUB_TOKEN"] = ORG_LEVEL_TOKEN
child_stream.sync(context=child_context)
os.environ["GITHUB_TOKEN"] = SAVED_GTHUB_TOKEN or ""
Expand Down
6 changes: 2 additions & 4 deletions tap_github/tests/test_tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,8 @@ def test_last_state_message_is_valid(capsys, repo_list_config): # noqa: F811
]
)
latest_updated_at = max(
map(
lambda record: isoparse(json.loads(record)["record"]["updated_at"]),
issue_comments_records,
)
isoparse(json.loads(record)["record"]["updated_at"])
for record in issue_comments_records
)
assert last_state_updated_at == latest_updated_at

Expand Down
Loading

0 comments on commit 0fa8ac6

Please sign in to comment.