chore: Enable more Ruff rules

MeltanoLabs · Jan 7, 2025 · 0fa8ac6 · 0fa8ac6
1 parent d769f65
commit 0fa8ac6
Show file tree

Hide file tree

Showing 11 changed files with 98 additions and 64 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -78,7 +78,13 @@ select = [
 	"E",    # pycodestyle (errors)
 	"W",    # pycodestyle (warnings)
 	"I",    # isort
+	"N",    # pep8-naming
 	"UP",   # pyupgrade
+	"YTT",  # flake8-2020
+	"ANN",  # flake8-annotations
+	"B",    # flake8-bugbear
+	"A",    # flake8-builtins
+	"C4",   # flake8-comprehensions
 	"DTZ",  # flake8-datetimez
 	"FA",   # flake8-future-annotations
 	"SIM",  # flake8-simplify
@@ -87,3 +93,6 @@ select = [
 	"FURB", # refurb
 	"RUF",  # Ruff-specific rules
 ]
+
+[tool.ruff.lint.per-file-ignores]
+"tap_github/tests/*" = ["ANN"]
diff --git a/tap_github/authenticator.py b/tap_github/authenticator.py
@@ -33,8 +33,8 @@ def __init__(
         self,
         token: str | None,
         rate_limit_buffer: int | None = None,
-        logger: Any | None = None,
-    ):
+        logger: Any | None = None,  # noqa: ANN401
+    ) -> None:
         """Init TokenManager info."""
         self.token = token
         self.logger = logger
@@ -48,7 +48,7 @@ def __init__(
             else self.DEFAULT_RATE_LIMIT_BUFFER
         )
 
-    def update_rate_limit(self, response_headers: Any) -> None:
+    def update_rate_limit(self, response_headers: Any) -> None:  # noqa: ANN401
         self.rate_limit = int(response_headers["X-RateLimit-Limit"])
         self.rate_limit_remaining = int(response_headers["X-RateLimit-Remaining"])
         self.rate_limit_reset = datetime.fromtimestamp(
@@ -97,7 +97,12 @@ def has_calls_remaining(self) -> bool:
 class PersonalTokenManager(TokenManager):
     """A class to store token rate limiting information."""
 
-    def __init__(self, token: str, rate_limit_buffer: int | None = None, **kwargs):
+    def __init__(
+        self,
+        token: str,
+        rate_limit_buffer: int | None = None,
+        **kwargs,  # noqa: ANN003
+    ) -> None:
         """Init PersonalTokenRateLimit info."""
         super().__init__(token, rate_limit_buffer=rate_limit_buffer, **kwargs)
 
@@ -166,8 +171,8 @@ def __init__(
         env_key: str,
         rate_limit_buffer: int | None = None,
         expiry_time_buffer: int | None = None,
-        **kwargs,
-    ):
+        **kwargs,  # noqa: ANN003
+    ) -> None:
         if rate_limit_buffer is None:
             rate_limit_buffer = self.DEFAULT_RATE_LIMIT_BUFFER
         super().__init__(None, rate_limit_buffer=rate_limit_buffer, **kwargs)
@@ -184,7 +189,7 @@ def __init__(
         self.token_expires_at: datetime | None = None
         self.claim_token()
 
-    def claim_token(self):
+    def claim_token(self) -> None:
         """Updates the TokenManager's token and token_expires_at attributes.
 
         The outcome will be _either_ that self.token is updated to a newly claimed valid token and
@@ -242,7 +247,7 @@ class GitHubTokenAuthenticator(APIAuthenticatorBase):
     """Base class for offloading API auth."""
 
     @staticmethod
-    def get_env():
+    def get_env():  # noqa: ANN205
         return dict(environ)
 
     def prepare_tokens(self) -> list[TokenManager]:

diff --git a/tap_github/client.py b/tap_github/client.py
@@ -65,8 +65,10 @@ def http_headers(self) -> dict[str, str]:
         return headers
 
     def get_next_page_token(
-        self, response: requests.Response, previous_token: Any | None
-    ) -> Any | None:
+        self,
+        response: requests.Response,
+        previous_token: Any | None,  # noqa: ANN401
+    ) -> Any | None:  # noqa: ANN401
         """Return a token for identifying next page or None if no more pages."""
         if (
             previous_token
@@ -136,7 +138,9 @@ def get_next_page_token(
         return (previous_token or 1) + 1
 
     def get_url_params(
-        self, context: dict | None, next_page_token: Any | None
+        self,
+        context: dict | None,
+        next_page_token: Any | None,  # noqa: ANN401
     ) -> dict[str, Any]:
         """Return a dictionary of values to be used in URL parameterization."""
         params: dict = {"per_page": self.MAX_PER_PAGE}
@@ -328,8 +332,10 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]:
         yield from extract_jsonpath(self.query_jsonpath, input=resp_json)
 
     def get_next_page_token(
-        self, response: requests.Response, previous_token: Any | None
-    ) -> Any | None:
+        self,
+        response: requests.Response,
+        previous_token: Any | None,  # noqa: ANN401
+    ) -> Any | None:  # noqa: ANN401
         """
         Return a dict of cursors for identifying next page or None if no more pages.
 
@@ -370,7 +376,7 @@ def get_next_page_token(
 
         # We leverage previous_token to remember the pagination cursors
         # for indices below max_pagination_index.
-        next_page_cursors: dict[str, str] = dict()
+        next_page_cursors: dict[str, str] = {}
         for key, value in (previous_token or {}).items():
             # Only keep pagination info for indices below max_pagination_index.
             pagination_index = int(str(key).split("_")[1])
@@ -392,10 +398,12 @@ def get_next_page_token(
         return next_page_cursors
 
     def get_url_params(
-        self, context: dict | None, next_page_token: Any | None
+        self,
+        context: dict | None,
+        next_page_token: Any | None,  # noqa: ANN401
     ) -> dict[str, Any]:
         """Return a dictionary of values to be used in URL parameterization."""
-        params = context.copy() if context else dict()
+        params = context.copy() if context else {}
         params["per_page"] = self.MAX_PER_PAGE
         if next_page_token:
             params.update(next_page_token)

diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py
@@ -35,7 +35,9 @@ class RepositoryStream(GitHubRestStream):
     replication_key = "updated_at"
 
     def get_url_params(
-        self, context: dict | None, next_page_token: Any | None
+        self,
+        context: dict | None,
+        next_page_token: Any | None,  # noqa: ANN401
     ) -> dict[str, Any]:
         """Return a dictionary of values to be used in URL parameterization."""
         assert context is not None, f"Context cannot be empty for '{self.name}' stream."
@@ -84,13 +86,13 @@ class TempStream(GitHubGraphqlStream):
                 th.Property("databaseId", th.IntegerType),
             ).to_dict()
 
-            def __init__(self, tap, repo_list) -> None:
+            def __init__(self, tap, repo_list) -> None:  # noqa: ANN001
                 super().__init__(tap)
                 self.repo_list = repo_list
 
             @property
             def query(self) -> str:
-                chunks = list()
+                chunks = []
                 for i, repo in enumerate(self.repo_list):
                     chunks.append(
                         f'repo{i}: repository(name: "{repo[1]}", owner: "{repo[0]}") '
@@ -114,7 +116,7 @@ def validate_response(self, response: requests.Response) -> None:
         if len(repo_list) < 1:
             return []
 
-        repos_with_ids: list = list()
+        repos_with_ids: list = []
         temp_stream = TempStream(self._tap, list(repo_list))
         # replace manually provided org/repo values by the ones obtained
         # from github api. This guarantees that case is correct in the output data.
@@ -166,10 +168,8 @@ def partitions(self) -> list[dict[str, str]] | None:
             ]
 
         if "repositories" in self.config:
-            split_repo_names = list(
-                map(lambda s: s.split("/"), self.config["repositories"])
-            )
-            augmented_repo_list = list()
+            split_repo_names = [s.split("/") for s in self.config["repositories"]]
+            augmented_repo_list = []
             # chunk requests to the graphql endpoint to avoid timeouts and other
             # obscure errors that the api doesn't say much about. The actual limit
             # seems closer to 1000, use half that to stay safe.
@@ -640,7 +640,9 @@ class MilestonesStream(GitHubRestStream):
     ignore_parent_replication_key = True
 
     def get_url_params(
-        self, context: dict | None, next_page_token: Any | None
+        self,
+        context: dict | None,
+        next_page_token: Any | None,  # noqa: ANN401
     ) -> dict[str, Any]:
         """Return a dictionary of values to be used in URL parameterization."""
         assert context is not None, f"Context cannot be empty for '{self.name}' stream."
@@ -840,7 +842,9 @@ class IssuesStream(GitHubRestStream):
     state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"]
 
     def get_url_params(
-        self, context: dict | None, next_page_token: Any | None
+        self,
+        context: dict | None,
+        next_page_token: Any | None,  # noqa: ANN401
     ) -> dict[str, Any]:
         """Return a dictionary of values to be used in URL parameterization."""
         assert context is not None, f"Context cannot be empty for '{self.name}' stream."
@@ -1198,7 +1202,9 @@ class PullRequestsStream(GitHubRestStream):
     use_fake_since_parameter = True
 
     def get_url_params(
-        self, context: dict | None, next_page_token: Any | None
+        self,
+        context: dict | None,
+        next_page_token: Any | None,  # noqa: ANN401
     ) -> dict[str, Any]:
         """Return a dictionary of values to be used in URL parameterization."""
         assert context is not None, f"Context cannot be empty for '{self.name}' stream."
@@ -1587,7 +1593,9 @@ class AnonymousContributorsStream(GitHubRestStream):
     tolerated_http_errors: ClassVar[list[int]] = [204]
 
     def get_url_params(
-        self, context: dict | None, next_page_token: Any | None
+        self,
+        context: dict | None,
+        next_page_token: Any | None,  # noqa: ANN401
     ) -> dict[str, Any]:
         """Return a dictionary of values to be used in URL parameterization."""
         assert context is not None, f"Context cannot be empty for '{self.name}' stream."
@@ -1626,7 +1634,7 @@ class StargazersStream(GitHubRestStream):
     # GitHub is missing the "since" parameter on this endpoint.
     use_fake_since_parameter = True
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs) -> None:  # noqa: ANN002, ANN003
         super().__init__(*args, **kwargs)
         # TODO - remove warning with next release.
         self.logger.warning(
@@ -1676,7 +1684,7 @@ class StargazersGraphqlStream(GitHubGraphqlStream):
     # The parent repository object changes if the number of stargazers changes.
     ignore_parent_replication_key = False
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs) -> None:  # noqa: ANN002, ANN003
         super().__init__(*args, **kwargs)
         # TODO - remove warning with next release.
         self.logger.warning(
@@ -1693,8 +1701,10 @@ def post_process(self, row: dict, context: dict | None = None) -> dict:
         return row
 
     def get_next_page_token(
-        self, response: requests.Response, previous_token: Any | None
-    ) -> Any | None:
+        self,
+        response: requests.Response,
+        previous_token: Any | None,  # noqa: ANN401
+    ) -> Any | None:  # noqa: ANN401
         """
         Exit early if a since parameter is provided.
         """
@@ -2092,7 +2102,7 @@ class WorkflowRunJobsStream(GitHubRestStream):
         th.Property("runner_group_name", th.StringType),
     ).to_dict()
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs) -> None:  # noqa: ANN002, ANN003
         super().__init__(*args, **kwargs)
         self._schema_emitted = False
 
@@ -2101,13 +2111,15 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]:
         yield from extract_jsonpath(self.records_jsonpath, input=response.json())
 
     def get_url_params(
-        self, context: dict | None, next_page_token: Any | None
+        self,
+        context: dict | None,
+        next_page_token: Any | None,  # noqa: ANN401
     ) -> dict[str, Any]:
         params = super().get_url_params(context, next_page_token)
         params["filter"] = "all"
         return params
 
-    def _write_schema_message(self):
+    def _write_schema_message(self) -> None:
         """Write out a SCHEMA message with the stream schema."""
         if not self._schema_emitted:
             super()._write_schema_message()

diff --git a/tap_github/scraping.py b/tap_github/scraping.py
@@ -112,10 +112,10 @@ def parse_counter(tag: Tag | NavigableString | None) -> int:
         else:
             title_string = cast(str, title[0])
         return int(title_string.strip().replace(",", "").replace("+", ""))
-    except (KeyError, ValueError):
+    except (KeyError, ValueError) as e:
         raise IndexError(
             f"Could not parse counter {tag}. Maybe the GitHub page format has changed?"
-        )
+        ) from e
 
 
 def scrape_metrics(
@@ -130,11 +130,11 @@ def scrape_metrics(
     try:
         issues = parse_counter(soup.find("span", id="issues-repo-tab-count"))
         prs = parse_counter(soup.find("span", id="pull-requests-repo-tab-count"))
-    except IndexError:
+    except IndexError as e:
         # These two items should exist. We raise an error if we could not find them.
         raise IndexError(
             "Could not find issues or prs info. Maybe the GitHub page format has changed?"  # noqa: E501
-        )
+        ) from e
 
     dependents_node = soup.find(string=used_by_regex)
     # verify that we didn't hit some random text in the page.

diff --git a/tap_github/streams.py b/tap_github/streams.py
@@ -63,7 +63,7 @@ class Streams(Enum):
     valid_queries: set[str]
     streams: list[type[Stream]]
 
-    def __init__(self, valid_queries: set[str], streams: list[type[Stream]]):
+    def __init__(self, valid_queries: set[str], streams: list[type[Stream]]) -> None:
         self.valid_queries = valid_queries
         self.streams = streams
 
@@ -124,5 +124,5 @@ def __init__(self, valid_queries: set[str], streams: list[type[Stream]]):
     )
 
     @classmethod
-    def all_valid_queries(cls):
+    def all_valid_queries(cls) -> set[str]:
         return set.union(*[stream.valid_queries for stream in Streams])
diff --git a/tap_github/tap.py b/tap_github/tap.py
@@ -13,20 +13,20 @@
 
 
 class TapGitHub(Tap):
-    """GitHub tap class."""
+    """Singer tap for the GitHub API."""
 
     name = "tap-github"
     package_name = "meltanolabs-tap-github"
 
     @classproperty
-    def logger(cls) -> logging.Logger:
+    def logger(cls: type[TapGitHub]) -> logging.Logger:  # noqa: N805
         """Get logger.
 
         Returns:
             Logger with local LOGLEVEL. LOGLEVEL from env takes priority.
         """
 
-        LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
+        LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()  # noqa: N806
         assert (
             LOGLEVEL in logging._levelToName.values()
         ), f"Invalid LOGLEVEL configuration: {LOGLEVEL}"

diff --git a/tap_github/tests/fixtures.py b/tap_github/tests/fixtures.py
@@ -105,15 +105,15 @@ def alternative_sync_chidren(self, child_context: dict, no_sync: bool = True) ->
     for child_stream in self.child_streams:
         # Use org:write access level credentials for collaborators stream
         if child_stream.name in ["collaborators"]:
-            ORG_LEVEL_TOKEN = os.environ.get("ORG_LEVEL_TOKEN")
+            ORG_LEVEL_TOKEN = os.environ.get("ORG_LEVEL_TOKEN")  # noqa: N806
             # TODO - Fix collaborators tests, likely by mocking API responses directly.
             # Currently we have to bypass them as they are failing frequently.
             if not ORG_LEVEL_TOKEN or no_sync:
                 logging.warning(
                     'No "ORG_LEVEL_TOKEN" found. Skipping collaborators stream sync.'
                 )
                 continue
-            SAVED_GTHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
+            SAVED_GTHUB_TOKEN = os.environ.get("GITHUB_TOKEN")  # noqa: N806
             os.environ["GITHUB_TOKEN"] = ORG_LEVEL_TOKEN
             child_stream.sync(context=child_context)
             os.environ["GITHUB_TOKEN"] = SAVED_GTHUB_TOKEN or ""

diff --git a/tap_github/tests/test_tap.py b/tap_github/tests/test_tap.py
@@ -155,10 +155,8 @@ def test_last_state_message_is_valid(capsys, repo_list_config):  # noqa: F811
         ]
     )
     latest_updated_at = max(
-        map(
-            lambda record: isoparse(json.loads(record)["record"]["updated_at"]),
-            issue_comments_records,
-        )
+        isoparse(json.loads(record)["record"]["updated_at"])
+        for record in issue_comments_records
     )
     assert last_state_updated_at == latest_updated_at