From 9556b220117960050e8339780d048509ff6b3078 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Tue, 2 Jan 2024 13:41:10 +0100 Subject: [PATCH] enable mypy ignore-without-code --- mltb2/arangodb.py | 36 ++++++++++++++++++------------------ mltb2/somajo_transformers.py | 4 ++-- mltb2/text.py | 10 +++++----- pyproject.toml | 1 + 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/mltb2/arangodb.py b/mltb2/arangodb.py index 3be32d0..7af8292 100644 --- a/mltb2/arangodb.py +++ b/mltb2/arangodb.py @@ -107,13 +107,13 @@ def from_config_file(cls, config_file_name, aql_overwrite: Optional[str] = None) _check_config_keys(arango_config, expected_config_file_keys) return cls( - hosts=arango_config["hosts"], # type: ignore - db_name=arango_config["db_name"], # type: ignore - username=arango_config["username"], # type: ignore - password=arango_config["password"], # type: ignore - collection_name=arango_config["collection_name"], # type: ignore - attribute_name=arango_config["attribute_name"], # type: ignore - batch_size=int(arango_config["batch_size"]), # type: ignore + hosts=arango_config["hosts"], # type: ignore[arg-type] + db_name=arango_config["db_name"], # type: ignore[arg-type] + username=arango_config["username"], # type: ignore[arg-type] + password=arango_config["password"], # type: ignore[arg-type] + collection_name=arango_config["collection_name"], # type: ignore[arg-type] + attribute_name=arango_config["attribute_name"], # type: ignore[arg-type] + batch_size=int(arango_config["batch_size"]), # type: ignore[arg-type] aql_overwrite=aql_overwrite, ) @@ -150,12 +150,12 @@ def load_batch(self) -> Sequence: aql = self.aql_overwrite cursor = connection.aql.execute( aql, - bind_vars=bind_vars, # type: ignore + bind_vars=bind_vars, # type: ignore[arg-type] batch_size=self.batch_size, ) - with closing(cursor) as closing_cursor: # type: ignore - batch = closing_cursor.batch() # type: ignore - return batch # type: ignore + with closing(cursor) as closing_cursor: # type: ignore[type-var] + batch = closing_cursor.batch() # type: ignore[union-attr] + return batch # type: ignore[return-value] def save_batch(self, batch: Sequence) -> None: """Save a batch of data to the ArangoDB database. @@ -195,24 +195,24 @@ def arango_collection_backup() -> None: output_file_name = f"./{args.col}_backup.jsonl.gz" print(f"Writing backup to '{output_file_name}'...") - with closing(ArangoClient(hosts=arango_config["hosts"])) as arango_client, gzip.open( # type: ignore + with closing(ArangoClient(hosts=arango_config["hosts"])) as arango_client, gzip.open( # type: ignore[arg-type] output_file_name, "w" ) as gzip_out: connection = arango_client.db( - arango_config["db_name"], # type: ignore - arango_config["username"], # type: ignore - arango_config["password"], # type: ignore + arango_config["db_name"], # type: ignore[arg-type] + arango_config["username"], # type: ignore[arg-type] + arango_config["password"], # type: ignore[arg-type] ) - jsonlines_writer = jsonlines.Writer(gzip_out) # type: ignore + jsonlines_writer = jsonlines.Writer(gzip_out) # type: ignore[arg-type] try: cursor = connection.aql.execute( "FOR doc IN @@coll RETURN doc", bind_vars={"@coll": args.col}, batch_size=100, - max_runtime=60 * 60, # type: ignore # 1 hour + max_runtime=60 * 60, # type: ignore[arg-type] # 1 hour stream=True, ) for doc in tqdm(cursor): jsonlines_writer.write(doc) finally: - cursor.close(ignore_missing=True) # type: ignore + cursor.close(ignore_missing=True) # type: ignore[union-attr] diff --git a/mltb2/somajo_transformers.py b/mltb2/somajo_transformers.py index 82a4b3c..18f835a 100644 --- a/mltb2/somajo_transformers.py +++ b/mltb2/somajo_transformers.py @@ -57,12 +57,12 @@ def __call__(self, text: str) -> List[str]: sentences = self.somajo_sentence_splitter(text) counts = self.transformers_token_counter(sentences) - assert len(sentences) == len(counts) # type: ignore # noqa: S101 + assert len(sentences) == len(counts) # type: ignore[arg-type] result_splits: List[str] = [] current_sentences: List[str] = [] current_count: int = 0 - for sentence, count in zip(tqdm(sentences, disable=not self.show_progress_bar), counts): # type: ignore + for sentence, count in zip(tqdm(sentences, disable=not self.show_progress_bar), counts): # type: ignore[arg-type] if count > self.max_token: if self.ignore_overly_long_sentences: continue diff --git a/mltb2/text.py b/mltb2/text.py index 5c6fb23..d998c4e 100644 --- a/mltb2/text.py +++ b/mltb2/text.py @@ -225,10 +225,10 @@ def fit(self, text: Union[str, Iterable[str]]) -> None: raise ValueError("fit must not be called after distance calculation!") if isinstance(text, str): - self._char_counter.update(text) # type: ignore + self._char_counter.update(text) # type: ignore[union-attr] else: for t in tqdm(text, disable=not self.show_progress_bar): - self._char_counter.update(t) # type: ignore + self._char_counter.update(t) # type: ignore[union-attr] self._fit_called = True @@ -238,7 +238,7 @@ def _normalize_char_counter(self) -> None: This supports lazy postprocessing of the char counter. """ if not self._distance_called: - self._normalized_char_counts = _normalize_counter_to_defaultdict(self._char_counter, self.max_dimensions) # type: ignore + self._normalized_char_counts = _normalize_counter_to_defaultdict(self._char_counter, self.max_dimensions) # type: ignore[arg-type] self._char_counter = None self._counted_char_set = set(self._normalized_char_counts) self._distance_called = True @@ -260,9 +260,9 @@ def distance(self, text) -> float: text_vector = [] text_count = Counter(text) text_count_defaultdict = _normalize_counter_to_defaultdict(text_count, self.max_dimensions) - for c in self._counted_char_set.union(text_count_defaultdict): # type: ignore + for c in self._counted_char_set.union(text_count_defaultdict): # type: ignore[union-attr] all_vector.append( - self._normalized_char_counts[c] # type: ignore + self._normalized_char_counts[c] # type: ignore[index] ) # if c is not in defaultdict, it will return 0 text_vector.append(text_count_defaultdict[c]) # if c is not in defaultdict, it will return 0 return cityblock(all_vector, text_vector) diff --git a/pyproject.toml b/pyproject.toml index f4d6caa..bee07ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -168,3 +168,4 @@ notice-rgx = "(# Copyright \\(c\\) \\d{4}.*\\n)+# This software is distributed u [tool.mypy] ignore_missing_imports = true +enable_error_code=["ignore-without-code"]