Skip to content

Commit

Permalink
enable mypy ignore-without-code
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipMay committed Jan 2, 2024
1 parent 1c7a8b6 commit 9556b22
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 25 deletions.
36 changes: 18 additions & 18 deletions mltb2/arangodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,13 @@ def from_config_file(cls, config_file_name, aql_overwrite: Optional[str] = None)
_check_config_keys(arango_config, expected_config_file_keys)

return cls(
hosts=arango_config["hosts"], # type: ignore
db_name=arango_config["db_name"], # type: ignore
username=arango_config["username"], # type: ignore
password=arango_config["password"], # type: ignore
collection_name=arango_config["collection_name"], # type: ignore
attribute_name=arango_config["attribute_name"], # type: ignore
batch_size=int(arango_config["batch_size"]), # type: ignore
hosts=arango_config["hosts"], # type: ignore[arg-type]
db_name=arango_config["db_name"], # type: ignore[arg-type]
username=arango_config["username"], # type: ignore[arg-type]
password=arango_config["password"], # type: ignore[arg-type]
collection_name=arango_config["collection_name"], # type: ignore[arg-type]
attribute_name=arango_config["attribute_name"], # type: ignore[arg-type]
batch_size=int(arango_config["batch_size"]), # type: ignore[arg-type]
aql_overwrite=aql_overwrite,
)

Expand Down Expand Up @@ -150,12 +150,12 @@ def load_batch(self) -> Sequence:
aql = self.aql_overwrite
cursor = connection.aql.execute(
aql,
bind_vars=bind_vars, # type: ignore
bind_vars=bind_vars, # type: ignore[arg-type]
batch_size=self.batch_size,
)
with closing(cursor) as closing_cursor: # type: ignore
batch = closing_cursor.batch() # type: ignore
return batch # type: ignore
with closing(cursor) as closing_cursor: # type: ignore[type-var]
batch = closing_cursor.batch() # type: ignore[union-attr]
return batch # type: ignore[return-value]

def save_batch(self, batch: Sequence) -> None:
"""Save a batch of data to the ArangoDB database.
Expand Down Expand Up @@ -195,24 +195,24 @@ def arango_collection_backup() -> None:
output_file_name = f"./{args.col}_backup.jsonl.gz"
print(f"Writing backup to '{output_file_name}'...")

with closing(ArangoClient(hosts=arango_config["hosts"])) as arango_client, gzip.open( # type: ignore
with closing(ArangoClient(hosts=arango_config["hosts"])) as arango_client, gzip.open( # type: ignore[arg-type]
output_file_name, "w"
) as gzip_out:
connection = arango_client.db(
arango_config["db_name"], # type: ignore
arango_config["username"], # type: ignore
arango_config["password"], # type: ignore
arango_config["db_name"], # type: ignore[arg-type]
arango_config["username"], # type: ignore[arg-type]
arango_config["password"], # type: ignore[arg-type]
)
jsonlines_writer = jsonlines.Writer(gzip_out) # type: ignore
jsonlines_writer = jsonlines.Writer(gzip_out) # type: ignore[arg-type]
try:
cursor = connection.aql.execute(
"FOR doc IN @@coll RETURN doc",
bind_vars={"@coll": args.col},
batch_size=100,
max_runtime=60 * 60, # type: ignore # 1 hour
max_runtime=60 * 60, # type: ignore[arg-type] # 1 hour
stream=True,
)
for doc in tqdm(cursor):
jsonlines_writer.write(doc)
finally:
cursor.close(ignore_missing=True) # type: ignore
cursor.close(ignore_missing=True) # type: ignore[union-attr]
4 changes: 2 additions & 2 deletions mltb2/somajo_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ def __call__(self, text: str) -> List[str]:
sentences = self.somajo_sentence_splitter(text)
counts = self.transformers_token_counter(sentences)

assert len(sentences) == len(counts) # type: ignore # noqa: S101
assert len(sentences) == len(counts) # type: ignore[arg-type]

result_splits: List[str] = []
current_sentences: List[str] = []
current_count: int = 0
for sentence, count in zip(tqdm(sentences, disable=not self.show_progress_bar), counts): # type: ignore
for sentence, count in zip(tqdm(sentences, disable=not self.show_progress_bar), counts): # type: ignore[arg-type]
if count > self.max_token:
if self.ignore_overly_long_sentences:
continue
Expand Down
10 changes: 5 additions & 5 deletions mltb2/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,10 @@ def fit(self, text: Union[str, Iterable[str]]) -> None:
raise ValueError("fit must not be called after distance calculation!")

if isinstance(text, str):
self._char_counter.update(text) # type: ignore
self._char_counter.update(text) # type: ignore[union-attr]
else:
for t in tqdm(text, disable=not self.show_progress_bar):
self._char_counter.update(t) # type: ignore
self._char_counter.update(t) # type: ignore[union-attr]

self._fit_called = True

Expand All @@ -238,7 +238,7 @@ def _normalize_char_counter(self) -> None:
This supports lazy postprocessing of the char counter.
"""
if not self._distance_called:
self._normalized_char_counts = _normalize_counter_to_defaultdict(self._char_counter, self.max_dimensions) # type: ignore
self._normalized_char_counts = _normalize_counter_to_defaultdict(self._char_counter, self.max_dimensions) # type: ignore[arg-type]
self._char_counter = None
self._counted_char_set = set(self._normalized_char_counts)
self._distance_called = True
Expand All @@ -260,9 +260,9 @@ def distance(self, text) -> float:
text_vector = []
text_count = Counter(text)
text_count_defaultdict = _normalize_counter_to_defaultdict(text_count, self.max_dimensions)
for c in self._counted_char_set.union(text_count_defaultdict): # type: ignore
for c in self._counted_char_set.union(text_count_defaultdict): # type: ignore[union-attr]
all_vector.append(
self._normalized_char_counts[c] # type: ignore
self._normalized_char_counts[c] # type: ignore[index]
) # if c is not in defaultdict, it will return 0
text_vector.append(text_count_defaultdict[c]) # if c is not in defaultdict, it will return 0
return cityblock(all_vector, text_vector)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,4 @@ notice-rgx = "(# Copyright \\(c\\) \\d{4}.*\\n)+# This software is distributed u

[tool.mypy]
ignore_missing_imports = true
enable_error_code=["ignore-without-code"]

0 comments on commit 9556b22

Please sign in to comment.