Skip to content

Commit

Permalink
🔥 Remove single_byte_hard|soft_failure counters
Browse files Browse the repository at this point in the history
Was part of an old experiment that was discontinued
  • Loading branch information
Ousret committed Sep 23, 2021
1 parent 1b087b3 commit 59e48eb
Showing 1 changed file with 7 additions and 18 deletions.
25 changes: 7 additions & 18 deletions charset_normalizer/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,6 @@ def from_bytes(
fallback_u8 = None # type: Optional[CharsetMatch]
fallback_specified = None # type: Optional[CharsetMatch]

single_byte_hard_failure_count = 0 # type: int
single_byte_soft_failure_count = 0 # type: int

results = CharsetMatches() # type: CharsetMatches

sig_encoding, sig_payload = identify_sig_or_bom(sequences)
Expand Down Expand Up @@ -218,20 +215,14 @@ def from_bytes(
else sequences[len(sig_payload) :],
encoding=encoding_iana,
)
except UnicodeDecodeError as e:
logger.warning(
"Code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
tested_but_hard_failure.append(encoding_iana)
if not is_multi_byte_decoder:
single_byte_hard_failure_count += 1
continue
except LookupError:
except (UnicodeDecodeError, LookupError) as e:
if not isinstance(e, LookupError):
logger.warning(
"Code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
tested_but_hard_failure.append(encoding_iana)
if not is_multi_byte_decoder:
single_byte_hard_failure_count += 1
continue

similar_soft_failure_test = False # type: bool
Expand Down Expand Up @@ -328,8 +319,6 @@ def from_bytes(

if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
tested_but_soft_failure.append(encoding_iana)
if not is_multi_byte_decoder:
single_byte_soft_failure_count += 1
logger.warning(
"%s was excluded because of initial chaos probing. Gave up %i time(s). "
"Computed mean chaos is %f %%.",
Expand Down

0 comments on commit 59e48eb

Please sign in to comment.