Skip to content

Commit

Permalink
reduce unused code
Browse files Browse the repository at this point in the history
  • Loading branch information
micmarty-deepsense committed May 9, 2024
1 parent 681e14c commit a6fac5e
Showing 1 changed file with 0 additions and 91 deletions.
91 changes: 0 additions & 91 deletions unstructured/metrics/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,97 +432,6 @@ def _generate_dataframes(self, rows):
return df, agg_df


def measure_text_extraction_accuracy(
output_dir: str,
source_dir: str,
output_list: Optional[List[str]] = None,
source_list: Optional[List[str]] = None,
export_dir: str = "metrics",
group_by: Optional[str] = None,
weights: Tuple[int, int, int] = (1, 1, 1),
visualize: bool = False,
output_type: str = "json",
) -> None:
"""
Loops through the list of structured output from all of `output_dir` or selected files from
`output_list`, and compare with gold-standard of the same file name under `source_dir` or
selected files from `source_list`.
Calculates text accuracy and percent missing. After looped through the whole list, write to tsv.
Also calculates the aggregated accuracy and percent missing.
"""
TextExtractionMetricsCalculator(
documents_dir=output_dir,
ground_truths_dir=source_dir,
group_by=group_by,
weights=weights,
document_type=output_type,
).on_files(document_paths=output_list, ground_truth_paths=source_list).calculate(
export_dir=export_dir, visualize_progress=visualize, display_agg_df=True
)


def measure_element_type_accuracy(
output_dir: str,
source_dir: str,
output_list: Optional[List[str]] = None,
source_list: Optional[List[str]] = None,
export_dir: str = "metrics",
group_by: Optional[str] = None,
visualize: bool = False,
):
"""
Loops through the list of structured output from all of `output_dir` or selected files from
`output_list`, and compare with gold-standard of the same file name under `source_dir` or
selected files from `source_list`.
Calculates element type frequency accuracy and percent missing. After looped through the
whole list, write to tsv. Also calculates the aggregated accuracy.
"""
ElementTypeMetricsCalculator(
documents_dir=output_dir,
ground_truths_dir=source_dir,
group_by=group_by,
).on_files(document_paths=output_list, ground_truth_paths=source_list).calculate(
export_dir=export_dir, visualize_progress=visualize, display_agg_df=True
)


def measure_table_structure_accuracy(
output_dir: str,
source_dir: str,
output_list: Optional[List[str]] = None,
source_list: Optional[List[str]] = None,
export_dir: str = "metrics",
visualize: bool = False,
cutoff: Optional[float] = None,
):
"""
Loops through the list of structured output from all of `output_dir` or selected files from
`output_list`, and compare with gold-standard of the same file name under `source_dir` or
selected files from `source_list`. Supports also a json file with filenames as keys and
structured gold-standard output as values.
Calculates:
- table found accuracy
- table level accuracy
- element in column index accuracy
- element in row index accuracy
- element's column content accuracy
- element's row content accuracy
After looped through the whole list, write to tsv. Also calculates the aggregated accuracy.
"""
TableStructureMetricsCalculator(
documents_dir=output_dir,
ground_truths_dir=source_dir,
cutoff=cutoff,
).on_files(document_paths=output_list, ground_truth_paths=source_list).calculate(
export_dir=export_dir, visualize_progress=visualize, display_agg_df=True
)


def get_mean_grouping(
group_by: str,
data_input: Union[pd.DataFrame, str],
Expand Down

0 comments on commit a6fac5e

Please sign in to comment.