Skip to content

Commit

Permalink
Fix file handling bug
Browse files Browse the repository at this point in the history
  • Loading branch information
hogru committed Mar 20, 2023
1 parent 298de6d commit cd56fb8
Showing 1 changed file with 20 additions and 11 deletions.
31 changes: 20 additions & 11 deletions src/molreactgen/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,10 @@ def _cleanse_and_copy_data(input_file_path: Path, output_file_path: Path) -> Non
def _download_pooched_dataset(
dataset: str, raw_dir: Path, enforce_download: bool
) -> None:
assert raw_dir.samefile(POOCHES[dataset].path)
# assert raw_dir.samefile(POOCHES[dataset].path)
assert raw_dir.as_posix() == str(
POOCHES[dataset].path
) # pooch.path should be a string, but isn't
if enforce_download:
for file in POOCHES[dataset].registry:
if (raw_dir / file).exists():
Expand All @@ -139,7 +142,10 @@ def _download_pooched_dataset(


def _prepare_pooched_dataset(dataset: str, raw_dir: Path, prep_dir: Path) -> None:
assert raw_dir.samefile(POOCHES[dataset].path)
# assert raw_dir.samefile(POOCHES[dataset].path)
assert raw_dir.as_posix() == str(
POOCHES[dataset].path
) # pooch.path should be a string, but isn't
prep_dir.mkdir(parents=True, exist_ok=True)
for file in POOCHES[dataset].registry:
# sub_dir = Path(file).parent
Expand Down Expand Up @@ -174,6 +180,7 @@ def _prepare_uspto_50k_dataset(raw_dir: Path, prep_dir: Path) -> None:
raise FileNotFoundError(
f"File {raw_file} not found. The raw file name seems to have changed."
)
prep_dir.mkdir(parents=True, exist_ok=True)
files: dict[str, str] = {
"known": "USPTO_50k_known.csv", # "known" means in either in validation or test set (but not train set)
"train": "USPTO_50k_train.csv",
Expand Down Expand Up @@ -256,17 +263,19 @@ def _prepare_uspto_50k_dataset(raw_dir: Path, prep_dir: Path) -> None:
), "Train set includes reactions from validation and/or test set!"


def _prepare_uspto_full_dataset(raw_dir: Path, prep_dir: Path) -> None:
raise NotImplementedError(
"Preparation of the full USPTO dataset is not yet implemented."
)


def _download_uspto_full_dataset(raw_dir: Path, enforce_download: bool) -> None:
# _download_pooched_dataset("uspto50k", raw_dir, enforce_download)
raise NotImplementedError(
"Download of the full USPTO dataset is not yet implemented."
)
# raise NotImplementedError(
# "Download of the full USPTO dataset is not yet implemented."
# )
logger.warning("Dataset usptofull download is not yet implemented, skipping...")


def _prepare_uspto_full_dataset(raw_dir: Path, prep_dir: Path) -> None:
# raise NotImplementedError(
# "Preparation of the full USPTO dataset is not yet implemented."
# )
logger.warning("Dataset usptofull preparation is not yet implemented, skipping...")


def _download_zinc_dataset(raw_dir: Path, enforce_download: bool) -> None:
Expand Down

0 comments on commit cd56fb8

Please sign in to comment.