Skip to content

Commit

Permalink
Allow specifying versions in matching methods
Browse files Browse the repository at this point in the history
  • Loading branch information
alrichardbollans committed Oct 5, 2023
1 parent 4618fb3 commit 7cfb0ee
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
6 changes: 3 additions & 3 deletions wcvp_name_matching/get_accepted_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def get_accepted_info_from_names_in_column(in_df: pd.DataFrame, name_col: str,
families_of_interest: List[str] = None,
family_column: str = None,
manual_resolution_csv: str = None,
match_level: str = 'full') -> pd.DataFrame:
match_level: str = 'full', wcvp_version: str = None) -> pd.DataFrame:
"""
First tries to match names in df to wcvp directly to obtain accepted info and then
matches names in df using knms and gets corresponding accepted info from wcvp
Expand Down Expand Up @@ -343,7 +343,7 @@ def get_accepted_info_from_names_in_column(in_df: pd.DataFrame, name_col: str,

# Check families of interest and in family column are in wcvp, and remove if not
if families_of_interest is not None or family_column is not None:
wcvp_families_df = get_all_taxa()
wcvp_families_df = get_all_taxa(version=wcvp_version)
wcvp_families = list(wcvp_families_df[wcvp_columns['family']].unique())
wcvp_acc_families = list(wcvp_families_df[wcvp_accepted_columns['family']].unique())
wcvp_all_families = wcvp_families + wcvp_acc_families
Expand Down Expand Up @@ -379,7 +379,7 @@ def get_accepted_info_from_names_in_column(in_df: pd.DataFrame, name_col: str,
in_df[unique_submission_index_col] = in_df[unique_submission_index_col].astype(str)
df = df.drop_duplicates(subset=[unique_submission_index_col])

all_taxa = get_all_taxa(families_of_interest=families_of_interest)
all_taxa = get_all_taxa(families_of_interest=families_of_interest, version=wcvp_version)
# First get manual matches using given ipni ids
if manual_resolution_csv is not None:
manual_match_df = pd.read_csv(manual_resolution_csv)
Expand Down
4 changes: 2 additions & 2 deletions wcvp_name_matching/wcvp_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def match_name_to_concatenated_columns(df: pd.DataFrame, matching_name_col: str,


def get_wcvp_info_for_names_in_column(df: pd.DataFrame, matching_name_col: str, unique_submission_id_col: str,
all_taxa: pd.DataFrame = None, family_column: str = None):
all_taxa: pd.DataFrame = None, family_column: str = None, wcvp_version:str = None):
"""
Appends accepted info columns to df from list of taxa, based on names in matching_name_col
:param df:
Expand All @@ -101,7 +101,7 @@ def get_wcvp_info_for_names_in_column(df: pd.DataFrame, matching_name_col: str,
:return:
"""
if all_taxa is None:
all_taxa = get_all_taxa()
all_taxa = get_all_taxa(version=wcvp_version)

# First try with author info i.e. taxon name + taxon_authors and then
# taxon name + parenthetical_author + primary_author
Expand Down

0 comments on commit 7cfb0ee

Please sign in to comment.