Skip to content

Commit

Permalink
Merge pull request #564 from padix-key/pubchem
Browse files Browse the repository at this point in the history
Adapt tests to changed database response
  • Loading branch information
padix-key authored May 19, 2024
2 parents c125e15 + da88c39 commit a12f3ce
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 30 deletions.
46 changes: 23 additions & 23 deletions src/biotite/database/pubchem/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
throttle_threshold=0.5, return_throttle_status=False):
"""
Download structure files from *PubChem* in various formats.
This function requires an internet connection.
Parameters
----------
cids : int or iterable object or int
Expand Down Expand Up @@ -62,7 +62,7 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
return_throttle_status : float, optional
If set to true, the :class:`ThrottleStatus` of the final request
is also returned.
Returns
-------
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
Expand All @@ -78,10 +78,10 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
of the final response is returned.
This can be used for custom request throttling, for example.
Only returned, if `return_throttle_status` is set to true.
Examples
--------
>>> import os.path
>>> file = fetch(2244, "sdf", path_to_directory)
>>> print(os.path.basename(file))
Expand All @@ -100,7 +100,7 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
# Create the target folder, if not existing
if target_path is not None and not isdir(target_path):
os.makedirs(target_path)

files = []
for i, cid in enumerate(cids):
# Prevent IDs as strings, this could be a common error, as other
Expand All @@ -111,14 +111,14 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
if verbose:
print(f"Fetching file {i+1:d} / {len(cids):d} ({cid})...",
end="\r")

# Fetch file from database
if target_path is not None:
file = join(target_path, str(cid) + "." + format)
else:
# 'file = None' -> store content in a file-like object
file = None

if file is None \
or not isfile(file) \
or getsize(file) == 0 \
Expand All @@ -130,12 +130,12 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
)
if not r.ok:
raise RequestError(parse_error_details(r.text))

if format.lower() in _binary_formats:
content = r.content
else:
content = r.text

if file is None:
if format in _binary_formats:
file = io.BytesIO(content)
Expand All @@ -145,11 +145,11 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
mode = "wb+" if format in _binary_formats else "w+"
with open(file, mode) as f:
f.write(content)

throttle_status = ThrottleStatus.from_response(r)
if throttle_threshold is not None:
throttle_status.wait_if_busy(throttle_threshold)

files.append(file)
if verbose:
print("\nDone")
Expand All @@ -168,9 +168,9 @@ def fetch_property(cids, name,
throttle_threshold=0.5, return_throttle_status=False):
"""
Download the given property for the given CID(s).
This function requires an internet connection.
Parameters
----------
cids : int or iterable object or int
Expand All @@ -189,7 +189,7 @@ def fetch_property(cids, name,
return_throttle_status : float, optional
If set to true, the :class:`ThrottleStatus` of the final request
is also returned.
Returns
-------
property : str or list of str
Expand All @@ -202,23 +202,23 @@ def fetch_property(cids, name,
The :class:`ThrottleStatus` obtained from the server response.
This can be used for custom request throttling, for example.
Only returned, if `return_throttle_status` is set to true.
Examples
--------
>>> butane_cids = np.array(search(FormulaQuery("C4H10")))
>>> # Filter natural isotopes...
>>> n_iso = np.array(fetch_property(butane_cids, "IsotopeAtomCount"), dtype=int)
>>> # ...and neutral compounds
>>> charge = np.array(fetch_property(butane_cids, "Charge"), dtype=int)
>>> butane_cids = butane_cids[(n_iso == 0) & (charge == 0)]
>>> print(butane_cids.tolist())
[7843, 6360, 161897780, 161295599, 158934736, 158271732, 157632982, 19048342, 19029854, 18402699]
>>> print(sorted(butane_cids.tolist()))
[6360, 7843, 18402699, 19029854, 19048342, 157632982, 158271732, 158934736, 161295599, 161897780]
>>> # Get the IUPAC names for each compound
>>> iupac_names = fetch_property(butane_cids, "IUPACName")
>>> # Compounds with multiple molecules use ';' as separator
>>> print(iupac_names)
['butane', '2-methylpropane', 'methylcyclopropane;molecular hydrogen', 'carbanylium;propane', 'carbanide;propane', 'acetylene;methane', 'cyclobutane;molecular hydrogen', 'cyclopropane;methane', 'ethane;ethene', 'methane;prop-1-ene']
['butane', '2-methylpropane', 'methane;prop-1-ene', 'ethane;ethene', 'cyclopropane;methane', 'cyclobutane;molecular hydrogen', 'acetylene;methane', 'carbanide;propane', 'carbanylium;propane', 'methylcyclopropane;molecular hydrogen']
"""
# If only a single CID is present,
# put it into a single element list
Expand All @@ -227,13 +227,13 @@ def fetch_property(cids, name,
single_element = True
else:
single_element = False

# Property names may only contain letters and numbers
if not name.isalnum():
raise ValueError(
f"Property '{name}' contains invalid characters"
)

# Use TXT format instead of CSV to avoid issues with ',' characters
# within table elements
r = requests.post(
Expand All @@ -245,7 +245,7 @@ def fetch_property(cids, name,
throttle_status = ThrottleStatus.from_response(r)
if throttle_threshold is not None:
throttle_status.wait_if_busy(throttle_threshold)

# Each line contains the property for one CID
properties = r.text.splitlines()

Expand Down
12 changes: 6 additions & 6 deletions src/biotite/database/pubchem/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class NameQuery(Query):
--------
>>> print(search(NameQuery("Alanine")))
[5950, ..., ..., ...]
[5950, ..., ...]
"""

def __init__(self, name):
Expand Down Expand Up @@ -204,10 +204,10 @@ class FormulaQuery(Query):
--------
>>> print(search(FormulaQuery("C4H10", number=5)))
[7843, ..., ..., ..., ...]
[..., ..., ..., ..., ...]
>>> atom_array = residue("ALA")
>>> print(search(FormulaQuery.from_atoms(atom_array, number=5)))
[5950, ..., ..., ..., ...]
[..., ..., ..., ..., ...]
"""

def __init__(self, formula, allow_other_elements=False, number=None):
Expand Down Expand Up @@ -555,11 +555,11 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
>>> # CID of alanine
>>> print(search(SuperstructureQuery(cid=5950, number=5)))
[1032, ..., ..., ..., ...]
[..., ..., ..., ..., ...]
>>> # AtomArray of alanine
>>> atom_array = residue("ALA")
>>> print(search(SuperstructureQuery.from_atoms(atom_array, number=5)))
[1032, ..., ..., ..., ...]
[..., ..., ..., ..., ...]
"""

def search_type(self):
Expand Down Expand Up @@ -801,7 +801,7 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
--------
>>> print(search(NameQuery("Alanine")))
[5950, ..., ..., ...]
[5950, ..., ...]
"""
# Use POST to be compatible with the larger payloads
# of structure searches
Expand Down
2 changes: 1 addition & 1 deletion tests/database/test_pubchem.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_fetch_invalid():
@pytest.mark.parametrize(
"query, ref_ids",
[
(pubchem.NameQuery("Alanine"), [155817681, 449619, 7311724, 5950]),
(pubchem.NameQuery("Alanine"), [602, 5950, 71080]),
(pubchem.SmilesQuery("CCCC"), [7843]),
(pubchem.InchiQuery("InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3"), [7843]),
(pubchem.InchiKeyQuery("IJDNQMDRQITEOD-UHFFFAOYSA-N"), [7843]),
Expand Down

0 comments on commit a12f3ce

Please sign in to comment.