Merge pull request deepchem#3896 from shreyasvinaya/ci-fixes

CI Fixes: patch fix for `tfp`, docker build
GreatRSingh · Mar 19, 2024 · 3eccafe · 3eccafe
2 parents eef32b0 + 095d642
commit 3eccafe
Show file tree

Hide file tree

Showing 43 changed files with 163 additions and 156 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -30,6 +30,7 @@ jobs:
         sudo rm -rf /opt/ghc
         sudo rm -rf "/usr/local/share/boost"
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        sudo rm -rf /usr/local/lib/android
     - uses: actions/checkout@v4
     - name: Cache pip modules for Linux
       if: runner.os == 'Linux'

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -21,6 +21,7 @@ jobs:
         sudo rm -rf /opt/ghc
         sudo rm -rf "/usr/local/share/boost"
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        sudo rm -rf /usr/local/lib/android
     - uses: actions/checkout@v4
     - name: Cache pip packages for Linux
       uses: actions/cache@v4

diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml
@@ -20,6 +20,7 @@ jobs:
         sudo rm -rf /opt/ghc
         sudo rm -rf "/usr/local/share/boost"
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        sudo rm -rf /usr/local/lib/android
     - uses: actions/checkout@v4
       with:
         fetch-depth: 0
@@ -44,7 +45,7 @@ jobs:
           ${{ runner.os }}-pip-
 
     - name: Set up Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: '3.9'
 

diff --git a/.github/workflows/jax_setup.yml b/.github/workflows/jax_setup.yml
@@ -51,6 +51,7 @@ jobs:
         sudo rm -rf /opt/ghc
         sudo rm -rf "/usr/local/share/boost"
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        sudo rm -rf /usr/local/lib/android
     - uses: actions/checkout@v4
       with:
         fetch-depth: 0

diff --git a/.github/workflows/mini_build.yml b/.github/workflows/mini_build.yml
@@ -62,6 +62,7 @@ jobs:
         sudo rm -rf /opt/ghc
         sudo rm -rf "/usr/local/share/boost"
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        sudo rm -rf /usr/local/lib/android
     - uses: actions/checkout@v4
       with:
         fetch-depth: 0
@@ -222,7 +223,7 @@ jobs:
         password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
     - name: Build and push
       id: docker_build
-      uses: docker/build-push-action@v2
+      uses: docker/build-push-action@v5
       with:
         builder: ${{ steps.buildx.outputs.name }}
         context: ./docker/nightly

diff --git a/.github/workflows/tensorflow_setup.yml b/.github/workflows/tensorflow_setup.yml
@@ -58,6 +58,7 @@ jobs:
         sudo rm -rf /opt/ghc
         sudo rm -rf "/usr/local/share/boost"
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        sudo rm -rf /usr/local/lib/android
     - uses: actions/checkout@v4
       with:
         fetch-depth: 0

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -65,6 +65,7 @@ jobs:
         sudo rm -rf /opt/ghc
         sudo rm -rf "/usr/local/share/boost"
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        sudo rm -rf /usr/local/lib/android
     - uses: actions/checkout@v4
       with:
         fetch-depth: 0

diff --git a/.github/workflows/torch_setup.yml b/.github/workflows/torch_setup.yml
@@ -58,6 +58,7 @@ jobs:
         sudo rm -rf /opt/ghc
         sudo rm -rf "/usr/local/share/boost"
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        sudo rm -rf /usr/local/lib/android
     - uses: actions/checkout@v4
       with:
         fetch-depth: 0

diff --git a/deepchem/data/__init__.py b/deepchem/data/__init__.py
@@ -2,6 +2,9 @@
 Gathers all datasets in one place for convenient imports
 """
 # flake8: noqa
+import logging
+
+logger = logging.getLogger(__name__)
 
 # TODO(rbharath): Get rid of * import
 from deepchem.data.datasets import pad_features
@@ -24,13 +27,9 @@
 from deepchem.data.data_loader import InMemoryLoader
 try:
     from deepchem.data.data_loader import SAMLoader
-except ImportError:
-    print("Error: Unable to import pysam. Please make sure it is installed.")
-try:
     from deepchem.data.data_loader import BAMLoader
-except ImportError:
-    print("Error: Unable to import pysam. Please make sure it is installed.")
-try:
     from deepchem.data.data_loader import CRAMLoader
-except ImportError:
-    print("Error: Unable to import pysam. Please make sure it is installed.")
+except ImportError as e:
+    logger.warning(
+        f'Skipped loading biological sequence loaders, missing a dependency. {e}'
+    )
diff --git a/deepchem/data/data_loader.py b/deepchem/data/data_loader.py
@@ -1903,10 +1903,10 @@ class SAMLoader(DataLoader):
     """Handles loading of SAM files.
     Sequence Alignment Map (SAM) is a text-based format used for storing biological sequences
     aligned to a reference sequence.It is generally used for storing nucleotide sequences,
-    generated by next generation sequencing technologies, and unmapped sequences. 
+    generated by next generation sequencing technologies, and unmapped sequences.
     SAM files have a header section and an alignment section.Alignment sections have
     11 mandatory fields, as well as a variable number of optional fields. Here, we
-    extract Query Name, Query Sequence, Query Length, Reference Name, 
+    extract Query Name, Query Sequence, Query Length, Reference Name,
     Reference Start, CIGAR and Mapping Quality of each read in the SAM file.
     This class provides methods to load and featurize data from SAM files.
 
@@ -1931,7 +1931,7 @@ def __init__(self, featurizer: Optional[Featurizer] = None):
         ----------
         featurizer: Featurizer (default: None)
             The Featurizer to be used for the loaded SAM data.
-    
+
        """
 
         # Set attributes

diff --git a/deepchem/data/tests/test_bam_loader.py b/deepchem/data/tests/test_bam_loader.py
@@ -1,11 +1,16 @@
 import os
 import unittest
 import deepchem as dc
+import logging
+
+logger = logging.getLogger(__name__)
+
 try:
     import pysam
-except ImportError:
-    print("Error: Unable to import pysam. Please make sure it is installed.")
-import numpy as np
+except ImportError as e:
+    logger.warning(
+        f'Skipped loading biological sequence featurized, missing a dependency. {e}'
+    )
 
 
 class TestBAMLoader(unittest.TestCase):

diff --git a/deepchem/data/tests/test_cram_loader.py b/deepchem/data/tests/test_cram_loader.py
@@ -1,11 +1,16 @@
 import os
 import unittest
 import deepchem as dc
+import logging
+
+logger = logging.getLogger(__name__)
+
 try:
     import pysam
-except ImportError:
-    print("Error: Unable to import pysam. Please make sure it is installed.")
-import numpy as np
+except ImportError as e:
+    logger.warning(
+        f'Skipped loading biological sequence featurized, missing a dependency. {e}'
+    )
 
 
 class TestCRAMLoader(unittest.TestCase):

diff --git a/deepchem/data/tests/test_sam_loader.py b/deepchem/data/tests/test_sam_loader.py
@@ -1,11 +1,16 @@
 import os
 import unittest
 import deepchem as dc
+import logging
+
+logger = logging.getLogger(__name__)
+
 try:
     import pysam
-except ImportError:
-    print("Error: Unable to import pysam. Please make sure it is installed.")
-import numpy as np
+except ImportError as e:
+    logger.warning(
+        f'Skipped loading biological sequence featurized, missing a dependency. {e}'
+    )
 
 
 class TestSAMLoader(unittest.TestCase):

diff --git a/deepchem/feat/__init__.py b/deepchem/feat/__init__.py
@@ -79,16 +79,12 @@
 try:
     from deepchem.feat.bio_seq_featurizer import SAMFeaturizer
     from deepchem.feat.bio_seq_featurizer import BAMFeaturizer
+    from deepchem.feat.bio_seq_featurizer import CRAMFeaturizer
 except ImportError as e:
     logger.warning(
         f'Skipped loading biological sequence featurized, missing a dependency. {e}'
     )
 
-try:
-    from deepchem.feat.bio_seq_featurizer import CRAMFeaturizer
-except ImportError:
-    print("Error: Unable to import pysam. Please make sure it is installed.")
-
 # tokenizers
 try:
     from deepchem.feat.smiles_tokenizer import SmilesTokenizer

diff --git a/deepchem/feat/bio_seq_featurizer.py b/deepchem/feat/bio_seq_featurizer.py
@@ -1,9 +1,4 @@
 import numpy as np
-import deepchem as dc
-try:
-    import pysam
-except ImportError:
-    pass
 from deepchem.feat import Featurizer
 
 
@@ -109,7 +104,7 @@ class BAMFeaturizer(Featurizer):
     (Sequence Alignment Map) files. This class extracts Query Name, Query
     Sequence, Query Length, Reference Name, Reference Start, CIGAR and Mapping
     Quality of the alignment in the BAM file.
-    
+
     This is the default featurizer used by BAMLoader, and it extracts the following
     fields from each read in each BAM file in the given order:-
     - Column 0: Query Name
@@ -134,7 +129,7 @@ class BAMFeaturizer(Featurizer):
     ----
     This class requires pysam to be installed. Pysam can be used with Linux or MacOS X.
     To use Pysam on Windows, use Windows Subsystem for Linux(WSL).
-    
+
     """
 
     def __init__(self, max_records=None):
@@ -199,7 +194,7 @@ class CRAMFeaturizer(Featurizer):
     biological sequences aligned to a reference sequence. This class extracts Query Name, Query
     Sequence, Query Length, Reference Name, Reference Start, CIGAR and Mapping
     Quality of the alignment in the CRAM file.
-    
+
     This is the default featurizer used by CRAMLoader, and it extracts the following
     fields from each read in each CRAM file in the given order:-
     - Column 0: Query Name
@@ -224,7 +219,7 @@ class CRAMFeaturizer(Featurizer):
     ----
     This class requires pysam to be installed. Pysam can be used with Linux or MacOS X.
     To use Pysam on Windows, use Windows Subsystem for Linux(WSL).
-    
+
     """
 
     def __init__(self, max_records=None):

diff --git a/deepchem/models/torch_models/acnn.py b/deepchem/models/torch_models/acnn.py
@@ -85,7 +85,7 @@ def __init__(self,
                  **kwargs) -> None:
         """TorchModel wrapper for ACNN
 
-         Parameters
+        Parameters
         ----------
         n_tasks: int
             number of tasks

diff --git a/deepchem/models/torch_models/hf_models.py b/deepchem/models/torch_models/hf_models.py
@@ -51,6 +51,7 @@ class HuggingFaceModel(TorchModel):
          - `mtr` - multitask regression - a task used for both pretraining base models and finetuning
          - `regression` - use it for regression tasks, like property prediction
          - `classification` - use it for classification tasks
+
         When the task is not specified or None, the wrapper returns raw output of the HuggingFaceModel.
         In cases where the HuggingFaceModel is a model without a task specific head, this output will be
         the last hidden states.

diff --git a/deepchem/models/torch_models/molgan.py b/deepchem/models/torch_models/molgan.py
@@ -169,6 +169,7 @@ def create_generator(self):
         The model has two outputs:
             1. edges
             2. nodes
+
         The format differs depending on intended use (training or sample generation).
         For sample generation use flag, sample_generation=True while calling generator
         i.e. gan.generators[0](noise_input, training=False, sample_generation=True).
@@ -188,6 +189,7 @@ def create_discriminator(self,
         Takes two inputs:
             1. adjacency tensor, containing bond information
             2. nodes tensor, containing atom information
+
         The input vectors need to be in one-hot encoding format.
         Use MolGAN featurizer for that purpose. It will be simplified
         in the future release.

diff --git a/deepchem/splits/splitters.py b/deepchem/splits/splitters.py
@@ -1511,7 +1511,6 @@ class ScaffoldSplitter(Splitter):
     Notes
     -----
     - This class requires RDKit to be installed.
-
     - When a SMILES representation of a molecule is invalid, the splitter skips processing
     the datapoint i.e it will not include the molecule in any splits.