west.cfg

# The master WEST configuration file for a simulation.
# vi: set filetype=yaml :
---
west:
  ddwe:
    # Output directory for the run.
    output_path: ddwe-logs
    # Run machine learning; set to False for ablation.
    do_machine_learning: True
    # File containing the point for targeting. Set to None for no target.
    target_point_path: common_files/reference.pdb
    # Interval for plotting the latent space.
    plot_interval: 10
    # Drive resampling with the target in latent space (lst) or the pcoord (pcoord).
    sort_by: pcoord
    # Pcoord approaches zero as the target is approached.
    pcoord_approaches_zero: True
    # Maximum number of target seeking resamples. Effectively how many to split for lof. Merge twice as many.
    max_resamples: 4
    # Lower limit on walker weight. If all of the walkers are below the limit, split/merge is skipped
    # that iteration
    split_weight_limit: 1e-40
    # Upper limit on walker weight. If all of the walkers exceed limit, split/merge is skipped
    # that iteration
    merge_weight_limit: 0.1
    # Whether to sort by RMSD or not
    not_sort_by_rmsd: False
    machine_learning:
      # static, train
      ml_mode: static
      # Contact map distance cutoff
      contact_cutoff: 8.0
      # How often to update the model. Set to 0 for static.
      update_interval: 10
      # Number of lagging iterations to use for training data.
      lag_iterations: 50
      # Path to the base training data (Optional).
      base_training_data_path: common_files/train.npy
      # Checkpoint file for static mode (Optional).
      static_chk_path: static_model/model/checkpoints/checkpoint-epoch-100.pt
    objective:
      # Objective method to use (lof, clustering).
      objective_method: lof
      # Cluster resampling algorithm to use (simplified, complex).
      cluster_resample_method: complex
      # Function to measure distance between latent space points (cosine or euclidean).
      distance_metric: euclidean_cosine
      lof:  
        # Maximum total number of past latent space points to save for the lof scheme.
        max_past_points: 1000
        # Number of neighbors for LOF.
        lof_n_neighbors: 20
        # How many walkers to consider for splitting and merging in lof.
        lof_consider_for_resample: 12
      cluster:
        # Clusting method (kmeans, knani, dbscan, gmm).
        cluster_method: gmm
        # Maximum number of contact maps to save from each cluster.
        max_save_per_cluster: 40
        # Number of "clusters" to use for the ablation version.
        ablation_clusters: 8
        # Number of KMeans clusters.
        kmeans_clusters: 8
        # Epsilon setting for dbscan.
        dbscan_epsilon: .1
        # Minimum number of points for dbscan.
        dbscan_min_samples: 25
        # Max components for GMM.
        gmm_max_components: 8
        # Threshold for outliers in GMM.
        gmm_threshold: 0.8
      mdance:
        # Scan across multiple numbers, inclusive, e.g. 4,6
        # Set to int for a single number.
        knani_clusters: 3, 10
        # Whether to use the Second Derivative of Davies–Bouldin index to determine minimum   
        db_second: true
        # How often to load in structures, takes every sieve-th frame from the trajectory for analysis.
        sieve: 1
        # The number of frames to extract from each cluster.
        n_structures: 11
        # What metric to use to compare between frames, Mean-Square-Distance vs. whatever
        metric: MSD
        # Different cluster initialization methods. comp_sim is the k-NANI initialization
        init_type: comp_sim
        # Subset of data for Diversity selection. 20% * number of input structures needs to be >= the number of clusters you request (defaults to 10)
        percentage: 20


  drivers:
    module_path: $WEST_SIM_ROOT
    we_driver: deepdrive_westpa.ddmd_driver.CustomDriver
    dmatrix_map:             common_files/dmatrix_map.pkl
    synd_model:              common_files/ntl9_folding.synd
  system:
    driver: westpa.core.systems.WESTSystem
    system_options:
      # Dimensionality of your progress coordinate
      pcoord_ndim: 1
      # Number of data points per iteration
      pcoord_len: 2
      # Data type for your progress coordinate
      pcoord_dtype: !!python/name:numpy.float32
      bins:
        type: RectilinearBinMapper
        # The edges of the bins
        boundaries:
          - [0, 1, 'inf']
      # Number walkers per bin
      bin_target_counts: 36
  propagation:
    max_total_iterations: 2000
    max_run_wallclock:    48:00:00
    propagator:           synd.westpa.propagator.SynMDPropagator
    parameters:
      synd_model:         common_files/ntl9_folding.synd
      topology:           common_files/reference.pdb
    gen_istates:          false
  data:
    west_data_file: west.h5
    datasets:
      - name:        pcoord
        scaleoffset: 4
      - name:        dmatrix
        scaleoffset: 4
  plugins:

    - plugin:        common_files.augmentation_driver.SynDAugmentationDriver
      module_path:   $WEST_SIM_ROOT
      coord_map:     common_files/coord_map.pkl
      dmatrix_map:   common_files/dmatrix_map.pkl

  # Settings for w_ipa, an interactive analysis program that can also automate analysis.
  analysis:
    directory: ANALYSIS                # specify the directory all analysis files should exist in.
    kinetics:                          # general options for both kinetics routines.
      step_iter: 1
      evolution: cumulative
      extra: [ 'disable-correl' ]
    analysis_schemes:                  # Analysis schemes.  Required: name (TEST), states, and bins
      TEST:
        enabled: True
        bins:
          - type: RectilinearBinMapper
            boundaries:
              - [0, 1, 9.6, 'inf'] 
        states:
          - label: folded
            coords:
              - [0.5]
          - label: unfolded
            coords:
              - [10]