Source code for haddock.modules.analysis.seletopclusts

"""Select models from the top clusters.

This module selects a number of models from a number of clusters. The
selection is based on the score of the models within the clusters.

In the standard HADDOCK analysis, the top 4 models of the top 10 clusters
are shown. In case seletopclusts is run after a sampling module, we can
keep a few models from all the clusters to have more diversity at the
refinement stage(s).
"""

from pathlib import Path

from haddock import log
from haddock.core.typing import Any, FilePath
from haddock.modules import BaseHaddockModule
from haddock.modules.analysis.seletopclusts.seletopclusts import (
    select_top_clusts_models,
    write_selected_models,
    )


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, "defaults.yaml")


[docs]class HaddockModule(BaseHaddockModule): """Haddock Module for 'seletopclusts'.""" name = RECIPE_PATH.name def __init__(self, order: int, path: Path, *ignore: Any, init_params: FilePath = DEFAULT_CONFIG, **everything: Any) -> None: super().__init__(order, path, init_params)
[docs] @classmethod def confirm_installation(cls) -> None: """Confirm if module is installed.""" return
def _run(self) -> None: """Execute the module's protocol.""" # Check parameters validity if self.params["top_models"] <= 0: _msg = "top_models must be either > 0 or nan." self.finish_with_error(_msg) if not isinstance(self.params["top_cluster"], int): _msg = "top_cluster must be an integer." self.finish_with_error(_msg) # Retrieve list of previous models models_to_select = self.previous_io.retrieve_models() # Check if cluster info is accessible if any([mdl.clt_rank is None for mdl in models_to_select]): _msg = ( "Impossible to obtain cluster information. Please consider " "running a clustering method prior to this module." ) self.finish_with_error(_msg) # Make model selection selected_models, _notes = select_top_clusts_models( self.params["sortby"], models_to_select, self.params["top_cluster"], self.params["top_models"], ) # Log notes for note in _notes: log.info(note) # dump the models to disk and change their attributes renamed_models = write_selected_models( "seletopclusts.txt", selected_models, self.path, ) # Make these new models the output of this module self.output_models = renamed_models # Export outputs self.export_io_models()