"""Select models from the top clusters.
This module selects a number of models from a number of clusters. The
selection is based on the score of the models within the clusters.
In the standard HADDOCK analysis, the top 4 models of the top 10 clusters
are shown. In case seletopclusts is run after a sampling module, we can
keep a few models from all the clusters to have more diversity at the
refinement stage(s).
"""
from pathlib import Path
from haddock import log
from haddock.core.typing import Any, FilePath
from haddock.modules import BaseHaddockModule
from haddock.modules.analysis.seletopclusts.seletopclusts import (
select_top_clusts_models,
write_selected_models,
)
RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, "defaults.yaml")
[docs]class HaddockModule(BaseHaddockModule):
"""Haddock Module for 'seletopclusts'."""
name = RECIPE_PATH.name
def __init__(self,
order: int,
path: Path,
*ignore: Any,
init_params: FilePath = DEFAULT_CONFIG,
**everything: Any) -> None:
super().__init__(order, path, init_params)
[docs] @classmethod
def confirm_installation(cls) -> None:
"""Confirm if module is installed."""
return
def _run(self) -> None:
"""Execute the module's protocol."""
# Check parameters validity
if self.params["top_models"] <= 0:
_msg = "top_models must be either > 0 or nan."
self.finish_with_error(_msg)
if not isinstance(self.params["top_cluster"], int):
_msg = "top_cluster must be an integer."
self.finish_with_error(_msg)
# Retrieve list of previous models
models_to_select = self.previous_io.retrieve_models()
# Check if cluster info is accessible
if any([mdl.clt_rank is None for mdl in models_to_select]):
_msg = (
"Impossible to obtain cluster information. Please consider "
"running a clustering method prior to this module."
)
self.finish_with_error(_msg)
# Make model selection
selected_models, _notes = select_top_clusts_models(
self.params["sortby"],
models_to_select,
self.params["top_cluster"],
self.params["top_models"],
)
# Log notes
for note in _notes:
log.info(note)
# dump the models to disk and change their attributes
renamed_models = write_selected_models(
"seletopclusts.txt",
selected_models,
self.path,
)
# Make these new models the output of this module
self.output_models = renamed_models
# Export outputs
self.export_io_models()