Source code for haddock.modules.analysis.filter

"""Filtering module.

The ``[filter]`` module filters the input models based on their score using a
``threshold`` value.
Models having higher score than the threshold value are filtered out.

The number of models to be selected is unknown, and is the set of models that
have a **score below the defined threshold**.

**Important**:
For this module to be functional, a score must be first computed.
This can be performed by running a CNS module or a scoring module.

**Program termination cases**:

* If scores are not
  accessible, the workflow will terminate with an error message.
* If the threshold value is too stringent, resulting in no models passed to the
  next module, the workflow will stop with an error message.

For more details about this module, please `refer to the haddock3 user manual
<https://www.bonvinlab.org/haddock3-user-manual/modules/analysis.html#filter-module>`_
"""

from pathlib import Path

from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import Any, FilePath
from haddock.libs.libontology import Format, PDBFile
from haddock.modules import BaseHaddockModule


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)


[docs] class HaddockModule(BaseHaddockModule): """HADDOCK3 module to select top cluster/model.""" name = RECIPE_PATH.name def __init__(self, order: int, path: Path, *ignore: Any, init_params: FilePath = DEFAULT_CONFIG, **everything: Any) -> None: super().__init__(order, path, init_params)
[docs] @classmethod def confirm_installation(cls) -> None: """Confirm if module is installed.""" return
def _run(self) -> None: """Execute module.""" # Make sure we have access to complexes if type(self.previous_io) == iter: self.finish_with_error( "[filter] This module cannot come after one" " that produced an iterable." ) # Get the models generated in previous step models: list[PDBFile] = [ p for p in self.previous_io.output if p.file_type == Format.PDB ] # Get the filter by parameter filter_by = "score" threshold = self.params["threshold"] # Make sure we can access this attribute on models models_with_attributes: list[PDBFile] = [ m for m in models if getattr(m, filter_by, None) != None ] # Check how many of them are available ratio_models_with_attr = len(models_with_attributes) / len(models) self.log( f"{100 * (1 - ratio_models_with_attr):6.2f} % " "of the input models have accessible scores." ) if len(models_with_attributes) == 0: self.finish_with_error( "Input models do not have scores. " "Please consider running a scoring module before!" ) # Process to the actual filtering step filtered_models: list[PDBFile] = [ m for m in models_with_attributes if getattr(m, filter_by) <= threshold ] # Final evaluation of the outcome of the filtering percent_filtered = (1 - (len(filtered_models) / len(models))) * 100 if len(filtered_models) == 0: self.finish_with_error( f"With the currently set 'threshold' value of {threshold}, " "ALL models were filtered out." ) else: self.log( f"With currently set 'threshold' value of {threshold}, " f"{percent_filtered:6.2f}% of the models were filtered out." ) # select the models based on the parameter self.output_models = filtered_models self.export_io_models()