"""Filtering module.
The ``[filter]`` module filters the input models based on their score using a
``threshold`` value.
Models having higher score than the threshold value are filtered out.
The number of models to be selected is unknown, and is the set of models that
have a **score below the defined threshold**.
**Important**:
For this module to be functional, a score must be first computed.
This can be performed by running a CNS module or a scoring module.
**Program termination cases**:
* If scores are not
accessible, the workflow will terminate with an error message.
* If the threshold value is too stringent, resulting in no models passed to the
next module, the workflow will stop with an error message.
For more details about this module, please `refer to the haddock3 user manual
<https://www.bonvinlab.org/haddock3-user-manual/modules/analysis.html#filter-module>`_
"""
from pathlib import Path
from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import Any, FilePath
from haddock.libs.libontology import Format, PDBFile
from haddock.modules import BaseHaddockModule
RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)
[docs]
class HaddockModule(BaseHaddockModule):
"""HADDOCK3 module to select top cluster/model."""
name = RECIPE_PATH.name
def __init__(self,
order: int,
path: Path,
*ignore: Any,
init_params: FilePath = DEFAULT_CONFIG,
**everything: Any) -> None:
super().__init__(order, path, init_params)
[docs]
@classmethod
def confirm_installation(cls) -> None:
"""Confirm if module is installed."""
return
def _run(self) -> None:
"""Execute module."""
# Make sure we have access to complexes
if type(self.previous_io) == iter:
self.finish_with_error(
"[filter] This module cannot come after one"
" that produced an iterable."
)
# Get the models generated in previous step
models: list[PDBFile] = [
p
for p in self.previous_io.output
if p.file_type == Format.PDB
]
# Get the filter by parameter
filter_by = "score"
threshold = self.params["threshold"]
# Make sure we can access this attribute on models
models_with_attributes: list[PDBFile] = [
m for m in models
if getattr(m, filter_by, None) != None
]
# Check how many of them are available
ratio_models_with_attr = len(models_with_attributes) / len(models)
self.log(
f"{100 * (1 - ratio_models_with_attr):6.2f} % "
"of the input models have accessible scores."
)
if len(models_with_attributes) == 0:
self.finish_with_error(
"Input models do not have scores. "
"Please consider running a scoring module before!"
)
# Process to the actual filtering step
filtered_models: list[PDBFile] = [
m for m in models_with_attributes
if getattr(m, filter_by) <= threshold
]
# Final evaluation of the outcome of the filtering
percent_filtered = (1 - (len(filtered_models) / len(models))) * 100
if len(filtered_models) == 0:
self.finish_with_error(
f"With the currently set 'threshold' value of {threshold}, "
"ALL models were filtered out."
)
else:
self.log(
f"With currently set 'threshold' value of {threshold}, "
f"{percent_filtered:6.2f}% of the models were filtered out."
)
# select the models based on the parameter
self.output_models = filtered_models
self.export_io_models()