Source code for haddock.modules.analysis.caprieval

"""Calculate CAPRI metrics for the input models.

By default the following metrics are calculated:

- FNAT (fraction of native contacts), namely the fraction of
    intermolecular contacts in the docked complex that are also
    present in the reference complex.
- IRMSD (interface root mean square deviation), namely the RMSD
    of the interface of the docked complex with respect
    to the reference complex.
- LRMSD (ligand root mean square deviation), namely the RMSD of the
    ligand of the docked complex with respect to the
    reference complex upon superposition of the receptor.
- DOCKQ, a measure of the quality of the docked model obtained
    by combining FNAT, IRMSD and LRMSD (see
    Basu and Wallner 2016,  11 (8), e0161879).
- ILRMSD (interface ligand root mean square deviation), the RMSD of the
    ligand of the docked complex with respect to the reference complex
    upon superposition of the interface of the receptor.
- GLOBAL_RMSD, the full RMSD between the reference and the model.

The following files are generated:

- **capri_ss.tsv**: a table with the CAPRI metrics for each model.
- **capri_clt.tsv**: a table with the CAPRI metrics for each cluster of models (if clustering information is available).

For more details about this module, please `refer to the haddock3 user manual
<https://www.bonvinlab.org/haddock3-user-manual/modules/analysis.html#caprieval-module>`_
"""

from pathlib import Path

from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import FilePath, Union
from haddock.libs.libontology import PDBFile
from haddock.libs.libparallel import Scheduler
from haddock.libs.libaa2cg import martinize
from haddock.libs.libstructure import find_ff
from haddock.libs.libpdb import handle_input_reference
from haddock.modules import BaseHaddockModule
from haddock.libs.libcapri import (
    CAPRI,
    capri_cluster_analysis,
    extract_data_from_capri_class,
    extract_models_best_references,
)
from haddock.modules.analysis.caprieval.capri import dump_weights


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)


[docs] class HaddockModule(BaseHaddockModule): """HADDOCK3 module to calculate the CAPRI metrics.""" name = RECIPE_PATH.name def __init__( self, order: int, path: Path, init_params: FilePath = DEFAULT_CONFIG, ) -> None: super().__init__(order, path, init_params)
[docs] @classmethod def confirm_installation(cls) -> None: """Confirm if contact executable is compiled.""" return
[docs] @staticmethod def is_nested(models: list[Union[PDBFile, list[PDBFile]]]) -> bool: for model in models: if isinstance(model, list): return True return False
[docs] def get_reference(self, models: list[PDBFile]) -> list[Path]: """Manage to obtain the reference structure to be used downstream. Parameters ---------- models : list[PDBFile] List of input model to be evaluated, among which the best can serve as reference structure if none provided. Returns ------- references : list[Path] List of paths to the reference(s) structure to be used downstream. """ if self.params["reference_fname"]: _reference = Path(self.params["reference_fname"]) references = handle_input_reference(_reference) else: self.log( "No reference structure provided. " "Using the structure with the lowest score from previous step" ) # Sort by score to find the "best" models.sort() best_model = models[0] assert isinstance(best_model, PDBFile), "Best model is not a PDBFile" best_model_fname = best_model.rel_path references = [best_model_fname] return references
def _run(self) -> None: """Execute module.""" # Get the models generated in previous step models = self.previous_io.retrieve_models(individualize=True) if self.is_nested(models): raise ValueError( "CAPRI module cannot be executed after " "modules that produce a nested list of models." ) # dump previously used weights dump_weights(self.order) # Find force-field ff = find_ff(models) # Get reference file if ff == "martini2": references = [ Path(martinize(ref_aa, self.path.resolve().parent, False)) for ref_aa in self.get_reference(models) ] else: references = self.get_reference(models) # Each model is a job; this is not the most efficient way # but by assigning each model to an individual job # we can handle scenarios in which the models are hetergoneous # for example during CAPRI scoring jobs: list[CAPRI] = [] # Loop over models for i, model_to_be_evaluated in enumerate(models, start=1): # `models_to_be_evaluated` cannot be a list, # `CAPRI` class is expecting a single model if isinstance(model_to_be_evaluated, list): raise ValueError( "CAPRI module cannot handle a list of `model_to_be_evaluated`" ) # Loop over references for ref_id, reference in enumerate(references, start=1): jobs.append( CAPRI( identificator=i, model=model_to_be_evaluated, path=Path("."), reference=reference, params=self.params, ref_id=ref_id, ff=ff, ) ) engine = Scheduler( tasks=jobs, ncores=self.params["ncores"], max_cpus=self.params["max_cpus"], ) engine.run() jobs = engine.results jobs = sorted(jobs, key=lambda capri: capri.identificator) # Extract best references per input model best_ref_jobs = extract_models_best_references(jobs) # Write standard capri_ss file extract_data_from_capri_class( capri_objects=best_ref_jobs, output_fname=Path(".", "capri_ss.tsv"), sort_key=self.params["sortby"], sort_ascending=self.params["sort_ascending"], add_reference_id=len(references) > 1, ) # Perform cluster analysis capri_cluster_analysis( capri_list=best_ref_jobs, model_list=models, # type: ignore # ignore this here only if we are checking the return type of `retrieve_models` is not nested!! output_fname="capri_clt.tsv", clt_threshold=self.params["clt_threshold"], # output_count=len(capri_jobs), sort_key=self.params["sortby"], sort_ascending=self.params["sort_ascending"], path=Path("."), ) # In case multiple references are provided, generate an additional file # containing the information to traceback metrics related to each ref if len(references) > 1: extract_data_from_capri_class( capri_objects=jobs, output_fname=Path(".", "capri_ss_multiref.tsv"), sort_key=self.params["sortby"], sort_ascending=self.params["sort_ascending"], add_reference_id=True, ) # Send models to the next step, no operation is done on them self.output_models = models # type: ignore # ignore this here only if we are checking the return type of `retrieve_models` is not nested!! self.export_io_models()