Source code for haddock.libs.libcns
"""CNS scripts util functions."""
import itertools
import math
from functools import partial
from os import linesep
from pathlib import Path
from haddock import EmptyPath, log
from haddock.core import cns_paths
from haddock.core.typing import Any, FilePath, FilePathT, Optional, Union
from haddock.libs import libpdb
from haddock.libs.libfunc import false, true
from haddock.libs.libmath import RandomNumberGenerator
from haddock.libs.libontology import PDBFile
from haddock.libs.libpdb import check_combination_chains
from haddock.libs.libutil import transform_to_list
RND = RandomNumberGenerator()
[docs]
def generate_default_header(
path: Optional[FilePath] = None,
) -> tuple[str, str, str, str, str, str]:
"""Generate CNS default header."""
if path is not None:
axis = load_axis(**cns_paths.get_axis(path))
link = load_link(Path(path, cns_paths.LINK_FILE))
scatter = load_scatter(Path(path, cns_paths.SCATTER_LIB))
tensor = load_tensor(**cns_paths.get_tensors(path))
trans_vec = load_trans_vectors(
**cns_paths.get_translation_vectors(path)
)
water_box = load_boxtyp20(cns_paths.get_water_box(path)["boxtyp20"])
else:
axis = load_axis(**cns_paths.axis)
link = load_link(cns_paths.link_file)
scatter = load_scatter(cns_paths.scatter_lib)
tensor = load_tensor(**cns_paths.tensors)
trans_vec = load_trans_vectors(**cns_paths.translation_vectors)
water_box = load_boxtyp20(cns_paths.water_box["boxtyp20"])
return (
link,
trans_vec,
tensor,
scatter,
axis,
water_box,
)
[docs]
def find_desired_linkfiles(
charged_nter: bool = False,
charged_cter: bool = False,
phosphate_5: bool = False,
path: Optional[FilePath] = None,
) -> dict[str, Path]:
"""Find appropriate link files to use depending on terminis states.
Parameters
----------
charged_nter : bool, optional
Must the Nter be charged ?, by default False
charged_cter : bool, optional
Must the Cter be charged ?, by default False
phosphate_5 : bool, optional
Must 5' be a phosphate ?, by default False
path : Optional[FilePath], optional
Path to where CNS topology/parameters are, by default None
Returns
-------
linkfiles : dict[str, Path]
Dict of CNS parameters/arguments/variable as keys
and Path to link files to be used during topology
generation.
"""
# Set output variable
linkfiles = {}
# Logic to find appropriate link for proteins
if charged_nter and charged_cter:
prot_link_key = "NH3+,COO-"
elif not charged_nter and charged_cter:
prot_link_key = "NH,COO-"
elif charged_nter and not charged_cter:
prot_link_key = "NH3+,CO"
elif not charged_nter and not charged_cter:
prot_link_key = "NH,CO"
# Point to corresponding file
linkfiles["prot_link_infile"] = cns_paths.PROTEIN_LINK_FILES[prot_link_key]
# Logic to find linkfile for DNA/RNA
nucl_link_key = "5'Phosphate" if phosphate_5 else "5'OH"
# Point to corresponding file
linkfiles["nucl_link_infile"] = cns_paths.NUCL_LINK_FILES[nucl_link_key]
# Converts to real paths
if path is not None:
linkfiles = {key: Path(path, p) for key, p in linkfiles.items()}
return linkfiles
def _is_nan(x: Any) -> bool:
"""Inspect if is nan."""
try:
return math.isnan(x)
except (ValueError, TypeError):
return False
[docs]
def filter_empty_vars(v: Any) -> bool:
"""
Filter empty variables.
See: https://github.com/haddocking/haddock3/issues/162
Returns
-------
bool
Returns `True` if the variable is not empty, and `False` if
the variable is empty. That is, `False` reflects those variables
that should not be written in CNS.
Raises
------
TypeError
If the type of `value` is not supported by CNS.
"""
cases = (
(lambda x: _is_nan(x), false),
(lambda x: isinstance(x, str) and bool(x), true),
(lambda x: isinstance(x, str) and not bool(x), false),
(lambda x: isinstance(x, bool), true), # it should return True
(lambda x: isinstance(x, (EmptyPath, Path)), true),
(lambda x: type(x) in (int, float), true),
(lambda x: x is None, false),
)
for detect, give in cases:
if detect(v):
return give(v)
else:
emsg = f"Value {v!r} has a unknown type for CNS: {type(v)}."
log.error(emsg)
raise TypeError(emsg)
[docs]
def load_workflow_params(
param_header: str = f"{linesep}! Parameters{linesep}",
**params: Any,
) -> str:
"""
Write the values at the header section.
"Empty variables" are ignored. These are defined accoring to
:func:`filter_empty_vars`.
Parameters
----------
params : dict
Dictionary containing the key:value pairs for the parameters to
be written to CNS. Values cannot be of dictionary type.
Returns
-------
param_header: str
The string with the CNS parameters defined.
"""
non_empty_parameters = ((k, v) for k, v in params.items() if filter_empty_vars(v))
# types besides the ones in the if-statements should not enter this loop
for param, v in non_empty_parameters:
param_header += write_eval_line(param, v)
assert isinstance(param_header, str)
return param_header
[docs]
def write_eval_line(param: Any, value: Any, eval_line: str = "eval (${}={})") -> str:
"""Write the CNS eval line depending on the type of `value`."""
eval_line += linesep
if isinstance(value, bool):
value = str(value).lower()
return eval_line.format(param, value)
elif isinstance(value, str):
value = '"' + value + '"'
return eval_line.format(param, value)
elif isinstance(value, Path):
value = '"' + str(value) + '"'
return eval_line.format(param, value)
elif isinstance(value, EmptyPath):
return eval_line.format(param, '""')
elif isinstance(value, (int, float)):
return eval_line.format(param, value)
else:
emsg = f"Unexpected type when writing CNS header: {type(value)}"
log.error(emsg)
raise TypeError(emsg)
[docs]
def load_link(mol_link: Path) -> str:
"""Add the link header."""
return load_workflow_params(
param_header=f"{linesep}! Link file{linesep}",
prot_link_infile=mol_link,
)
load_axis = partial(load_workflow_params, param_header=f"{linesep}! Axis{linesep}") # noqa: E501
load_tensor = partial(load_workflow_params, param_header=f"{linesep}! Tensors{linesep}") # noqa: E501
prepare_output = partial(
load_workflow_params, param_header=f"{linesep}! Output structure{linesep}"
) # noqa: E501
load_trans_vectors = partial(
load_workflow_params, param_header=f"{linesep}! Translation vectors{linesep}"
) # noqa: E501
load_ambig = partial(write_eval_line, "ambig_fname")
load_unambig = partial(write_eval_line, "unambig_fname")
load_hbond = partial(write_eval_line, "hbond_fname")
load_dihe = partial(write_eval_line, "dihe_f")
load_tensor_tbl = partial(write_eval_line, "tensor_tbl")
[docs]
def load_scatter(scatter_lib: Path) -> str:
"""Add scatter library."""
return load_workflow_params(
param_header=f"{linesep}! Scatter lib{linesep}", scatter_lib=scatter_lib
)
[docs]
def load_boxtyp20(waterbox_param: Path) -> str:
"""Add boxtyp20 eval line."""
return load_workflow_params(
param_header=f"{linesep}! Water box{linesep}", boxtyp20=waterbox_param
)
# This is used by docking
[docs]
def prepare_multiple_input(
pdb_input_list: list[str],
psf_input_list: list[str]
) -> str:
"""Prepare multiple input files."""
input_str = f"{linesep}! Input structure{linesep}"
for psf in psf_input_list:
input_str += f"structure{linesep}"
input_str += f" @@{psf}{linesep}"
input_str += f"end{linesep}"
ncount = 1
for pdb in pdb_input_list:
input_str += f"coor @@{pdb}{linesep}"
input_str += write_eval_line(f"input_pdb_filename_{ncount}", pdb)
ncount += 1
# check how many chains there are across all the PDBs
chain_l: list[list[str]] = []
for pdb in pdb_input_list:
for element in libpdb.identify_chainseg(pdb):
chain_l.append(element)
ncomponents = len(set(itertools.chain(*chain_l)))
input_str += write_eval_line("ncomponents", ncomponents)
return input_str
# This is used by Topology and Scoring
[docs]
def prepare_single_input(
pdb_input: FilePath, psf_input: Union[None, FilePath, list[FilePathT]] = None
) -> str:
"""Input of the CNS file.
This section will be written for any recipe even if some CNS variables
are not used, it should not be an issue.
"""
input_str = f"{linesep}! Input structure{linesep}"
if psf_input:
# if isinstance(psf_input, str):
input_str += f"structure{linesep}"
input_str += f" @@{psf_input}{linesep}"
input_str += f"end{linesep}"
input_str += f"coor @@{pdb_input}{linesep}"
if isinstance(psf_input, list):
input_str += f"structure{linesep}"
for psf in psf_input:
input_str += f" @@{psf}{linesep}"
input_str += f"end{linesep}"
# $file variable is still used by some CNS recipes, need refactoring!
input_str += write_eval_line("file", pdb_input)
segids, chains = libpdb.identify_chainseg(pdb_input)
chainsegs = sorted(list(set(segids) | set(chains)))
ncomponents = len(chainsegs)
input_str += write_eval_line("ncomponents", ncomponents)
for i, segid in enumerate(chainsegs, start=1):
input_str += write_eval_line(f"prot_segid_{i}", segid)
seed = RND.randint(100, 99999)
input_str += write_eval_line("seed", seed)
return input_str
def _add_cg_backmapping_arguments(
input_element: Union[PDBFile, list[PDBFile]],
) -> str:
"""Build CG backmapping CNS arguments string.
Args:
input_element (Union[PDBFile, list[PDBFile]]):
Input structure.
Raises:
ValueError: All atom topology not found.
ValueError: CG-to-AA restraints not found.
Returns:
input_str: str
String of arguments understood by CNS.
"""
aa_psf_list: list[Path] = []
cgtoaa_tbl_list: list[Path] = []
input_str: str = ""
# Structure composed of multiple entries
if isinstance(input_element.aa_topology, (list, tuple,)):
for psf in input_element.aa_topology:
if psf is None:
raise ValueError(
f"All-Atom Topology not found {input_element.rel_path}. "
"Conversion to all-atom requires a topology generated "
"with [topoaa] and [topocg]."
)
else:
aa_psf_list.append(psf.rel_path.as_posix())
for i, tbl in enumerate(input_element.cgtoaa_tbl):
if tbl is None and not input_element.shape[i]:
raise ValueError(
f"Coarse-Crain to All-Atom restraint file not found "
f"{input_element.rel_path}. Conversion to all-atom "
"requires a restraint file generated with [topocg]."
)
elif not input_element.shape[i]:
cgtoaa_tbl_list.append(tbl.as_posix())
# Structure composed of only one entry
else:
pdb = input_element
shape = libpdb.check_mol_shape(pdb.rel_path)
if pdb.aa_topology is None:
raise ValueError(
f"All-Atom Topology not found {input_element.rel_path}."
"Conversion to all-atom requires a topology generated with"
" [topoaa] and [topocg]."
)
aa_psf_list.append(pdb.aa_topology.rel_path.as_posix())
if pdb.cgtoaa_tbl is None and not shape:
raise ValueError(
"Coarse-Crain to All-Atom restraint file not found for"
f" entry: {input_element.rel_path}. Conversion to all-atom"
" requires a restraint file generated with [topocg]."
)
cgtoaa_tbl_list.append(pdb.cgtoaa_tbl.as_posix())
# Loop over all the files that need to be added
for ind, (aa_psf, cg2aa_tbl) in enumerate(zip(aa_psf_list, cgtoaa_tbl_list), start=1):
# eval line for psf
input_str += write_eval_line(f"input_aa_psf_filename_{ind}", aa_psf)
# eval line for pdb
input_str += write_eval_line(
f"input_aa_pdb_filename_{ind}", f"{aa_psf[:-4]}.pdb"
)
# eval line for tbl
input_str += write_eval_line(f"input_cgtbl_filename_{ind}", cg2aa_tbl)
return input_str
[docs]
def prepare_cns_input(
model_number: int,
input_element: Union[PDBFile, list[PDBFile]],
step_path: FilePath,
recipe_str: str,
defaults: Any,
identifier: str,
ambig_fname: FilePath = "",
native_segid: bool = False,
cgtoaa: bool = False,
default_params_path: Optional[Path] = None,
debug: Optional[bool] = False,
seed: Optional[int] = None,
) -> Union[Path, str]:
"""
Generate the .inp file needed by the CNS engine.
Parameters
----------
model_number : int
The number of the model. Will be used as file name suffix.
input_element : `libs.libontology.Persisten`, list of those
"""
# TODO: Refactor this function into smaller functions or classes
# read the default parameters
default_params = load_workflow_params(**defaults)
default_params += write_eval_line("ambig_fname", ambig_fname)
# Check if any PDBFile has ligand files and override global parameters
# This is important for ensembles with autotoppar where each model has different ligand files
pdb_files = transform_to_list(input_element)
for pdb in pdb_files:
if hasattr(pdb, "ligand_top_fname") and pdb.ligand_top_fname:
default_params += write_eval_line("ligand_top_fname", pdb.ligand_top_fname)
if hasattr(pdb, "ligand_param_fname") and pdb.ligand_param_fname:
default_params += write_eval_line(
"ligand_param_fname", pdb.ligand_param_fname
)
# write the PDBs
pdb_list = [pdb.rel_path for pdb in pdb_files]
# write the PSFs
psf_list: list[Path] = []
if isinstance(input_element, (list, tuple)):
for pdb in input_element:
if isinstance(pdb.topology, (list, tuple)):
for psf in pdb.topology:
psf_fname = psf.rel_path
psf_list.append(psf_fname)
else:
if pdb.topology is None:
raise ValueError(f"Topology not found for pdb {pdb.rel_path}.")
psf_fname = pdb.topology.rel_path
psf_list.append(psf_fname)
elif isinstance(input_element.topology, (list, tuple)):
pdb = input_element # for clarity
if pdb.topology is None:
raise ValueError(f"Topology not found for pdb {pdb.rel_path}.")
for psf in pdb.topology:
psf_fname = psf.rel_path
psf_list.append(psf_fname)
else:
pdb = input_element # for clarity
if pdb.topology is None:
raise ValueError(f"Topology not found for pdb {pdb.rel_path}.")
psf_fname = pdb.topology.rel_path
psf_list.append(psf_fname)
input_str = prepare_multiple_input(
pdb_input_list=[str(p) for p in pdb_list],
psf_input_list=[str(p) for p in psf_list],
)
# Add the CG-to-AA backmapping CNS arguments
if cgtoaa:
input_str += _add_cg_backmapping_arguments(input_element)
# Setup output filename
output_pdb_filename = f"{identifier}_{model_number}.pdb"
output = f"{linesep}! Output structure{linesep}"
output += write_eval_line("output_pdb_filename", output_pdb_filename)
# prepare chain/seg IDs
segid_str = ""
if native_segid:
if isinstance(input_element, (list, tuple)):
chainid_list = check_combination_chains(input_element)
for i, _chainseg in enumerate(chainid_list, start=1):
segid_str += write_eval_line(f"prot_segid_{i}", _chainseg)
else:
chainid_list: list[str] = []
segids, chains = libpdb.identify_chainseg(
input_element.rel_path, sort=False
)
chainsegs = sorted(list(set(segids) | set(chains)))
for i, _chainseg in enumerate(chainsegs, start=1):
segid_str += write_eval_line(f"prot_segid_{i}", _chainseg)
output += write_eval_line("count", model_number)
# Set pseudo-random seed
if seed is None:
seed = RND.randint(100, 99999)
seed_str = write_eval_line("seed", seed)
# Combine all input parts
inp = default_params + input_str + seed_str + output + segid_str + recipe_str
if debug:
# In debug mode we return a file (allowing to debug)
inp_file = Path(f"{identifier}_{model_number}.inp")
inp_file.write_text(inp)
return inp_file
# Return input string
return inp
[docs]
def prepare_expected_pdb(
model_obj: Union[PDBFile, tuple[PDBFile, ...]],
model_nb: int,
path: FilePath,
identifier: str,
) -> PDBFile:
"""Prepare a PDBobject."""
# Build filepath
expected_pdb_fname = Path(path, f"{identifier}_{model_nb}.pdb")
# Initiate a PDBFile object
pdb = PDBFile(expected_pdb_fname, path=path)
# Multiple models as input
if isinstance(model_obj, (tuple, list, set)):
pdb.topology = [p.topology for p in model_obj]
pdb.aa_topology = [p.aa_topology for p in model_obj]
pdb.cgtoaa_tbl = [p.cgtoaa_tbl for p in model_obj]
pdb.shape = [p.shape for p in model_obj]
# Search for Topology/Parameters among the input models
# to pass it to the about-to-become complex
for m in model_obj:
if m.ligand_param_fname and m.ligand_top_fname:
pdb.ligand_param_fname = m.ligand_param_fname
pdb.ligand_top_fname = m.ligand_top_fname
# Single model / complex
else:
pdb.topology = model_obj.topology
pdb.seed = model_obj.seed
pdb.aa_topology = model_obj.aa_topology
pdb.cgtoaa_tbl = model_obj.cgtoaa_tbl
pdb.restr_fname = model_obj.restr_fname
pdb.shape = model_obj.shape
pdb.ligand_param_fname = model_obj.ligand_param_fname
pdb.ligand_top_fname = model_obj.ligand_top_fname
return pdb