Source code for haddock.libs.libligand

"""Ligand topology utilities."""

import re
import subprocess
import tempfile
from pathlib import Path

from haddock.core.supported_molecules import supported_residues
from haddock.core.defaults import prodrg_exec, prodrg_param
from haddock import log
import shutil

from haddock.core.typing import FilePath


[docs] def identify_unknown_hetatms(pdb_file: FilePath) -> list[str]: """Return residue names in a PDB that are not in the supported residues. Parameters ---------- pdb_file : FilePath Path to the PDB file to inspect. Returns ------- list[str] Unique residue names not found in the supported residues set, in order of first appearance. """ seen: list[str] = [] with open(pdb_file) as fh: for line in fh: if not line.startswith(("ATOM ", "HETATM")): continue resname = line[17:20].strip() if resname and resname not in supported_residues and resname not in seen: seen.append(resname) return seen
[docs] def run_prodrg( pdb_file: FilePath, output_dir: FilePath, ) -> tuple[Path, Path]: """Run prodrg on a ligand PDB and write CNS topology and parameter files. prodrg writes its output to fixed filenames in the current working directory, so the process is executed inside a managed temporary directory that is cleaned up automatically. The resulting files are written to ``output_dir`` named after the input PDB stem. Parameters ---------- pdb_file : FilePath Path to the ligand PDB file. output_dir : FilePath Directory where the named ``.top`` and ``.param`` files are written. Returns ------- tuple[Path, Path] Paths to the written ``<stem>_prodrg.top`` and ``<stem>_prodg.param`` files. Raises ------ RuntimeError If prodrg exits with a non-zero return code or the expected output files are not created. """ if prodrg_exec is None or prodrg_param is None: raise RuntimeError( "prodrg is not available on this platform. " "Provide the binary path via the PRODRG_EXEC environment variable." ) pdb_file = Path(pdb_file).resolve() output_dir = Path(output_dir).resolve() # PRODRG will write its output in its `pwd`, so run it in a temporary one with tempfile.TemporaryDirectory() as tmpdir: # NOTE: PRODRG has a max of 79 chars as input, make sure to pass relative paths # so we need to copy everything to this temporary dir dst = Path(tmpdir, prodrg_param.name) shutil.copy(prodrg_param, dst) dst = Path(tmpdir, pdb_file.name) shutil.copy(pdb_file, dst) result = subprocess.run( # NOTE: We need this `PDBELEM` flag here [str(prodrg_exec), str(pdb_file.name), str(prodrg_param.name), "PDBELEM"], cwd=tmpdir, capture_output=True, text=True, ) if result.returncode != 0: raise RuntimeError( f"prodrg failed with return code {result.returncode}:\n{result.stderr}" ) tmp_top = Path(tmpdir) / "DRGCNS.TOP" tmp_par = Path(tmpdir) / "DRGCNS.PAR" # TODO: Check if the atom names have been changed! # tmp_pdb = Path(tmpdir) / "<???>.pdb" if not tmp_top.exists() or not tmp_par.exists(): ls = list(Path(tmpdir).iterdir()) prodrg_err = Path(tmpdir) / "DRGDRG.ERR" prodrg_log = Path(tmpdir) / "DRGDRG.LOG" log.error(f"DRGDRG.log: {prodrg_log.read_text()}") log.error(f"DRGDRG.err: {prodrg_err.read_text()}") log.debug(f"ls: {ls}") raise RuntimeError( f"prodrg finished but expected output files are missing in {tmpdir} " ) top_path = output_dir / f"{pdb_file.stem}_prodrg.top" par_path = output_dir / f"{pdb_file.stem}_prodrg.param" top_content = _sanitize_atom_names(tmp_top.read_text()) par_content = _sanitize_atom_names(_remove_nbonds(tmp_par.read_text())) top_path.write_text(top_content) par_path.write_text(par_content) return top_path, par_path
def _sanitize_atom_names(content: str) -> str: """Remove colons from atom type names in prodrg CNS output. prodrg may generate atom type names containing colons (e.g. ``HT:A``) and this is not compatible with CNS so they must be removed. Parameters ---------- content : str Contents of a prodrg-generated CNS file. Returns ------- str Content with colons stripped from non-comment lines. """ lines = [] for line in content.splitlines(keepends=True): if line.lstrip().startswith("!"): lines.append(line) else: lines.append(line.replace(":", "")) return "".join(lines) def _remove_nbonds(par_content: str) -> str: """Remove the NBONds...END block from a prodrg CNS parameter string. PRODRG definition of NBONds might interfere with HADDOCK's internal parameters, so we must remove the NBONds lines from the param generated by HADDOCK Parameters ---------- par_content : str Contents of the ``DRGCNS.PAR`` file. Returns ------- str Parameter content with the NBONds block removed. """ return re.sub(r"(?s)NBONds.*?END", "", par_content)