Source code for haddock.clis.restraints.calc_accessibility

"""haddock3-restraints calc_accessibility subcommand.

Given a pdb file, it will calculate the relative accessibility of
the side chains and return a list of surface exposed residues.

Nucleic acids bases are considered to be always accessible.

Usage:
    haddock3-restraints calc_accessibility
        <input_pdb_file>            Path to a PDB file to analyse
        [-c <cutoff>]               Relative side-chain accessibility cutoff
        [--log_level <log_level>]   DEBUG, INFO, WARNING, ERROR, or CRITICAL
        [--export_to_actpass]       Flag to export accessible resiudes
"""


import logging
import os
from freesasa import Structure
from pathlib import Path

from haddock.core.typing import Union


# As this script is a subcommand of the `haddock3-restraints` cli,
# it requires its own options and arguments that are managed here.
[docs]def add_calc_accessibility_arguments(calc_accessibility_subcommand): """Add arguments to the calc_accessibility subcommand.""" calc_accessibility_subcommand.add_argument( "input_pdb_file", type=str, help="input PDB structure.", ) calc_accessibility_subcommand.add_argument( "-c", "--cutoff", help="Relative cutoff for sidechain accessibility", required=False, default=0.4, type=float, ) calc_accessibility_subcommand.add_argument( "--log_level", default='INFO', choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), help="Logging level", required=False, ) calc_accessibility_subcommand.add_argument( "--export_to_actpass", default=False, action="store_true", help="Export the exposed residues as passive to an actpass file", required=False, ) return calc_accessibility_subcommand
########################################################## # Define set of resiudes / bases handeled by this script # ########################################################## DEFAULT_BASES = ["DA", "DC", "DG", "DT", "A", "C", "G", "U"] MODIFIED_BASES = ["DJ"] STANDARD_AA = [ 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', ] MODIFIED_AA = [ 'ACE', 'ALY', 'ASH', 'CFE', 'CHX', 'CSP', 'CTN', 'CYC', 'CYF', 'CYM', 'DDZ', 'DUM', 'GLH', 'HLY', 'HYP', 'M3L', 'MLY', 'MLZ', 'MSE', 'NEP', 'PNS', 'PTR', 'SEP', 'TOP', 'TYP', 'TYS', ] # , 'THP'='TOP'? VALID_AA = STANDARD_AA + MODIFIED_AA VALID_BASES = DEFAULT_BASES + MODIFIED_BASES VALID_IONS = [ "LI", "F", "NA", "MG", "AL", "CL", "K", "CA", "V", "CR", "MN", "FE", "NI", "CO", "CU", "ZN", "BR", "KR", "SR", "MO", "AG", "CD", "I", "CS", "HO", "YB", "OS", "IR", "PT", "AU", "HG", "PB", ] # Taken from NACCESS REL_ASA = { 'total': { 'ALA': 107.95, 'CYS': 134.28, 'ASP': 140.39, 'GLU': 172.25, 'PHE': 199.48, 'GLY': 80.10, 'HIS': 182.88, 'ILE': 175.12, 'LYS': 200.81, 'LEU': 178.63, 'MET': 194.15, 'ASN': 143.94, 'PRO': 136.13, 'GLN': 178.50, 'ARG': 238.76, 'SER': 116.50, 'THR': 139.27, 'VAL': 151.44, 'TRP': 249.36, 'TYR': 212.76, 'ASH': 140.39, 'DDZ': 107.95, 'GLH': 172.25, 'CYM': 134.28, 'CSP': 134.28, 'CYF': 134.28, 'CYC': 134.28, 'CFE': 134.28, 'NEP': 182.88, 'ALY': 200.81, 'MLZ': 200.81, 'MLY': 200.81, 'M3L': 200.81, 'HYP': 136.13, 'SEP': 116.50, 'TOP': 139.27, 'TYP': 212.76, 'PTR': 212.76, 'TYS': 212.76, 'PNS': 116.50, }, 'bb': { 'ALA': 38.54, 'CYS': 37.53, 'ASP': 37.70, 'GLU': 37.51, 'PHE': 35.37, 'GLY': 47.77, 'HIS': 35.80, 'ILE': 37.16, 'LYS': 37.51, 'LEU': 37.51, 'MET': 37.51, 'ASN': 37.70, 'PRO': 16.23, 'GLN': 37.51, 'ARG': 37.51, 'SER': 38.40, 'THR': 37.57, 'VAL': 37.16, 'TRP': 38.10, 'TYR': 35.38, 'ASH': 37.70, 'DDZ': 38.54, 'GLH': 37.51, 'CYM': 37.53, 'CYC': 37.53, 'CSP': 37.53, 'CYF': 37.53, 'CFE': 37.53, 'NEP': 35.80, 'ALY': 37.51, 'MLZ': 37.51, 'MLY': 37.51, 'M3L': 37.51, 'HYP': 16.23, 'SEP': 38.40, 'TOP': 37.57, 'TYP': 35.38, 'PTR': 35.38, 'TYS': 35.38, 'PNS': 38.40, }, 'sc': { 'ALA': 69.41, 'CYS': 96.75, 'ASP': 102.69, 'GLU': 134.74, 'PHE': 164.11, 'GLY': 32.33, 'HIS': 147.08, 'ILE': 137.96, 'LYS': 163.30, 'LEU': 141.12, 'MET': 156.64, 'ASN': 106.24, 'PRO': 119.90, 'GLN': 140.99, 'ARG': 201.25, 'SER': 78.11, 'THR': 101.70, 'VAL': 114.28, 'TRP': 211.26, 'TYR': 177.38, 'ASH': 102.69, 'DDZ': 69.41, 'GLH': 134.74, 'CYM': 96.75, 'CYC': 96.75, 'CSP': 96.75, 'CYF': 96.75, 'CFE': 96.75, 'NEP': 147.08, 'ALY': 163.30, 'MLZ': 163.30, 'MLY': 163.30, 'M3L': 163.30, 'HYP': 119.90, 'SEP': 78.11, 'TOP': 101.70, 'TYP': 177.38, 'PTR': 177.38, 'TYS': 177.38, 'PNS': 78.11, } }
[docs]def get_accessibility( pdb_f: Union[Path, str] ) -> dict[str, dict[int, dict[str, float]]]: """Compute per-residue accessibility values. Calls `FreeSASA <https://freesasa.github.io/>`_ using its Python API and returns per-residue accessibility values. Parameters ---------- pdb_f : Union[Path, str] Path to the PDB file of interest. Return ------ resid_access : dict[str, dict[int, dict[str, float]]] Dictionary containing a list of accessible residues for each chain(s). """ naccess_unsupported_aa = ['HEC', 'TIP', 'ACE', 'THP', 'HEB', 'CTN'] logging.info("Calculate accessibility...") try: from freesasa import Classifier, calc except ImportError as err: logging.error("calc_accessibility requires the 'freesasa' Python API") raise ImportError(err) # Initiate data holders asa_data: dict[tuple[str, str, int, str], float] = {} rsa_data: dict[tuple[str, str, int], float] = {} rel_main_chain: dict[tuple[str, str, int], float] = {} rel_side_chain: dict[tuple[str, str, int], float] = {} # Point relative asa data _rsa = REL_ASA['total'] _rsa_bb = REL_ASA['bb'] _rsa_sc = REL_ASA['sc'] # Workaround to get relative accessibility values for Nucleic Acids # -> will always be accessible ! for na in VALID_BASES: _rsa[na] = 1 _rsa_bb[na] = 1 _rsa_sc[na] = 1 _script_path = '/'.join(os.path.realpath(__file__).split('/')[:-1]) config_path = _script_path + '/naccess.config' classifier = Classifier(config_path) struct = Structure(pdb_f, classifier, options={}) struct.setRadiiWithClassifier(classifier) result = calc(struct) # iterate over all atoms to get SASA and residue name for idx in range(struct.nAtoms()): atname = struct.atomName(idx).strip() resname = struct.residueName(idx).strip() resid = int(struct.residueNumber(idx)) chain = struct.chainLabel(idx) at_uid = (chain, resname, resid, atname) res_uid = (chain, resname, resid) asa = result.atomArea(idx) asa_data[at_uid] = asa # add asa to residue rsa_data[res_uid] = rsa_data.get(res_uid, 0) + asa # If residue name is unknown, this is probably an ion, # set a relative accessibility of -1 if resname not in _rsa and ( resname not in VALID_AA + VALID_BASES or resname[:2] in VALID_IONS or resname in naccess_unsupported_aa ): logging.warning(f"UPDATED RSA for {resname}") _rsa[resname] = -1 _rsa_bb[resname] = -1 _rsa_sc[resname] = -1 # 3 cases: Regular amino-acid, regular nucleic acid, ion if (resname not in VALID_BASES and atname in ('C', 'N', 'O')) or \ (resname in VALID_BASES and atname in ('P', 'C1', 'C9')): rel_main_chain[res_uid] = rel_main_chain.get(res_uid, 0) + asa elif all([ resname not in VALID_AA + VALID_BASES, resname[:2] in VALID_IONS, ]): rel_main_chain[res_uid] = rel_main_chain.get(res_uid, 0) + asa rel_side_chain[res_uid] = rel_side_chain.get(res_uid, 0) + asa else: rel_side_chain[res_uid] = rel_side_chain.get(res_uid, 0) + asa # convert total asa to relative asa rsa_data.update( (res_uid, asa / _rsa[res_uid[1]]) for res_uid, asa in rsa_data.items() ) rel_main_chain.update( (res_uid, asa / _rsa_bb[res_uid[1]] * 100) for res_uid, asa in rel_main_chain.items() ) rel_side_chain.update( (res_uid, asa / _rsa_sc[res_uid[1]] * 100) for res_uid, asa in rel_side_chain.items() ) # We format to fit the pipeline resid_access: dict[str, dict[int, dict[str, float]]] = {} for res_uid, access in rel_main_chain.items(): chain = res_uid[0] # resname = res_uid[1] resnum = res_uid[2] if chain not in resid_access: resid_access[chain] = {} resid_access[chain][resnum] = { 'side_chain_rel': rel_side_chain.get(res_uid, 0), 'main_chain_rel': access, } # Display accessible residues for chain in resid_access: logging.info(f"Chain: {chain} - {len(resid_access[chain])} residues") return resid_access
[docs]def apply_cutoff( access_data: dict[str, dict[int, dict[str, float]]], cutoff: float, ) -> dict[str, list[int]]: """Apply a cutoff to the sidechain relative accessibility.""" logging.info(f'Applying cutoff to side_chain_rel - {cutoff}') # saving the results in a dictionary result_dict: dict[str, list[int]] = {} for chain in access_data: result_list: list[int] = [] for res in access_data[chain]: sc_rel_accessibility = access_data[chain][res]['side_chain_rel'] if sc_rel_accessibility >= cutoff * 100: result_list.append(res) result_list = list(set(result_list)) result_list.sort() result_str = ','.join(map(str, result_list)) logging.info(f'Chain {chain} - {result_str}') # putting the data in the dictionary result_dict[chain] = result_list return result_dict
[docs]def export_passive( result_dict: dict[str, list[int]], prefix: str = '', ) -> None: """Export the exposed residues as passive to an actpass file.""" for chain in result_dict: filename = Path(f"{prefix}passive_{chain}.actpass") logging.info(f"Writing exposed residues as passive in: {filename}") if filename.exists(): logging.error( f"File {filename} already exists, nothing performed!" ) else: with open(filename, "w") as f: f.write(os.linesep) # Add empty line as no active residues f.write(f"{' '.join(map(str, result_dict[chain]))}")
[docs]def calc_accessibility( input_pdb_file: Union[Path, str], cutoff: float = 0.4, log_level: str = "INFO", export_to_actpass: bool = False, ) -> None: """Calculate the accessibility of the side chains and apply a cutoff. Parameters ---------- input_pdb_file : str Path to the PDB file. cutoff : float Relative cutoff for sidechain accessibility, default = 0.4 log_level : str Logging level. export_to_actpass : bool Export the exposed residues as passive to an actpass file. """ logging.basicConfig( level=log_level, format='%(asctime)s L%(lineno)d %(levelname)s - %(message)s', datefmt='%d/%m/%Y %H:%M:%S', ) # Compute per-residues accessibilities access_dic = get_accessibility(input_pdb_file) # Filter residues based on accessibility cutoff result_dict = apply_cutoff(access_dic, cutoff) # export residues as passive to an actpass file if export_to_actpass: export_passive( result_dict, prefix=f'{Path(input_pdb_file).stem}_', )