Source code for haddock.libs.libnotebooks

"""
Helper functions for HADDOCK3 notebooks
"""

import gzip
import os
from io import StringIO

from Bio.PDB import PDBIO, PDBParser, Superimposer

# NOTE: `py3Dmol` is an optional dependency, it might not be installed
#  this is why we have the conditional import here
try:
    import py3Dmol
except ImportError:
    py3Dmol = None


[docs] def load_pdb_file(file_path): """.""" if not os.path.exists(file_path): print(f"Error: File not found at {file_path}") return None if file_path.endswith(".gz"): with gzip.open(file_path, "rt") as f: return f.read() else: with open(file_path, "r") as f: return f.read()
[docs] def pdb_string_to_structure(pdb_string, structure_id): """.""" parser = PDBParser(QUIET=True) pdb_io = StringIO(pdb_string) structure = parser.get_structure(structure_id, pdb_io) return structure
[docs] def structure_to_pdb_string(structure): """.""" pdb_io = PDBIO() pdb_io.set_structure(structure) output = StringIO() pdb_io.save(output) return output.getvalue()
# Full alignement - user-given chains used # FIXME: This function has too many arguments, needs to be refactored
[docs] def align_full( pdb_path1, pdb_path2, chains=["A", "B"], width=800, height=600, model1_colors={"A": "red", "B": "orange", "C": "pink"}, model2_colors={"A": "blue", "B": "green", "C": "lime"}, atom_types=["P", "C1", "CA"], show_labels=False, show_per_chain_rmsd=True, ): """.""" """ Align two protein structures using all specified chains and visualize with py3Dmol. Parameters: ----------- pdb_path1 : str Path to the first (reference) PDB file pdb_path2 : str Path to the second PDB file to align to the first chains : list, default ['A', 'B'] List of chain IDs to include in alignment width : int, default 800 Viewer width in pixels height : int, default 600 Viewer height in pixels model1_colors : dict, default {'A': 'red', 'B': 'orange'} Colors for chains in model 1 model2_colors : dict, default {'A': 'blue', 'B': 'green'} Colors for chains in model 2 atom_types : list, default ['CA', 'P', 'C1'] Atom types to use for alignment (['CA'] or ['CA', 'CB', 'N', 'C']) show_labels : bool, default False Whether to show descriptive labels show_per_chain_rmsd : bool, default True Whether to calculate and display per-chain RMSD values Returns: -------- py3Dmol.view object Example: -------- align_full_molecule('model1.pdb.gz', 'model2.pdb.gz') """ def get_atoms_from_chains(structure, chain_ids, atom_types): atoms = [] chain_info = {} for model in structure: for chain_id in chain_ids: if chain_id in model: chain = model[chain_id] chain_atoms = [] for residue in chain: for atom_type in atom_types: if atom_type in residue: atoms.append(residue[atom_type]) chain_atoms.append(residue[atom_type]) chain_info[chain_id] = len(chain_atoms) return atoms, chain_info # Create viewer view = py3Dmol.view(width=width, height=height) # Load PDB files model_1_data = load_pdb_file(pdb_path1) model_2_data = load_pdb_file(pdb_path2) if not (model_1_data and model_2_data): print("Failed to load one or both PDB files") return view, None, {} overall_rmsd = None per_chain_rmsd = {} try: # Parse structures struct1 = pdb_string_to_structure(model_1_data, "model1") struct2 = pdb_string_to_structure(model_2_data, "model2") # Get atoms from all specified chains atoms_1, chain_info_1 = get_atoms_from_chains(struct1, chains, atom_types) atoms_2, chain_info_2 = get_atoms_from_chains(struct2, chains, atom_types) print(f"Atoms for alignment - Model 1: {chain_info_1}, Total: {len(atoms_1)}") print(f"Atoms for alignment - Model 2: {chain_info_2}, Total: {len(atoms_2)}") print( "Model 1: " + "; ".join( [ f"chain {chain} in {color}" for chain, color in model1_colors.items() if chain in chains ] ) ) print( "Model 2: " + "; ".join( [ f"chain {chain} in {color}" for chain, color in model2_colors.items() if chain in chains ] ) ) if len(atoms_1) > 0 and len(atoms_2) > 0: # Align using all atoms from specified chains min_atoms = min(len(atoms_1), len(atoms_2)) ref_atoms = atoms_1[:min_atoms] alt_atoms = atoms_2[:min_atoms] print(f"Using {min_atoms} atom pairs for alignment") # Perform superimposition sup = Superimposer() sup.set_atoms(ref_atoms, alt_atoms) overall_rmsd = sup.rms print(f"Whole molecule alignment RMSD: {overall_rmsd:.3f} Å") # Apply transformation to all atoms in structure 2 sup.apply(struct2.get_atoms()) # Calculate per-chain RMSD if requested if show_per_chain_rmsd: for chain_id in chains: try: chain_atoms_1, _ = get_atoms_from_chains( struct1, [chain_id], atom_types ) chain_atoms_2, _ = get_atoms_from_chains( struct2, [chain_id], atom_types ) if len(chain_atoms_1) > 0 and len(chain_atoms_2) > 0: min_chain = min(len(chain_atoms_1), len(chain_atoms_2)) sup_chain = Superimposer() sup_chain.set_atoms( chain_atoms_1[:min_chain], chain_atoms_2[:min_chain] ) per_chain_rmsd[chain_id] = sup_chain.rms print(f"Chain {chain_id} RMSD: {sup_chain.rms:.3f} Å") except Exception as e: print(f"Could not calculate RMSD for chain {chain_id}: {e}") # Convert back to PDB strings aligned_pdb_1 = structure_to_pdb_string(struct1) aligned_pdb_2 = structure_to_pdb_string(struct2) # Add models to viewer view.addModel(aligned_pdb_1, "pdb") view.addModel(aligned_pdb_2, "pdb") else: print( "Could not find sufficient atoms for alignment, adding original models" ) view.addModel(model_1_data, "pdb") view.addModel(model_2_data, "pdb") except Exception as e: print(f"Alignment failed: {e}") view.addModel(model_1_data, "pdb") view.addModel(model_2_data, "pdb") # Apply styling view.setStyle({"model": 0}, {"cartoon": {}}) view.setStyle({"model": 1}, {"cartoon": {}}) for chain, color in model1_colors.items(): if chain in chains: view.addStyle( {"model": 0, "chain": chain}, {"cartoon": {"color": color, "opacity": 0.9}}, ) for chain, color in model2_colors.items(): if chain in chains: view.addStyle( {"model": 1, "chain": chain}, {"cartoon": {"color": color, "opacity": 0.6}}, ) # Add labels if requested if show_labels: view.addLabel( "Model 1 (Reference)", { "position": {"x": -20, "y": 20, "z": 0}, "backgroundColor": "darkred", "fontColor": "white", }, ) view.addLabel( "Model 2 (Aligned)", { "position": {"x": 20, "y": 20, "z": 0}, "backgroundColor": "darkgreen", "fontColor": "white", }, ) view.addLabel( "Full Molecule Alignment", { "position": {"x": 0, "y": -20, "z": 0}, "backgroundColor": "navy", "fontColor": "white", }, ) if overall_rmsd: view.addLabel( f"Overall RMSD: {overall_rmsd:.3f} Å", { "position": {"x": 0, "y": -30, "z": 0}, "backgroundColor": "purple", "fontColor": "white", }, ) # Add per-chain RMSD labels y_offset = -40 for chain_id, rmsd_val in per_chain_rmsd.items(): view.addLabel( f"Chain {chain_id}: {rmsd_val:.3f} Å", { "position": {"x": 0, "y": y_offset, "z": 0}, "backgroundColor": "gray", "fontColor": "white", "fontSize": 10, }, ) y_offset -= 8 view.zoomTo() return view