Source code for haddock.clis.restraints.random_removal

"""haddock3-restraints random_removal subcommand.

Given an input ambiguous file (.tbl), this subcommand will generate an archive
containing multiple tbl files, each containing a subset of the initial ones.

The subset is tuned by the optional argument --ratio (-r)
The number of generated files in the archive is tuned by the argument --nb-tbl (-n)
The random seed can be tuned using --seed (-s), for reproducibility issues

Usage:
    haddock3-restraints random_removal <tbl_file> [-r <ratio>] [-n <nb-tbl>] [-s <seed>]

positional arguments:
  tblfile               input tbl restraint file.

options:
  -h, --help            show this help message and exit
  -r RATIO, --ratio RATIO
                        Ratio of restraints to be randomly removed.
  -s SEED, --seed SEED  Pseudo-random seed.
  -n NB_TBL, --nb-tbl NB_TBL
                        Number of ambiguous files to generate in the archive.
"""

import os
import sys
import tarfile
from io import BytesIO
from pathlib import Path

from haddock.core.typing import Union
from haddock.libs.librestraints import get_restraint_subset


[docs] def add_rand_removal_arguments(rand_removal_subcommand): """Add arguments to the random removal subcommand.""" rand_removal_subcommand.add_argument( "tblfile", type=str, help="input tbl restraint file.", ) rand_removal_subcommand.add_argument( "-r", "--ratio", help="Ratio of restraints to be randomly removed. (default: %(default)s)", required=False, default=0.5, type=float, ) rand_removal_subcommand.add_argument( "-s", "--seed", help="Pseudo-random seed. (default: %(default)s)", required=False, default=917, # Same as the one in various modules (iniseed) ) rand_removal_subcommand.add_argument( "-n", "--nb-tbl", help="Number of ambiguous files to generate in the archive. (default: %(default)s)", required=False, type=int, default=10, ) return rand_removal_subcommand
[docs] def random_removal( tblfile: Union[str, Path], ratio: float, nb_tbl: int = 10, seed: int = 917, ) -> Path: """Generate an archive containing the randomly removed restraints. Parameters ---------- tblfile : Union[str, Path] Path the the input ambiguous file ratio : float Ration of restraints to be removed nb_tbl : int, optional Number of ambig files to generate in the archive, by default 10 seed : int, optional Initial random seed, by default 917 Returns ------- Union[str, Path] Path to the generated archive containing `nb_tbl` restraints in it. """ if nb_tbl < 1: sys.exit( "Number of restraints files to generate must be " f">= 1 (now set to {nb_tbl})" ) # Initiate the tbl archive holding all the restraints tbl_archive_fpath = Path( Path(tblfile).parent.resolve(), Path(tblfile).name + ".tgz", ) tbl_archive = tarfile.open(tbl_archive_fpath, "w:gz") # Initiate restraints subset generator try: subsets_restraints = get_restraint_subset( tblfile, ratio, seed=seed, ) except ValueError as e: sys.exit(e) # Loop over number of tbl file to generate for i in range(1, nb_tbl + 1): # Obtain a subset of restraints subset_restraints = next(subsets_restraints) # Combine them to a string containing the restraints subset_restraints_str = os.linesep.join(subset_restraints) # Create tarinfo object tarinfo = tarfile.TarInfo(f"{Path(tblfile).stem}_rr{i}.tbl") tarinfo.size = len(subset_restraints_str) # Add this file to the archive tbl_archive.addfile( tarinfo, BytesIO(subset_restraints_str.encode("utf-8")), ) # Closing things tbl_archive.close() subsets_restraints.close() # Return the generated archive path return tbl_archive_fpath
[docs] def main( tblfile: Union[str, Path], ratio: float, nb_tbl: int = 10, seed: int = 917, ) -> None: """Simple wrapper of the random_removal function. Parameters ---------- tblfile : Union[str, Path] Path the the input ambiguous file ratio : float Ration of restraints to be removed nb_tbl : int, optional Number of ambig files to generate in the archive, by default 10 seed : int, optional Initial random seed, by default 917 Returns ------- Union[str, Path] Path to the generated archive containing `nb_tbl` restraints in it. """ archive_fpath = random_removal(tblfile, ratio, nb_tbl=nb_tbl, seed=seed) print( f"Generated archive path: {archive_fpath}!{os.linesep}" f"Note that resulting restraints might be redundant{os.linesep}" f"By using this file, we suggest to:{os.linesep}" f"- turn off random removal (random_removal = false){os.linesep}" "- turn on the previous ambig in later stages (previous_ambig = true)" )