Skip to content

CLI Reference

Documentation for the protein-detective script.

$ protein-detective --help
usage: protein-detective [-h]
                         [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
                         [--version]
                         {search,retrieve,density-filter,prune-pdbs,powerfit} ...

Protein Detective CLI

positional arguments:
  {search,retrieve,density-filter,prune-pdbs,powerfit}
    search              Search UniProt for structures
    retrieve            Retrieve structures
    density-filter      Filter AlphaFoldDB structures based on density
                        confidence
    prune-pdbs          Prune PDBe files to keep only the first chain and
                        rename it to A
    powerfit            PowerFit related commands

options:
  -h, --help            show this help message and exit
  --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
  --version             show program's version number and exit

$ protein-detective search --help
usage: protein-detective search [-h] [--taxon-id TAXON_ID]
                                [--reviewed | --no-reviewed]
                                [--subcellular-location-uniprot SUBCELLULAR_LOCATION_UNIPROT]
                                [--subcellular-location-go SUBCELLULAR_LOCATION_GO]
                                [--molecular-function-go MOLECULAR_FUNCTION_GO]
                                [--limit LIMIT]
                                session_dir

positional arguments:
  session_dir           Session directory to store results

options:
  -h, --help            show this help message and exit
  --taxon-id TAXON_ID   NCBI Taxon ID
  --reviewed, --no-reviewed
                        Reviewed=swissprot, no-reviewed=trembl. Default is
                        uniprot=swissprot+trembl.
  --subcellular-location-uniprot SUBCELLULAR_LOCATION_UNIPROT
                        Subcellular location (UniProt)
  --subcellular-location-go SUBCELLULAR_LOCATION_GO
                        Subcellular location (GO term, e.g. GO:0005737)
  --molecular-function-go MOLECULAR_FUNCTION_GO
                        Molecular function (GO term, e.g. GO:0003677)
  --limit LIMIT         Limit number of results

retrieve

$ protein-detective retrieve --help
usage: protein-detective retrieve [-h] [--what {alphafold,pdbe}]
                                  [--what-af-formats {amAnnotations,amAnnotationsHg19,amAnnotationsHg38,bcif,cif,paeDoc,paeImage,pdb}]
                                  session_dir

positional arguments:
  session_dir           Session directory to store results

options:
  -h, --help            show this help message and exit
  --what {alphafold,pdbe}
                        What to retrieve. Can be specified multiple times.
                        Default is pdbe and alphafold.
  --what-af-formats {amAnnotations,amAnnotationsHg19,amAnnotationsHg38,bcif,cif,paeDoc,paeImage,pdb}
                        AlphaFold formats to retrieve. Can be specified
                        multiple times. Default is 'pdb'.

density-filter

$ protein-detective density-filter --help
usage: protein-detective density-filter [-h]
                                        [--confidence-threshold CONFIDENCE_THRESHOLD]
                                        [--min-residues MIN_RESIDUES]
                                        [--max-residues MAX_RESIDUES]
                                        session_dir

positional arguments:
  session_dir           Session directory for input and output

options:
  -h, --help            show this help message and exit
  --confidence-threshold CONFIDENCE_THRESHOLD
                        pLDDT confidence threshold (0-100)
  --min-residues MIN_RESIDUES
                        Minimum number of residues above confidence threshold
  --max-residues MAX_RESIDUES
                        Maximum number of residues above confidence threshold.

prune-pdbs

$ protein-detective prune-pdbs --help
usage: protein-detective prune-pdbs [-h] session_dir

positional arguments:
  session_dir  Session directory containing PDB files

options:
  -h, --help   show this help message and exit

powerfit

$ protein-detective powerfit --help
usage: protein-detective powerfit [-h]
                                  {commands,run,report,fit-models,list-runs,list-lcc} ...

positional arguments:
  {commands,run,report,fit-models,list-runs,list-lcc}
    commands            Generate PowerFit commands for PDB files in the
                        session directory
    run                 Run PowerFit on PDB files in the session directory
    report              Generate a report of the best PowerFit solutions.
    fit-models          Fit models based on PowerFit solutions
    list-runs           List all PowerFit runs in the session directory
    list-lcc            List Local Cross Validation (lcc.mrc) files for
                        PowerFit runs

options:
  -h, --help            show this help message and exit

powerfit commands

$ protein-detective powerfit commands --help
usage: protein-detective powerfit commands [-h] [-a <float>] [-l] [-cw] [-nr]
                                           [-rr <float>] [-nt] [-tc <float>]
                                           [-p <int>] [-g] [--output OUTPUT]
                                           target resolution session_dir

positional arguments:
  target                Target density map to fit the model in. Data should
                        either be in CCP4 or MRC format
  resolution            Resolution of map in angstrom
  session_dir           Session directory for input and output

options:
  -h, --help            show this help message and exit
  -a, --angle <float>   Rotational sampling density in degree. Increasing this
                        number by a factor of 2 results in approximately 8
                        times more rotations sampled.
  -l, --laplace         Use the Laplace pre-filter density data. Can be
                        combined with the core-weighted local cross-
                        correlation.
  -cw, --core-weighted  Use core-weighted local cross-correlation score. Can
                        be combined with the Laplace pre-filter.
  -nr, --no-resampling  Do not resample the density map.
  -rr, --resampling-rate <float>
                        Resampling rate compared to Nyquist.
  -nt, --no-trimming    Do not trim the density map.
  -tc, --trimming-cutoff <float>
                        Intensity cutoff to which the map will be trimmed.
                        Default is 10 percent of maximum intensity.
  -p, --nproc <int>     Number of processors used during search. The number
                        will be capped at the total number of available
                        processors on your machine.
  -g, --gpu             Off-load the intensive calculations to the GPU.
  --output OUTPUT       Output file for powerfit commands. If set to '-'
                        (default) will print to stdout.

powerfit run

$ protein-detective powerfit run --help
usage: protein-detective powerfit run [-h] [-a <float>] [-l] [-cw] [-nr]
                                      [-rr <float>] [-nt] [-tc <float>]
                                      [-p <int>] [-g]
                                      target resolution session_dir

Run PowerFit on PDB files in the session directory and store results.

positional arguments:
  target                Target density map to fit the model in. Data should
                        either be in CCP4 or MRC format
  resolution            Resolution of map in angstrom
  session_dir           Session directory containing PDB files

options:
  -h, --help            show this help message and exit
  -a, --angle <float>   Rotational sampling density in degree. Increasing this
                        number by a factor of 2 results in approximately 8
                        times more rotations sampled.
  -l, --laplace         Use the Laplace pre-filter density data. Can be
                        combined with the core-weighted local cross-
                        correlation.
  -cw, --core-weighted  Use core-weighted local cross-correlation score. Can
                        be combined with the Laplace pre-filter.
  -nr, --no-resampling  Do not resample the density map.
  -rr, --resampling-rate <float>
                        Resampling rate compared to Nyquist.
  -nt, --no-trimming    Do not trim the density map.
  -tc, --trimming-cutoff <float>
                        Intensity cutoff to which the map will be trimmed.
                        Default is 10 percent of maximum intensity.
  -p, --nproc <int>     Number of processors used during search. The number
                        will be capped at the total number of available
                        processors on your machine.
  -g, --gpu             Off-load the intensive calculations to the GPU.

powerfit report

$ protein-detective powerfit report --help
usage: protein-detective powerfit report [-h]
                                         [--powerfit_run_id POWERFIT_RUN_ID]
                                         [--top TOP] [--output OUTPUT]
                                         session_dir

positional arguments:
  session_dir           Session directory containing PowerFit results

options:
  -h, --help            show this help message and exit
  --powerfit_run_id POWERFIT_RUN_ID
                        ID of the PowerFit run to report on
  --top TOP             Number of top solutions to report
  --output OUTPUT       Output file for solutions table. If set to '-'
                        (default) will print to stdout.

powerfit fit-models

$ protein-detective powerfit fit-models --help
usage: protein-detective powerfit fit-models [-h]
                                             [--powerfit_run_id POWERFIT_RUN_ID]
                                             [--top TOP] [--output OUTPUT]
                                             session_dir

positional arguments:
  session_dir           Session directory containing PowerFit results

options:
  -h, --help            show this help message and exit
  --powerfit_run_id POWERFIT_RUN_ID
                        ID of the PowerFit run to report on. If not provided,
                        will use the all runs.
  --top TOP             Number of top solutions to fit models for
  --output OUTPUT       Output file for fitted model table. If set to '-'
                        (default) will print to stdout.

powerfit list-runs

$ protein-detective powerfit list-runs --help
usage: protein-detective powerfit list-runs [-h] session_dir

positional arguments:
  session_dir  Session directory containing PowerFit results

options:
  -h, --help   show this help message and exit

powerfit list-lcc

$ protein-detective powerfit list-lcc --help
usage: protein-detective powerfit list-lcc [-h] session_dir

positional arguments:
  session_dir  Session directory containing PowerFit results

options:
  -h, --help   show this help message and exit