Source code for jade.basic.structure.util

import sys
import os
import gzip

import Bio
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.Residue import Residue
from jade.basic.RestypeDefinitions import RestypeDefinitions
from jade.basic.numeric import *

### NOTE: All Utility function have been replaced by a Bio Structure wrapper: BioPose.
### Please see this new class for future developments!

########  NEW Biopython utility functions ##########

[docs]def peptide_bond_distance(res1, res2):
    """
    Return the bond distance between two residues using Numpy array math.
    :param res1: Bio.PDB.Residue.Residue
    :param res2: Bio.PDB.Residue.Residue
    :rtype: float
    """
    return atomic_distance(res1, res2, 'C', 'N')

[docs]def atomic_distance(res1, res2, res1_atom_name, res2_atom_name):
    """
    Return the atomic distance between two arbitrary Bio residues and two arbitrary atom names.
    :param res1: Bio.PDB.Residue.Residue
    :param res2: Bio.PDB.Residue.Residue
    :param res1_atom_name: str
    :param res2_atom_name: str
    :rtype: float
    """
    try:
        return distance_numpy(res1[res1_atom_name].get_vector().get_array(), res2[res2_atom_name].get_vector().get_array())
    except Exception:
        print "Residue does not have the atom name or there is a problem in the vector.  Returning 0"
        raise IndexError

########  OLD Biopython Utility Functions replaced by BIOPose ########

[docs]def has_id(model, id):
    """
    Returns true or false if the model has the chain.  Because biopython is not updating it's index that has_id is using.  WTF.
    """
    for i in model:
        if i.id == id:
            return True
    return False

[docs]def get_biopython_structure(path, model_id = None):
    structure = None
    path = path.strip()
    parser = PDBParser()
    if not model_id:
        model_id = os.path.basename(path)
    if os.path.basename(path).split('.')[-1] == "pdb":
        structure = parser.get_structure(model_id, path)
    elif os.path.basename(path).split('.')[-1] == 'gz':
        GZ = gzip.open(path, 'rb')
        structure = parser.get_structure(model_id, GZ)
        GZ.close()
    else:
        sys.exit("Unknown extension to read PDB: "+path)

    return structure



[docs]def get_seq_from_biostructure(structure, chain_id):
    for biochain in structure[0]:
        if get_chain_length(biochain) == 0:
            continue
        if biochain.id == chain_id:
            return get_seq_from_biochain(biochain)

    print "Chain not found!"
    raise LookupError

[docs]def get_seq_from_biochain(bio_chain):

    if get_chain_length(bio_chain) == 0:
            return ""

    seq = ""
    d = RestypeDefinitions()

    for res in bio_chain:
        if res.id[0]==' ':
            aa = d.get_one_letter_from_three(res.resname)
            if not aa:
                print "Skipping non-canonical resname: "+res.resname
                print "This could pose a problem!"
                continue
            seq = seq+aa
    return seq

[docs]def get_chain_length(bio_chain):

    l = 0
    for res in bio_chain:
        if res.id[0]==' ':
            l+=1
    return l

[docs]def get_num_biochains(model):
    return len(model[0])