#!/usr/bin/env python
#Jared Adolf-Bryfogle (jadolfbr@gmail.com)
import os
import gzip
import re
import math
from collections import defaultdict
from Bio.PDB import PDBParser
from Bio.PDB import PPBuilder
from Bio.PDB import MMCIFParser
from Bio.PDB import calc_dihedral
from Bio.PDB import Vector
from jade.basic.RestypeDefinitions import RestypeDefinitions
from jade.basic.structure.Structure import PDBInfo
from jade.basic.structure.Structure import ResidueRecord
from jade.basic.structure.util import peptide_bond_distance
from jade.basic.path import *
from jade.basic.numeric import *
from jade.utility import vector1
[docs]class BioPose(object):
"""
This is my biopython meta class. Because biopython's interface kinda sucks.
This is a little cleaner.
The other way is to sublclass each Biopython class structure, which I'm not ready to do.
Right now, you need a path as I don't know how we would use this from sequence, etc as you do in Rosetta.
:path: Is a path to an RCSB file. PDB (.pdb), mmCIF(.cif), and gzipped (.gz) versions.
"""
def __init__(self, path, model_num=0):
self.res_definitions = RestypeDefinitions()
self.struct, self.header = self.load_from_file(path) #Bio struct, Header dictionary
self._setup_self(model_num)
############ IO ###################
[docs] def load_from_file(self, path):
"""
Load a file from PDB or mmCIF. .gz is supported.
:param path: Path to PDB or mmCIF file
:rtype: tuple(bio.PDB.Structure.Structure, dict)
"""
structure = None
if re.search(".pdb", path):
parser = PDBParser()
else:
parser = MMCIFParser()
path = path.strip()
model_id = os.path.basename(path)
#if os.path.basename(path).split('.')[-1] == 'gz':
# GZ = gzip.open(path, 'rb')
# GZ.close()
#else :
structure = parser.get_structure(model_id, open_file( path ))
header = parser.get_header()
return structure, header
[docs] def reload_from_file(self, path, model_num=0):
"""
Reload a BioPose from a file path.
:param path: str
:param model_num: int
:return:
"""
self.struct, self.header = self.load_from_file(path)
self._setup_self(model_num)
def _setup_self(self, model_num =0):
"""
Setup all info for bio pose after loading from file.
:param model_num: int
:return:
"""
self.all_residues = self._setup_all_residues(model_num) #vector1 of Bio Residues
self.pdb_info = self._setup_pdb_info() #PDBInfo to map the vector1
self.peptide_bond_distances = self._setup_peptide_bond_distances() #map of bond distances to next residue in pose.
############ Getting Components #############
return self.pdb_info
[docs] def resnum(self, pdb_num, chain, icode=' '):
return self.pdb_info.get_resnum(pdb_num, chain, icode)
[docs] def structure(self):
"""
Get the Bio Structure stored in this class.
:rtype: bio.PDB.Structure.Structure
"""
return self.struct
[docs] def model(self, model_num = 0):
"""
Get a Bio Model of the stored structure
:param id: int
:rtype: bio.PDB.Model.Model
"""
return self.struct[model_num]
[docs] def chain(self, chain_id, model_num = 0):
"""
Get a Bio Chain of the stored structure
:param chain_id: str
:param model_num: int
:rtype: bio.PDB.Chain.Chain
"""
return self.struct[model_num][chain_id]
[docs] def residue(self, resnum, chain_id, icode=' ', alt=' ', model_num = 0):
"""
Get a Bio Residue of the stored structure.
Adds a chain_id attribute.
:param resnum: int
:param chain_id: str
:param icode: str
:param alt: str
:param model_num: int
:rtype: bio.PDB.Residue.Residue
"""
res = self.struct[model_num][chain_id][(alt, resnum, icode)]
res.chain_id = chain_id
return res
[docs] def atom(self, atom_name, resnum, chain_id, icode=' ', alt=' ', model_num=0):
"""
Get a Bio Atom of the stored structure
:param atom_name: str
:param resnum: int
:param chain_id: str
:param icode: str
:param alt: str
:param model_num: int
:rtype: bio.PDB.Atom.Atom
"""
return self.struct[model_num][chain_id][(alt, resnum, icode)][atom_name]
### Lists of Structures ###
[docs] def chains(self, model_num = 0):
"""
Get a list of Bio Chains
:param model_num: int
:rtype: list[bio.PDB.Chain.Chain
"""
return [c for c in self.struct]
[docs] def residues(self, chain_id, model_num = 0, include_alt = False):
"""
Get residues, including or not including residues with alternate location codes - which can be a PITA
Adds chain_id attribute to residue.
:param chain_id: str
:param model_num: int
:param include_alt: bool
:rtype: list[bio.PDB.Residue.Residue]
"""
resi = []
for res in self.chain(chain_id, model_num):
res.chain_id = chain_id
if res.id[0] ==' ':
resi.append(res)
elif include_alt:
resi.append(res)
else:
continue
return resi
[docs] def atoms(self, resnum, chain_id, icode=' ', alt=' ', model_num = 0):
"""
Get a list of Bio Atoms
:param resnum: int
:param chain_id: str
:param icode: str
:param alt: str
:param model_num: int
:rtype: list[bio.PDB.Atom.Atom]
"""
return [atm for atm in self.residue(resnum, chain_id, icode, alt, model_num)]
############ Helper Funtions ################
[docs] def total_residue(self):
return len(self.all_residues)
[docs] def phi(self, i):
"""
Get the Phi Angle of i in radians
:param i: int
:rtype: float
"""
if i == 1 or not self.connected_to_previous(i):
return 0.0
res = self.all_residues[i]
try:
n = res['N'].get_vector()
ca = res['CA'].get_vector()
c = res['C'].get_vector()
res_minus_one = self.all_residues[i -1]
cp = res_minus_one['C'].get_vector()
phi = calc_dihedral(cp, n, ca, c)
return phi
except Exception:
print "Could not get phi for "+repr(i)
raise LookupError
[docs] def psi(self, i):
"""
Get the Psi Angle of i in radians
:param i: int
:rtype: float
"""
res = self.all_residues[i]
if i == len(self.all_residues) or not self.connected_to_next(i):
return 0.0
try:
n = res['N'].get_vector()
ca = res['CA'].get_vector()
c = res['C'].get_vector()
res_plus_one = self.all_residues[i + 1]
nn = res_plus_one['N'].get_vector()
psi = calc_dihedral(n, ca, c, nn)
return psi
except Exception:
print "Could not get psi for "+repr(i)
raise LookupError
[docs] def omega(self, i, rosetta_definitions = True):
"""
Get the Omega Angle of i in radians
Omega is defined as the dihedral angle between the peptide bond of i and i + 1, as in Rosetta.
If rosetta_definitions are False, omega is then treated as being between i and i -1
:param i: int
:param reverse_rosetta_definitions: bool
:rtype: float
"""
res = self.all_residues[i]
try:
n = res['N'].get_vector()
ca = res['CA'].get_vector()
c = res['C'].get_vector()
if rosetta_definitions and i < len(self.all_residues) -1 and self.connected_to_next(i):
res_plus_one = self.all_residues[i + 1]
next_n = res_plus_one['N'].get_vector()
next_ca = res_plus_one['CA'].get_vector()
omega = calc_dihedral(ca, c, next_n, next_ca)
return omega
elif not rosetta_definitions and i > 1 and self.connected_to_previous(i):
res_minus_one = self.all_residues[i - 1]
pre_c = res_minus_one['C'].get_vector()
pre_ca = res_minus_one['CA'].get_vector()
omega = calc_dihedral(pre_ca, pre_c, n, ca)
return omega
else:
return 0.0
except BaseException:
print "Could not get omega for "+repr(i)
raise LookupError
[docs] def res_bond_distance(self, resi):
"""
Get the stored bond distances between residue and residue+1
:param res: int
:rtype: float
"""
return self.peptide_bond_distances[resi]
[docs] def connected_to_next(self, resi, peptide_bond_distance_cutoff=1.8):
if self.res_bond_distance(resi) <= peptide_bond_distance_cutoff:
return True
else:
return False
[docs] def connected_to_previous(self, resi, peptide_bond_distance_cutoff=1.8):
if resi == 0:
return False
elif self.res_bond_distance(resi-1) <= peptide_bond_distance_cutoff:
return True
else:
return False
[docs] def get_sequence(self, chain_id, model_num = 0):
"""
Get a sequence of a chain - Not including alternate res locations
:param chain_id: str
:param model_num: int
:rtype: str
"""
if self.get_chain_length(chain_id, model_num) == 0:
return ""
seq = ""
for res in self.residues(chain_id, model_num):
aa = self.res_definitions.get_one_letter_from_three(res.resname)
if not aa:
print "Setting NCAA as X: "+res.resname
print "This could pose a problem!"
seq = seq+'X'
continue
seq = seq+aa
return seq
[docs] def get_chain_length(self, chain_id, model_num = 0):
"""
Get the number of AA in a chain - Not including alternate res locations
:param chain_id: str
:rtype: int
"""
return len(self.residues(chain_id, model_num))
[docs] def get_chain_ids(self, model_num):
"""
Get all chain IDS for a model.
:param model_num: int
:rtype: list[str]
"""
ids = []
for chain in self.model(model_num):
ids.append(chain.id)
return ids
###### Private Members #######
def _setup_all_residues(self, model_num=0):
"""
Setup all residues in the model, for ease of use like a pose.
:param model_num: int
:rtype: list[bio.PDB.Residue.Residue]
"""
all_residues = vector1()
for chain_id in self.get_chain_ids(model_num):
#print "ChainID: "+chain_id
residues = self.residues(chain_id, model_num)
all_residues.extend(residues)
return all_residues
def _setup_pdb_info(self):
"""
Setup the PDBInfo mapping from residues to pdb iformation.
:param model_num: int
:rtype: PDBInfo
"""
pdb_info = PDBInfo()
i = 1
for res in self.all_residues:
s = ResidueRecord(self.res_definitions.get_one_letter_from_three(res.resname), res.id[1], res.chain_id, res.id[2])
pdb_info.add_residue_record(s)
return pdb_info
def _setup_peptide_bond_distances(self):
bond_distances = defaultdict()
for i in range(1, len(self.all_residues)+1):
res = self.all_residues[i]
if i == len(self.all_residues):
bond_distances[i] = 0
else:
try:
bond_distances[i] = peptide_bond_distance(self.all_residues[i], self.all_residues[i+1])
except Exception:
bond_distances[i] = None
return bond_distances
########## Testing Functions #############
[docs]def test_dihedrals(pose):
"""
Simple Test for Dihedral output
:param pose: BioPose
:rtype: bool
"""
for i in range(1, pose.total_residue()+1):
print "\n"+str(pose.pdb_info.pose2pdb(i))
try:
print "Phi: "+repr(math.degrees(pose.phi(i)))
print "Psi: "+repr(math.degrees(pose.psi(i)))
print "Omega:"+repr(math.degrees(pose.omega(i)))
except Exception:
"Print could not get dihedral for resnum "+repr(i)
return True
[docs]def test_pdbinfo(pose):
"""
Simple Test for pdbinfo output.
:param pose: BioPose
:rtype: bool
"""
for i in range(1, pose.total_residue() +1):
print repr(i)
print pose.all_residues[i].id
print pose.pdb_info.pose2pdb(i)
######## Testing Main ############
if __name__ == "__main__":
v = vector1([1, 2, 3])
for i in v:
print repr(i)
print len(v)
print repr(v[3])
test_pdb = os.path.join(get_testing_inputs_path(),"2j88.pdb")
pose = BioPose(test_pdb)
test_dihedrals(pose)