import sqlite3
import os
import sys
from collections import defaultdict
from jade.antibody import outliers
#A Collection of functions to query the AbDb
[docs]def get_pdb_chain_subset(db, gene, use_cutoffs = False, res_cutoff = 2.8, rfac_cutoff = .3):
"""
Return a list of tuples of [pdb, chain] of the particular gene
"""
#print db_fname
db.row_factory = sqlite3.Row
c = db.cursor()
if not use_cutoffs:
c.execute("SELECT DISTINCT PDB, original_chain FROM cdr_data WHERE gene =?", (gene,))
else:
c.execute("SELECT DISTINCT PDB, original_chain FROM cdr_data WHERE gene = ? and resolution <= ? and rfactor <= ?", [gene, res_cutoff, rfac_cutoff])
rows = c.fetchall()
entries = []
for row in rows:
row.keys()
entries.append([row['PDB'],row['original_chain']])
return entries
[docs]def get_all_lengths(db, cdr, limit_to_known = True, res_cutoff = 2.8, rfac_cutoff = .3):
"""
Get all unique lengths for a CDR
"""
c = db.cursor()
lengths = []
if limit_to_known:
in_data = ['loopKeyNotInPaper', cdr, res_cutoff, float(rfac_cutoff)]
for row in c.execute("SELECT DISTINCT length from "+"cdr_data"+" WHERE datatag!=? and CDR=? and resolution<=? and rfactor<=?", in_data):
lengths.append(row[0])
else:
in_data = [cdr, res_cutoff, rfac_cutoff]
for row in c.execute("SELECT DISTINCT length from "+"cdr_data WHERE CDR=? and resolution<=? and rfactor<=?", in_data):
lengths.append(row[0])
c.close()
return lengths
[docs]def get_all_clusters_for_length(db, cdr, length, limit_to_known = True, res_cutoff = 2.8, rfac_cutoff = .3):
"""
Get all unique clusters for a length and a cdr
"""
c = db.cursor()
clusters = []
if limit_to_known:
data = ['loopKeyNotInPaper', cdr, length, res_cutoff, rfac_cutoff]
for row in c.execute("SELECT DISTINCT fullcluster from cdr_data"+" WHERE datatag!=? and CDR=? and length=? and resolution <= ? and rfactor <= ?", data):
clusters.append(row[0])
else:
data = [cdr, length, res_cutoff, rfac_cutoff]
for row in c.execute("SELECT DISTINCT fullcluster from cdr_data WHERE CDR=? and length=? and resolution<=? and rfac_cutoff<=?", data):
clusters.append(row[0])
c.close()
return clusters
[docs]def get_center_for_cluster_and_length(db, cdr, length, cluster, data_names_array):
"""
Get the center for a particular cluster and length
"""
sele = ", ".join(data_names_array)
#print sele
data = []
c = db.cursor()
#print "Getting data for: "+" ".join([cdr, repr(length), cluster])
in_data = ['loopKeyNotInPaper', cdr, length, cluster]
for row in c.execute("SELECT "+sele+" FROM cdr_data WHERE center=1 and datatag!=? and CDR=? and length=? and fullcluster=?",in_data):
data.append(row)
#print data
if len(data) == 0: return data
else: return data[0]
[docs]def get_center_dih_degrees_for_cluster_and_length(db, cdr, length, cluster):
"""
Returns a dictionary of center dihedral angles in positional order. Or returns False if not found.
result["phis'] = [phis as floats]
result["psis"] = [Psis as floats]
result["omegas"] = [Omegas as floats]
"""
result = defaultdict()
result["phis"] = []
result["psis"] = []
result["omegas"] = []
data_names_array = ["dihedrals"]
data = get_center_for_cluster_and_length(db, cdr, length, cluster, data_names_array)
if len(data) == 0: return False
dihedrals = data[0]
dihSP = dihedrals.split(":")
phii = 0; psii = 1; omega_i = 2
for i in range(0, length):
phi = dihSP[phii]; psi = dihSP[psii]; omega = dihSP[omega_i]
result["phis"].append(float(phi)); result["psis"].append(float(psi)); result["omegas"].append(float(omega))
phii+=3; psii+=3; omega_i+=3
assert(len(result["phis"]) == len(result["psis"]) == len(result["omegas"]))
return result
[docs]def get_dihedral_string_for_centers(db, limit_to_known = True):
"""
Get a string of the dihedral angles for all centers
"""
sele = "SELECT fullcluster, length_type, ss, dihedrals from cdr_data where center=1"
data = defaultdict()
c = db.cursor()
for row in c.execute(sele):
data[row[0]] = [row[1], row[2], row[3]]
return data
[docs]def get_data_for_cluster_and_length(db, cdr, length, cluster, data_names_array, limit_to_known = True, res_cutoff = 2.8, rfac_cutoff = .3):
"""
Get a set of data of a particular length, cdr, and cluster.
data_names_array is a list of the types of data. Can include DISTINCT keyword
Example: data_names_array = ["PDB", "original_chain", "new_chain", "sequence"]
"""
sele = ", ".join(data_names_array)
#print sele
data = []
c = db.cursor()
if limit_to_known:
in_data = ['loopKeyNotInPaper', cdr, length, cluster, res_cutoff, rfac_cutoff]
for row in c.execute("SELECT "+sele+" FROM cdr_data WHERE datatag!=? and CDR=? and length=? and fullcluster=? and resolution<=? and rfactor <=?", in_data):
data.append(row)
else:
in_data = [cdr, length, cluster, res_cutoff, rfac_cutoff]
for row in c.execute("SELECT "+sele+" FROM cdr_data WHERE CDR=? and length=? and fullcluster=? and resolution<=? and rfactor<=?", in_data):
data.append(row)
c.close()
return data
[docs]def get_unique_sequences_for_cluster(db, cluster, include_outliers, outlier_definition = "conservative"):
sequences = []
cur = db.cursor()
for row in cur.execute("select DISTINCT seq FROM cdr_data WHERE fullcluster=? "+outliers.get_outlier_string(include_outliers, outlier_definition), [cluster]):
sequences.append(row[0])
cur.close()
return sequences
[docs]def get_cdr_rmsd_for_entry(db, pdb, original_chain, cdr, length, fullcluster):
#Make sure string to to unicode bullshit.
sele = [str(pdb), str(original_chain), str(cdr), length, str(fullcluster)]
cur = db.cursor()
rmsds = []
for row in cur.execute("select bb_rmsd_cdr_align FROM cdr_data WHERE PDB=? AND original_chain = ? AND CDR=? AND length=? and fullcluster=?", sele):
rmsds.append(row[0])
cur.close()
if len(rmsds) < 1:
return -1
else:
return rmsds[0]
[docs]def get_stem_rmsd_for_entry(db, pdb, original_chain, cdr, length, fullcluster):
#Make sure string to to unicode bullshit.
sele = [str(pdb), str(original_chain), str(cdr), length, str(fullcluster)]
cur = db.cursor()
rmsds = []
for row in cur.execute("select bb_rmsd_stem_align FROM cdr_data WHERE PDB=? AND original_chain = ? AND CDR=? AND length=? and fullcluster=?", sele):
rmsds.append(row[0])
cur.close()
if len(rmsds) < 1:
return -1
else:
return rmsds[0]