Source code for jade.RAbD_BM.benchmark_plotting

#!/usr/bin/env python

import sqlite3
import os
import sys
import re
import numpy
import matplotlib.pyplot as plot
from pylab import polyfit, poly1d

from collections import defaultdict

[docs]class NativeCDRData: def __init__(self, datatype, native_path, data_table = "cdr_metrics"): self.native_path = native_path self.db = sqlite3.connect(native_path) self.datatype = datatype self.data_table = data_table self.data = defaultdict() self.setup_data(datatype)
[docs] def setup_data(self, datatype): cur = self.db.cursor() query = "SELECT input_tag, CDR, "+datatype+" FROM "+self.data_table+", structures WHERE structures.struct_id = "+self.data_table+".struct_id" print query for row in cur.execute(query): pdbid = row[0]; cdr = row[1]; data = row[2] if not self.data.has_key(pdbid): self.data[pdbid] = defaultdict()
self.data[pdbid][cdr] = float(data)
[docs] def get_all_data(self):
return self.data
[docs] def get_data(self, pdbid, cdr):
return self.data[pdbid][cdr]
[docs]class RecoveryCDRData: def __init__(self, db_paths, type = "length"): self.type = type self.table = "all_"+type self.db_paths = db_paths self.recovery_data = defaultdict() self.risk_ratio_data = defaultdict() self.setup_data()
[docs] def setup_data(self): self.connections = [] for db_path in self.db_paths: nameSP = os.path.basename(db_path).split(".") #exp_name = nameSP[0]+"_"+nameSP[1]+"_"+nameSP[3] exp_name = nameSP[1]+"_"+nameSP[3] print exp_name self.recovery_data[exp_name] = defaultdict() self.risk_ratio_data[exp_name] = defaultdict() con = sqlite3.connect(db_path) cur = con.cursor() query = "SELECT native, CDR, top_rec, top_rr FROM "+self.table print query for row in cur.execute(query): pdb = row[0]; cdr = row[1]; rec = row[2]; rr = row[3] if rr == None: rr = 0 #print repr(row) if not self.recovery_data[exp_name].has_key(pdb): self.recovery_data[exp_name][pdb] = defaultdict() self.risk_ratio_data[exp_name][pdb] = defaultdict() self.recovery_data[exp_name][pdb][cdr] = float(rec)
self.risk_ratio_data[exp_name][pdb][cdr] = float(rr)
[docs]class PlotData: def __init__(self,native_data, rec_data): if isinstance(native_data, NativeCDRData):pass if isinstance(rec_data, RecoveryCDRData): pass self.native_data = native_data self.rec_data = rec_data
[docs] def get_xy_of_exp(self, exp, rec = True, skip_H3 = True): x = [] y = [] for pdb in self.native_data.data.keys(): for cdr in self.native_data.data[pdb].keys(): if cdr == "H3" and skip_H3: continue; pdb_cdr = pdb.split(".")[0]+"_"+cdr x0 = self.native_data.data[pdb][cdr] if rec: y0 = self.rec_data.recovery_data[exp][pdb][cdr] else: y0 = self.rec_data.risk_ratio_data[exp][pdb][cdr] x.append(x0) y.append(y0)
return x, y
[docs] def plot_data(self, outname, rec = True): for exp in self.rec_data.recovery_data.keys(): x, y = self.get_xy_of_exp(exp, rec) plot.plot(x, y, 'o', label = exp) fitx = polyfit(x,y,1) fit_fnx=poly1d(fitx) #plot.plot(x,fit_fnx(x),'b-') # Regr line (x,fit_fn(x)) ylab = self.rec_data.type+" " if rec: ylab = ylab+"Recovery" else: ylab = ylab+"Risk Ratio" plot.ylabel(ylab) plot.xlabel(self.native_data.datatype) plot.legend()
plot.show() if __name__ == "__main__": outname = "testing" type = "ag_ab_contacts_total" #type = "ag_ab_dSASA" native_db = sys.argv[1] dbs = sys.argv[2:] native_data = NativeCDRData(type, native_db) rec_data = RecoveryCDRData(dbs, "cluster") plotter = PlotData(native_data, rec_data) plotter.plot_data(outname, False)