Source code for jade.antibody.cdr_data.CDRData

import pandas
import os
from collections import defaultdict


[docs]class CDRDataInfo(object): """ Simple class for holding and accessing Cluster and Length data for a particular Decoy. """ def __init__(self, name, strategy, decoy): self.name = name self.strategy = strategy self.decoy = decoy self.data = defaultdict() self.cdrs = ["L1", "L2", "L3", "H1", "H2", "H3"] def __repr__(self): line = "" for cdr in self.cdrs: if self.data.has_key(cdr): line = line + cdr+":"+repr(self.get_value_for_cdr(cdr)) line = line+"\n" #print line return line
[docs] def get_data(self):
return self.data
[docs] def get_value_for_cdr(self, cdr):
return self.data[cdr]
[docs] def set_value_for_cdr(self, cdr, value):
self.data[cdr] = value
[docs] def set_data(self, data): """ Dictionary for each CDR: L1, L2, L3, H1, H2, H3 """
self.data = data
[docs] def set_value(self, cdr, value):
self.data[cdr] = value
[docs] def has_data(self, cdr): if self.data.has_key(cdr): return True else:
return False
[docs] def is_camelid(self): """ Return True if missing light chain data """ if (not self.has_data('L1')) and (not self.has_data('L2')) and (not self.has_data('L3')): return True else:
return False
[docs]class CDRData(object): """ Class holding cluster and length data from cluster or antibody features database. """ def __init__(self, name, native_path, is_camelid = False): self.is_camelid = is_camelid self.name = name self.all_data = defaultdict() if is_camelid: self.cdrs = ["H1", "H2", "H3"] else: self.cdrs = ["L1", "L2", "L3", "H1", "H2", "H3"] self.native_data = None self._setup_native_data(native_path) #Public
[docs] def get_pandas_dataframe(self, cdrs=None, drop_dir_prefix = False): """ Gets all data as a pandas dataframe. Uses the set name as the score. You can then order, or select specific ones using the data frame. :rtype: pandas.DataFrame """ if not cdrs: cdrs = self.cdrs temp_dict = defaultdict(list) for strategy in self.all_data: for decoy in self.all_data[strategy]: triple = self.all_data[strategy][decoy] if isinstance(triple, CDRDataInfo): pass temp_dict["strategy"].append(strategy) if drop_dir_prefix: temp_dict["decoy"].append(os.path.basename(decoy)) else: temp_dict["decoy"].append(decoy) for cdr in cdrs: temp_dict["_".join([cdr, self.name])].append(triple.get_value_for_cdr(cdr)) columns = ["strategy", "decoy"] columns = columns.extend(["_".join([cdr, self.name]) for cdr in self.cdrs]) df = pandas.DataFrame(temp_dict, columns=columns) df.index = df["decoy"] del df["decoy"] df = df.convert_objects(convert_numeric=True)
return df
[docs] def add_data(self, strategy, con): """ Function to add data to the class. Needs to be defined in subclass. :param strategy: Strategy for which we are adding data :param con: Sqlite3 Connection object """
pass
[docs] def get_strategy_data(self, strategy): """ Get data for each decoy :param strategy: Strategy string :rtype: list of CDRDataInfo """
return self.all_data[strategy]
[docs] def get_strategy_data_for_decoy(self, strategy, decoy): """ Get the data of the decoy :param strategy: Strategy string :param decoy: decoy including path and suffix :rtype: CDRDataInfo """
return self.all_data[strategy][decoy]
[docs] def get_native_data(self):
return self.native_data
[docs] def set_native_data_input_tag(self, con, input_tag):
self._set_native_data_input_tag(con, input_tag, self.column_name)
[docs] def get_concatonated_map(self, cdr = None, decoy_list = None): """ Returns a defaultDic: Default: decoy: CDRDataInfo If CDR != None: [value, cdr] = CDRDataTriple #->CDR to get back cdr_value, decoy for sorting on cdr_value :rtype: defaultdict """ result_data = defaultdict() for strategy in self.all_data: for decoy in self.all_data[strategy]: if decoy_list and decoy not in decoy_list: continue triple = self.all_data[strategy][decoy] if isinstance(triple, CDRDataInfo): pass if cdr: result_data[(triple.get_value_for_cdr(cdr), decoy)] = triple else: result_data[decoy] = triple
return result_data #Private def _get_stmt(self, column_name): if not self.is_camelid: stmt = "SELECT "+ \ "structures.input_tag as decoy,"+ \ "H1."+column_name+" as H1," +\ "H2."+column_name+" as H2," +\ "H3."+column_name+" as H3," +\ "L1."+column_name+" as L1," +\ "L2."+column_name+" as L2," +\ "L3."+column_name+" as L3" stmt = stmt + """ FROM cdr_clusters as L1, cdr_clusters as L2, cdr_clusters as L3, cdr_clusters as H1, cdr_clusters as H2, cdr_clusters as H3, structures WHERE structures.struct_id = L1.struct_id AND structures.struct_id = L2.struct_id AND structures.struct_id = L3.struct_id AND structures.struct_id = H1.struct_id AND structures.struct_id = H2.struct_id AND structures.struct_id = H3.struct_id AND L1.CDR = 'L1' AND L2.CDR = 'L2' AND L3.CDR = 'L3' AND H1.CDR = 'H1' AND H2.CDR = 'H2' AND H3.CDR = 'H3' """ else: stmt = "SELECT "+ \ "structures.input_tag as decoy,"+ \ "H1."+column_name+" as H1," +\ "H2."+column_name+" as H2," +\ "H3."+column_name+" as H3" stmt = stmt + """ FROM cdr_clusters as H1, cdr_clusters as H2, cdr_clusters as H3, structures WHERE structures.struct_id = H1.struct_id AND structures.struct_id = H2.struct_id AND structures.struct_id = H3.struct_id AND H1.CDR = 'H1' AND H2.CDR = 'H2' AND H3.CDR = 'H3' """ #print stmt return stmt def _get_add_data(self, strategy, con, column_name): self.column_name = column_name data = defaultdict() cur = con.cursor() print "Adding "+column_name+" data for "+strategy for row in cur.execute(self._get_stmt(column_name)): #print repr(row) d = CDRDataInfo(self.name, strategy, row[0]) d.set_value('H1', row[1]) d.set_value('H2', row[2]) d.set_value('H3', row[3]) if not self.is_camelid: d.set_value('L1', row[4]) d.set_value('L2', row[5]) d.set_value('L3', row[6]) data[row[0]] = d #print self.name+" type: "+str(type(row[1])) #print repr(d) self._add_data(strategy, data) def _add_data(self, strategy, decoy_data_map): """ Add data in the form of a dict of decoy:DataTriple """ if not self.all_data.has_key(strategy): self.all_data[strategy] = defaultdict() self.all_data[strategy] = decoy_data_map #print repr(decoy_data_map) def _setup_native_data(self, pdb_path): if not pdb_path: return None def _set_native_data_from_biopose(self, pdb_path): pass def _set_native_data(self, data): self.native_data = data def _set_native_data_input_tag(self, con, input_tag, column_name):
pass