Source code for jade.antibody.decoy_data.DecoyData

import math
import os

import pandas

from jade.basic.sql.StatementCreator import *


########################################################################################################################
###   DecoyData
########################################################################################################################
[docs]class DecoyDataTriple(object): """ Struct for holding data instead of a tupple """ def __init__(self, strategy, struct_id, decoy, score, out_name, raw_name): self.strategy = strategy self.struct_id = struct_id self.decoy = decoy self.score = score self.score_type = out_name
self.raw_name = raw_name
[docs]class DecoyData(object): def __init__(self, name, has_real_values = True, reverse_top = False): self.name = name self.interface = 'LH_A' self._has_real_values = has_real_values self.reverse_top = reverse_top self.all_data = defaultdict() #all_data is dict of [ strategy ][ input_tag (decoy) ][ DecoyDataTriple ] self.filters = None self.filter_name = None
[docs] def get_pandas_dataframe(self, top_n = None, drop_dir_prfix = False): """ Gets all data as a pandas dataframe. Uses the set name as the score. You can then order, or select specific ones using the data frame. :return: pandas.DataFrame """ temp_dict = defaultdict(list) for strategy in sorted(self.all_data): decoys = self.get_ordered_decoy_list(strategy, top_n) for decoy in decoys: triple = self.all_data[strategy][decoy] if isinstance(triple, DecoyDataTriple): pass temp_dict["strategy"].append(strategy) if drop_dir_prfix: temp_dict["decoy"].append(os.path.basename(decoy)) else: temp_dict["decoy"].append(decoy) temp_dict[self.name].append(float(triple.score)) columns = ["strategy", "decoy", self.name] df = pandas.DataFrame(temp_dict, columns=columns) df.index = df["decoy"] del df["decoy"] df = df.convert_objects(convert_numeric=True)
return df
[docs] def set_interface(self, interface): """ Set the Antibody-Antigen interface - used mainly for H_A vs LH_A """
self.interface = interface
[docs] def add_filters(self, filters, filter_name): self.filters = filters
self.filter_name = filter_name
[docs] def get_outname(self): if not self.filters: return self.name
else: return self.name+"_"+self.filter_name
[docs] def has_real_values(self):
return self._has_real_values
[docs] def add_data(self, strategy, con): """ Baseclass method - needs to be overridden in subclass :param strategy: Strategy to which we are adding data. :param con: Sqlite3 Connection object """
pass def _get_add_data(self, strategy, stmt_creator, con): if isinstance(stmt_creator, StatementCreator): pass if self.filters: for filter in self.filters: stmt_creator.add_data_filter(filter) print "Getting data for: "+self.name stmt = stmt_creator.create_statement() data = defaultdict() cur = con.cursor() for row in cur.execute(stmt): #print repr(row) triple = DecoyDataTriple(strategy, row[0], str(row[1]), row[2], self.get_outname(), self.name) data[str(row[1])] = triple self._add_data(strategy, data) def _add_data(self, strategy, decoy_data_map): """ Add data in the form of a dict of decoy:DataTriple """ if not self.all_data.has_key(strategy): self.all_data[strategy] = defaultdict() self.all_data[strategy] = decoy_data_map
[docs] def get_data_for_decoy(self, strategy, decoy): """ Get the held data for the decoy :param strategy: Strategy Name :param decoy: Decoy name (with dir and suffix) :rtype: DecoyDataTriple """
return self.all_data[ strategy ][ decoy ]
[docs] def get_top_x_percent_cutoff_value(self, strategy, top_percent): if top_percent < 1.0: top_percent = top_percent*100 decoys = self.get_ordered_decoy_list(strategy) last_entry = math.ceil(len(decoys)/float(top_percent)) print "Using top "+repr(last_entry)+" entries as subset"
return self.get_data_for_decoy(strategy, decoys[ int(last_entry) -1]).score #############################################################################
[docs] def get_strategy_data(self, strategy, by_score_tuple = False): """ For a particular strategy: Return a dictionary of decoy:DataTriple or if by_score_tuple: [score, decoy] = DataTriple """ #print self.name if by_score_tuple: out_dict = defaultdict() for decoy in self.all_data[strategy]: triple = self.all_data[strategy][decoy] out_dict[(triple.score, triple.decoy)] = triple return out_dict else:
return self.all_data[strategy]
[docs] def get_top_strategy_data(self, strategy, top_n, by_score_tuple = False): """ For a particular strategy: Return a dictionary of decoy:DataTriple or if by_score_tuple: [score, decoy] = DataTriple For only the top scoring decoys """ top_n = int(top_n) data = self.get_strategy_data(strategy, True) reverse = False if self.reverse_top: reverse = True top_points = sorted(data.keys(), reverse = reverse)[0:top_n] top_data = defaultdict() for point in top_points: if by_score_tuple: top_data[point] = data[point] else: top_data[point[1]] = data[point]
return top_data
[docs] def get_top_all_data(self, top_n, by_score_tuple = False): """ Over all the strategies: Return a dictionary of decoy:DataTriple or if by_score_tuple: [score, decoy] = DataTriple """ top_n = int(top_n) data = self.get_concatonated_map(True) reverse = False if self.reverse_top: reverse = True top_points = sorted(data.keys(), reverse = reverse)[0:top_n] top_data = defaultdict() for point in top_points: if by_score_tuple: top_data[point] = data[point] else: top_data[point[1]] = data[point]
return top_data
[docs] def get_ordered_decoy_list(self, strategy, top_n = None): """ Get an ordered array of decoy names by energy for a particular strategy :rtype: list of str """ if self.reverse_top: reverse = True else: reverse = False decoys = [] if top_n: data = self.get_top_strategy_data(strategy, top_n, True) else: data = self.get_strategy_data(strategy, True) #print repr(data) points = sorted(data.keys(), reverse = reverse) #print points for tup in points: decoys.append(data[tup].decoy)
return decoys
[docs] def get_ordered_decoy_list_all(self, top_n = None): """ Get an ordered array of decoy names by energy over all the strategies :rtype: list of str """ if self.reverse_top: reverse = True else: reverse = False if top_n: all_data = self.get_top_all_data(top_n, True) else: all_data = self.get_concatonated_map(True) decoys = [] points = sorted(all_data.keys(), reverse = reverse)[0:top_n] for tup in points: decoys.append(all_data[tup].decoy)
return decoys
[docs] def get_concatonated_map(self, by_score_tuple = False): """ Returns a defaultDic: Default: decoy: DecoyDataTriple by_score_tuple (for sorting on score and having possible redundancy) [score, decoy]: DecoyDataTriple """ result_data = defaultdict() for strategy in self.all_data: for decoy in self.all_data[strategy]: triple = self.all_data[strategy][decoy] if isinstance(triple, DecoyDataTriple): pass if by_score_tuple: result_data[(triple.score, decoy)] = triple else: result_data[decoy] = triple
return result_data