Source code for jade.nnk.NNKEnrichments

import sys, os, numpy
from collections import defaultdict
from copy import deepcopy
import pandas

from jade.nnk import NNKAbMaturation


[docs]class NNKEnrichments(object):
    """
    Simple class that holds all the enrichment data for a particular class, antibody, and antigen.
    """
    def __init__(self, data_dir, zeros = -2.0, class_type = 'VRC01', antibody = 'glCHA31', antigen = 'GT81', sort = 'S1'):
        """

        :param data_dir: the directory with sort data. Each set of data + antibody should be in a separate directory (Ex: glCHA31, et.c)
        :param zeros: The number we use when enrichment of top/bottom gate is zero.  This is the log(enrichment). -2.0 corresponds to an enrichment of about .08.
        :param class_type:
        :param antibody:
        :param antigen:
        :param sort:
        """

        data_loader = NNKAbMaturation.GetNNKData(data_dir, antibody)

        self.df = data_loader.get_2D_data_freq_nnk_data(antigen=antigen, sort=sort)
        self.df = self.df.applymap(numpy.log)
        self.df = self.df.replace(numpy.NINF, float(zeros))

        if not isinstance(self.df, pandas.DataFrame): sys.exit()

        self.data_1D = data_loader.get_1d_data_tuple_freq_nnk_data(antigen=antigen, sort=sort)


[docs]    def max(self, position):
        """

        Get the maximum enrichment at a particular position, and the amino acid

        :param position:
        :return:
        """

        max_index = self.df[str(position)].idxmax()
        #print repr(max_index)
        return self.value(position, max_index), max_index

[docs]    def min(self, position):
        """

        Get the minimum enrichment at a particular position, and the amino acid
        
        Note: There may be multiple minumum amino acids - this is not yet accounted for!

        :param position:
        :return:
        """

        min_index = self.df[str(position)].idxmin()

        return self.value(position, min_index), min_index

[docs]    def value(self, position, three_letter_code):
        """

        Get the enrichment value of a particular position and code.

        :param position:
        :param three_letter_code:
        :return:
        """
        return self.df.get_value(three_letter_code, str(position))

[docs]    def mean(self, position):
        return numpy.mean(self.df[str(position)])

[docs]    def calculate_factors(self):
        """
        Return a dataframe of calculated factors
        
        Factor is Sergeys definition:
        
         (P-M)/MAD = scaling factor; where 
            P - total propensity for amino acid at this position,
            M - mean total propensity for all amino acids at this position
            MAD - mean average deviation for propensities at this position.
            
        :rtype: pandas.DataFrame 
        """

        factors = deepcopy(self.df)

        for pos in self.df.columns:
            m = self.df[pos].mean()
            mad = numpy.absolute(self.df[pos] - m).mean()
            if mad == 0:
                factors[pos] = 0



        return factors

[docs]def combine_enrichments( list_of_nnk_enrichments, additive_combine = False):
    """
    Combine a list of nnk_enrichments to populate this one.
    @type list_of_nnk_enrichments: [NNKEnrichments]
    :rtype: NNKEnrichments
    """
    print "Len: "+repr(len(list_of_nnk_enrichments))

    if len(list_of_nnk_enrichments) == 1:
        return list_of_nnk_enrichments[0]

    new_enrich = deepcopy(list_of_nnk_enrichments[0])
    dfs_2D = []
    dfs_1D = []
    for enrich in list_of_nnk_enrichments:
        #print len(enrich.df.columns)
        dfs_2D.append(enrich.df)
        dfs_1D.append(enrich.data_1D)

    df_2D = pandas.concat(dfs_2D)

    if additive_combine:
        new_enrich.df = df_2D.groupby(level=0).sum()
        new_enrich.data_1D = numpy.sum(numpy.array(dfs_1D), axis=0)
    else:
        new_enrich.df = df_2D.groupby(level=0).mean()
        new_enrich.data_1D = numpy.mean(numpy.array(dfs_1D), axis=0)


    return new_enrich