Source code for jade.basic.pandas.stats

import math
from collections import defaultdict
import pandas




[docs]def calculate_stddev(df, x, y, hue=None): """ Calcuates standard deviations for a normal distribution (Numerical data) over X and Hue categories. If hue is given, the hue column will be added, and the overall will be of 'ALL' Example DataFrame output (x='exp', y= 'length_recovery_freq', hue = 'cdr': SD cdr exp y 20 6.739596 H2 ALL length_recovery_freq 21 7.373650 H2 min.remove_antigen-F length_recovery_freq 22 6.400637 ALL min.remove_antigen-T length_recovery_freq :param df: pandas.DataFrame :param x: str :param y: str :param total_column: str :param hue: str :rtype: pandas.DataFrame """ flat_dict = defaultdict(list) for x_name in df[x].unique(): local = df[df[x] == x_name] flat_dict[x].append(x_name) flat_dict['SD'].append(local[y].std()) flat_dict['y'].append(y) if hue: flat_dict[hue].append('ALL') for hue_name in df[hue].unique(): local2 = local[df[hue] == hue_name] flat_dict[x].append(x_name) flat_dict[hue].append(hue_name) flat_dict['SD'].append(local2[y].std()) flat_dict['y'].append(y) # Calculate Hue overall SDs. if hue: for x_name in df[hue].unique(): local = df[df[hue] == x_name] flat_dict[hue].append(x_name) flat_dict['SD'].append(local[y].std()) flat_dict['y'].append(y) if hue: flat_dict[x].append('ALL') # print repr(flat_dict) stddev_df = pandas.DataFrame.from_dict(flat_dict)
return stddev_df
[docs]def calculate_stddev_binomial_distribution(df, x, y, total_column, y_mean_column, hue=None): """ Calculates standard deviations for a binomial distribution (like experiment True/False values) over X and Hue categories.. Typically used for bar-plot. If hue is given the hue column will be added, and the overall will be of 'ALL', plus that of Hue Example DataFrame output (x='exp', y= 'length_recovery_freq', hue = 'cdr': SD cdr exp y 20 6.739596 H2 ALL length_recovery_freq 21 7.373650 H2 min.remove_antigen-F length_recovery_freq 22 6.400637 ALL min.remove_antigen-T length_recovery_freq :param df: pandas.DataFrame :param x: str :param y: str :param total_column: str :param hue: str :rtype: pandas.DataFrame """ flat_dict = defaultdict(list) for x_name in df[x].unique(): local = df[df[x] == x_name] mean = local[y].mean() total = local[total_column].sum() dev = math.sqrt(mean * (1 - mean / total * 1.0)) flat_dict[x].append(x_name) flat_dict['SD'].append(dev) flat_dict['y'].append(y_mean_column) if hue: flat_dict[hue].append('ALL') for hue_name in df[hue].unique(): # print x_name+" "+hue_name local2 = local[df[hue] == hue_name] mean = local2[y].mean() total = local2[total_column].sum() # print x_name+" "+hue_name+" "+repr(mean)+" "+repr(total) dev = math.sqrt(mean * (1 - mean / float(total))) flat_dict[x].append(x_name) flat_dict[hue].append(hue_name) flat_dict['SD'].append(dev) flat_dict['y'].append(y_mean_column) # Calculate Hue overall SDs. if hue: for x_name in df[hue].unique(): local = df[df[hue] == x_name] mean = local[y].mean() total = local[total_column].sum() dev = math.sqrt(mean * (1 - mean / total * 1.0)) flat_dict[hue].append(x_name) flat_dict['SD'].append(dev) flat_dict['y'].append(y_mean_column) if hue: flat_dict[x].append('ALL') # print repr(flat_dict) stddev_df = pandas.DataFrame.from_dict(flat_dict) print stddev_df.tail()
return stddev_df