Source code for jade.basic.plotting.error_bars

import seaborn as sea
import pandas
import matplotlib as mpl
import numpy

from jade.basic.pandas.stats import calculate_stddev_binomial_distribution
from jade.basic.pandas.stats import calculate_stddev



[docs]def calculate_set_errorbars_hist(ax, data, x, y,
                                 binomial_distro = True, total_column = 'total_entries', y_freq_column = None,
                                 x_order = None, hue_order = None,
                                 hue=None, caps=True, color='k', linewidth=.75, base_columnwidth=.8, full = True):
    """
    Calculates the standard deviation of the data, sets erorr bars for a bar chart.
    Default base_columnwidth for seaborn plots is .8

    Optionally give x_order and/or hue_order for the ordering of the columns.  Make sure to pass this while plotting.
    Note:
     If Hue is enabled, this base is divided by the number of hue_names for the final width used for plotting.

    :param ax: mpl.Axes
    :param data: pandas.DataFrame
    :param x: str
    :param y: str
    :param binomial_distro: bool
    :param total_column: str
    :param y_freq_column: str
    :param x_order: list
    :param hue_order: list
    :param hue: str
    :param caps: bool
    :param color: str
    :param linewidth: float
    :param base_columnwidth:  float
    :param full: bool
    :rtype: None
    """

    def get_sd(errors, x_name, hue_name = None):
        if hue:
            return errors[errors[x] == x_name][errors[hue] == hue_name][errors['y'] == y].iloc[0]['SD']
        else:
            return errors[errors[x] == x_name][errors['y'] == y].iloc[0]['SD']

    def get_mean(x_name, hue_name = None):
        if hue:
            # print "WTF?" + repr(data[data[x] == x_name][data[hue] == hue_name][y])
            f = data[data[x] == x_name][data[hue] == hue_name][y]
            return sum(float(embedding) for embedding in f) / len(f)
            # return data[data[x] == x_name][data[hue] == hue_name][y], dtype=float).mean()
            # return data[data[x] == x_name][data[hue] == hue_name][y].mean()
        else:
            return data[data[x] == x_name][y].mean()

    # This makes it easier for frequencies of x/100, instead of passing two columns
    #   - one to calc mean (y), and one for freq.


    if not y_freq_column:
        y_freq_column = y

    if binomial_distro:
        error_dfs = calculate_stddev_binomial_distribution(data, x, y_freq_column, total_column, y, hue)
    else:
        error_dfs = calculate_stddev(data, x, y, hue)

    if not hue and hue in error_dfs.columns:

        error_dfs = error_dfs[error_dfs[hue] == 'ALL']

    # Make sure ALL is not plotted.

    # Need X columns, and y data to plot

    # Need to only plot upper bars
    # (_, caps1, _) = ax.errorbar(x = [(.2)], y=[.25], yerr = [(0,), (.5,)], ls = 'None', capsize=5, color ='k', lw=1)


    x_indexes = []
    y_means = []
    yerr = []
    zeros = []

    x_names = sea.utils.categorical_order(data[x], x_order)

    # Start the coordinates at Zero, then minus i+base/2 from everything to get it centered at each i.
    if hue:
        hue_names = sea.utils.categorical_order(data[hue], hue_order)

        w = ( base_columnwidth /float(len(hue_names)))
        base_w = base_columnwidth

        ##Check Ordering here, should be good?
        for i, x_name in enumerate(x_names):

            for z, hue_name in enumerate(hue_names):
                print x_name + " "+ hue_name

                index = (w * z) + (w / 2) - base_w / float(2) + i
                x_indexes.append(index)
                # print "Index: "+repr(index)
                print "SD: " + repr(get_sd(error_dfs, x_name, hue_name))
                # print "mean: " + repr(get_mean(x_name, hue_name))
                yerr.append(get_sd(error_dfs, x_name, hue_name))
                y_means.append(get_mean(x_name, hue_name))


    else:
        for i, x_name in enumerate(x_names):
            x_indexes.append(i)
            yerr.append(get_sd(error_dfs, x_name))
            y_means.append(get_mean(x_name))

    zeros = list(numpy.zeros(len(x_indexes)))

    if full:
        (_, caps_list, _) = ax.errorbar(x=x_indexes, y=y_means, yerr=yerr,
                                        ls='None', capsize=5, color=color, lw=linewidth)
    else:
        (_, caps_list, _) = ax.errorbar(x=x_indexes, y=y_means, yerr=[zeros, yerr],
                                        ls='None', capsize=5, color=color, lw=linewidth)
    if caps:
        for cap in caps_list:
            cap.set_linewidth(linewidth)
            cap.set_markeredgewidth(linewidth)


[docs]def calculate_set_errorbars_scatter(ax, data, x, y,
                                    binomial_distro=False, total_column='total_entries',
                                    caps=False, color='k', lw=1.5):
    """
    (Untested) - Calculates the standard deviation of the data, sets error bars for a typical scatter plot
    """

    if binomial_distro:
        error_dfs = calculate_stddev_binomial_distribution(data, x, y, total_column, hue=None)
    else:
        error_dfs = calculate_stddev(data, x, y, hue=None)