Skip to content

Commit

Permalink
Proportion plots (#234)
Browse files Browse the repository at this point in the history
* DOC: Updating changelog

* ENH: Adding in proportion_plots

* DOC: Updating changelog

* TST: Adding test case for tick ordering

* TST: Adding in subplot check

* Getting rid of pseudocount

* renaming stuff

* refactoring

* Removing tests for coordinates, since they are unstable

* TST: Adding test for the last column, since it is stable

* fixing travis

* removing biom dependency

* pep8

* kick conda

* DOC: fixing radial test.  Also adding more documentation to proportion_plot

* Upgrade bokeh

* pep8

* flake8
  • Loading branch information
mortonjt authored Oct 13, 2017
1 parent 0f1d7df commit 716ac90
Show file tree
Hide file tree
Showing 7 changed files with 293 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ install:
- conda install --yes -n test_env cython
- source activate test_env
- pip install -r ci/pip_requirements.txt
- pip install -e '.[q2]'
- pip install -e .
script:
- WITH_COVERAGE=TRUE make all
- if [ ${MAKE_DOC} ]; then make -C doc clean html; fi
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# gneiss changelog

## Version 0.4.1
* Added `proportion_plot` to plot the mean proportions within a single balance [#234](https://github.com/biocore/gneiss/pull/234)
* Added colorbar for heatmap
* Decoupled qiime2 from gneiss. All qiime2 commands have now been ported to [q2-gneiss](https://github.com/qiime2/q2-gneiss)

Expand Down
5 changes: 3 additions & 2 deletions gneiss/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@

from ._heatmap import heatmap
from ._radial import radialplot
from ._decompose import balance_boxplot, balance_barplots
from ._decompose import balance_boxplot, balance_barplots, proportion_plot


__all__ = ["heatmap", "radialplot", "balance_boxplot", "balance_barplots"]
__all__ = ["heatmap", "radialplot", "balance_boxplot",
"balance_barplots", "proportion_plot"]
170 changes: 170 additions & 0 deletions gneiss/plot/_decompose.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,173 @@ def balance_barplots(tree, balance_name, header, feature_metadata,
ax_num.set_xlim([0, max([num_.max().values[1],
denom_.max().values[1]])])
return ax_num, ax_denom


def proportion_plot(table, metadata, category, left_group, right_group,
num_features, denom_features,
feature_metadata=None,
label_col='species',
num_color='#105d33', denom_color='#b0d78e',
axes=(None, None)):
""" Plot the mean proportions of features within a balance.
This plots the numerator and denominator components within a balance.
Parameters
----------
table : pd.DataFrame
Table of relative abundances.
metadata : pd.DataFrame
Samples metadata
spectrum : pd.Series
The component from partial least squares.
feature_metadata : pd.DataFrame
The metadata associated to the features.
category : str
Name of sample metadata category.
left_group : str
Name of group within sample metadata category to plot
on the left of the plot.
right_group : str
Name of group within sample metadata category to plot
on the right of the plot.
label_col : str
Column in the feature metadata table to summarize by.
num_color : str
Color to plot the numerator.
denom_color : str
Color to plot the denominator.
Returns
-------
ax_num : matplotlib.pyplot.Axes
Matplotlib axes for the numerator bars
ax_denom : matplotlib.pyplot.Axes
Matplotlib axes for the denominator bars
Examples
--------
First we'll want to set up the main objects to pass into the plot.
For starters, we'll pass in the feature table, metadata and
feature_metadata.
>>> table = pd.DataFrame({
... 'A': [1, 1.2, 1.1, 2.1, 2.2, 2],
... 'B': [9.9, 10, 10.1, 2, 2.4, 2.1],
... 'C': [5, 3, 1, 2, 2, 3],
... 'D': [5, 5, 5, 5, 5, 5],
... }, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])
>>> feature_metadata = pd.DataFrame({
... 'A': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'],
... 'B': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'],
... 'C': ['k__poo', 'p__tar', 'c__', 'o__', 'f__', 'g__', 's__'],
... 'D': ['k__poo', 'p__far', 'c__', 'o__', 'f__', 'g__', 's__']
... }, index=['kingdom', 'phylum', 'class', 'order', 'family',
... 'genus', 'species']).T
>>> metadata = pd.DataFrame({
... 'groups': ['X', 'X', 'X', 'Y', 'Y', 'Y'],
... 'dry': [1, 2, 3, 4, 5, 6]},
... index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])
Then we can specify which specific features to visualize and plot.
>>> num_features = ['A', 'B']
>>> denom_features = ['C', 'D']
>>> ax1, ax2 = proportion_plot(table, metadata, 'groups', 'X', 'Y',
... num_features, denom_features,
... feature_metadata, label_col='phylum')
Since this method will return the raw matplotlib object, labels, titles,
ticks, etc can directly modified using this object.
"""
import seaborn as sns
if axes[0] is None or axes[1] is None:
f, (ax_num, ax_denom) = plt.subplots(1, 2)
else:
ax_num, ax_denom = axes[0], axes[1]

fname = 'feature'
ptable = table.apply(lambda x: x / x.sum(), axis=1)
num_df = ptable[num_features]

denom_df = ptable[denom_features]

# merge together metadata and sequences
num_data_ = pd.merge(metadata, num_df,
left_index=True, right_index=True)
denom_data_ = pd.merge(metadata, denom_df,
left_index=True, right_index=True)

# merge the data frame, so that all of the proportions
# are in their own separate column
num_data = pd.melt(num_data_, id_vars=[category],
value_vars=list(num_df.columns),
value_name='proportion', var_name=fname)
num_data['part'] = 'numerator'
denom_data = pd.melt(denom_data_, id_vars=[category],
value_vars=list(denom_df.columns),
value_name='proportion', var_name=fname)
denom_data['part'] = 'denominator'
data = pd.concat((num_data, denom_data))
if feature_metadata is not None:
num_feature_metadata = feature_metadata.loc[num_df.columns,
label_col]
denom_feature_metadata = feature_metadata.loc[denom_df.columns,
label_col]
# order of the ids to plot
order = (list(num_feature_metadata.index) +
list(denom_feature_metadata.index))
else:
order = (list(num_df.columns) +
list(denom_df.columns))

less_df = data.loc[data[category] == left_group].dropna()

sns.barplot(x='proportion',
y=fname,
data=less_df,
color=denom_color,
order=order,
ax=ax_denom)
more_df = data.loc[data[category] == right_group].dropna()

sns.barplot(x='proportion',
y=fname,
data=more_df,
color=num_color,
order=order,
ax=ax_num)
if feature_metadata is not None:
ax_denom.set(yticklabels=(list(num_feature_metadata.values) +
list(denom_feature_metadata.values)),
title=left_group)
else:
ax_denom.set(yticklabels=order, title=left_group)

ax_num.set(yticklabels=[], ylabel='', yticks=[], title=right_group)

max_xlim = max(ax_denom.get_xlim()[1], ax_num.get_xlim()[1])
min_xlim = max(ax_denom.get_xlim()[0], ax_num.get_xlim()[0])

max_ylim, min_ylim = ax_denom.get_ylim()

ax_denom.set_xlim(max_xlim, min_xlim)
ax_num.set_xlim(min_xlim, max_xlim)
ax_denom.set_position([0.2, 0.125, 0.3, 0.75])
ax_num.set_position([0.5, 0.125, 0.3, 0.75])

num_h = num_df.shape[1]
denom_h = denom_df.shape[1]

space = (max_ylim - min_ylim) / (num_h + denom_h)
ymid = (max_ylim - min_ylim) * num_h / (num_h + denom_h) - 0.5 * space

ax_denom.axhspan(min_ylim, ymid, facecolor=num_color,
zorder=0, alpha=0.25)
ax_denom.axhspan(ymid, max_ylim, facecolor=denom_color,
zorder=0, alpha=0.25)

ax_num.axhspan(min_ylim, ymid, facecolor=num_color, zorder=0, alpha=0.25)
ax_num.axhspan(ymid, max_ylim, facecolor=denom_color, zorder=0, alpha=0.25)
return (ax_num, ax_denom)
116 changes: 115 additions & 1 deletion gneiss/plot/tests/test_decompose.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import unittest
from gneiss.plot import balance_boxplot, balance_barplots
from gneiss.plot import balance_boxplot, balance_barplots, proportion_plot
import numpy as np
import pandas as pd
import numpy.testing as npt
import matplotlib.pyplot as plt
from skbio import TreeNode


Expand Down Expand Up @@ -105,5 +106,118 @@ def test_basic_barplot(self):
feature_metadata=self.feature_df)


class TestProportionPlot(unittest.TestCase):
def setUp(self):
self.table = pd.DataFrame({
'A': [1, 1.2, 1.1, 2.1, 2.2, 2],
'B': [9.9, 10, 10.1, 2, 2.4, 2.1],
'C': [5, 3, 1, 2, 2, 3],
'D': [5, 5, 5, 5, 5, 5],
}, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])

self.feature_metadata = pd.DataFrame({
'A': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'],
'B': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'],
'C': ['k__poo', 'p__tar', 'c__', 'o__', 'f__', 'g__', 's__'],
'D': ['k__poo', 'p__far', 'c__', 'o__', 'f__', 'g__', 's__']
}, index=['kingdom', 'phylum', 'class', 'order',
'family', 'genus', 'species']).T

self.metadata = pd.DataFrame({
'groups': ['X', 'X', 'X', 'Y', 'Y', 'Y'],
'dry': [1, 2, 3, 4, 5, 6]
}, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])

def test_proportion_plot(self):
np.random.seed(0)
num_features = ['A', 'B']
denom_features = ['C', 'D']
ax1, ax2 = proportion_plot(self.table, self.metadata,
'groups', 'X', 'Y',
num_features, denom_features,
self.feature_metadata,
label_col='phylum')
res = np.vstack([l.get_xydata() for l in ax1.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, verbose=True)

res = np.vstack([l.get_xydata() for l in ax2.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, verbose=True)

res = [l._text for l in ax2.get_yticklabels()]
exp = ['p__bar', 'p__bar', 'p__tar', 'p__far']
self.assertListEqual(res, exp)

def test_proportion_plot_order(self):
self.maxDiff = None
np.random.seed(0)
# tests for different ordering
num_features = ['A', 'B']
denom_features = ['D', 'C']
ax1, ax2 = proportion_plot(self.table, self.metadata,
'groups', 'X', 'Y',
num_features, denom_features,
self.feature_metadata,
label_col='phylum')
res = np.vstack([l.get_xydata() for l in ax1.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)

res = np.vstack([l.get_xydata() for l in ax2.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)

res = [l._text for l in ax2.get_yticklabels()]
exp = ['p__bar', 'p__bar', 'p__far', 'p__tar']
self.assertListEqual(res, exp)

def test_proportion_plot_order_figure(self):
self.maxDiff = None
np.random.seed(0)
# tests for different ordering
fig, axes = plt.subplots(1, 2)

num_features = ['A', 'B']
denom_features = ['D', 'C']
ax1, ax2 = proportion_plot(self.table, self.metadata,
'groups', 'X', 'Y',
num_features, denom_features,
self.feature_metadata,
label_col='phylum', axes=axes)
res = np.vstack([l.get_xydata() for l in ax1.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)

res = np.vstack([l.get_xydata() for l in ax2.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)

res = [l._text for l in ax2.get_yticklabels()]
exp = ['p__bar', 'p__bar', 'p__far', 'p__tar']
self.assertListEqual(res, exp)

def test_proportion_plot_original_labels(self):
# tests for different ordering
fig, axes = plt.subplots(1, 2)

num_features = ['A', 'B']
denom_features = ['D', 'C']
ax1, ax2 = proportion_plot(self.table, self.metadata,
'groups', 'X', 'Y',
num_features, denom_features,
axes=axes)

res = [l._text for l in ax2.get_yticklabels()]
exp = ['A', 'B', 'D', 'C']
self.assertListEqual(res, exp)


if __name__ == '__main__':
unittest.main()
7 changes: 3 additions & 4 deletions gneiss/plot/tests/test_radial.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from skbio import TreeNode, DistanceMatrix
from gneiss.plot._radial import radialplot
from gneiss.plot._dendrogram import UnrootedDendrogram
import pandas.util.testing as pdt


class TestRadial(unittest.TestCase):
Expand Down Expand Up @@ -84,9 +83,9 @@ def test_basic_plot(self):
node_size='node_size', edge_width='edge_width')

for e in exp_edges.keys():
pdt.assert_series_equal(
p.renderers[0].data_source.data[e],
pd.Series(exp_edges[e], name=e))
self.assertListEqual(
list(p.renderers[0].data_source.data[e]),
exp_edges[e])

for e in exp_nodes.keys():
self.assertListEqual(
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def finalize_options(self):
'nose >= 1.3.7',
'scikit-bio==0.5.1',
'statsmodels>=0.8.0',
'biom-format',
'seaborn'
],
classifiers=classifiers,
Expand Down

0 comments on commit 716ac90

Please sign in to comment.