#!/usr/bin/env python
# -*- coding: utf-8 -*-
# (c) The University of Strathclyde 2019
# Author: Leighton Pritchard
#
# Contact:
# leighton.pritchard@strath.ac.uk
#
# Leighton Pritchard,
# Strathclyde Institute of Pharmaceutical and Biomedical Sciences
# The University of Strathclyde
# Cathedral Street
# Glasgow
# G1 1XQ
# Scotland,
# UK
#
# The MIT License
#
# (c) The University of Strathclyde 2019
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
"""Code to implement Seaborn graphics output for ANI analyses."""
import warnings
import matplotlib # pylint: disable=C0411
import pandas as pd
import seaborn as sns
matplotlib.use("Agg")
import matplotlib.pyplot as plt # noqa: E402,E501 # pylint: disable=wrong-import-position,wrong-import-order,ungrouped-imports
# Add classes colorbar to Seaborn plot
[docs]def get_colorbar(dfr, classes):
"""Return a colorbar representing classes, for a Seaborn plot.
:param dfr:
:param classes:
The aim is to get a pd.Series for the passed dataframe columns,
in the form:
0 colour for class in col 0
1 colour for class in col 1
... colour for class in col ...
n colour for class in col n
"""
levels = sorted(list(set(classes.values())))
paldict = dict(
zip(
levels,
sns.cubehelix_palette(
len(levels), light=0.9, dark=0.1, reverse=True, start=1, rot=-2
),
)
)
lvl_pal = {cls: paldict[lvl] for (cls, lvl) in list(classes.items())}
# Have to use string conversion of the dataframe index, here
col_cb = pd.Series([str(_) for _ in dfr.index]).map(lvl_pal)
# The col_cb Series index now has to match the dfr.index, but
# we don't create the Series with this (and if we try, it
# fails) - so change it with this line
col_cb.index = dfr.index
return col_cb
# Add labels to the seaborn heatmap axes
[docs]def add_labels(fig, params):
"""Add labels to Seaborn heatmap axes, in-place.
:param fig:
:param params:
"""
if params.labels:
# If a label mapping is missing, use the key text as fall back
for _ in fig.ax_heatmap.get_yticklabels():
_.set_text(params.labels.get(_.get_text(), _.get_text()))
for _ in fig.ax_heatmap.get_xticklabels():
_.set_text(params.labels.get(_.get_text(), _.get_text()))
fig.ax_heatmap.set_xticklabels(fig.ax_heatmap.get_xticklabels(), rotation=90)
fig.ax_heatmap.set_yticklabels(fig.ax_heatmap.get_yticklabels(), rotation=0)
return fig
# Return a clustermap
[docs]def get_clustermap(dfr, params, title=None, annot=True):
"""Return a Seaborn clustermap for the passed dataframe.
:param dfr:
:param params:
:param title: str, plot title
:param annot: Boolean, add text for cell values?
"""
# If we do not catch warnings here, then we often get the following warning:
# ClusterWarning: scipy.cluster: The symmetric non-negative hollow
# observation matrix looks suspiciously like an uncondensed distance matrix
# The usual solution would be to convert the array with
# scipy.spatial.distance.squareform(), but this requires that all values in
# the main diagonal are zero, which is not the case for ANI.
# As we know this is a (1-distance) matrix, we could just set the diagonal
# to zero and fudge it, but this is not a good solution. Instead, we suppress
# the warning in a context manager for this function call only, because we
# know the warning is not relevant.
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message=(
"scipy.cluster: The symmetric non-negative "
"hollow observation matrix looks suspiciously like an "
"uncondensed distance matrix"
),
)
fig = sns.clustermap(
dfr,
cmap=params.cmap,
vmin=params.vmin,
vmax=params.vmax,
col_colors=params.colorbar,
row_colors=params.colorbar,
figsize=(params.figsize, params.figsize),
linewidths=params.linewidths,
annot=annot,
)
# add labels for each of the input genomes
add_labels(fig, params)
fig.cax.yaxis.set_label_position("left")
if title:
fig.cax.set_ylabel(title)
# Return clustermap
return fig
# Generate Seaborn heatmap output
[docs]def heatmap(dfr, outfilename=None, title=None, params=None):
"""Return seaborn heatmap with cluster dendrograms.
:param dfr: pandas DataFrame with relevant data
:param outfilename: path to output file (indicates output format)
:param title:
:param params:
"""
# Decide on figure layout size: a minimum size is required for
# aesthetics, and a maximum to avoid core dumps on rendering.
# If we hit the maximum size, we should modify font size.
maxfigsize = 120
calcfigsize = dfr.shape[0] * 1.1
figsize = min(max(8, calcfigsize), maxfigsize)
if figsize == maxfigsize:
scale = maxfigsize / calcfigsize
sns.set_context("notebook", font_scale=scale)
# Add a colorbar?
if params.classes is None:
col_cb = None
else:
col_cb = get_colorbar(dfr, params.classes)
# Add attributes to parameter object, and draw heatmap
params.colorbar = col_cb
params.figsize = figsize
params.linewidths = 0.25
fig = get_clustermap(dfr, params, title=title)
# Save to file
if outfilename:
fig.savefig(outfilename)
# Return clustermap
return fig
[docs]def distribution(dfr, outfilename, matname, title=None):
"""Return seaborn distribution plot for matrix.
:param drf: DataFrame with results matrix
:param outfilename: Path to output file for writing
:param matname: str, type of matrix being plotted
:param title: str, optional title
"""
fill = "#A6C8E0"
rug = "#2678B2"
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
fig.suptitle(title)
sns.histplot(
dfr.values.flatten(),
ax=axes[0],
stat="count",
element="step",
color=fill,
edgecolor=fill,
)
axes[0].set_ylim(ymin=0)
sns.kdeplot(dfr.values.flatten(), ax=axes[1])
sns.rugplot(dfr.values.flatten(), ax=axes[1], color=rug)
# Modify axes after data is plotted
for _ in axes:
if matname == "sim_errors":
_.set_xlim(0, _.get_xlim()[1])
elif matname in ["hadamard", "coverage"]:
_.set_xlim(0, 1.01)
elif matname == "identity":
_.set_xlim(0.75, 1.01)
# Tidy figure
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
if outfilename:
# For some reason seaborn gives us an AxesSubPlot with
# sns.distplot, rather than a Figure, so we need this hack
fig.savefig(outfilename)
return fig
[docs]def scatter(
dfr1,
dfr2,
outfilename=None,
matname1="identity",
matname2="coverage",
title=None,
params=None,
):
"""Return seaborn scatterplot.
:param dfr1: pandas DataFrame with x-axis data
:param dfr2: pandas DataFrame with y-axis data
:param outfilename: path to output file (indicates output format)
:param matname1: name of x-axis data
:param matname2: name of y-axis data
:param title: title for the plot
:param params: a list of parameters for plotting: [colormap, vmin, vmax]
"""
# Make an empty dataframe to collect the input data in
combined = pd.DataFrame()
# Add data
combined[matname1] = dfr1.values.flatten()
combined[matname2] = dfr2.values.flatten()
# Add lable information, if available
# if params.labels:
# hue = "labels"
# combined['labels'] = # add labels to dataframe; unsure of their configuration at this point
# else:
hue = None
# Create the plot
fig = sns.lmplot(
x=matname1,
y=matname2,
data=combined,
hue=hue,
fit_reg=False,
scatter_kws={"s": 2},
)
fig.set(xlabel=matname1.title(), ylabel=matname2.title())
plt.title(title)
# Save to file
if outfilename:
fig.savefig(outfilename)
# Return clustermap
return fig