Source code for pyani.pyani_graphics.mpl

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# (c) The University of Strathclyde 2019
# Author: Leighton Pritchard
#
# Contact:
# leighton.pritchard@strath.ac.uk
#
# Leighton Pritchard,
# Strathclyde Institute of Pharmaceutical and Biomedical Sciences
# The University of Strathclyde
# Cathedral Street
# Glasgow
# G1 1XQ
# Scotland,
# UK
#
# The MIT License
#
# (c) The University of Strathclyde 2019
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
"""Code to implement MatPLotLib graphics output for ANI analyses."""

import warnings

from math import floor, log10

import matplotlib  # pylint: disable=C0411
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as distance

from scipy.stats import gaussian_kde

from pyani import pyani_config

# Specify matplotlib backend. This *must* be done before pyplot import, but
# raises errors with flake8 etc. So we comment out the specific error
matplotlib.use("Agg")
import matplotlib.pyplot as plt  # noqa: E402,E501 # pylint: disable=wrong-import-position,wrong-import-order,ungrouped-imports
import matplotlib.gridspec as gridspec  # noqa: E402,E501 # pylint: disable=wrong-import-position,wrong-import-order,ungrouped-imports


# Register Matplotlib colourmaps
plt.register_cmap(cmap=pyani_config.CMAP_SPBND_BURD)
plt.register_cmap(cmap=pyani_config.CMAP_HADAMARD_BURD)
plt.register_cmap(cmap=pyani_config.CMAP_BURD)

# Matplotlib version dictates bug fixes
MPLVERSION = matplotlib.__version__


# helper for cleaning up matplotlib axes by removing ticks etc.
[docs]def clean_axis(axis): """Remove ticks, tick labels, and frame from axis. :param axis: """ axis.get_xaxis().set_ticks([]) axis.get_yaxis().set_ticks([]) for spine in list(axis.spines.values()): spine.set_visible(False)
# Add dendrogram and axes to passed figure
[docs]def add_dendrogram(dfr, fig, params, heatmap_gs, orientation="col"): """Return a dendrogram and corresponding gridspec, attached to the fig. :param dfr: Pandas DataFrame describing input data :param fig: matplotlib Fig that holds graphical output :param params: pyani_graphics.Params object :param heatmap_gs: matplotlib GridSpec for this dendrogram :param orientation: str, "row" or "col" Modifies the fig in-place. Orientation is either 'row' or 'col' and determines location and orientation of the rendered dendrogram. We expect that the row/column index values should be ordered identically. If they are not, the dendrogram will not match labels """ # Row or column axes? if orientation == "row": dists = distance.pdist(dfr) spec = heatmap_gs[1, 0] orient = "left" nrows, ncols = 1, 2 height_ratios = [1] else: # Column dendrogram dists = distance.pdist(dfr.T) spec = heatmap_gs[0, 1] orient = "top" nrows, ncols = 2, 1 height_ratios = [1, 0.15] # Create row dendrogram axis gspec = gridspec.GridSpecFromSubplotSpec( nrows, ncols, subplot_spec=spec, wspace=0.0, hspace=0.1, height_ratios=height_ratios, ) dend_axes = fig.add_subplot(gspec[0, 0]) if len(list(params.labels.values())) == 0: labels = None else: labels = list(params.labels.values()) dend = sch.dendrogram( sch.linkage(dists, method="complete"), color_threshold=np.inf, orientation=orient, labels=labels, get_leaves=True, ) clean_axis(dend_axes) return {"dendrogram": dend, "gridspec": gspec}
[docs]def distribution(dfr, outfilename, matname, title=None): """Return matplotlib distribution plot for matrix. :param dfr: DataFrame with results matrix :param outfilename: Path to output file for writing :param matname: str, type of matrix being plotted :param title: str, optional title """ fig, axes = plt.subplots(1, 2, figsize=(15, 5)) fig.suptitle(title) data = dfr.values.flatten() xvals = np.linspace(min(data), max(data), 200) # Plot histogram axes[0].hist(data, bins=50) # Plot density density = gaussian_kde(data) density._compute_covariance() # pylint: disable=protected-access axes[1].plot(xvals, density(xvals)) # Modify axes after data is plotted for _ in axes: if matname == "sim_errors": _.set_xlim(0, _.get_xlim()[1]) elif matname in ["hadamard", "coverage"]: _.set_xlim(0, 1.01) elif matname == "identity": _.set_xlim(_.get_xlim()[0], 1.01) # Tidy figure fig.tight_layout(rect=[0, 0.03, 1, 0.95]) if outfilename: # For some reason seaborn gives us an AxesSubPlot with # sns.distplot, rather than a Figure, so we need this hack fig.savefig(outfilename) return fig
# Create heatmap axes for Matplotlib output
[docs]def get_heatmap_axes(dfr, fig, heatmap_gs): """Return axis for Matplotlib heatmap. :param dfr: :param fig: :param heatmap_gs: """ # Create heatmap axis heatmap_axes = fig.add_subplot(heatmap_gs[1, 1]) heatmap_axes.set_xticks(np.linspace(0, dfr.shape[0] - 1, dfr.shape[0])) heatmap_axes.set_yticks(np.linspace(0, dfr.shape[0] - 1, dfr.shape[0])) heatmap_axes.grid(False) heatmap_axes.xaxis.tick_bottom() heatmap_axes.yaxis.tick_right() return heatmap_axes
[docs]def add_colorbar(dfr, fig, dend, params, orientation="row"): """Add class colorbars to Matplotlib heatmap. :param dfr: :param fig: :param dent: :param params: :param orientation: """ # Assign a numerical value to each class, for mpl classdict = {cls: idx for (idx, cls) in enumerate(params.classes.values())} # colourbar cblist = [] for name in [str(_) for _ in dfr.index[dend["dendrogram"]["leaves"]]]: if name in params.classes: cblist.append(classdict[params.classes[name]]) elif name in classdict: cblist.append(classdict[name]) else: # Catches genomes with no assigned class cblist.append(0) colbar = pd.Series(cblist) # Create colourbar axis - could capture if needed if orientation == "row": cbaxes = fig.add_subplot(dend["gridspec"][0, 1]) cbaxes.imshow( [[cbar] for cbar in colbar.values], cmap=plt.get_cmap(pyani_config.MPL_CBAR), interpolation="nearest", aspect="auto", origin="lower", ) else: cbaxes = fig.add_subplot(dend["gridspec"][1, 0]) cbaxes.imshow( [colbar], cmap=plt.get_cmap(pyani_config.MPL_CBAR), interpolation="nearest", aspect="auto", origin="lower", ) clean_axis(cbaxes) return colbar
# Add labels to the heatmap axes
[docs]def add_labels(heatmap_axes, rowlabels, collabels, params): """Add labels to Matplotlib heatmap axes, in-place. :param heatmap_axes: :param rowlabels: :param collabels: :param params: """ if params.labels: # If a label mapping is missing, use the key text as fall back rowlabels = [params.labels.get(lab, lab) for lab in rowlabels] collabels = [params.labels.get(lab, lab) for lab in collabels] xlabs = heatmap_axes.set_xticklabels(collabels) ylabs = heatmap_axes.set_yticklabels(rowlabels) for label in xlabs: # Rotate column labels label.set_rotation(90) for labset in (xlabs, ylabs): # Smaller font for label in labset: label.set_fontsize(8)
# Add colour scale to heatmap
[docs]def add_colorscale(fig, heatmap_gs, ax_map, params, title=None): """Add colour scale to heatmap. :param fig: :param heatmap_gs: :param ax_map: :param params: :param title: """ # Set tick intervals cbticks = [params.vmin + e * params.vdiff for e in (0, 0.25, 0.5, 0.75, 1)] if params.vmax > 10: exponent = int(floor(log10(params.vmax))) - 1 cbticks = [int(round(e, -exponent)) for e in cbticks] scale_subplot = gridspec.GridSpecFromSubplotSpec( 1, 3, subplot_spec=heatmap_gs[0, 0], wspace=0.0, hspace=0.0 ) scale_ax = fig.add_subplot(scale_subplot[0, 1]) cbar = fig.colorbar(ax_map, scale_ax, ticks=cbticks) if title: cbar.set_label(title, fontsize=6) cbar.ax.yaxis.set_ticks_position("left") cbar.ax.yaxis.set_label_position("left") cbar.ax.tick_params(labelsize=6) cbar.outline.set_linewidth(0) return cbar
# Generate Matplotlib heatmap output
[docs]def heatmap(dfr, outfilename=None, title=None, params=None): """Return matplotlib heatmap with cluster dendrograms. :param dfr: pandas DataFrame with relevant data :param outfilename: path to output file (indicates output format) :param params: a list of parameters for plotting: [colormap, vmin, vmax] :param labels: dictionary of alternative labels, keyed by default sequence labels :param classes: dictionary of sequence classes, keyed by default sequence labels """ # Sort rows by index - this ensures that labels match the dendrogram. # When recovering dataframes from the database, we get row # indexes/labels as integers, but out of order, resulting in a # mismatch of labels to leaves in the dendrogram. This line remedies # that. dfr = dfr.sort_index() # Layout figure grid and add title # Set figure size by the number of rows in the dataframe figsize = max(8, dfr.shape[0] * 0.175) fig = plt.figure(figsize=(figsize, figsize)) # if title: # fig.suptitle(title) heatmap_gs = gridspec.GridSpec( 2, 2, wspace=0.0, hspace=0.0, width_ratios=[0.3, 1], height_ratios=[0.3, 1] ) # Add column and row dendrograms/axes to figure coldend = add_dendrogram(dfr, fig, params, heatmap_gs, orientation="col") rowdend = add_dendrogram(dfr, fig, params, heatmap_gs, orientation="row") # Add heatmap axes to figure, with rows/columns as in the dendrograms heatmap_axes = get_heatmap_axes(dfr, fig, heatmap_gs) ax_map = heatmap_axes.imshow( dfr.iloc[rowdend["dendrogram"]["leaves"], coldend["dendrogram"]["leaves"]], interpolation="nearest", cmap=params.cmap, origin="lower", vmin=params.vmin, vmax=params.vmax, aspect="auto", ) # Are there class colourbars to add? if params.classes is not None: add_colorbar(dfr, fig, coldend, params, orientation="col") add_colorbar(dfr, fig, rowdend, params, orientation="row") # Add heatmap labels add_labels( heatmap_axes, rowdend["dendrogram"]["ivl"], coldend["dendrogram"]["ivl"], params ) # Add colour scale add_colorscale(fig, heatmap_gs, ax_map, params, title) # Return figure output, and write, if required plt.subplots_adjust(top=0.85) # Leave room for title # fig.set_tight_layout(True) # We know that there is a UserWarning here about tight_layout and # using the Agg renderer on OSX, so catch and ignore it, for cleanliness. with warnings.catch_warnings(): warnings.simplefilter("ignore") heatmap_gs.tight_layout(fig, h_pad=0.1, w_pad=0.5) if outfilename: fig.savefig(outfilename) return fig
[docs]def scatter( dfr1, dfr2, outfilename=None, matname1="identity", matname2="coverage", title=None, params=None, ): """Return matplotlib scatterplot. :param dfr1: pandas DataFrame with x-axis data :param dfr2: pandas DataFrame with y-axis data :param outfilename: path to output file (indicates output format) :param matname1: name of x-axis data :param matname2: name of y-axis data :param title: title for the plot :param params: a list of parameters for plotting: [colormap, vmin, vmax] """ # Make an empty dataframe to collect the input data in combined = pd.DataFrame() # Add data combined[matname1] = dfr1.values.flatten() combined[matname2] = dfr2.values.flatten() # Add lable information, if available # if params.labels: # hue = "labels" # combined['labels'] = # add labels to dataframe; unsure of their configuration at this point # else: hue = None fig, ax = plt.subplots(figsize=(8, 8)) fig.suptitle(title) ax.set_xlabel(f"{matname1.title()}") ax.set_ylabel(f"{matname2.title()}") plt.scatter(matname1, matname2, data=combined, c=hue, s=2) # Return figure output, and write, if required plt.subplots_adjust(top=0.85) # Leave room for title fig.set_tight_layout(True) if outfilename: fig.savefig(outfilename) return fig