# -*- coding: utf-8 -*-
# (c) The James Hutton Institute 2016-2019
# (c) University of Strathclyde 2019
# Author: Leighton Pritchard
#
# Contact:
# leighton.pritchard@strath.ac.uk
#
# Leighton Pritchard,
# Strathclyde Institute for Pharmacy and Biomedical Sciences,
# Cathedral Street,
# Glasgow,
# G1 1XQ
# Scotland,
# UK
#
# The MIT License
#
# Copyright (c) 2016-2019 The James Hutton Institute
# Copyright (c) 2019 University of Strathclyde
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
"""Module providing functions for presenting analysis/db output."""
import sys
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence
import pandas as pd # type: ignore
[docs]def colour_rows(
series: pd.Series, even_colour: str = "#DDECF5", odd_colour: str = "#6CB6E4"
) -> List[str]:
"""Return alternating colours for rows in a dataframe.
:param series: pd.Series
:param even_colour: str, hex colour for even rows
:param odd_colour: str, hex colour for odd rows
"""
is_odd = [idx % 2 for idx, row in enumerate(series.index)]
return [
"background-color: %s" % odd_colour
if v
else "background-color: %s" % even_colour
for v in is_odd
]
[docs]def table_padding() -> Dict[str, Any]:
"""Return HTML for table cell padding."""
return dict(selector="td", props=[("padding", "15px")])
[docs]def hover_highlight(hover_colour: str = "#FFFF99") -> Dict[str, Any]:
"""Return HTML style to colour dataframe row when hovering.
:param hover_colour: str, hex colour for hover highlight
"""
return dict(selector="tr:hover", props=[("background-color", "%s" % hover_colour)])
[docs]def colour_identity(
series: pd.Series, threshold: float = 0.95, colour: str = "#FF2222"
) -> List[str]:
"""Highlight percentage identities over a threshold.
:param series:
:param threshold: float, threshold for cell highlighting
:param colour: str, hex colour for highlighted cells
"""
if series.name == "percentage identity":
mask = series >= threshold
return ["color: %s" % colour if v else "" for v in mask]
return ["" for v in series]
[docs]def colour_coverage(
series: pd.Series, threshold: float = 0.95, colour: str = "#FF2222"
) -> List[str]:
"""Highlight percent coverage over a threshold.
:param series:
:param threshold: float, threshold for cell highlighting
:param colour: str, hex colour for highlighted cells
"""
if "coverage" in str(series.name):
mask = series >= threshold
return ["color: %s" % colour if v else "" for v in mask]
return ["" for v in series]
[docs]def colour_numeric(val: float, threshold: float = 0.95, colour: str = "#FF2222") -> str:
"""Highlight numeric values over a threshold.
:param val:
:param threshold: float, threshold for cell highlighting
:param colour: str, hex colour for highlighted cell
"""
if val < threshold:
colour = "black"
return "color: %s" % colour
# Write a dataframe in pyani-styled HTML
[docs]def write_styled_html(
path: Path, dfm: pd.DataFrame, index: Optional[str] = None, colour_num: bool = False
) -> None:
"""Add CSS styling to a dataframe and write as HTML.
:param path: path to write output file
:param dfm: dataframe to be written out
:param index: column to be set as index (if necessary)
"""
# Reset the index to a specified column
if index is not None and index in dfm.columns:
dfm.set_index(index, inplace=True)
# Colour rows in alternating shades of blue
styled = dfm.style.apply(colour_rows)
# Colour percentage identity threshold/coverage values > 95% in red
styled = styled.apply(colour_identity).apply(colour_coverage)
# Colour numbers over a given threshold
if colour_num:
styled = styled.applymap(colour_numeric)
# Apply styles
styled = styled.set_table_styles(
[hover_highlight(), header_font(), table_padding()]
)
# Set font to Helvetica
styled = styled.set_properties(**{"font-family": "Helvetica", "font-size": "small"})
# Write styled HTML to path
html = styled.render()
with open(path, "w") as ofh:
ofh.write(html)
# Write a dataframe to STDOUT
[docs]def write_to_stdout(
stem: str, dfm: pd.DataFrame, show_index: bool = False, line_width: float = None
) -> None:
"""Write dataframe in tab-separated form to STDOUT.
:param stem: str
:param dfm: pd.Dataframe
:param show_index: Boolean, include index in output table
:param line_width:
"""
sys.stdout.write(f"TABLE: {stem}\n")
sys.stdout.write(dfm.to_string(index=show_index, line_width=line_width) + "\n\n")
# Write a table returned from the pyani database in the requested format
[docs]def write_dbtable(
dfm: pd.DataFrame,
path: Path,
formats: Sequence[str] = ("tab",),
show_index: bool = True,
colour_num: bool = False,
) -> None:
"""Write database result table to output file in named format.
:param dfm: pd.Dataframe
:param path: Path to output file
:param formats: tuple of str, output file formats
:param show_index: output row and column labels
:param colour_num: use colours for values in HTML output
colours are used for identity/coverage tables
"""
logger = logging.getLogger(__name__)
formatdict = {
"tab": (dfm.to_csv, {"sep": "\t", "index": show_index}, ".tab"),
"excel": (dfm.to_excel, {"index": show_index}, ".xlsx"),
"html": (
write_styled_html,
{"dfm": dfm, "index": show_index, "colour_num": colour_num},
".html",
),
"stdout": (write_to_stdout, {"dfm": dfm, "show_index": show_index}, ""),
}
for fmt in formats:
func, args, ext = formatdict[fmt]
ofname = path.with_suffix(ext)
try:
func(ofname, **args)
except ModuleNotFoundError as e:
logger.warning("ModuleNotFoundError: %s", e)
logger.warning("Skipping %s output", fmt)
continue