Source code for scenicplus.cistromes

# -*- coding: utf-8 -*-
"""Merging, scoring and assessing TF correlations of cistromes

"""
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import pycistarget
from pycistarget.motif_enrichment_dem import *
from pycisTopic.diff_features import *
from pycisTopic.signature_enrichment import *
import pyranges as pr
from random import sample
import seaborn as sns
from scipy.stats import pearsonr
from typing import List
from .utils import region_names_to_coordinates, target_to_overlapping_query, p_adjust_bh, Groupby, flatten_list

from .scenicplus_class import SCENICPLUS

def _signatures_to_iter(menr):
    for x in menr.keys():
        if isinstance(menr[x], pycistarget.motif_enrichment_dem.DEM):
            for y in menr[x].cistromes['Region_set'].keys():
                for z in menr[x].cistromes['Region_set'][y]:
                    yield x, y, z, menr[x].cistromes['Region_set'][y][z]
        elif isinstance(menr[x], dict):
            for y in menr[x].keys():
                if not isinstance(menr[x][y], pycistarget.motif_enrichment_cistarget.cisTarget):
                    raise ValueError(f'Only motif enrichment results from pycistarget or DEM are allowed, not {type(menr[x][y])}')
                for z in menr[x][y].cistromes['Region_set']:
                    yield x, y, z, menr[x][y].cistromes['Region_set'][z]
        else:
            raise ValueError(f'Only motif enrichment results from pycistarget or DEM are allowed, not {type(menr[x])}')

def _get_signatures_as_dict(i):
    return {z+'__'+x+'__'+y: regions for x, y, z, regions in i}

def _merge_dict_of_signatures(d, suffix = ''):
    arr_keys_signatures = np.array(list(d.keys()))
    grouper = Groupby([x.split('_')[0] for x in arr_keys_signatures])
    merged_signatures = {}
    for TF, idx in zip(grouper.keys, grouper.indices):
        merged_signatures[TF + suffix] = pr.PyRanges(
            region_names_to_coordinates(set(flatten_list([d[x] for x in arr_keys_signatures[idx]]))))
    return merged_signatures

def _overlap_if_necessary(d, test_regions, regions_to_overlap):
    d_overlap = {}
    for k in d.keys():
        s_query_regions = set(coord_to_region_names(d[k]))
        #if the signature regions are already in the scplus_obj coordinate system, do nothing, otherwise overlap
        if len(s_query_regions & test_regions) != len(s_query_regions):
            signature_regions = target_to_overlapping_query(regions_to_overlap, d[k])
        else:
            signature_regions = d[k]
        if len(signature_regions) != 0:
            d_overlap[k] = signature_regions
    return d_overlap


[docs]def merge_cistromes(scplus_obj: SCENICPLUS,
                    cistromes_key: str = 'Unfiltered',
                    subset: pr.PyRanges = None):
    """Generate cistromes from motif enrichment tables

    Parameters
    ---------
    scplus_obj: :class:`SCENICPLUS`
        A :class:`SCENICPLUS` object with motif enrichment results from pycistarget (`scplus_obj.menr`).
        Several analyses can be included in the slot (topics/DARs/other; and different methods [Homer/DEM/cistarget]).
    cistromes_key: str, optional
        Key to store cistromes. Cistromes will stored at `scplus_obj.uns['Cistromes'][siganture_key]`
    subset: list
        A PyRanges containing a set of regions that regions in cistromes must overlap. This is useful when
        aiming for cell type specific cistromes for example (e.g. providing the cell type's MACS peaks)
    """
    menr = scplus_obj.menr
    # Get signatures from Homer/Cistarget outputs
    signatures = _get_signatures_as_dict(_signatures_to_iter(menr))

    #split direct and indirect signatures
    signatures_direct = {x: signatures[x] for x in signatures.keys() if not 'extended' in x}
    signatures_extend = {x: signatures[x] for x in signatures.keys() if     'extended' in x}
    
    #Remove empty signature
    signatures_direct = {k:v for k,v in signatures_direct.items() if v}
    signatures_extend = {k:v for k,v in signatures_extend.items() if v}

    if len(signatures_direct.keys()) == 0 and len(signatures_extend.keys()) == 0:
        raise ValueError("No cistromes found! Make sure that the motif enrichment results look good!")
    
    #merge regions by TF name
    if len(signatures_direct.keys()) > 0:
        merged_signatures_direct = _merge_dict_of_signatures(signatures_direct, suffix = '')
    if len(signatures_extend.keys()) > 0:
        merged_signatures_extend = _merge_dict_of_signatures(signatures_extend, suffix = '_extended')
    
    #overlap regions with scplus_regions
    regions = set(scplus_obj.region_names)
    pr_regions = pr.PyRanges(region_names_to_coordinates(regions))
    if subset is not None:
        #make sure subset is in scplus_regions coordinate system
        regions_to_overlap = target_to_overlapping_query(pr_regions, subset)
    else:
        regions_to_overlap = pr_regions
    
    if len(signatures_direct.keys()) > 0:
        merged_signatures_direct = _overlap_if_necessary(merged_signatures_direct, regions, regions_to_overlap)
    if len(signatures_extend.keys()) > 0:
        merged_signatures_extend = _overlap_if_necessary(merged_signatures_extend, regions, regions_to_overlap)
    
    # Sort alphabetically
    if len(signatures_direct.keys()) > 0:
        merged_signatures_direct = dict(
            sorted(merged_signatures_direct.items(), key=lambda x: x[0].lower()))
    if len(signatures_extend.keys()) > 0:
        merged_signatures_extend = dict(
            sorted(merged_signatures_extend.items(), key=lambda x: x[0].lower()))
    # Combine
    if len(signatures_direct.keys()) > 0 and len(signatures_extend.keys()) > 0:
        merged_signatures = {**merged_signatures_direct,
                             **merged_signatures_extend}
    elif len(signatures_direct.keys()) > 0 and len(signatures_extend.keys()) == 0:
        merged_signatures = merged_signatures_direct
    elif len(signatures_extend.keys()) > 0 and len(signatures_direct.keys()) == 0:
        merged_signatures = merged_signatures_extend
    # Add number of regions
    merged_signatures = {
        x + '_(' + str(len(merged_signatures[x])) + 'r)': merged_signatures[x] for x in merged_signatures.keys()}
    # Store in object
    if not 'Cistromes' in scplus_obj.uns.keys():
        scplus_obj.uns['Cistromes'] = {}
    scplus_obj.uns['Cistromes'][cistromes_key] = merged_signatures

# Score cistromes in cells


[docs]def score_cistromes(scplus_obj: SCENICPLUS,
                    ranking: CistopicImputedFeatures,
                    cistromes_key: str = 'Unfiltered',
                    enrichment_type: str = 'region',
                    auc_threshold: float = 0.05,
                    normalize: bool = False,
                    n_cpu: int = 1):
    """
    Get enrichment of a region signature in cells  using AUCell (Van de Sande et al., 2020)

    Parameters
    ---------
    scplus_obj: :class:`SCENICPLUS`
        A :class:`SCENICPLUS` object with motif enrichment results from pycistarget (`scplus_obj.menr`).
    rankings: CistopicImputedFeatures
        A CistopicImputedFeatures object with ranking values
    cistromes_key: str, optional
        Key to store where cistromes are stored. Cistromes will retrieved from `scplus_obj.uns['Cistromes'][siganture_key]`
        and AUC values will be stored in `scplus_obj.uns['Cistromes_AUC'][cistromes_key]`.
    enrichment_type: str
        Whether features are genes or regions
    auc_threshold: float
        The fraction of the ranked genome to take into account for the calculation of the Area Under the recovery Curve. Default: 0.05
    normalize: bool
        Normalize the AUC values to a maximum of 1.0 per regulon. Default: False
    num_workers: int
        The number of cores to use. Default: 1

    References
    ---------
    Van de Sande, B., Flerin, C., Davie, K., De Waegeneer, M., Hulselmans, G., Aibar, S., ... & Aerts, S. (2020). A scalable SCENIC workflow for single-cell gene 
    regulatory network analysis. Nature Protocols, 15(7), 2247-2276.
    """
    if not 'Cistromes_AUC' in scplus_obj.uns.keys():
        scplus_obj.uns['Cistromes_AUC'] = {}
    scplus_obj.uns['Cistromes_AUC'][cistromes_key] = signature_enrichment(ranking,
                                                                          scplus_obj.uns['Cistromes'][cistromes_key],
                                                                          enrichment_type,
                                                                          auc_threshold,
                                                                          normalize,
                                                                          n_cpu)
# Create pseudobulks


[docs]def generate_pseudobulks(scplus_obj: SCENICPLUS,
                         variable: str,
                         normalize_expression: bool = True,
                         auc_key: str = 'Cistromes_AUC',
                         signature_key: str = 'Unfiltered',
                         nr_cells: int = 10,
                         nr_pseudobulks: int = 100,
                         seed: int = 555):
    """
    Generate pseudobulks based on the cistrome AUC matrix and gene expression

    Parameters
    ---------
    scplus_obj: :class:`SCENICPLUS`
        A :class:`SCENICPLUS` object with scored cistromes (`scplus_obj.uns['Cistromes_AUC'][cistromes_key]`)
    variable: str, optional
        Variable to create pseudobulks by. It must ve a column in `scplus_obj.metadata_cell`
    cistromes_key: str, optional
        Key to store where cistromes AUC values are stored. Cistromes AUC will retrieved from 
        `scplus_obj.uns['Cistromes_AUC'][cistromes_key]` and the pseudobulk matrix will be stored
        at `scplus_obj.uns['Pseudobulk']['Cistromes_AUC'][variable][cistromes_key]` and `scplus_obj.uns['Pseudobulk'][variable]['Expression']`
    nr_cells : int, optional
        Number of cells to include per pseudobulk.
    nr_pseudobulks: int, optional
        Number of pseudobulks to generate per class
    seed: int
        Seed to ensure that pseudobulk are reproducible.
    """
    cell_data = scplus_obj.metadata_cell
    cistromes_auc = scplus_obj.uns[auc_key][signature_key]
    cell_data = cell_data.loc[cistromes_auc.index, :]
    dgem = pd.DataFrame(
        scplus_obj.X_EXP, index=scplus_obj.cell_names, columns=scplus_obj.gene_names).copy()
    categories = list(set(cell_data.loc[:, variable]))
    cistrome_auc_agg_list = list()
    dgem_agg_list = list()
    cell_names = list()
    if normalize_expression:
        dgem = dgem.T / dgem.T.sum(0) * 10**6
        dgem = np.log1p(dgem).T
    for category in categories:
        cells = cell_data[cell_data.loc[:, variable]
                          == category].index.tolist()
        for x in range(nr_pseudobulks):
            random.seed(x)
            sample_cells = sample(cells, nr_cells)
            sub_dgem = dgem.loc[sample_cells, :].mean(axis=0)
            sub_auc = cistromes_auc.loc[sample_cells, :].mean(axis=0)
            cistrome_auc_agg_list.append(sub_auc)
            dgem_agg_list.append(sub_dgem)
            cell_names.append(category + '_' + str(x))
    cistrome_auc_agg = pd.concat(cistrome_auc_agg_list, axis=1)
    cistrome_auc_agg.columns = cell_names
    dgem_agg = pd.concat(dgem_agg_list, axis=1)
    dgem_agg.columns = cell_names
    if not 'Pseudobulk' in scplus_obj.uns.keys():
        scplus_obj.uns['Pseudobulk'] = {}
    if not variable in scplus_obj.uns['Pseudobulk'].keys():
        scplus_obj.uns['Pseudobulk'][variable] = {}
    scplus_obj.uns['Pseudobulk'][variable]['Expression'] = dgem_agg
    if not auc_key in scplus_obj.uns['Pseudobulk'][variable].keys():
        scplus_obj.uns['Pseudobulk'][variable][auc_key] = {}
    scplus_obj.uns['Pseudobulk'][variable][auc_key][signature_key] = cistrome_auc_agg


[docs]def TF_cistrome_correlation(scplus_obj: SCENICPLUS,
                            variable: str = None,
                            use_pseudobulk: bool = True,
                            auc_key: str = 'Cistromes_AUC',
                            signature_key: str = 'Unfiltered',
                            out_key: str = 'Unfiltered',
                            subset: List[str] = None):
    """
    Get correlation between gene expression and cistrome accessibility

    Parameters
    ---------
    scplus_obj: :class:`SCENICPLUS`
        A :class:`SCENICPLUS` object with pseudobulk matrices (`scplus_obj.uns['Pseudobulk']`)
    variable: str, optional
        Variable used to create the pseudobulks. Must be a key in `scplus_obj.uns['Pseudobulk']`.
        Only required if use_pseudobulk is False.
    use_pseudobulk: bool, optional
        Whether to use pseudobulk matrix or actual values.
    cistromes_key: str, optional
        Key to retrieve the pseudobulk matrices.  Cistrome accessibility will be retrieved from
        `scplus_obj.uns['Pseudobulk'][variable]['Cistromes_AUC'][cistromes_key]` and 
        gene expression from `scplus_obj.uns['Pseudobulk'][variable]['Expression']`.
    out_key : str, optional
        Ouput key. Correlations will be stored at `scplus_obj.uns['TF_cistrome_correlation'][variable][out_key]`.
    subset: List, optional
        Subset of cells to be used to calculate correlations. Default: None (All)
    """
    if use_pseudobulk:
        dgem_agg = scplus_obj.uns['Pseudobulk'][variable]['Expression']
        cistromes_auc_agg = scplus_obj.uns['Pseudobulk'][variable][auc_key][signature_key]
    else:
        dgem_agg = pd.DataFrame(
            scplus_obj.X_EXP, index=scplus_obj.cell_names, columns=scplus_obj.gene_names).copy().T
        cistromes_auc_agg = scplus_obj.uns[auc_key][signature_key].copy().T

    if subset is not None:
        cell_data = pd.DataFrame([x.rsplit('_', 1)[0] for x in cistromes_auc_agg.columns],
                                 index=cistromes_auc_agg.columns).iloc[:, 0]
        subset_cells = cell_data[cell_data.isin(subset)].index.tolist()
        cistromes_auc_agg = cistromes_auc_agg.loc[:, subset_cells]
        dgem_agg = dgem_agg.loc[:, subset_cells]
    corr_df = pd.DataFrame(columns=['TF', 'Cistrome', 'Rho', 'P-value'])
    for tf in cistromes_auc_agg.index:
        # Handle _extended
        tf_rna = tf.split('_')[0]
        if tf_rna in dgem_agg.index:
            cistromes_auc_tf = cistromes_auc_agg.loc[tf, :]
            tf_expr = dgem_agg.loc[tf_rna, :]
            # Exception in case TF is only expressed in 1 cell
            # TFs expressed in few cells could be filtered too
            try:
                corr_1, _1 = pearsonr(tf_expr, cistromes_auc_tf)
                x = {'TF': tf_rna,
                     'Cistrome': tf,
                     'Rho': corr_1,
                     'P-value': _1}
                corr_df = corr_df.append(pd.DataFrame(
                    data=x, index=[0]), ignore_index=True)
            except:
                continue
    corr_df = corr_df.dropna()
    corr_df['Adjusted_p-value'] = p_adjust_bh(corr_df['P-value'])

    if not 'TF_cistrome_correlation' in scplus_obj.uns.keys():
        scplus_obj.uns['TF_cistrome_correlation'] = {}
    if not out_key in scplus_obj.uns['TF_cistrome_correlation'].keys():
        scplus_obj.uns['TF_cistrome_correlation'][out_key] = {}
    scplus_obj.uns['TF_cistrome_correlation'][out_key] = corr_df

[docs]def cistrome_correlation(scplus_obj: SCENICPLUS,
                         variable: str = None,
                         use_pseudobulk: bool = True,
                         auc_key: str = 'Cistromes_AUC',
                         signature_key1: str = 'Gene_based',
                         signature_key2: str = 'Region_based',
                         out_key: str = 'Unfiltered',
                         subset: List[str] = None):
        """
        Get correlation between gene-based and region-based AUC

        Parameters
        ---------
        scplus_obj: :class:`SCENICPLUS`
            A :class:`SCENICPLUS` object with pseudobulk matrices (`scplus_obj.uns['Pseudobulk']`)
        variable: str, optional
            Variable used to create the pseudobulks. Must be a key in `scplus_obj.uns['Pseudobulk']`.
            Only required if use_pseudobulk is False.
        use_pseudobulk: bool, optional
            Whether to use pseudobulk matrix or actual values.
        auc_key: str, optional
            Key to retrieve the pseudobulk matrices.  Cistrome AUCs will be retrieved from
            `scplus_obj.uns['Pseudobulk'][variable]['Cistromes_AUC'][cistromes_key][signature_key1]` and
            gene expression from
            `scplus_obj.uns['Pseudobulk'][variable]['Cistromes_AUC'][cistromes_key][signature_key2]`.
        out_key : str, optional
            Output key. Correlations will be stored at `scplus_obj.uns['cistrome_correlation'][variable][out_key]`.
        subset: List, optional
            Subset of cells to be used to calculate correlations. Default: None (All)
        """
        if use_pseudobulk:
            gene_auc_agg = scplus_obj.uns['Pseudobulk'][variable][auc_key][signature_key1].copy()
            region_auc_agg = scplus_obj.uns['Pseudobulk'][variable][auc_key][signature_key2].copy()
        else:
            gene_auc_agg = scplus_obj.uns[auc_key][signature_key1].copy().T
            region_auc_agg = scplus_obj.uns[auc_key][signature_key2].copy().T

        if subset is not None:
            cell_data = pd.DataFrame([x.rsplit('_', 1)[0] for x in gene_auc_agg.columns],
                                     index=gene_auc_agg.columns).iloc[:, 0]
            subset_cells = cell_data[cell_data.isin(subset)].index.tolist()
            gene_auc_agg = gene_auc_agg.loc[:, subset_cells]
            region_auc_agg = region_auc_agg.loc[:, subset_cells]

        corr_df = pd.DataFrame(columns=['Signature1', 'Signature2', 'Rho', 'P-value'])

        # cistrome naming includes number of genes/regions, so need to create matching names
        # x.rsplit('_', 1) splits at first _ from the right
        gene_auc_agg['cistrome_id_short'] = gene_auc_agg.index.map(lambda x: x.rsplit('_', 1)[0])
        gene_auc_agg['cistrome_id'] = gene_auc_agg.index
        gene_auc_agg = gene_auc_agg.set_index('cistrome_id_short')

        region_auc_agg['cistrome_id_short'] = region_auc_agg.index.map(lambda x: x.rsplit('_', 1)[0])
        region_auc_agg['cistrome_id'] = region_auc_agg.index
        region_auc_agg = region_auc_agg.set_index('cistrome_id_short')

        for tf in gene_auc_agg.index:
            # I think all TFs should match, but just in case
            if tf in region_auc_agg.index:
                gene_auc_agg_tf = gene_auc_agg.loc[tf, gene_auc_agg.columns != 'cistrome_id']
                region_auc_agg_tf = region_auc_agg.loc[tf, gene_auc_agg.columns != 'cistrome_id']

                # record orginal cistrome name for results
                signature1_id = gene_auc_agg.loc[tf, 'cistrome_id']
                signature2_id = region_auc_agg.loc[tf, 'cistrome_id']
                # Exception in case TF is only expressed in 1 cell
                # TFs expressed in few cells could be filtered too
                try:
                    corr_1, _1 = pearsonr(gene_auc_agg_tf, region_auc_agg_tf)
                    x = {'Signature1': signature1_id,
                         'Signature2': signature2_id,
                         'Rho': corr_1,
                         'P-value': _1}
                    corr_df = pd.concat([corr_df,
                                         pd.DataFrame(data=x, index=[0])],
                                        ignore_index=True)
                except:
                    continue
        corr_df = corr_df.dropna()
        corr_df['Adjusted_p-value'] = p_adjust_bh(corr_df['P-value'])

        if not 'cistrome_correlation' in scplus_obj.uns.keys():
            scplus_obj.uns['cistrome_correlation'] = {}
        if not out_key in scplus_obj.uns['cistrome_correlation'].keys():
            scplus_obj.uns['cistrome_correlation'][out_key] = {}
        scplus_obj.uns['cistrome_correlation'][out_key] = corr_df


[docs]def prune_plot(scplus_obj: SCENICPLUS,
               name: str,
               pseudobulk_variable: str = None,
               auc_key: str = 'Cistromes_AUC',
               signature_key: str = 'Unfiltered',
               use_pseudobulk: bool = True,
               show_dot_plot: bool = True,
               show_line_plot: bool = False,
               color_dict=None,
               subset=None,
               seed=555,
               ax: plt.axes = None,
               **kwargs):
    """
    Plot cistrome accessibility versus TF expression

    Parameters
    ---------
    scplus_obj: :class:`SCENICPLUS`
        A :class:`SCENICPLUS` object.
    cistrome_name: str, optional
        Cistrome to plot. The TF name (or followed by extended) is enough to quert.
    pseudobulk_variable: str, optional
        Key to retrieve the pseudobulk matrices.  Cistrome accessibility will be retrieved from
        `scplus_obj.uns['Pseudobulk'][pseudobulk_variable]['Cistromes_AUC'][cistromes_key]` and 
        gene expression from `scplus_obj.uns['Pseudobulk'][pseudobulk_variable]['Expression']`.
    cistromes_key: str, optional
        Key to retrieve the pseudobulk matrices.  Cistrome accessibility will be retrieved from
        `scplus_obj.uns['Pseudobulk'][pseudobulk_variable]['Cistromes_AUC'][cistromes_key]` and 
        gene expression from `scplus_obj.uns['Pseudobulk'][pseudobulk_variable]['Expression']`.
    use_pseudobulk: bool, optional
        Whether to use pseudobulk matrix or actual values.
    show_dot_plot: bool, optional
        Whether to show dots in plot
    show_line_plot: bool, optional
        Whether to show line fitting to plot
    color_dict: Dict, optional
        Color dictionary to specify colors
    subset: List, optional
        Subset of pseudobulks/cells to use
    seed: int
        Seed to ensure that pseudobulk are reproducible.
    ax: plt.axes, optional
        matplotlib axes to plot to.
    **kwargs:
        Parameters for seaborn plotting.

    """
    if use_pseudobulk:
        dgem = scplus_obj.uns['Pseudobulk'][pseudobulk_variable]['Expression'].copy(
        )
        cistromes_auc = scplus_obj.uns['Pseudobulk'][pseudobulk_variable][auc_key][signature_key].copy(
        )
        cell_data = pd.DataFrame([x.rsplit('_', 1)[
                                 0] for x in cistromes_auc.columns], index=cistromes_auc.columns).iloc[:, 0]
    else:
        dgem = pd.DataFrame(scplus_obj.X_EXP, index=scplus_obj.cell_names,
                            columns=scplus_obj.gene_names).copy().T
        cistromes_auc = scplus_obj.uns[auc_key][signature_key].copy().T
        cell_data = scplus_obj.metadata_cell.loc[cistromes_auc.columns,
                                                 pseudobulk_variable]
    if subset is None:
        tf_expr = dgem.loc[name.split('_')[0], :]
        tf_acc = cistromes_auc.index[cistromes_auc.index.str.contains(
            name + '_(', regex=False)][0]
        cistromes_auc_tf = cistromes_auc.loc[tf_acc, :]
    else:
        subset_cells = cell_data[cell_data.isin(subset)].index.tolist()
        cell_data = cell_data.loc[subset_cells]
        tf_expr = dgem.loc[name.split('_')[0], subset_cells]
        tf_acc = cistromes_auc.index[cistromes_auc.index.str.contains(
            name + '_(', regex=False)][0]
        cistromes_auc_tf = cistromes_auc.loc[tf_acc, subset_cells]
    random.seed(seed)
    if cell_data is not None:
        categories = list(set(cell_data))
        if color_dict is None:
            color = list(map(
                lambda i: "#" +
                "%06x" % random.randint(0, 0xFFFFFF), range(len(categories))
            ))
            color_dict = dict(zip(categories, color))
        color = [color_dict[x] for x in cell_data]
        patchList = []
        for key in color_dict:
            data_key = mpatches.Patch(color=color_dict[key], label=key)
            patchList.append(data_key)
        if show_dot_plot:
            data = pd.DataFrame(list(zip(tf_expr, cistromes_auc_tf, cell_data)),
                                columns=['TF_Expression', auc_key, 'Variable'])
            sns.scatterplot(x="TF_Expression", y=auc_key, data=data,
                            hue='Variable', palette=color_dict, ax=ax,  **kwargs)
            if ax is None:
                plt.legend(handles=patchList, bbox_to_anchor=(
                    1.04, 1), loc="upper left")
            else:
                ax.legend(handles=patchList, bbox_to_anchor=(
                    1.04, 1), loc="upper left")
        if show_line_plot:
            data = pd.DataFrame(list(zip(tf_expr, cistromes_auc_tf, cell_data)),
                                columns=['TF_Expression', auc_key, 'Variable'])
            sns.regplot(x="TF_Expression", y=auc_key, data=data,
                        scatter_kws={'color': color}, ax=ax, **kwargs)
            if ax is None:
                plt.legend(handles=patchList, bbox_to_anchor=(
                    1.04, 1), loc="upper left")
            else:
                ax.legend(handles=patchList, bbox_to_anchor=(
                    1.04, 1), loc="upper left")
    else:
        if show_dot_plot:
            data = pd.DataFrame(list(zip(tf_expr, cistromes_auc_tf, cell_data)),
                                columns=['TF_Expression', auc_key, 'Variable'])
            sns.scatterplot(x="TF_Expression", y=auc_key,
                            data=data, ax=ax,  **kwargs)
        if show_line_plot:
            data = pd.DataFrame(list(zip(tf_expr, cistromes_auc_tf)),
                                columns=['TF_Expression', auc_key])
            sns.regplot(x="TF_Expression", y=auc_key,
                        data=data, ax=ax, **kwargs)

    corr, _ = pearsonr(tf_expr, cistromes_auc_tf)
    if ax is None:
        plt.xlabel('TF_Expression\nCorrelation ' + str(corr) +
                   '\n' + 'P-value:' + str(_), fontsize=10)
        plt.title(name)
    else:
        ax.set_xlabel('TF_Expression\nCorrelation ' + str(corr) +
                      '\n' + 'P-value:' + str(_), fontsize=10)
        ax.set_title(name)
    if ax is None:
        plt.show()