Source code for scenicplus.networks

"""export eRegulons to eGRN network and plot.
"""

import json
import pandas as pd
from typing import Union, Dict, Sequence, Optional, List
import anndata
import scanpy as sc
import random
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import networkx as nx
from matplotlib.colors import to_rgba, to_hex
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler


def _format_df_nx(df, key, var):
    """
    A helper function to format differential test results
    """
    df.index = df['names']
    df = pd.DataFrame(df['logfoldchanges'])
    df.columns = [var+'_Log2FC_'+key]
    df.index.name = None
    return df


def _get_log2fc_nx(scplus_obj: 'SCENICPLUS',
                  variable,
                  features,
                  contrast: Optional[str] = 'gene'
                  ):
    """
    A helper function to derive log2fc changes
    """
    if contrast == 'gene':
        adata = anndata.AnnData(X=scplus_obj.X_EXP, obs=pd.DataFrame(
            index=scplus_obj.cell_names), var=pd.DataFrame(index=scplus_obj.gene_names))
    if contrast == 'region':
        adata = anndata.AnnData(X=scplus_obj.X_ACC.T, obs=pd.DataFrame(
            index=scplus_obj.cell_names), var=pd.DataFrame(index=scplus_obj.region_names))
    adata.obs = pd.DataFrame(scplus_obj.metadata_cell[variable])
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, features]
    sc.tl.rank_genes_groups(
        adata, variable, method='wilcoxon', corr_method='bonferroni')
    groups = adata.uns['rank_genes_groups']['names'].dtype.names
    diff_list = [_format_df_nx(sc.get.rank_genes_groups_df(
        adata, group=group), group, variable) for group in groups]
    return pd.concat(diff_list, axis=1)



[docs]
def create_nx_tables(scplus_obj: 'SCENICPLUS',
                     eRegulon_metadata_key: str ='eRegulon_metadata',
                     subset_eRegulons: List = None,
                     subset_regions: List = None,
                     subset_genes: List = None,
                     add_differential_gene_expression: bool = False,
                     add_differential_region_accessibility: bool = False,
                     differential_variable: List =[]):
    """
    A function to format eRegulon data into tables for plotting eGRNs.
    
    Parameters
    ---------
    scplus_obj: SCENICPLUS
        A SCENICPLUS object with eRegulons
    eRegulon_metadata_key: str, optional
        Key where the eRegulon metadata dataframe is stored
    subset_eRegulons: list, optional
        List of eRegulons to subset
    subset_regions: list, optional
        List of regions to subset
    subset_genes: list, optional
        List of genes to subset
    add_differential_gene_expression: bool, optional
        Whether to calculate differential gene expression logFC for a given variable
    add_differential_region_accessibility: bool, optional
        Whether to calculate differential region accessibility logFC for a given variable
    differential_variable: list, optional
        Variable to calculate differential gene expression or region accessibility.
        
    Return
    ---------
    A dictionary with edge feature tables ('TF2G', 'TF2R', 'R2G') and node feature tables ('TF', 'Gene', 'Region')
    """
    er_metadata = scplus_obj.uns[eRegulon_metadata_key].copy()
    if subset_eRegulons is not None:
        subset_eRegulons = [x + '_[^a-zA-Z0-9]' for x in subset_eRegulons]
        er_metadata = er_metadata[er_metadata['Region_signature_name'].str.contains(
            '|'.join(subset_eRegulons))]
    if subset_regions is not None:
        er_metadata = er_metadata[er_metadata['Region'].isin(subset_regions)]
    if subset_genes is not None:
        er_metadata = er_metadata[er_metadata['Gene'].isin(subset_genes)]
    nx_tables = {}
    nx_tables['Edge'] = {}
    nx_tables['Node'] = {}
    # Generate edge tables
    r2g_columns = [x for x in er_metadata.columns if 'R2G' in x]
    tf2g_columns = [x for x in er_metadata.columns if 'TF2G' in x]
    nx_tables['Edge']['TF2R'] = er_metadata[er_metadata.columns.difference(
        r2g_columns + tf2g_columns)].drop('Gene', axis=1).drop_duplicates()
    nx_tables['Edge']['TF2R'] = nx_tables['Edge']['TF2R'][['TF', 'Region'] +
                                                          nx_tables['Edge']['TF2R'].columns.difference(['TF', 'Region']).tolist()]
    nx_tables['Edge']['R2G'] = er_metadata[er_metadata.columns.difference(
        tf2g_columns)].drop('TF', axis=1).drop_duplicates()
    nx_tables['Edge']['R2G'] = nx_tables['Edge']['R2G'][['Region', 'Gene'] +
                                                        nx_tables['Edge']['R2G'].columns.difference(['Region', 'Gene']).tolist()]
    nx_tables['Edge']['TF2G'] = er_metadata[er_metadata.columns.difference(
        r2g_columns)].drop('Region', axis=1).drop_duplicates()
    nx_tables['Edge']['TF2G'] = nx_tables['Edge']['TF2G'][['TF', 'Gene'] +
                                                          nx_tables['Edge']['TF2G'].columns.difference(['TF', 'Gene']).tolist()]
    # Generate node tables
    tfs = list(set(er_metadata['TF']))
    nx_tables['Node']['TF'] = pd.DataFrame(
        'TF', index=tfs, columns=['Node_type'])
    nx_tables['Node']['TF']['TF'] = tfs
    genes = list(set(er_metadata['Gene']))
    genes = [x for x in genes if x not in tfs]
    nx_tables['Node']['Gene'] = pd.DataFrame(
        'Gene', index=genes, columns=['Node_type'])
    nx_tables['Node']['Gene']['Gene'] = genes
    regions = list(set(er_metadata['Region']))
    nx_tables['Node']['Region'] = pd.DataFrame(
        'Region', index=regions, columns=['Node_type'])
    nx_tables['Node']['Region']['Region'] = regions
    # Add gene logFC
    if add_differential_gene_expression is True:
        for var in differential_variable:
            nx_tables['Node']['TF'] = pd.concat([nx_tables['Node']['TF'], _get_log2fc_nx(
                scplus_obj, var, nx_tables['Node']['TF'].index.tolist(), contrast='gene')], axis=1)
            nx_tables['Node']['Gene'] = pd.concat([nx_tables['Node']['Gene'], _get_log2fc_nx(
                scplus_obj, var, nx_tables['Node']['Gene'].index.tolist(), contrast='gene')], axis=1)
    if add_differential_region_accessibility is True:
        for var in differential_variable:
            nx_tables['Node']['Region'] = pd.concat([nx_tables['Node']['Region'], _get_log2fc_nx(
                scplus_obj, var, nx_tables['Node']['Region'].index.tolist(), contrast='region')], axis=1)
    return nx_tables



def _format_nx_table_internal(nx_tables, table_type, table_id, color_by={}, transparency_by={}, size_by={}, shape_by={}, label_size_by={}, label_color_by={}):
    """
    A helper function to format edge and node tables into graphs
    """
    nx_tb = nx_tables[table_type][table_id]
    # Color
    if table_id in color_by.keys():
        if 'fixed_color' not in color_by[table_id].keys():
            color_var = nx_tables[table_type][table_id][color_by[table_id]['variable']]
            if 'category_color' in color_by[table_id].keys():
                if color_by[table_id]['category_color'] is None:
                    random.seed(555)
                    categories = set(color_var)
                    color = list(map(
                        lambda i: "#" +
                        "%06x" % random.randint(
                            0, 0xFFFFFF), range(len(categories))
                    ))
                    color_dict = dict(zip(categories, color))
                else:
                    color_dict = color_by[table_id]['category_color']
                color = color_var.apply(
                    lambda x: to_rgba(color_dict[x])).to_numpy()
            elif 'continuous_color' in color_by[table_id].keys():
                if color_by[table_id]['continuous_color'] is None:
                    color_map = 'viridis'
                else:
                    color_map = color_by[table_id]['continuous_color']
                if 'v_min' in color_by[table_id].keys():
                    v_min = color_by[table_id]['v_min']
                else:
                    v_min = None
                if 'v_max' in color_by[table_id].keys():
                    v_max = color_by[table_id]['v_max']
                else:
                    v_max = None
                color = _get_colors(color_var, color_map, v_min, v_max)
        else:
            color = np.array([color_by[table_id]['fixed_color']]
                             * nx_tables[table_type][table_id].shape[0])
    else:
        color = np.array([to_rgba('grey')] *
                         nx_tables[table_type][table_id].shape[0])

    # Transparency
    if table_id in transparency_by.keys():
        if 'fixed_alpha' not in transparency_by[table_id]['variable']:
            transparency_var = nx_tables[table_type][table_id][transparency_by[table_id]['variable']]
            if 'v_min' in transparency_by[table_id].keys():
                v_min = transparency_by[table_id]['v_min']
            else:
                v_min = None
            if 'v_max' in transparency_by[table_id].keys():
                v_max = transparency_by[table_id]['v_max']
            else:
                v_max = None
            if 'min_alpha' in transparency_by[table_id].keys():
                min_alpha = transparency_by[table_id]['min_alpha']
            else:
                min_alpha = 0.5
            norm = plt.Normalize(v_min, v_max)
            x = norm(transparency_var)
            x[x < min_alpha] = min_alpha
            for i in range(0, len(color)):
                c = list(color[i])
                c[-1] = x[i]
                color[i] = tuple(c)
            #color[:, -1] = x
        else:
            for i in range(0, len(color)):
                c = list(color[i])
                c[-1] = transparency_by[table_id]['fixed_alpha']
                color[i] = tuple(c)


    # Size/Width
    if table_id in size_by.keys():
        if 'fixed_size' not in size_by[table_id].keys():
            sw_var = nx_tables[table_type][table_id][size_by[table_id]
                                                     ['variable']].to_numpy().flatten('F')
            if 'min_size' in size_by[table_id].keys():
                p_min = size_by[table_id]['min_size']
            else:
                p_min = 3
            if 'max_size' in size_by[table_id].keys():
                p_max = size_by[table_id]['max_size']
            else:
                p_max = 10
            s_min = sw_var[sw_var != 0].min()
            s_max = sw_var.max()
            sw_var[sw_var != 0] = p_min + \
                (sw_var[sw_var != 0] - s_min) * \
                ((p_max - p_min) / (s_max - s_min))
        else:
            sw_var = [size_by[table_id]['fixed_size']] * \
                nx_tables[table_type][table_id].shape[0]
    else:
        sw_var = [1] * nx_tables[table_type][table_id].shape[0]

    # Node shape
    if table_id in shape_by.keys():
        if 'fixed_shape' not in shape_by[table_id].keys():
            if not 'categorical_shape' in shape_by[table_id].keys():
                print(
                    'No categorical_shape dictionary provided, making all nodes circular!')
                shape_var = ['circular'] * \
                    nx_tables[table_type][table_id].shape[0]
            else:
                shape_dict = shape_by[table_id]['categorical_shape']
                shape_var = shape_var.apply(lambda x: shape_dict[x]).to_numpy()
        else:
            shape_var = np.array(
                [shape_by[table_id]['fixed_shape']]*nx_tables[table_type][table_id].shape[0])
    else:
        shape_var = ['ellipse'] * nx_tables[table_type][table_id].shape[0]

    # Label size
    if table_id in label_size_by.keys():
        if 'fixed_label_size' not in label_size_by[table_id].keys():
            if not 'categorical_label_size' in label_size_by[table_id].keys():
                print(
                    'categorical_label_size dictionary provided, using size 14 for all nodes!')
                label_size_var = 14 * nx_tables[table_type][table_id].shape[0]
            else:
                label_size_dict = label_size_by[table_id]['categorical_label_size']
                label_size_var = label_size_var.apply(
                    lambda x: label_size_var[x]).to_numpy()
        else:
            label_size_var = np.array(
                [label_size_by[table_id]['fixed_label_size']]*nx_tables[table_type][table_id].shape[0])
    else:
        label_size_var = [14] * nx_tables[table_type][table_id].shape[0]

    # Label color
    if table_id in label_color_by.keys():
        if 'fixed_label_color' not in label_color_by[table_id].keys():
            if not 'categorical_label_color' in label_color_by[table_id].keys():
                print(
                    'categorical_label_color dictionary provided, using black for all nodes!')
                label_color_var = np.array(
                    [to_rgba('black')]*nx_tables[table_type][table_id].shape[0])
            else:
                label_color_dict = label_color_by[table_id]['categorical_label_color']
                label_color_var = label_color_var.apply(
                    lambda x: to_rgba(label_color_var[x])).to_numpy()
        else:
            label_color_var = np.array(
                [label_color_by[table_id]['fixed_label_color']]*nx_tables[table_type][table_id].shape[0])
    else:
        label_color_var = np.array(
            [to_rgba('black')]*nx_tables[table_type][table_id].shape[0])

    color = [to_hex(x, keep_alpha=True) for x in color]
    label_color_var = [to_hex(x, keep_alpha=True) for x in label_color_var]
    if table_type == 'Edge':
        dt1 = nx_tb.iloc[:, 0:2].reset_index(drop=True)
        dt2 = pd.DataFrame([color, sw_var]).T.reset_index(drop=True)
    else:
        dt1 = nx_tb.iloc[:, 0:2].reset_index(drop=True)
        dt2 = pd.DataFrame([color, sw_var, shape_var, label_size_var,
                           label_color_var]).T.reset_index(drop=True)

    dt = pd.concat([dt1, dt2], axis=1)
    color = dt.iloc[:,2]
    dt['color_rgb'] = [to_hex(to_rgba(x)) for x in color]
    dt['color_alpha'] = [to_rgba(x)[3] for x in color]
    scaler = MinMaxScaler(feature_range=(200,255))
    dt['color_alpha'] = scaler.fit_transform(np.array(dt['color_alpha']).reshape(-1,1))
    if len(set(dt['color_alpha'])) == 1:
        dt['color_alpha'] = [255]*dt.shape[0]
    if table_type == 'Edge':
        dt.columns = ['source', 'target', 'color', 'width', 'color_rgb', 'color_alpha']
    else:
        color = dt.iloc[:,6]
        dt['font_color_rgb'] = [to_hex(to_rgba(x)) for x in color]
        dt['font_color_alpha'] = [to_rgba(x)[3] for x in color]
        dt['font_color_alpha'] = scaler.fit_transform(np.array(dt['font_color_alpha']).reshape(-1,1))
        if len(set(dt['font_color_alpha'])) == 1:
            dt['font_color_alpha'] = [255]*dt.shape[0]
        dt.columns = ['group', 'label', 'color',
                      'size', 'shape', 'font_size', 'font_color', 'color_rgb', 'color_alpha',
                      'font_color_rgb', 'font_color_alpha']
    return dt


def _get_colors(inp, cmap_name, vmin=None, vmax=None):
    """
    A function to get color values from a continuous vector and a color map
    """
    color_map = cm.get_cmap(cmap_name)
    norm = plt.Normalize(vmin, vmax)
    return color_map(norm(inp))



[docs]
def create_nx_graph(nx_tables: Dict,
                    use_edge_tables: List = ['TF2R', 'R2G'],
                    color_edge_by: Dict ={},
                    transparency_edge_by: Dict ={},
                    width_edge_by: Dict = {},
                    color_node_by: Dict = {},
                    transparency_node_by: Dict = {},
                    size_node_by: Dict = {},
                    shape_node_by: Dict = {},
                    label_size_by: Dict = {},
                    label_color_by: Dict = {},
                    layout: str = 'concentrical_layout',
                    lc_dist_genes: float = 0.8,
                    lc_dist_TF: float = 0.1,
                    scale_position_by: float = 250):
    """
    Format node/edge feature tables into a graph
    
    Parameters
    ---------
    nx_tables: Dict
        Dictionary with node/edge feature tables as produced by `create_nx_tables`
    use_edge_tables: List, optional
        List of edge tables to use
    color_edge_by: Dict, optional
        A dictionary containing for a given edge key the variable and color map to color edges by.
        If the variable is categorical, the entry 'categorical_color' can be provided as a dictionary with
        category: color. If it is a continuous variable a color map can be provided as `continuous_color` and
        entried v_max and v_min can be provided to control the min and max values of the scale. Alternatively,
        one fixed color can use by using 'fixed_color' as variable, alterntively adding an entry fixed_color: color
        to the dictionary.
    transparency_edge_by: Dict, optional
        A dictionary containing for a given edge key the variable and the max and min alpha values. The variable
        name has to be provided (only continuous variables accepted), together with v_max/v_mix parameters if
        desired. Alternatively, one fixed alpha can use by using 'fixed_alpha' as variable, alterntively adding an
        entry fixed_alpha: size to the dictionary.
    width_edge_by: Dict, optional
        A dictionary containing for a given edge key the variable and the max and min sizes. The variable
        name has to be provided (only continuous variables accepted), together with max_size/min_size parameters if
        desired. Alternatively, one fixed size can use by using 'fixed_size' as variable, alterntively adding an
        entry fixed_size: size to the dictionary.
    color_node_by: Dict, optional
        A dictionary containing for a given node key the variable and color map to color edges by.
        If the variable is categorical, the entry 'categorical_color' can be provided as a dictionary with
        category: color. If it is a continuous variable a color map can be provided as `continuous_color` and
        entried v_max and v_min can be provided to control the min and max values of the scale. Alternatively,
        one fixed color can use by using 'fixed_color' as variable, alterntively adding an entry fixed_color: color
        to the dictionary.
    transparency_node_by: Dict, optional
        A dictionary containing for a given node key the variable and the max and min alpha values. The variable
        name has to be provided (only continuous variables accepted), together with v_max/v_mix parameters if
        desired. Alternatively, one fixed alpha can use by using 'fixed_alpha' as variable, alterntively adding an
        entry fixed_alpha: size to the dictionary.
    size_node_by: Dict, optional
        A dictionary containing for a given node key the variable and the max and min sizes. The variable
        name has to be provided (only continuous variables accepted), together with max_size/min_size parameters if
        desired. Alternatively, one fixed size can use by using 'fixed_size' as variable, alterntively adding an
        entry fixed_size: size to the dictionary.
    shape_node_by: Dict, optional
        A dictionary containing for a given node key the variable and shapes. The variable
        name has to be provided (only categorical variables accepted). Alternatively, one fixed shape can use by
        using 'fixed_shape' as variable, alterntively adding an entry fixed_shape: size to the dictionary.
    label_size_by: Dict, optional
        A dictionary containing for a given node key the variable and the max and min sizes. The variable
        name has to be provided (only continuous variables accepted), together with max_size/min_size parameters if
        desired. Alternatively, one fixed size can use by using 'fixed_label_size' as variable, alterntively adding an
        entry fixed_label_size: size to the dictionary.
    label_color_by: Dict, optional
        A dictionary containing for a given node key the variable and a color dictionary. The variable
        name has to be provided (only categorical variables accepted), together with a color dictionary if
        desired. Alternatively, one fixed color can use by using 'fixed_label_color' as variable, alterntively adding an
        entry fixed_label_color: size to the dictionary.
    layout: str, optional
        Layout to use. Options are: 'concentrical_layout' (SCENIC+ custom layout) or kamada_kawai_layout (from networkx).
    lc_dist_genes: float, optional
        Distance between regions and genes. Only used if using concentrical_layout.
    lc_dist_TF: float, optional
        Distance between TF and regions. Only used if using concentrical_layout.
    scale_position_by: int, optional
        Value to scale positions for visualization in pyvis.
        
    Return
    ---------
    A networkx graph, positions, and node/edges feature tables.
    """
    # Get node table names
    use_node_tables = []
    if 'TF2R' in use_edge_tables:
        use_node_tables = ['TF', 'Region'] + use_node_tables
    if 'TF2G' in use_edge_tables:
        use_node_tables = ['TF', 'Gene'] + use_node_tables
    if 'R2G' in use_edge_tables:
        use_node_tables = ['Region', 'Gene'] + use_node_tables
    use_node_tables = sorted(list(set(use_node_tables)), reverse=True)

    # Create graph
    edge_tables = pd.concat([_format_nx_table_internal(
        nx_tables, 'Edge', x, color_edge_by, transparency_edge_by, width_edge_by, {}) for x in use_edge_tables])
    edge_tables.dropna(axis = 0, how = 'any', inplace = True)
    G = nx.from_pandas_edgelist(edge_tables, edge_attr=True)
    # Add node tables
    node_tables = pd.concat([_format_nx_table_internal(nx_tables, 'Node', x, color_node_by, transparency_node_by,
                            size_node_by, shape_node_by, label_size_by, label_color_by) for x in use_node_tables])
    node_tables.index = node_tables['label']
    node_tables.dropna(axis = 0, how = 'any', inplace = True)
    node_tables_d = node_tables.to_dict()
    for key in node_tables_d.keys():
            nx.set_node_attributes(G, node_tables_d[key], name=key)
    nx.set_node_attributes(G, node_tables_d['label'], name='title')
    font_nt_d = node_tables[['font_size', 'font_color']]
    font_nt_d.columns = ['size', 'color']
    font_nt_d = font_nt_d.to_dict(orient='index')
    nx.set_node_attributes(G, font_nt_d, name='font')
    if layout == 'concentrical_layout':
        pos = concentrical_layout(G, dist_genes=lc_dist_genes, dist_TF=lc_dist_TF)
    else:
        pos = nx.kamada_kawai_layout(G)
        
    x_pos_dict = {x:pos[x][0]*scale_position_by for x in pos.keys() if not np.isnan(pos[x][0])}
    y_pos_dict = {x:pos[x][1]*scale_position_by for x in pos.keys() if not np.isnan(pos[x][0])}
    fixed_dict = {x:{'fixed.x': True, 'fixed.y': True} for x in pos.keys()}
    nx.set_node_attributes(G, x_pos_dict, name='x')
    nx.set_node_attributes(G, y_pos_dict, name='y')
    nx.set_node_attributes(G, fixed_dict, name='fixed')
        
    return G, pos, edge_tables, node_tables



[docs]
def plot_networkx(G, pos):
    """
    A function to plot networks with networkx
    
    Parameters
    ---------
    G: Graph
        A networkx graph
    pos: Dict
        Position values
    """
    nx.draw_networkx_nodes(G, pos, node_color=nx.get_node_attributes(G,'color').values(),
                           node_size=list(nx.get_node_attributes(G,'size').values()),
                           node_shape = 'D')
    nx.draw_networkx_edges(G, pos, edge_color = nx.get_edge_attributes(G,'color').values(),
                          width = list(nx.get_edge_attributes(G,'width').values()))
    fontsize_d = {y:x['size'] for x,y in zip(list(nx.get_node_attributes(G,'font').values()),list(nx.get_node_attributes(G,'label').values())) if x['size'] != 0.0}
    fontcolor_d = {y:x['color'] for x,y in zip(list(nx.get_node_attributes(G,'font').values()),list(nx.get_node_attributes(G,'label').values())) if x['size'] != 0.0}
    for node, (x, y) in pos.items():
        if node in fontsize_d.keys():
            plt.text(x, y, node, fontsize=fontsize_d[node], color=fontcolor_d[node],  ha='center', va='center')
    ax = plt.gca()
    ax.margins(0.11)
    plt.tight_layout()
    plt.axis("off")
    plt.show()



def _distance(p1, p2):
    """
    Helper function for custom layout
    """
    x1 = p1[0]
    x2 = p2[0]
    y1 = p1[1]
    y2 = p2[1]

    return np.sqrt((x2 - x1)**2 + (y2 - y1)**2)


def _pairwise_distance(points):
    """
    Helper function for custom layout
    """
    distances = np.zeros((points.shape[0], points.shape[0]))
    for i in range(points.shape[0]):
        for j in range(points.shape[0]):
            distances[i, j] = _distance(points[i], points[j])
    np.fill_diagonal(distances, np.NINF)
    return distances


def _line_two_points(p1, p2, return_func=True):
    """
    Helper function for custom layout
    """
    x1 = p1[0]
    x2 = p2[0]
    y1 = p1[1]
    y2 = p2[1]

    m = (y2 - y1) / (x2 - x1)
    b = y1 - m * x1
    if return_func:
        return lambda x: m * x + b
    else:
        return m, b


def _line_slope_point(m, p, return_func=True):
    """
    Helper function for custom layout
    """
    x = p[0]
    y = p[1]
    b = y - m * x

    if return_func:
        return lambda x: m * x + b
    else:
        return m, b


[docs]
def concentrical_layout(G,
               dist_genes=1,
               dist_TF=0.1):
    """
    Generate custom concentrical layout
    
    Parameters
    ---------
    G: Graph
        A networkx graph
    dist_genes: int, optional
        Distance from the regions to the genes
    dist_TF
        Distance from the TF to the regions
    """
    node_type = nx.get_node_attributes(G, 'group')
    TF_nodes = [n for n in G.nodes if node_type[n] == 'TF']
    region_nodes = [n for n in G.nodes if node_type[n] == 'Region']
    gene_nodes = [n for n in G.nodes if node_type[n] == 'Gene']

    # get regions with TFs as target
    tmp = pd.DataFrame(list(G.edges))
    regions_targetting_TFs = tmp.loc[np.isin(tmp[1], TF_nodes), 0].to_list()
    del(tmp)
    region_nodes = list(set(region_nodes) - set(regions_targetting_TFs))

    # layout regions in a circle
    n_region_nodes = len(region_nodes)

    theta = np.linspace(0, 1, n_region_nodes + 1)[:-1] * 2 * np.pi
    theta = theta.astype(np.float32)
    pos_regions = np.column_stack(
        [np.cos(theta), np.sin(theta), np.zeros((n_region_nodes, 0))]
    )

    # sort regions by target
    source_target_dict = {}
    target_source_dict = {}
    for edge in G.edges:
        source = edge[0]
        target = edge[1]

        if source in source_target_dict.keys():
            if target not in source_target_dict[source]:
                source_target_dict[source].append(target)
        else:
            source_target_dict[source] = [target]

        if target in target_source_dict.keys():
            if source not in target_source_dict[target]:
                target_source_dict[target].append(source)
        else:
            target_source_dict[target] = [source]

    region_nodes = sorted(region_nodes, key=lambda x: target_source_dict[x][0])
    pos_regions = dict(zip(region_nodes, pos_regions))

    # layout target genes in concentric circle around regions
    pos_genes = {}
    additional_genes_to_position = []
    for gene in gene_nodes:
        # get regions targetting this gene and their position
        regions = target_source_dict[gene]
        if all([r in regions_targetting_TFs for r in regions]):
            additional_genes_to_position.append(gene)
            continue
        pos_regions_gene = np.array(
            [pos_regions[r] for r in regions if r not in regions_targetting_TFs])
        if len(regions) > 1:
            # get the positions which are furthest apart and "draw" a line through them
            pairwise_distances = _pairwise_distance(pos_regions_gene)
            furthest_points = np.unravel_index(
                pairwise_distances.argmax(), pairwise_distances.shape)
            m, b = _line_two_points(
                pos_regions_gene[furthest_points[0]], pos_regions_gene[furthest_points[1]], return_func=False)
            # draw a perpendicular line through the first line and the mean position
            p_mean = pos_regions_gene.mean(0)
            m, b = _line_slope_point(-1/m, p_mean, return_func=False)
            # get the point which is a distance dist_genes away from point p_mean
            p_new = [p_mean[0] - dist_genes * np.sqrt(1 / (1 + m**2)),
                     p_mean[1] - m * dist_genes * np.sqrt(1 / (1 + m**2))]
            # check if point is within the circle, otherwise take the other point (radius of the circle is 1)
            if p_new[0]**2 + p_new[1]**2 < 1:
                p_new = [p_mean[0] + dist_genes * np.sqrt(1 / (1 + m**2)),
                         p_mean[1] + m * dist_genes * np.sqrt(1 / (1 + m**2))]
        else:
            # draw line trough origin and pos of region
            m, b = _line_two_points(pos_regions_gene[0], [
                                    0, 0], return_func=False)
            # get the point which is a distance dist_genes away from point pos_regions_gene
            p_new = [pos_regions_gene[0][0] - dist_genes * np.sqrt(1 / (1 + m**2)),
                     pos_regions_gene[0][1] - m * dist_genes * np.sqrt(1 / (1 + m**2))]
            # check if point is within the circle, otherwise take the other point (radius of the circle is 1)
            if p_new[0]**2 + p_new[1]**2 < 1:
                p_new = [pos_regions_gene[0][0] + dist_genes * np.sqrt(1 / (1 + m**2)),
                         pos_regions_gene[0][1] + m * dist_genes * np.sqrt(1 / (1 + m**2))]
        pos_genes[gene] = np.array(p_new)

    pos_TF = {}
    for TF in TF_nodes:
        # get regions targetted by this TF and their position
        if TF in source_target_dict.keys():
            regions = source_target_dict[TF]
        else:
            regions = []
        if all([r in regions_targetting_TFs for r in regions]):
            additional_genes_to_position.append(TF)
            continue
        pos_regions_TF = np.array(
            [pos_regions[r] for r in regions if r not in regions_targetting_TFs])

        if len(regions) > 1:
            # get the positions which are furthest apart and "draw" a line through them
            pairwise_distances = _pairwise_distance(pos_regions_TF)
            furthest_points = np.unravel_index(
                pairwise_distances.argmax(), pairwise_distances.shape)
            m, b = _line_two_points(
                pos_regions_TF[furthest_points[0]], pos_regions_TF[furthest_points[1]], return_func=False)
            # draw a perpendicular line through the first line and the mean position
            p_mean = pos_regions_TF.mean(0)
            m, b = _line_slope_point(-1/m, p_mean, return_func=False)
            # get the point which is a distance dist_genes away from point p_mean
            p_new = [p_mean[0] - dist_TF * np.sqrt(1 / (1 + m**2)),
                     p_mean[1] - m * dist_TF * np.sqrt(1 / (1 + m**2))]
            # check if point is within the circle, otherwise take the other point (radius of the circle is 1)
            if p_new[0]**2 + p_new[1]**2 > 1:
                p_new = [p_mean[0] + dist_TF * np.sqrt(1 / (1 + m**2)),
                         p_mean[1] + m * dist_TF * np.sqrt(1 / (1 + m**2))]
        else:
            # draw line trough origin and pos of region
            m, b = _line_two_points(
                pos_regions_TF[0], [0, 0], return_func=False)
            # get the point which is a distance dist_genes away from point pos_regions_gene
            p_new = [pos_regions_TF[0][0] - dist_TF * np.sqrt(1 / (1 + m**2)),
                     pos_regions_TF[0][1] - m * dist_TF * np.sqrt(1 / (1 + m**2))]
            # check if point is within the circle, otherwise take the other point (radius of the circle is 1)
            if p_new[0]**2 + p_new[1]**2 < 1:
                p_new = [pos_regions_TF[0][0] + dist_TF * np.sqrt(1 / (1 + m**2)),
                         pos_regions_TF[0][1] + m * dist_TF * np.sqrt(1 / (1 + m**2))]
        pos_TF[TF] = np.array(p_new)

    # layout TF nodes within circle
    #G_TF = G.subgraph(nodes = [*TF_nodes, *regions_targetting_TFs, *additional_genes_to_position])
    #additional_genes_to_position_init = {gene: [random.uniform(0, 1), random.uniform(0, 1)] for gene in additional_genes_to_position}
    #pos_TF = nx.spring_layout(G_TF, scale = 0.7, pos = {**pos_TF, **additional_genes_to_position_init})
    G_add = G.subgraph(nodes=additional_genes_to_position)
    pos_add = nx.spring_layout(G_add, scale=0.1)

    G_regions_TF = G.subgraph(nodes=[*regions_targetting_TFs, *TF_nodes])
    pos_regions_TF = nx.spring_layout(G_regions_TF, scale=1)
    pos_regions_TF = {k: pos_regions_TF[k] for k in pos_regions_TF.keys(
    ) if k in regions_targetting_TFs}

    return {**pos_TF, **pos_regions, **pos_genes, **pos_add, **pos_regions_TF}

    

[docs]
def export_to_cytoscape(G, pos, out_file: str, pos_scaling_factor:int = 200, size_scaling_factor:int=1):
    """
    A function to export to cytoscape    
    Parameters
    ---------
    G: Graph
        A networkx graph.
    Pos: coordinates of graph nodes
        generated by running create_nx_graph.
    out_file: str
        Path to wich to save the export.
    pos_scaling_factor: int, optional
        Factor by which to scale the graph node coordinates.
    size_scaling_factor: int, optional
        Factor by which tos cale the graph node sizes.
    """
    cy = nx.cytoscape_data(G)
    for n in cy["elements"]["nodes"]:
        for k, v in n.items():
            v["label"] = v.pop("value")
    for n, p in zip(cy["elements"]["nodes"], pos.values()):
        if not np.isnan(p[0]) and not np.isnan(p[1]):
            n["position"] = {"x": int(p[0] * pos_scaling_factor), "y": int(p[1] * pos_scaling_factor)}
        else: 
            n["position"] = {"x": 0, "y": 0}
    for n in cy["elements"]["nodes"]:
        n['data']['font_size'] = int(n['data']['font_size'])
        n['data']['size'] = n['data']['size']*size_scaling_factor
        n['data']['shape'] = n['data']['shape'].capitalize()
    json_string = json.dumps(cy, indent = 2)
    with open(out_file, 'w') as outfile:
        outfile.write(json_string)