Source code for scenicplus.networks

"""export eRegulons to eGRN network and plot.
"""

import json
import pandas as pd
from typing import Union, Dict, Sequence, Optional, List
import anndata
import scanpy as sc
import random
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import networkx as nx
from matplotlib.colors import to_rgba, to_hex
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler


def _format_df_nx(df, key, var):
    """
    A helper function to format differential test results
    """
    df.index = df['names']
    df = pd.DataFrame(df['logfoldchanges'])
    df.columns = [var+'_Log2FC_'+key]
    df.index.name = None
    return df


def _get_log2fc_nx(scplus_obj: 'SCENICPLUS',
                  variable,
                  features,
                  contrast: Optional[str] = 'gene'
                  ):
    """
    A helper function to derive log2fc changes
    """
    if contrast == 'gene':
        adata = anndata.AnnData(X=scplus_obj.X_EXP, obs=pd.DataFrame(
            index=scplus_obj.cell_names), var=pd.DataFrame(index=scplus_obj.gene_names))
    if contrast == 'region':
        adata = anndata.AnnData(X=scplus_obj.X_ACC.T, obs=pd.DataFrame(
            index=scplus_obj.cell_names), var=pd.DataFrame(index=scplus_obj.region_names))
    adata.obs = pd.DataFrame(scplus_obj.metadata_cell[variable])
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, features]
    sc.tl.rank_genes_groups(
        adata, variable, method='wilcoxon', corr_method='bonferroni')
    groups = adata.uns['rank_genes_groups']['names'].dtype.names
    diff_list = [_format_df_nx(sc.get.rank_genes_groups_df(
        adata, group=group), group, variable) for group in groups]
    return pd.concat(diff_list, axis=1)


[docs]def create_nx_tables(scplus_obj: 'SCENICPLUS', eRegulon_metadata_key: str ='eRegulon_metadata', subset_eRegulons: List = None, subset_regions: List = None, subset_genes: List = None, add_differential_gene_expression: bool = False, add_differential_region_accessibility: bool = False, differential_variable: List =[]): """ A function to format eRegulon data into tables for plotting eGRNs. Parameters --------- scplus_obj: SCENICPLUS A SCENICPLUS object with eRegulons eRegulon_metadata_key: str, optional Key where the eRegulon metadata dataframe is stored subset_eRegulons: list, optional List of eRegulons to subset subset_regions: list, optional List of regions to subset subset_genes: list, optional List of genes to subset add_differential_gene_expression: bool, optional Whether to calculate differential gene expression logFC for a given variable add_differential_region_accessibility: bool, optional Whether to calculate differential region accessibility logFC for a given variable differential_variable: list, optional Variable to calculate differential gene expression or region accessibility. Return --------- A dictionary with edge feature tables ('TF2G', 'TF2R', 'R2G') and node feature tables ('TF', 'Gene', 'Region') """ er_metadata = scplus_obj.uns[eRegulon_metadata_key].copy() if subset_eRegulons is not None: subset_eRegulons = [x + '_[^a-zA-Z0-9]' for x in subset_eRegulons] er_metadata = er_metadata[er_metadata['Region_signature_name'].str.contains( '|'.join(subset_eRegulons))] if subset_regions is not None: er_metadata = er_metadata[er_metadata['Region'].isin(subset_regions)] if subset_genes is not None: er_metadata = er_metadata[er_metadata['Gene'].isin(subset_genes)] nx_tables = {} nx_tables['Edge'] = {} nx_tables['Node'] = {} # Generate edge tables r2g_columns = [x for x in er_metadata.columns if 'R2G' in x] tf2g_columns = [x for x in er_metadata.columns if 'TF2G' in x] nx_tables['Edge']['TF2R'] = er_metadata[er_metadata.columns.difference( r2g_columns + tf2g_columns)].drop('Gene', axis=1).drop_duplicates() nx_tables['Edge']['TF2R'] = nx_tables['Edge']['TF2R'][['TF', 'Region'] + nx_tables['Edge']['TF2R'].columns.difference(['TF', 'Region']).tolist()] nx_tables['Edge']['R2G'] = er_metadata[er_metadata.columns.difference( tf2g_columns)].drop('TF', axis=1).drop_duplicates() nx_tables['Edge']['R2G'] = nx_tables['Edge']['R2G'][['Region', 'Gene'] + nx_tables['Edge']['R2G'].columns.difference(['Region', 'Gene']).tolist()] nx_tables['Edge']['TF2G'] = er_metadata[er_metadata.columns.difference( r2g_columns)].drop('Region', axis=1).drop_duplicates() nx_tables['Edge']['TF2G'] = nx_tables['Edge']['TF2G'][['TF', 'Gene'] + nx_tables['Edge']['TF2G'].columns.difference(['TF', 'Gene']).tolist()] # Generate node tables tfs = list(set(er_metadata['TF'])) nx_tables['Node']['TF'] = pd.DataFrame( 'TF', index=tfs, columns=['Node_type']) nx_tables['Node']['TF']['TF'] = tfs genes = list(set(er_metadata['Gene'])) genes = [x for x in genes if x not in tfs] nx_tables['Node']['Gene'] = pd.DataFrame( 'Gene', index=genes, columns=['Node_type']) nx_tables['Node']['Gene']['Gene'] = genes regions = list(set(er_metadata['Region'])) nx_tables['Node']['Region'] = pd.DataFrame( 'Region', index=regions, columns=['Node_type']) nx_tables['Node']['Region']['Region'] = regions # Add gene logFC if add_differential_gene_expression is True: for var in differential_variable: nx_tables['Node']['TF'] = pd.concat([nx_tables['Node']['TF'], _get_log2fc_nx( scplus_obj, var, nx_tables['Node']['TF'].index.tolist(), contrast='gene')], axis=1) nx_tables['Node']['Gene'] = pd.concat([nx_tables['Node']['Gene'], _get_log2fc_nx( scplus_obj, var, nx_tables['Node']['Gene'].index.tolist(), contrast='gene')], axis=1) if add_differential_region_accessibility is True: for var in differential_variable: nx_tables['Node']['Region'] = pd.concat([nx_tables['Node']['Region'], _get_log2fc_nx( scplus_obj, var, nx_tables['Node']['Region'].index.tolist(), contrast='region')], axis=1) return nx_tables
def _format_nx_table_internal(nx_tables, table_type, table_id, color_by={}, transparency_by={}, size_by={}, shape_by={}, label_size_by={}, label_color_by={}): """ A helper function to format edge and node tables into graphs """ nx_tb = nx_tables[table_type][table_id] # Color if table_id in color_by.keys(): if 'fixed_color' not in color_by[table_id].keys(): color_var = nx_tables[table_type][table_id][color_by[table_id]['variable']] if 'category_color' in color_by[table_id].keys(): if color_by[table_id]['category_color'] is None: random.seed(555) categories = set(color_var) color = list(map( lambda i: "#" + "%06x" % random.randint( 0, 0xFFFFFF), range(len(categories)) )) color_dict = dict(zip(categories, color)) else: color_dict = color_by[table_id]['category_color'] color = color_var.apply( lambda x: to_rgba(color_dict[x])).to_numpy() elif 'continuous_color' in color_by[table_id].keys(): if color_by[table_id]['continuous_color'] is None: color_map = 'viridis' else: color_map = color_by[table_id]['continuous_color'] if 'v_min' in color_by[table_id].keys(): v_min = color_by[table_id]['v_min'] else: v_min = None if 'v_max' in color_by[table_id].keys(): v_max = color_by[table_id]['v_max'] else: v_max = None color = _get_colors(color_var, color_map, v_min, v_max) else: color = np.array([color_by[table_id]['fixed_color']] * nx_tables[table_type][table_id].shape[0]) else: color = np.array([to_rgba('grey')] * nx_tables[table_type][table_id].shape[0]) # Transparency if table_id in transparency_by.keys(): if 'fixed_alpha' not in transparency_by[table_id]['variable']: transparency_var = nx_tables[table_type][table_id][transparency_by[table_id]['variable']] if 'v_min' in transparency_by[table_id].keys(): v_min = transparency_by[table_id]['v_min'] else: v_min = None if 'v_max' in transparency_by[table_id].keys(): v_max = transparency_by[table_id]['v_max'] else: v_max = None if 'min_alpha' in transparency_by[table_id].keys(): min_alpha = transparency_by[table_id]['min_alpha'] else: min_alpha = 0.5 norm = plt.Normalize(v_min, v_max) x = norm(transparency_var) x[x < min_alpha] = min_alpha for i in range(0, len(color)): c = list(color[i]) c[-1] = x[i] color[i] = tuple(c) #color[:, -1] = x else: for i in range(0, len(color)): c = list(color[i]) c[-1] = transparency_by[table_id]['fixed_alpha'] color[i] = tuple(c) # Size/Width if table_id in size_by.keys(): if 'fixed_size' not in size_by[table_id].keys(): sw_var = nx_tables[table_type][table_id][size_by[table_id] ['variable']].to_numpy().flatten('F') if 'min_size' in size_by[table_id].keys(): p_min = size_by[table_id]['min_size'] else: p_min = 3 if 'max_size' in size_by[table_id].keys(): p_max = size_by[table_id]['max_size'] else: p_max = 10 s_min = sw_var[sw_var != 0].min() s_max = sw_var.max() sw_var[sw_var != 0] = p_min + \ (sw_var[sw_var != 0] - s_min) * \ ((p_max - p_min) / (s_max - s_min)) else: sw_var = [size_by[table_id]['fixed_size']] * \ nx_tables[table_type][table_id].shape[0] else: sw_var = [1] * nx_tables[table_type][table_id].shape[0] # Node shape if table_id in shape_by.keys(): if 'fixed_shape' not in shape_by[table_id].keys(): if not 'categorical_shape' in shape_by[table_id].keys(): print( 'No categorical_shape dictionary provided, making all nodes circular!') shape_var = ['circular'] * \ nx_tables[table_type][table_id].shape[0] else: shape_dict = shape_by[table_id]['categorical_shape'] shape_var = shape_var.apply(lambda x: shape_dict[x]).to_numpy() else: shape_var = np.array( [shape_by[table_id]['fixed_shape']]*nx_tables[table_type][table_id].shape[0]) else: shape_var = ['ellipse'] * nx_tables[table_type][table_id].shape[0] # Label size if table_id in label_size_by.keys(): if 'fixed_label_size' not in label_size_by[table_id].keys(): if not 'categorical_label_size' in label_size_by[table_id].keys(): print( 'categorical_label_size dictionary provided, using size 14 for all nodes!') label_size_var = 14 * nx_tables[table_type][table_id].shape[0] else: label_size_dict = label_size_by[table_id]['categorical_label_size'] label_size_var = label_size_var.apply( lambda x: label_size_var[x]).to_numpy() else: label_size_var = np.array( [label_size_by[table_id]['fixed_label_size']]*nx_tables[table_type][table_id].shape[0]) else: label_size_var = [14] * nx_tables[table_type][table_id].shape[0] # Label color if table_id in label_color_by.keys(): if 'fixed_label_color' not in label_color_by[table_id].keys(): if not 'categorical_label_color' in label_color_by[table_id].keys(): print( 'categorical_label_color dictionary provided, using black for all nodes!') label_color_var = np.array( [to_rgba('black')]*nx_tables[table_type][table_id].shape[0]) else: label_color_dict = label_color_by[table_id]['categorical_label_color'] label_color_var = label_color_var.apply( lambda x: to_rgba(label_color_var[x])).to_numpy() else: label_color_var = np.array( [label_color_by[table_id]['fixed_label_color']]*nx_tables[table_type][table_id].shape[0]) else: label_color_var = np.array( [to_rgba('black')]*nx_tables[table_type][table_id].shape[0]) color = [to_hex(x, keep_alpha=True) for x in color] label_color_var = [to_hex(x, keep_alpha=True) for x in label_color_var] if table_type == 'Edge': dt1 = nx_tb.iloc[:, 0:2].reset_index(drop=True) dt2 = pd.DataFrame([color, sw_var]).T.reset_index(drop=True) else: dt1 = nx_tb.iloc[:, 0:2].reset_index(drop=True) dt2 = pd.DataFrame([color, sw_var, shape_var, label_size_var, label_color_var]).T.reset_index(drop=True) dt = pd.concat([dt1, dt2], axis=1) color = dt.iloc[:,2] dt['color_rgb'] = [to_hex(to_rgba(x)) for x in color] dt['color_alpha'] = [to_rgba(x)[3] for x in color] scaler = MinMaxScaler(feature_range=(200,255)) dt['color_alpha'] = scaler.fit_transform(np.array(dt['color_alpha']).reshape(-1,1)) if len(set(dt['color_alpha'])) == 1: dt['color_alpha'] = [255]*dt.shape[0] if table_type == 'Edge': dt.columns = ['source', 'target', 'color', 'width', 'color_rgb', 'color_alpha'] else: color = dt.iloc[:,6] dt['font_color_rgb'] = [to_hex(to_rgba(x)) for x in color] dt['font_color_alpha'] = [to_rgba(x)[3] for x in color] dt['font_color_alpha'] = scaler.fit_transform(np.array(dt['font_color_alpha']).reshape(-1,1)) if len(set(dt['font_color_alpha'])) == 1: dt['font_color_alpha'] = [255]*dt.shape[0] dt.columns = ['group', 'label', 'color', 'size', 'shape', 'font_size', 'font_color', 'color_rgb', 'color_alpha', 'font_color_rgb', 'font_color_alpha'] return dt def _get_colors(inp, cmap_name, vmin=None, vmax=None): """ A function to get color values from a continuous vector and a color map """ color_map = cm.get_cmap(cmap_name) norm = plt.Normalize(vmin, vmax) return color_map(norm(inp))
[docs]def create_nx_graph(nx_tables: Dict, use_edge_tables: List = ['TF2R', 'R2G'], color_edge_by: Dict ={}, transparency_edge_by: Dict ={}, width_edge_by: Dict = {}, color_node_by: Dict = {}, transparency_node_by: Dict = {}, size_node_by: Dict = {}, shape_node_by: Dict = {}, label_size_by: Dict = {}, label_color_by: Dict = {}, layout: str = 'concentrical_layout', lc_dist_genes: float = 0.8, lc_dist_TF: float = 0.1, scale_position_by: float = 250): """ Format node/edge feature tables into a graph Parameters --------- nx_tables: Dict Dictionary with node/edge feature tables as produced by `create_nx_tables` use_edge_tables: List, optional List of edge tables to use color_edge_by: Dict, optional A dictionary containing for a given edge key the variable and color map to color edges by. If the variable is categorical, the entry 'categorical_color' can be provided as a dictionary with category: color. If it is a continuous variable a color map can be provided as `continuous_color` and entried v_max and v_min can be provided to control the min and max values of the scale. Alternatively, one fixed color can use by using 'fixed_color' as variable, alterntively adding an entry fixed_color: color to the dictionary. transparency_edge_by: Dict, optional A dictionary containing for a given edge key the variable and the max and min alpha values. The variable name has to be provided (only continuous variables accepted), together with v_max/v_mix parameters if desired. Alternatively, one fixed alpha can use by using 'fixed_alpha' as variable, alterntively adding an entry fixed_alpha: size to the dictionary. width_edge_by: Dict, optional A dictionary containing for a given edge key the variable and the max and min sizes. The variable name has to be provided (only continuous variables accepted), together with max_size/min_size parameters if desired. Alternatively, one fixed size can use by using 'fixed_size' as variable, alterntively adding an entry fixed_size: size to the dictionary. color_node_by: Dict, optional A dictionary containing for a given node key the variable and color map to color edges by. If the variable is categorical, the entry 'categorical_color' can be provided as a dictionary with category: color. If it is a continuous variable a color map can be provided as `continuous_color` and entried v_max and v_min can be provided to control the min and max values of the scale. Alternatively, one fixed color can use by using 'fixed_color' as variable, alterntively adding an entry fixed_color: color to the dictionary. transparency_node_by: Dict, optional A dictionary containing for a given node key the variable and the max and min alpha values. The variable name has to be provided (only continuous variables accepted), together with v_max/v_mix parameters if desired. Alternatively, one fixed alpha can use by using 'fixed_alpha' as variable, alterntively adding an entry fixed_alpha: size to the dictionary. size_node_by: Dict, optional A dictionary containing for a given node key the variable and the max and min sizes. The variable name has to be provided (only continuous variables accepted), together with max_size/min_size parameters if desired. Alternatively, one fixed size can use by using 'fixed_size' as variable, alterntively adding an entry fixed_size: size to the dictionary. shape_node_by: Dict, optional A dictionary containing for a given node key the variable and shapes. The variable name has to be provided (only categorical variables accepted). Alternatively, one fixed shape can use by using 'fixed_shape' as variable, alterntively adding an entry fixed_shape: size to the dictionary. label_size_by: Dict, optional A dictionary containing for a given node key the variable and the max and min sizes. The variable name has to be provided (only continuous variables accepted), together with max_size/min_size parameters if desired. Alternatively, one fixed size can use by using 'fixed_label_size' as variable, alterntively adding an entry fixed_label_size: size to the dictionary. label_color_by: Dict, optional A dictionary containing for a given node key the variable and a color dictionary. The variable name has to be provided (only categorical variables accepted), together with a color dictionary if desired. Alternatively, one fixed color can use by using 'fixed_label_color' as variable, alterntively adding an entry fixed_label_color: size to the dictionary. layout: str, optional Layout to use. Options are: 'concentrical_layout' (SCENIC+ custom layout) or kamada_kawai_layout (from networkx). lc_dist_genes: float, optional Distance between regions and genes. Only used if using concentrical_layout. lc_dist_TF: float, optional Distance between TF and regions. Only used if using concentrical_layout. scale_position_by: int, optional Value to scale positions for visualization in pyvis. Return --------- A networkx graph, positions, and node/edges feature tables. """ # Get node table names use_node_tables = [] if 'TF2R' in use_edge_tables: use_node_tables = ['TF', 'Region'] + use_node_tables if 'TF2G' in use_edge_tables: use_node_tables = ['TF', 'Gene'] + use_node_tables if 'R2G' in use_edge_tables: use_node_tables = ['Region', 'Gene'] + use_node_tables use_node_tables = sorted(list(set(use_node_tables)), reverse=True) # Create graph edge_tables = pd.concat([_format_nx_table_internal( nx_tables, 'Edge', x, color_edge_by, transparency_edge_by, width_edge_by, {}) for x in use_edge_tables]) edge_tables.dropna(axis = 0, how = 'any', inplace = True) G = nx.from_pandas_edgelist(edge_tables, edge_attr=True) # Add node tables node_tables = pd.concat([_format_nx_table_internal(nx_tables, 'Node', x, color_node_by, transparency_node_by, size_node_by, shape_node_by, label_size_by, label_color_by) for x in use_node_tables]) node_tables.index = node_tables['label'] node_tables.dropna(axis = 0, how = 'any', inplace = True) node_tables_d = node_tables.to_dict() for key in node_tables_d.keys(): nx.set_node_attributes(G, node_tables_d[key], name=key) nx.set_node_attributes(G, node_tables_d['label'], name='title') font_nt_d = node_tables[['font_size', 'font_color']] font_nt_d.columns = ['size', 'color'] font_nt_d = font_nt_d.to_dict(orient='index') nx.set_node_attributes(G, font_nt_d, name='font') if layout == 'concentrical_layout': pos = concentrical_layout(G, dist_genes=lc_dist_genes, dist_TF=lc_dist_TF) else: pos = nx.kamada_kawai_layout(G) x_pos_dict = {x:pos[x][0]*scale_position_by for x in pos.keys() if not np.isnan(pos[x][0])} y_pos_dict = {x:pos[x][1]*scale_position_by for x in pos.keys() if not np.isnan(pos[x][0])} fixed_dict = {x:{'fixed.x': True, 'fixed.y': True} for x in pos.keys()} nx.set_node_attributes(G, x_pos_dict, name='x') nx.set_node_attributes(G, y_pos_dict, name='y') nx.set_node_attributes(G, fixed_dict, name='fixed') return G, pos, edge_tables, node_tables
[docs]def plot_networkx(G, pos): """ A function to plot networks with networkx Parameters --------- G: Graph A networkx graph pos: Dict Position values """ nx.draw_networkx_nodes(G, pos, node_color=nx.get_node_attributes(G,'color').values(), node_size=list(nx.get_node_attributes(G,'size').values()), node_shape = 'D') nx.draw_networkx_edges(G, pos, edge_color = nx.get_edge_attributes(G,'color').values(), width = list(nx.get_edge_attributes(G,'width').values())) fontsize_d = {y:x['size'] for x,y in zip(list(nx.get_node_attributes(G,'font').values()),list(nx.get_node_attributes(G,'label').values())) if x['size'] != 0.0} fontcolor_d = {y:x['color'] for x,y in zip(list(nx.get_node_attributes(G,'font').values()),list(nx.get_node_attributes(G,'label').values())) if x['size'] != 0.0} for node, (x, y) in pos.items(): if node in fontsize_d.keys(): plt.text(x, y, node, fontsize=fontsize_d[node], color=fontcolor_d[node], ha='center', va='center') ax = plt.gca() ax.margins(0.11) plt.tight_layout() plt.axis("off") plt.show()
def _distance(p1, p2): """ Helper function for custom layout """ x1 = p1[0] x2 = p2[0] y1 = p1[1] y2 = p2[1] return np.sqrt((x2 - x1)**2 + (y2 - y1)**2) def _pairwise_distance(points): """ Helper function for custom layout """ distances = np.zeros((points.shape[0], points.shape[0])) for i in range(points.shape[0]): for j in range(points.shape[0]): distances[i, j] = _distance(points[i], points[j]) np.fill_diagonal(distances, np.NINF) return distances def _line_two_points(p1, p2, return_func=True): """ Helper function for custom layout """ x1 = p1[0] x2 = p2[0] y1 = p1[1] y2 = p2[1] m = (y2 - y1) / (x2 - x1) b = y1 - m * x1 if return_func: return lambda x: m * x + b else: return m, b def _line_slope_point(m, p, return_func=True): """ Helper function for custom layout """ x = p[0] y = p[1] b = y - m * x if return_func: return lambda x: m * x + b else: return m, b
[docs]def concentrical_layout(G, dist_genes=1, dist_TF=0.1): """ Generate custom concentrical layout Parameters --------- G: Graph A networkx graph dist_genes: int, optional Distance from the regions to the genes dist_TF Distance from the TF to the regions """ node_type = nx.get_node_attributes(G, 'group') TF_nodes = [n for n in G.nodes if node_type[n] == 'TF'] region_nodes = [n for n in G.nodes if node_type[n] == 'Region'] gene_nodes = [n for n in G.nodes if node_type[n] == 'Gene'] # get regions with TFs as target tmp = pd.DataFrame(list(G.edges)) regions_targetting_TFs = tmp.loc[np.isin(tmp[1], TF_nodes), 0].to_list() del(tmp) region_nodes = list(set(region_nodes) - set(regions_targetting_TFs)) # layout regions in a circle n_region_nodes = len(region_nodes) theta = np.linspace(0, 1, n_region_nodes + 1)[:-1] * 2 * np.pi theta = theta.astype(np.float32) pos_regions = np.column_stack( [np.cos(theta), np.sin(theta), np.zeros((n_region_nodes, 0))] ) # sort regions by target source_target_dict = {} target_source_dict = {} for edge in G.edges: source = edge[0] target = edge[1] if source in source_target_dict.keys(): if target not in source_target_dict[source]: source_target_dict[source].append(target) else: source_target_dict[source] = [target] if target in target_source_dict.keys(): if source not in target_source_dict[target]: target_source_dict[target].append(source) else: target_source_dict[target] = [source] region_nodes = sorted(region_nodes, key=lambda x: target_source_dict[x][0]) pos_regions = dict(zip(region_nodes, pos_regions)) # layout target genes in concentric circle around regions pos_genes = {} additional_genes_to_position = [] for gene in gene_nodes: # get regions targetting this gene and their position regions = target_source_dict[gene] if all([r in regions_targetting_TFs for r in regions]): additional_genes_to_position.append(gene) continue pos_regions_gene = np.array( [pos_regions[r] for r in regions if r not in regions_targetting_TFs]) if len(regions) > 1: # get the positions which are furthest apart and "draw" a line through them pairwise_distances = _pairwise_distance(pos_regions_gene) furthest_points = np.unravel_index( pairwise_distances.argmax(), pairwise_distances.shape) m, b = _line_two_points( pos_regions_gene[furthest_points[0]], pos_regions_gene[furthest_points[1]], return_func=False) # draw a perpendicular line through the first line and the mean position p_mean = pos_regions_gene.mean(0) m, b = _line_slope_point(-1/m, p_mean, return_func=False) # get the point which is a distance dist_genes away from point p_mean p_new = [p_mean[0] - dist_genes * np.sqrt(1 / (1 + m**2)), p_mean[1] - m * dist_genes * np.sqrt(1 / (1 + m**2))] # check if point is within the circle, otherwise take the other point (radius of the circle is 1) if p_new[0]**2 + p_new[1]**2 < 1: p_new = [p_mean[0] + dist_genes * np.sqrt(1 / (1 + m**2)), p_mean[1] + m * dist_genes * np.sqrt(1 / (1 + m**2))] else: # draw line trough origin and pos of region m, b = _line_two_points(pos_regions_gene[0], [ 0, 0], return_func=False) # get the point which is a distance dist_genes away from point pos_regions_gene p_new = [pos_regions_gene[0][0] - dist_genes * np.sqrt(1 / (1 + m**2)), pos_regions_gene[0][1] - m * dist_genes * np.sqrt(1 / (1 + m**2))] # check if point is within the circle, otherwise take the other point (radius of the circle is 1) if p_new[0]**2 + p_new[1]**2 < 1: p_new = [pos_regions_gene[0][0] + dist_genes * np.sqrt(1 / (1 + m**2)), pos_regions_gene[0][1] + m * dist_genes * np.sqrt(1 / (1 + m**2))] pos_genes[gene] = np.array(p_new) pos_TF = {} for TF in TF_nodes: # get regions targetted by this TF and their position if TF in source_target_dict.keys(): regions = source_target_dict[TF] else: regions = [] if all([r in regions_targetting_TFs for r in regions]): additional_genes_to_position.append(TF) continue pos_regions_TF = np.array( [pos_regions[r] for r in regions if r not in regions_targetting_TFs]) if len(regions) > 1: # get the positions which are furthest apart and "draw" a line through them pairwise_distances = _pairwise_distance(pos_regions_TF) furthest_points = np.unravel_index( pairwise_distances.argmax(), pairwise_distances.shape) m, b = _line_two_points( pos_regions_TF[furthest_points[0]], pos_regions_TF[furthest_points[1]], return_func=False) # draw a perpendicular line through the first line and the mean position p_mean = pos_regions_TF.mean(0) m, b = _line_slope_point(-1/m, p_mean, return_func=False) # get the point which is a distance dist_genes away from point p_mean p_new = [p_mean[0] - dist_TF * np.sqrt(1 / (1 + m**2)), p_mean[1] - m * dist_TF * np.sqrt(1 / (1 + m**2))] # check if point is within the circle, otherwise take the other point (radius of the circle is 1) if p_new[0]**2 + p_new[1]**2 > 1: p_new = [p_mean[0] + dist_TF * np.sqrt(1 / (1 + m**2)), p_mean[1] + m * dist_TF * np.sqrt(1 / (1 + m**2))] else: # draw line trough origin and pos of region m, b = _line_two_points( pos_regions_TF[0], [0, 0], return_func=False) # get the point which is a distance dist_genes away from point pos_regions_gene p_new = [pos_regions_TF[0][0] - dist_TF * np.sqrt(1 / (1 + m**2)), pos_regions_TF[0][1] - m * dist_TF * np.sqrt(1 / (1 + m**2))] # check if point is within the circle, otherwise take the other point (radius of the circle is 1) if p_new[0]**2 + p_new[1]**2 < 1: p_new = [pos_regions_TF[0][0] + dist_TF * np.sqrt(1 / (1 + m**2)), pos_regions_TF[0][1] + m * dist_TF * np.sqrt(1 / (1 + m**2))] pos_TF[TF] = np.array(p_new) # layout TF nodes within circle #G_TF = G.subgraph(nodes = [*TF_nodes, *regions_targetting_TFs, *additional_genes_to_position]) #additional_genes_to_position_init = {gene: [random.uniform(0, 1), random.uniform(0, 1)] for gene in additional_genes_to_position} #pos_TF = nx.spring_layout(G_TF, scale = 0.7, pos = {**pos_TF, **additional_genes_to_position_init}) G_add = G.subgraph(nodes=additional_genes_to_position) pos_add = nx.spring_layout(G_add, scale=0.1) G_regions_TF = G.subgraph(nodes=[*regions_targetting_TFs, *TF_nodes]) pos_regions_TF = nx.spring_layout(G_regions_TF, scale=1) pos_regions_TF = {k: pos_regions_TF[k] for k in pos_regions_TF.keys( ) if k in regions_targetting_TFs} return {**pos_TF, **pos_regions, **pos_genes, **pos_add, **pos_regions_TF}
[docs]def export_to_cytoscape(G, pos, out_file: str, pos_scaling_factor:int = 200, size_scaling_factor:int=1): """ A function to export to cytoscape Parameters --------- G: Graph A networkx graph. Pos: coordinates of graph nodes generated by running create_nx_graph. out_file: str Path to wich to save the export. pos_scaling_factor: int, optional Factor by which to scale the graph node coordinates. size_scaling_factor: int, optional Factor by which tos cale the graph node sizes. """ cy = nx.cytoscape_data(G) for n in cy["elements"]["nodes"]: for k, v in n.items(): v["label"] = v.pop("value") for n, p in zip(cy["elements"]["nodes"], pos.values()): if not np.isnan(p[0]) and not np.isnan(p[1]): n["position"] = {"x": int(p[0] * pos_scaling_factor), "y": int(p[1] * pos_scaling_factor)} else: n["position"] = {"x": 0, "y": 0} for n in cy["elements"]["nodes"]: n['data']['font_size'] = int(n['data']['font_size']) n['data']['size'] = n['data']['size']*size_scaling_factor n['data']['shape'] = n['data']['shape'].capitalize() json_string = json.dumps(cy, indent = 2) with open(out_file, 'w') as outfile: outfile.write(json_string)