Source code for scenicplus.eregulon_enrichment

"""Score eRegulon target genes and regions in cells using AUC algorithm.

"""

from pycisTopic.diff_features import *
from pycisTopic.signature_enrichment import *
from pyscenic.binarization import binarize

from .scenicplus_class import SCENICPLUS


[docs]def get_eRegulons_as_signatures(scplus_obj: SCENICPLUS, eRegulon_metadata_key: str = 'eRegulon_metadata', key_added: str = 'eRegulon_signatures'): """ Format eRegulons for scoring Parameters ---------- scplus_obj: `class::SCENICPLUS` A SCENICPLUS object with eRegulons metadata computed. eRegulon_metadata_key: str, optional Key where the eRegulon metadata is stored (in `scplus_obj.uns`) key_added: str, optional Key where formated signatures will be stored (in `scplus_obj.uns`) """ region_signatures = {x: list(set(scplus_obj.uns[eRegulon_metadata_key][scplus_obj.uns[eRegulon_metadata_key].Region_signature_name == x]['Region'])) for x in list( set(scplus_obj.uns[eRegulon_metadata_key].Region_signature_name))} gene_signatures = {x: list(set(scplus_obj.uns[eRegulon_metadata_key][scplus_obj.uns[eRegulon_metadata_key].Gene_signature_name == x]['Gene'])) for x in list( set(scplus_obj.uns[eRegulon_metadata_key].Gene_signature_name))} if not key_added in scplus_obj.uns.keys(): scplus_obj.uns[key_added] = {} scplus_obj.uns[key_added]['Gene_based'] = gene_signatures scplus_obj.uns[key_added]['Region_based'] = region_signatures
[docs]def make_rankings(scplus_obj: SCENICPLUS, target: str = 'region', seed: int = 123): """ A function to generate rankings per cell based on the imputed accessibility scores per region or the gene expression per cell. Parameters --------- scplus_obj: :class:`SCENICPLUS` A :class:`SCENICPLUS` object with motif enrichment results from pycistarget (`scplus_obj.menr`). target: str, optional Whether rankings should be done based on gene expression or region accessibilty. Default: 'region' seed: int, optional Random seed to ensure reproducibility of the rankings when there are ties Return ------ CistopicImputedFeatures A :class:`CistopicImputedFeatures` containing with ranking values rather than scores. """ # Initialize random number generator, for handling ties rng = np.random.default_rng(seed=seed) # Function to make rankings per array def rank_scores_and_assign_random_ranking_in_range_for_ties( scores_with_ties_for_motif_or_track_numpy: np.ndarray ) -> np.ndarray: # # Create random permutation so tied scores will have a different ranking each time. random_permutations_to_break_ties_numpy = rng.permutation( scores_with_ties_for_motif_or_track_numpy.shape[0] ) ranking_with_broken_ties_for_motif_or_track_numpy = random_permutations_to_break_ties_numpy[ (-scores_with_ties_for_motif_or_track_numpy)[ random_permutations_to_break_ties_numpy].argsort() ].argsort().astype(imputed_acc_obj_ranking_db_dtype) return ranking_with_broken_ties_for_motif_or_track_numpy # Create zeroed imputed object rankings database. if target == 'region': imputed_acc_ranking = CistopicImputedFeatures( np.zeros((len(scplus_obj.region_names), len(scplus_obj.cell_names)), dtype=np.int32), scplus_obj.region_names, scplus_obj.cell_names, 'Ranking') if target == 'gene': imputed_acc_ranking = CistopicImputedFeatures( np.zeros((len(scplus_obj.gene_names), len(scplus_obj.cell_names)), dtype=np.int32), scplus_obj.gene_names, scplus_obj.cell_names, 'Ranking') # Get dtype of the scores imputed_acc_obj_ranking_db_dtype = 'uint32' # Convert to csc if target == 'region': if sparse.issparse(scplus_obj.X_ACC): mtx = scplus_obj.X_ACC.tocsc() else: mtx = scplus_obj.X_ACC elif target == 'gene': if sparse.issparse(scplus_obj.X_EXP): mtx = scplus_obj.X_EXP.T.tocsc() else: mtx = scplus_obj.X_EXP.T # Rank all scores per motif/track and assign a random ranking in range for regions/genes with the same score. for col_idx in range(len(imputed_acc_ranking.cell_names)): imputed_acc_ranking.mtx[:, col_idx] = rank_scores_and_assign_random_ranking_in_range_for_ties( mtx[:, col_idx].toarray().flatten() if sparse.issparse( mtx) else mtx[:, col_idx].flatten() ) return imputed_acc_ranking
[docs]def score_eRegulons(scplus_obj: SCENICPLUS, ranking: CistopicImputedFeatures, inplace: bool = True, eRegulon_signatures_key: str = 'eRegulon_signatures', key_added: str = 'eRegulon_AUC', enrichment_type: str = 'region', auc_threshold: float = 0.05, normalize: bool = False, n_cpu: int = 1): """ Score eRegulons using AUCell Parameters ---------- scplus_obj: `class::SCENICPLUS` A SCENICPLUS object with formatted eRegulons. ranking: `class::CistopicImputedFeatures` A CistopicImputedFeatures object containing rankings, generated using the function make_rankings. inplace: bool, optional If set to True store result in scplus_obj, otherwise it is returned. eRegulon_signatures_key: str, optional Key where formated signatures are stored (in `scplus_obj.uns`) key_added: str, optional Key where formated AUC values will be stored (in `scplus_obj.uns`) enrichment_type: str, optional Whether region or gene signatures are being used auc_threshold: float The fraction of the ranked genome to take into account for the calculation of the Area Under the recovery Curve. Default: 0.05 normalize: bool Normalize the AUC values to a maximum of 1.0 per regulon. Default: False n_cpu: int The number of cores to use. Default: 1 """ if not key_added in scplus_obj.uns.keys(): scplus_obj.uns[key_added] = {} if enrichment_type == 'region': key = 'Region_based' if enrichment_type == 'gene': key = 'Gene_based' if inplace: scplus_obj.uns[key_added][key] = signature_enrichment(ranking, scplus_obj.uns[eRegulon_signatures_key][key], enrichment_type='gene', auc_threshold=auc_threshold, normalize=normalize, n_cpu=n_cpu) else: return signature_enrichment(ranking, scplus_obj.uns[eRegulon_signatures_key][key], enrichment_type='gene', auc_threshold=auc_threshold, normalize=normalize, n_cpu=n_cpu)
[docs]def binarize_AUC(scplus_obj: SCENICPLUS, auc_key: Optional[str] = 'eRegulon_AUC', out_key: Optional[str] = 'eRegulon_AUC_thresholds', signature_keys: Optional[List[str]] = ['Gene_based', 'Region_based'], n_cpu: Optional[int] = 1): """ Binarize eRegulons using AUCell Parameters ---------- scplus_obj: `class::SCENICPLUS` A SCENICPLUS object with eRegulons AUC. auc_key: str, optional Key where the AUC values are stored out_key: str, optional Key where the AUCell thresholds will be stored (in `scplus_obj.uns`) signature_keys: List, optional Keys to extract AUC values from. Default: ['Gene_based', 'Region_based'] n_cpu: int The number of cores to use. Default: 1 """ if not out_key in scplus_obj.uns.keys(): scplus_obj.uns[out_key] = {} for signature in signature_keys: auc_mtx = scplus_obj.uns[auc_key][signature] _, auc_thresholds = binarize(auc_mtx, num_workers=n_cpu) scplus_obj.uns[out_key][signature] = auc_thresholds