Source code for stlearn.tl.cci.analysis

"""Wrapper function for performing CCI analysis, varrying the analysis based on
the inputted data / state of the anndata object.
"""

import os

import numba
import numpy as np
import numpy.typing as npt
import pandas as pd
from anndata import AnnData
from statsmodels.stats.multitest import multipletests
from tqdm import tqdm

from .base import calc_distance, calc_neighbours, get_lrs_scores
from .go import run_go
from .het import (
    count,
    get_interaction_matrix,
    get_interaction_pvals,
    get_neighbourhoods,
    grid_parallel,
)
from .het_helpers import get_data_for_counting
from .permutation import perform_spot_testing


# Functions related to Ligand-Receptor interactions

[docs]
def load_lrs(
    names: str | list | None = None, species: str = "human"
) -> npt.NDArray[np.str_]:
    """Loads inputted LR database, & concatenates into consistent database set of
    pairs without duplicates. If None loads 'connectomeDB2020_lit'.

    Parameters
    ----------
    names: list
        Databases to load, options: 'connectomeDB2020_lit' (literature verified),
        'connectomeDB2020_put' (putative). If more than one specified, loads all &
        removes duplicates.
    species: str
        Format of the LR genes, either 'human' or 'mouse'.
    Returns
    -------
    lrs: np.ndarray
       lr pairs from the database in format ['L1_R1', 'LN_RN']
    """
    if names is None:
        names = ["connectomeDB2020_lit"]
    if isinstance(names, str):
        names = [names]

    path = os.path.dirname(os.path.realpath(__file__))
    dbs = [pd.read_csv(f"{path}/databases/{name}.txt", sep="\t") for name in names]
    lrs_full = []
    for db in dbs:
        lrs = [f"{db.values[i, 0]}_{db.values[i, 1]}" for i in range(db.shape[0])]
        lrs_full.extend(lrs)
    lrs_full_arr = np.unique(np.array(lrs_full))
    # If dealing with mouse, need to reformat #
    if species == "mouse":
        genes1 = [lr_.split("_")[0] for lr_ in lrs_full]
        genes2 = [lr_.split("_")[1] for lr_ in lrs_full]
        lrs_full_arr = np.array(
            [
                genes1[i][0]
                + genes1[i][1:].lower()
                + "_"
                + genes2[i][0]
                + genes2[i][1:].lower()
                for i in range(len(lrs_full))
            ],
        )

    return lrs_full_arr




[docs]
def grid(
    adata: AnnData,
    n_row: int = 10,
    n_col: int = 10,
    use_label: str | None = None,
    n_cpus: int | None = None,
    verbose: bool = True,
) -> AnnData:
    """Creates a new anndata representing a gridded version of the data; can be
        used upstream of CCI pipeline. NOTE: intended use is for single cell
        spatial data, not Visium or other lower resolution tech.

    Parameters
    ----------
    adata: AnnData
        The data object.
    n_row: int
        The number of rows in the grid.
    n_col: int
        The number of columns in the grid.
    use_label: str
        The cell type labels in adata.obs to join together & save as deconvolution data.
    n_cpus: int
        Number of threads to use or if None use os.cpu_count()
    Returns
    -------
    grid_data: AnnData
        Equivalent expression data to adata, except values have been summed by
        cells that fall within defined bins.
    """
    if verbose:
        print("Gridding...")

    # Setting threads for paralellisation #
    if n_cpus is not None:
        numba.set_num_threads(n_cpus)
    else:
        numba.set_num_threads(os.cpu_count())

    # Retrieving the coordinates of each grid #
    n_squares = n_row * n_col
    cell_bcs = adata.obs_names.values.astype(str)
    xs, ys = (
        adata.obs["imagecol"].values.astype(int),
        adata.obs["imagerow"].values.astype(int),
    )

    grid_counts, xedges, yedges = np.histogram2d(xs, ys, bins=[n_col, n_row])
    grid_counts, xedges, yedges = (
        grid_counts.astype(int),
        xedges.astype(float),
        yedges.astype(float),
    )

    grid_expr = np.zeros((n_squares, adata.shape[1]))
    grid_coords = np.zeros((n_squares, 2))
    grid_cell_counts = np.zeros(n_squares, dtype=np.int64)
    # If use_label is specified, then it will generate deconvolution information
    cell_labels, cell_set, cell_info = None, None, None
    if use_label is not None:
        cell_labels = adata.obs[use_label].values.astype(str)
        cell_set = np.unique(cell_labels).astype(str)
        cell_info = np.zeros((n_squares, len(cell_set)), dtype=np.float64)

    # Performing grid operation in parallel #
    grid_parallel(
        grid_coords,
        xedges,
        yedges,
        n_row,
        n_col,
        xs,
        ys,
        cell_bcs,
        grid_cell_counts,
        grid_expr,
        adata.X,
        use_label is not None,
        cell_labels,
        cell_info,
        cell_set,
    )

    # Creating gridded anndata #
    grid_expr = pd.DataFrame(
        grid_expr,
        index=[f"grid_{i}" for i in range(n_squares)],
        columns=adata.var_names.values.astype(str),
    )
    grid_data = AnnData(grid_expr)
    grid_data.obs["imagecol"] = grid_coords[:, 0]
    grid_data.obs["imagerow"] = grid_coords[:, 1]
    grid_data.obs["n_cells"] = grid_cell_counts
    grid_data.obsm["spatial"] = grid_coords
    grid_data.uns["spatial"] = adata.uns["spatial"]

    if use_label is not None and cell_info is not None and cell_set is not None:
        grid_data.uns[use_label] = pd.DataFrame(
            cell_info,
            index=grid_data.obs_names.values.astype(str),
            columns=cell_set,
        )
        max_indices = np.apply_along_axis(np.argmax, 1, cell_info)
        grid_data.obs[use_label] = [cell_set[index] for index in max_indices]
        grid_data.obs[use_label] = grid_data.obs[use_label].astype("category")
        grid_data.obs[use_label] = grid_data.obs[use_label].cat.set_categories(
            adata.obs[use_label].cat.categories,
        )
        if f"{use_label}_colors" in adata.uns:
            grid_data.uns[f"{use_label}_colors"] = adata.uns[f"{use_label}_colors"]

    # Subsetting to only gridded spots that contain cells #
    grid_data = grid_data[grid_data.obs["n_cells"] > 0, :].copy()
    if use_label is not None:
        grid_data.uns[use_label] = grid_data.uns[use_label].loc[grid_data.obs_names, :]

    grid_data.uns["grid_counts"] = grid_counts
    grid_data.uns["grid_xedges"] = xedges
    grid_data.uns["grid_yedges"] = yedges

    return grid_data




[docs]
def run(
    adata: AnnData,
    lrs: npt.NDArray[np.str_],
    min_spots: int = 10,
    distance: float | None = None,
    n_pairs: int = 1000,
    n_cpus: int | None = None,
    use_label: str | None = None,
    adj_method: str = "fdr_bh",
    pval_adj_cutoff: float = 0.05,
    min_expr: float = 0.0,
    save_bg: bool = False,
    neg_binom: bool = False,
    random_state: int = 0,
    verbose: bool = True,
) -> None:
    """Performs stLearn LR analysis.

    Parameters
    -----------
    adata: AnnData
        The data object.
    lrs: np.ndarray
        The LR pairs to score/test for enrichment (in format 'L1_R1').
    min_spots: int
        Minimum number of spots with an LR score for an LR to be considered for
        further testing.
    distance: float
        Distance to determine the neighbours (default [None] is immediately
        adjacent neighbours if using Visium), distance=0 means within spot
        (only for non-single-cell spatial data).
    n_pairs: int
        Number of random pairs of genes to generate when creating the background
        distribution per LR pair; higher than more accurate p-value estimation.
    n_cpus: int
        Number of threads to use or if None use os.cpu_count()
    use_label: str
        The cell type deconvolution results to use in counting stored in
        adata.uns; if not specified only considered LR expression without cell
        heterogeneity.
    adj_method: str
        Parsed to statsmodels.stats.multitest.multipletests for multiple
        hypothesis testing correction; see there for other options.
    pval_adj_cutoff: float
        P-value below which LR is considered significant in spot neighbourhood.
    min_expr: float
        Minimum gene expression of either L or R for spot to be considered to
        expression of either.
    save_bg: bool
        Whether to save the background per LR pair; for method development only.
        Not recommended since huge memory.
    neg_binom: bool
        Whether to fit a negative binomial distribution for all background
        scores generated across spots per LR after discretising the random
        scores. Can be extremely slow.
    random_state:
        A random seed for reproducibility.
    verbose: bool
        Whether print dialogue to user during run-time.
    Returns
    --------
    None
    Relevant information stored:
        adata.uns['lr_summary']
            Summary of significant spots detected per LR,
            the LRs listed in the index is the same order of LRs in the columns of
            results stored in adata.obsm below. Hence, the order of this must be
            maintained.
        adata.obsm
            Additional keys are added; 'lr_scores', 'lr_sig_scores', 'p_vals',
            'p_adjs', '-log10(p_adjs)'. All are numpy matrices, with columns
            referring to the LRs listed in adata.uns['lr_summary']. 'lr_scores'
            is the raw scores, while 'lr_sig_scores' is the same except only for
            significant scores; non-significant scores are set to zero.
        adata.obsm['cci_het']
            Only if use_label specified; contains the counts of the cell types found
            per spot.
    """
    # Setting threads for parallelisation
    if n_cpus is not None:
        numba.set_num_threads(n_cpus)
    else:
        numba.set_num_threads(os.cpu_count())

    # Making sure none of the var_names contains '_' already, these will need
    # to be renamed.
    prob_genes = [gene for gene in adata.var_names if "_" in gene]
    if len(prob_genes) > 0:
        raise Exception(
            "Detected '_' within some gene names, which breaks "
            + "internal string handling for the lrs in format 'L_R'.\n"
            + "Recommend to rename adata.var_names or remove these "
            + f"genes from adata:\n {prob_genes}",
        )

    # Calculating neighbour & storing #
    distance = calc_distance(adata, distance)
    neighbours = calc_neighbours(adata, distance, verbose=verbose)
    adata.obsm["spot_neighbours"] = pd.DataFrame(
        [",".join(x.astype(str)) for x in neighbours],
        index=adata.obs_names,
        columns=["neighbour_indices"],
    )
    spot_neighs_df = adata.obsm["spot_neighbours"]
    spot_neigh_bcs = []
    for i in range(spot_neighs_df.shape[0]):
        neigh_indices = [
            int(index)
            for index in spot_neighs_df.values[i, 0].split(",")
            if index != ""
        ]
        neigh_bcs = [adata.obs_names[index] for index in neigh_indices]
        spot_neigh_bcs.append(",".join(neigh_bcs))
    spot_neigh_bcs_df = pd.DataFrame(
        spot_neigh_bcs,
        index=spot_neighs_df.index,
        columns=["neighbour_bcs"],
    )
    # Important to store barcodes in-case adata subsetted #
    adata.obsm["spot_neigh_bcs"] = spot_neigh_bcs_df

    if verbose:
        print(
            "Spot neighbour indices stored in adata.obsm['spot_neighbours'] "
            "& adata.obsm['spot_neigh_bcs'].",
        )

    # Conduct with cell heterogeneity info if label_transfer provided #
    cell_het = use_label is not None and use_label in adata.uns
    if cell_het:
        if verbose:
            print("Calculating cell heterogeneity...")

        # Calculating cell heterogeneity #
        count(adata, distance=distance, use_label=use_label)

    het_vals = (
        np.array([1] * len(adata))
        if use_label not in adata.obsm
        else adata.obsm[use_label]
    )

    """ 1. Filter any LRs without stored expression.
    """
    # Calculating the lr_scores across spots for the inputted lrs #
    lr_scores, new_lrs = get_lrs_scores(adata, lrs, neighbours, het_vals, min_expr)
    lr_bool = (lr_scores > 0).sum(axis=0) > min_spots
    new_lrs = new_lrs[lr_bool]
    lr_scores = lr_scores[:, lr_bool]
    if verbose:
        print("Altogether " + str(len(new_lrs)) + " valid L-R pairs")
    if len(new_lrs) == 0:
        print("Exiting due to lack of valid LR pairs.")
        return

    """ Permutation methods generating background per spot, & test lrs in spot.
    """
    perform_spot_testing(
        adata,
        lr_scores,
        new_lrs,
        n_pairs,
        neighbours,
        het_vals,
        min_expr,
        adj_method,
        pval_adj_cutoff,
        verbose,
        save_bg=save_bg,
        neg_binom=neg_binom,
        random_state=random_state,
    )




[docs]
def adj_pvals(
    adata,
    pval_adj_cutoff: float = 0.05,
    correct_axis: str = "spot",
    adj_method: str = "fdr_bh",
):
    """Performs p-value adjustment and determination of significant spots.
        Default settings of this function are already run in st.tl.cci.run.

    Parameters
    ----------
    adata: AnnData
        Must have st.tl.cci.run performed prior.
    pval_adj_cutoff: float
        Cutoff for spot to be significant based on adjusted p-value.
    correct_axis: str
        Either 'spot', 'LR', or None; former corrects for number of LRs tested
        in each spot, middle corrects for number of spots tested per LR, and
        latter performs no adjustment (uses p-value for significant testing)
    adj_method: str
        Any method supported by statsmodels.stats.multitest.multipletests;
        https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html
    Returns
    -------
    adata: AnnData
        Adjusts all of the LR results; warning, does not adjust
        celltype-celltype results from running st.tl.run_cci downstream.
    """
    if "lr_summary" not in adata.uns:
        raise Exception("Need to run st.tl.cci.run first.")

    lr_scores = adata.obsm["lr_scores"]
    sig_scores = lr_scores.copy()
    ps = adata.obsm["p_vals"]
    padjs = np.ones(ps.shape)
    if correct_axis == "spot":
        for spot_i in range(ps.shape[0]):
            lr_indices = np.where(lr_scores[spot_i, :] > 0)[0]
            if len(lr_indices) > 0:
                spot_ps = ps[spot_i, lr_indices]
                spot_padjs = multipletests(spot_ps, method=adj_method)[1]
                padjs[spot_i, lr_indices] = spot_padjs
                sig_scores[spot_i, lr_indices[spot_padjs >= pval_adj_cutoff]] = 0
    elif correct_axis == "LR":
        for lr_i in range(ps.shape[1]):
            spot_indices = np.where(lr_scores[:, lr_i] > 0)[0]
            if len(spot_indices) > 0:
                lr_ps = ps[spot_indices, lr_i]
                spot_padjs = multipletests(lr_ps, method=adj_method)[1]
                padjs[spot_indices, lr_i] = spot_padjs
                sig_scores[spot_indices[spot_padjs >= pval_adj_cutoff], lr_i] = 0
    elif correct_axis is None:
        padjs = ps.copy()
        sig_scores[padjs >= pval_adj_cutoff] = 0
    else:
        raise Exception(
            "Invalid correct_axis input, must be one of: 'LR', 'spot', or None",
        )

    # Counting spots significant per lr #
    lr_counts = (padjs < pval_adj_cutoff).sum(axis=0)
    lr_counts_pval = (ps < pval_adj_cutoff).sum(axis=0)

    # Re-ranking LRs based on these counts & updating LR ordering #
    adata.uns["lr_summary"].loc[:, "n_spots_sig"] = lr_counts
    adata.uns["lr_summary"].loc[:, "n_spots_sig_pval"] = lr_counts_pval
    new_order = np.argsort(-adata.uns["lr_summary"].loc[:, "n_spots_sig"].values)
    adata.uns["lr_summary"] = adata.uns["lr_summary"].iloc[new_order, :]
    print("Updated adata.uns[lr_summary]")
    scores_ordered = lr_scores[:, new_order]
    sig_scores_ordered = sig_scores[:, new_order]
    ps_ordered = ps[:, new_order]
    padjs_ordered = padjs[:, new_order]
    log10padjs = -np.log10(padjs_ordered)

    keys = ["lr_scores", "lr_sig_scores", "p_vals", "p_adjs", "-log10(p_adjs)"]
    values = [scores_ordered, sig_scores_ordered, ps_ordered, padjs_ordered, log10padjs]
    for i in range(len(keys)):
        adata.obsm[keys[i]] = values[i]
        print(f"Updated adata.obsm[{keys[i]}]")




[docs]
def run_lr_go(
    adata: AnnData,
    r_path: str,
    n_top: int = 100,
    bg_genes: np.ndarray | None = None,
    min_sig_spots: int = 1,
    species: str = "human",
    p_cutoff: float = 0.01,
    q_cutoff: float = 0.5,
    onts: str = "BP",
    verbose: bool = True,
):
    """Runs a basic GO analysis on the genes in the top ranked LR pairs.
        Only supported for human and mouse species.

    Parameters
    ----------
    adata: AnnData
        Must have had st.tl.cci_rank.run() called prior.
    r_path: str
        Path to R, must have clusterProfiler, org.Mm.eg.db, and org.Hs.eg.db
        installed.
    bg_genes: np.ndarray
        Genes to be used as the background. If None, defaults to all genes in
        lr database: 'connectomeDB2020_put'.
    n_top: int
        The top number of LR pairs to use.
    min_sig_spots: int
        Minimum no. of significant spots pairs must have to be considered.
    species: str
        Species to perform the GO testing for.
    p_cutoff: float
        P-value & P-adj cutoff below which results will be returned.
    q_cutoff: float
        Q-value cutoff below which results will be returned.
    onts: str
        As per clusterProfiler; One of "BP", "MF", and "CC" subontologies, or "ALL"
        for all three.
    Returns
    -------
    adata: AnnData
        Relevant information stored in adata.uns['lr_go']
    """
    # Making sure inputted correct species
    all_species = ["human", "mouse"]
    if species not in all_species:
        raise Exception(f"Got {species} for species, must be one of {all_species}")

    # Getting the genes from the top LR pairs
    if "lr_summary" not in adata.uns:
        raise Exception("Need to run st.tl.cci.run first.")
    lrs = adata.uns["lr_summary"].index.values.astype(str)
    n_sig = adata.uns["lr_summary"].loc[:, "n_spots_sig"].values.astype(int)
    top_lrs = lrs[n_sig > min_sig_spots][0:n_top]
    top_genes = np.unique([lr.split("_") for lr in top_lrs])

    # Determining the background genes if not inputted
    if bg_genes is None:
        all_lrs = load_lrs("connectomeDB2020_put")
        all_genes = [lr_.split("_") for lr_ in all_lrs]
        bg_genes = np.unique(all_genes)

    # Running the GO analysis
    go_results = run_go(
        top_genes,
        bg_genes,
        species,
        r_path,
        p_cutoff=p_cutoff,
        q_cutoff=q_cutoff,
        onts=onts,
    )
    adata.uns["lr_go"] = go_results
    if verbose:
        print("GO results saved to adata.uns['lr_go']")



# Functions for calling Celltype-Celltype interactions

[docs]
def run_cci(
    adata: AnnData,
    use_label: str,
    spot_mixtures: bool = False,
    min_spots: int = 3,
    sig_spots: bool = True,
    cell_prop_cutoff: float = 0.2,
    p_cutoff: float = 0.05,
    n_perms: int = 100,
    n_cpus: int | None = None,
    random_state: int = 0,
    verbose: bool = True,
):
    """Calls significant celltype-celltype interactions based on cell-type data
    randomisation.

    Parameters
    ----------
    adata: AnnData
        Must have had st.tl.cci_rank.run() called prior.
    use_label: str
        If !spot_mixtures, is a key in adata.obs, else key in adata.uns.
        Note if spot_mixtures specified, must have both the deconvolution data
        in adata.uns[use_label] and the dominant cell type per spot stored in
        adata.obs[use_label]. See tutorial for example.
    spot_mixtures: bool
        If true, indicates using deconvolution data, hence use_label
        refers to adata.uns.
    min_spots: int
        Specifies the minimum number of spots where LR score present to
        include in subsequent analysis.
    sig_spots: bool
        If true, only consider edges which include a signficant spot from
        calling st.tl.cci.run()
    cell_prop_cutoff: float
        Only relevant if spot_mixtures==True, indicates cutoff where cell type
        considered found in spot.
    p_cutoff: float
        Value at which p is considered significant.
    n_perms: int
        Number of randomisations of cell data to generate p-values.
        If set to 0, then performs no permutations, but still does perform
        raw counting of the cell type interactions with each LR hotspot. This
        can still be visualised downstream by setting paramters to plot
        significant interactions to false.
    n_cpus: int | None
        cpu resources to use.
    random_state:
        A random seed for reproducibility.
    verbose: bool
        True if print dialogue to user during run-time.
    Returns
    -------
    adata: AnnData
        Relevant information stored
            adata.uns['lr_summary']
                Additional columns; f"n_cci_sig_{use_label}",
                f"n-spot_cci_{use_label}", f"n-spot_cci_sig_{use_label}".
                Former is the no. of CCIs significant for the LR, middle is
                the no. of individual spot-spot interactions across all CCIs for
                LR, and latter is the no. of significant individual spot
                interactions.
            adata.uns
                Dataframes added:
                    f"lr_cci_raw_{use_label}"
                        The raw count of spot-spot interactions across all LR
                        pairs for each possible CCI.
                    f"lr_cci_raw_{use_label}"
                        The count of significant spot-spot interactions across
                        all LR pairs for each possible CCI.
                Dictionaries added:
                    f"per_lr_cci_pvals_{use_label}"
                        Each key refers to a LR, with the value being a dataframe
                        listing the p-values for each potential CCI.
                    f"per_lr_cci_raw_{use_label}"
                        Each key refers to a LR, with the value being a dataframe
                        listing the count of spot-spot interactions via the LR in
                        significant LR neighbourhoods stratified by each
                        celltype-celltype combination.
                    f"per_lr_cci_{use_label}"
                        The same as f"per_lr_cci_raw_{use_label}", except
                        subsetted to significant CCIs.
    """
    # Setting threads for paralellisation #
    if n_cpus is not None:
        numba.set_num_threads(n_cpus)
    else:
        numba.set_num_threads(os.cpu_count())

    ran_lr = "lr_summary" in adata.uns
    ran_sig = False if not ran_lr else "n_spots_sig" in adata.uns["lr_summary"].columns
    if not ran_lr or not ran_sig:
        raise Exception(
            "No LR results testing results found, please run st.tl.cci.run first",
        )

    # Ensuring compatibility with current way of adding label_transfer to object
    if use_label == "label_transfer" or use_label == "predictions":
        obs_key, uns_key = "predictions", "label_transfer"
    else:
        obs_key, uns_key = use_label, use_label

    # Getting the cell/tissue types that we are actually testing #
    if obs_key not in adata.obs:
        raise Exception(
            f"Missing {obs_key} from adata.obs, need this even if using mixture mode.",
        )
    tissue_types = adata.obs[obs_key].values.astype(str)
    all_set = np.unique(tissue_types)

    # Mixture mode
    mix_mode = spot_mixtures and uns_key in adata.uns
    if not mix_mode and spot_mixtures:
        print(
            f"Warning: specified spot_mixtures but no deconvolution data in "
            f"adata.uns['{uns_key}'].\nFalling back to discrete mode.",
        )
    if mix_mode:  # Checking the deconvolution results stored correctly.
        cols_present = np.all(
            [cell_type in adata.uns[uns_key] for cell_type in all_set],
        )
        rows_present = np.all(adata.uns[uns_key].index.values == adata.obs_names.values)
        msg = f"Cell type scores misformatted in adata.uns[{uns_key}]:\n"
        if not cols_present or not rows_present:
            if not cols_present:
                msg = (
                    msg + f"Cell types missing from adata.uns[{uns_key}] columns:\n"
                    f"{[cell for cell in all_set if cell not in adata.uns[uns_key]]}\n"
                )
            elif not rows_present:
                msg = msg + "Rows do not correspond to adata.obs_names.\n"
            raise Exception(msg)

        # Checking for case where have cell types that are never dominant
        # in a spot, so need to include these in all_set
        if len(all_set) < adata.uns[uns_key].shape[1]:
            all_set = adata.uns[uns_key].columns.values.astype(str)

    # Getting minimum necessary information for edge counting
    if verbose:
        print("Getting cached neighbourhood information...")
    # Getting the neighbourhoods #
    _, neighbourhood_indices = get_neighbourhoods(adata)

    if verbose:
        print("Getting information for CCI counting...")

    cell_data = get_data_for_counting(adata, use_label, mix_mode, all_set)

    lr_summary = adata.uns["lr_summary"]
    col_i = 1 if sig_spots else 0
    col = "lr_sig_scores" if sig_spots else "lr_scores"
    best_lr_indices = np.where(lr_summary.values[:, col_i] > min_spots)[0]
    best_lrs = lr_summary.index.values[best_lr_indices]
    lr_genes = np.unique([lr.split("_") for lr in best_lrs])
    if len(lr_genes) == 0:
        raise Exception(
            "No LR pairs returned with current filtering params; \n"
            "may need to adjust min_spots, sig_spots parameters, "
            "or re-run st.tl.cci.run with more relaxed parameters.",
        )
    lr_expr = adata[:, lr_genes].to_df()

    # Sig-CCIs across all LRs #
    all_matrix = np.zeros((len(all_set), len(all_set)), dtype=int)
    # CCIs across all LRs #
    raw_matrix = np.zeros((len(all_set), len(all_set)), dtype=int)
    per_lr_cci = {}  # Per LR significant CCI counts #
    per_lr_cci_pvals = {}  # Per LR CCI p-values #
    per_lr_cci_raw = {}  # Per LR raw CCI counts #
    lr_n_spot_cci = np.zeros(lr_summary.shape[0])
    lr_n_spot_cci_sig = np.zeros(lr_summary.shape[0])
    lr_n_cci_sig = np.zeros(lr_summary.shape[0])
    with tqdm(
        total=len(best_lrs),
        desc="Counting celltype-celltype interactions per LR and permuting "
        + f"{n_perms} times.",
        bar_format="{l_bar}{bar} [ time left: {remaining} ]",
        disable=not verbose,
    ) as pbar:
        for i, best_lr in enumerate(best_lrs):
            ligand, receptor = best_lr.split("_")

            L_bool = lr_expr.loc[:, ligand].values > 0
            R_bool = lr_expr.loc[:, receptor].values > 0
            lr_index = np.where(adata.uns["lr_summary"].index.values == best_lr)[0][0]
            sig_bool = adata.obsm[col][:, lr_index] > 0

            int_matrix = get_interaction_matrix(
                cell_data,
                neighbourhood_indices,
                all_set,
                sig_bool,
                L_bool,
                R_bool,
                cell_prop_cutoff,
            ).astype(int)

            if n_perms > 0:
                int_pvals = get_interaction_pvals(
                    int_matrix,
                    n_perms,
                    cell_data,
                    neighbourhood_indices,
                    all_set,
                    sig_bool,
                    L_bool,
                    R_bool,
                    cell_prop_cutoff,
                    random_state + i,
                )
            else:
                int_pvals = np.ones(int_matrix.shape)

            # Setting spot counts to 0 for non-significant ccis #
            sig_int_matrix = int_matrix.copy()
            sig_int_matrix[int_pvals > p_cutoff] = 0

            # Summarising n-spots sig/non-sig across ccis & total cci_rank for lr pair
            lr_n_spot_cci[best_lr_indices[i]] = int_matrix.sum()
            lr_n_spot_cci_sig[best_lr_indices[i]] = sig_int_matrix.sum()
            lr_n_cci_sig[best_lr_indices[i]] = (int_pvals < p_cutoff).sum()

            raw_matrix += int_matrix
            all_matrix += sig_int_matrix
            int_df = pd.DataFrame(int_matrix, index=all_set, columns=all_set)
            sig_int_df = pd.DataFrame(sig_int_matrix, index=all_set, columns=all_set)
            pval_df = pd.DataFrame(int_pvals, index=all_set, columns=all_set)
            per_lr_cci[best_lr] = sig_int_df
            per_lr_cci_pvals[best_lr] = pval_df
            per_lr_cci_raw[best_lr] = int_df

            pbar.update(1)

    # Saving results to anndata #
    adata.uns["lr_summary"][f"n_cci_sig_{use_label}"] = lr_n_cci_sig
    adata.uns["lr_summary"][f"n-spot_cci_{use_label}"] = lr_n_spot_cci
    adata.uns["lr_summary"][f"n-spot_cci_sig_{use_label}"] = lr_n_spot_cci_sig
    adata.uns[f"lr_cci_{use_label}"] = pd.DataFrame(
        all_matrix,
        index=all_set,
        columns=all_set,
    )
    adata.uns[f"lr_cci_raw_{use_label}"] = pd.DataFrame(
        raw_matrix,
        index=all_set,
        columns=all_set,
    )
    adata.uns[f"per_lr_cci_{use_label}"] = per_lr_cci
    adata.uns[f"per_lr_cci_pvals_{use_label}"] = per_lr_cci_pvals
    adata.uns[f"per_lr_cci_raw_{use_label}"] = per_lr_cci_raw
    if verbose:
        print(
            f"Significant counts of cci_rank interactions for all LR pairs in "
            f"{f'data.uns[lr_cci_{use_label}]'}",
        )
        print(
            f"Significant counts of cci_rank interactions for each LR pair "
            f"stored in dictionary {f'data.uns[per_lr_cci_{use_label}]'}",
        )