Source code for stlearn.spatial.clustering.localization

import numpy as np
import pandas as pd
from anndata import AnnData
from natsort import natsorted
from sklearn.cluster import DBSCAN



[docs]
def localization(
    adata: AnnData,
    use_label: str = "leiden",
    eps: float = 20.0,
    min_samples: int = 1,
    copy: bool = False,
) -> AnnData | None:
    """\
    Perform local cluster by using DBSCAN.

    Parameters
    ----------
    adata: AnnData
        Annotated data matrix.
    use_label: str, default = "leiden"
        Use label result of cluster method.
    eps: float, default 20.0
        The maximum distance between two samples for one to be considered as
        in the neighborhood of the other. This is not a maximum bound on the
        distances of points within a cluster. This is the most important DBSCAN
        parameter to choose appropriately for your data set and distance function.
    min_samples: int, default = 1
        The number of samples (or total weight) in a neighborhood for a point to be
        considered as a core point. This includes the point itself. Passed into DBSCAN's
        min_samples parameter.
    copy: bool, default = False
        Return a copy instead of writing to adata.
    Returns
    -------
    Anndata
    """

    adata = adata.copy() if copy else adata

    if "sub_cluster_labels" in adata.obs.columns:
        adata.obs = adata.obs.drop("sub_cluster_labels", axis=1)

    pd.set_option("mode.chained_assignment", None)
    subclusters_list = []
    for i in adata.obs[use_label].unique():
        tmp = adata.obs[adata.obs[use_label] == i]

        clustering = DBSCAN(eps=eps, min_samples=min_samples, algorithm="kd_tree").fit(
            tmp[["imagerow", "imagecol"]]
        )

        labels = clustering.labels_

        sublabels = []
        for label in labels:
            sublabels.append(str(i) + "_" + str(label))
        tmp["sub_labels"] = sublabels
        subclusters_list.append(tmp["sub_labels"])

    subclusters = pd.concat(subclusters_list)
    pd.reset_option("mode.chained_assignment")

    adata.obs = adata.obs.merge(
        pd.DataFrame({"sub_cluster_labels": subclusters}),
        left_index=True,
        right_index=True,
    )

    # Convert to numeric
    converted = dict(enumerate(adata.obs["sub_cluster_labels"].unique()))
    inv_map = {v: str(k) for k, v in converted.items()}
    adata.obs["sub_cluster_labels"] = adata.obs["sub_cluster_labels"].replace(inv_map)

    adata.obs["sub_cluster_labels"] = pd.Categorical(
        values=np.array(adata.obs["sub_cluster_labels"]).astype("U"),
        categories=natsorted(
            np.unique(np.array(adata.obs["sub_cluster_labels"])).astype("U")
        ),
    )

    labels_cat = list(map(int, adata.obs[use_label].cat.categories))
    cat_ind = {labels_cat[i]: i for i in range(len(labels_cat))}
    adata.uns[use_label + "_index_dict"] = cat_ind

    return adata if copy else None