Source code for stlearn.adds.parsing

from os import PathLike

import numpy as np
from anndata import AnnData



[docs]
def parsing(
    adata: AnnData,
    coordinates_file: int | str | bytes | PathLike[str] | PathLike[bytes],
    copy: bool = True,
) -> AnnData | None:
    """\
    Parsing the old spaital transcriptomics data

    Parameters
    ----------
    adata
        Annotated data matrix.
    coordinates_file
        Coordinate file generated by st_spot_detector.
    copy
        Return a copy instead of writing to adata.
    Returns
    -------
    Depending on `copy`, returns or updates `adata` with the following fields.
    **imagecol** and **imagerow** : `adata.obs` field
        Spatial information of the tissue image.
    """

    # Get a map of the new coordinates
    new_coordinates = dict()
    with open(coordinates_file) as filehandler:
        for line in filehandler.readlines():
            tokens = line.split()
            assert len(tokens) >= 6 or len(tokens) == 4
            if tokens[0] != "x":
                old_x = int(tokens[0])
                old_y = int(tokens[1])
                new_x = round(float(tokens[2]), 2)
                new_y = round(float(tokens[3]), 2)
                if len(tokens) >= 6:
                    pixel_x = float(tokens[4])
                    pixel_y = float(tokens[5])
                    new_coordinates[(old_x, old_y)] = (pixel_x, pixel_y)
                else:
                    raise ValueError(
                        "Error, output format is pixel coordinates but\n "
                        "the coordinates file only contains 4 columns\n"
                    )

    adata = adata.copy() if copy else adata

    counts_table = adata.to_df()
    new_index_values = list()

    imgcol = []
    imgrow = []
    for index in counts_table.index:
        tokens = index.split("x")
        x = int(tokens[0])
        y = int(tokens[1])
        try:
            new_x, new_y = new_coordinates[(x, y)]
            imgcol.append(new_x)
            imgrow.append(new_y)

            new_index_values.append(f"{new_x}x{new_y}")
        except KeyError:
            counts_table.drop(index, inplace=True)

    # Assign the new indexes
    # counts_table.index = new_index_values

    # Remove genes that have now a total count of zero
    counts_table = counts_table.transpose()[counts_table.sum(axis=0) > 0].transpose()

    adata = AnnData(counts_table)

    adata.obs["imagecol"] = imgcol
    adata.obs["imagerow"] = imgrow
    adata.obsm["spatial"] = np.c_[[imgcol, imgrow]].reshape(-1, 2)

    return adata if copy else None