Source code for stlearn.adds.parsing

from os import PathLike

import numpy as np
from anndata import AnnData


[docs] def parsing( adata: AnnData, coordinates_file: int | str | bytes | PathLike[str] | PathLike[bytes], copy: bool = True, ) -> AnnData | None: """\ Parsing the old spaital transcriptomics data Parameters ---------- adata Annotated data matrix. coordinates_file Coordinate file generated by st_spot_detector. copy Return a copy instead of writing to adata. Returns ------- Depending on `copy`, returns or updates `adata` with the following fields. **imagecol** and **imagerow** : `adata.obs` field Spatial information of the tissue image. """ # Get a map of the new coordinates new_coordinates = dict() with open(coordinates_file) as filehandler: for line in filehandler.readlines(): tokens = line.split() assert len(tokens) >= 6 or len(tokens) == 4 if tokens[0] != "x": old_x = int(tokens[0]) old_y = int(tokens[1]) new_x = round(float(tokens[2]), 2) new_y = round(float(tokens[3]), 2) if len(tokens) >= 6: pixel_x = float(tokens[4]) pixel_y = float(tokens[5]) new_coordinates[(old_x, old_y)] = (pixel_x, pixel_y) else: raise ValueError( "Error, output format is pixel coordinates but\n " "the coordinates file only contains 4 columns\n" ) adata = adata.copy() if copy else adata counts_table = adata.to_df() new_index_values = list() imgcol = [] imgrow = [] for index in counts_table.index: tokens = index.split("x") x = int(tokens[0]) y = int(tokens[1]) try: new_x, new_y = new_coordinates[(x, y)] imgcol.append(new_x) imgrow.append(new_y) new_index_values.append(f"{new_x}x{new_y}") except KeyError: counts_table.drop(index, inplace=True) # Assign the new indexes # counts_table.index = new_index_values # Remove genes that have now a total count of zero counts_table = counts_table.transpose()[counts_table.sum(axis=0) > 0].transpose() adata = AnnData(counts_table) adata.obs["imagecol"] = imgcol adata.obs["imagerow"] = imgrow adata.obsm["spatial"] = np.c_[[imgcol, imgrow]].reshape(-1, 2) return adata if copy else None