from typing import Optional, Union
from anndata import AnnData
from matplotlib import pyplot as plt
from pathlib import Path
import os
import sys
import numpy as np
[docs]def parsing(
adata: AnnData,
coordinates_file: Union[Path, str],
copy: bool = True,
) -> Optional[AnnData]:
"""\
Parsing the old spaital transcriptomics data
Parameters
----------
adata
Annotated data matrix.
coordinates_file
Coordinate file generated by st_spot_detector.
copy
Return a copy instead of writing to adata.
Returns
-------
Depending on `copy`, returns or updates `adata` with the following fields.
**imagecol** and **imagerow** : `adata.obs` field
Spatial information of the tissue image.
"""
# Get a map of the new coordinates
new_coordinates = dict()
with open(coordinates_file, "r") as filehandler:
for line in filehandler.readlines():
tokens = line.split()
assert len(tokens) >= 6 or len(tokens) == 4
if tokens[0] != "x":
old_x = int(tokens[0])
old_y = int(tokens[1])
new_x = round(float(tokens[2]), 2)
new_y = round(float(tokens[3]), 2)
if len(tokens) >= 6:
pixel_x = float(tokens[4])
pixel_y = float(tokens[5])
new_coordinates[(old_x, old_y)] = (pixel_x, pixel_y)
else:
raise ValueError(
"Error, output format is pixel coordinates but\n "
"the coordinates file only contains 4 columns\n"
)
counts_table = adata.to_df()
new_index_values = list()
imgcol = []
imgrow = []
for index in counts_table.index:
tokens = index.split("x")
x = int(tokens[0])
y = int(tokens[1])
try:
new_x, new_y = new_coordinates[(x, y)]
imgcol.append(new_x)
imgrow.append(new_y)
new_index_values.append("{0}x{1}".format(new_x, new_y))
except KeyError:
counts_table.drop(index, inplace=True)
# Assign the new indexes
# counts_table.index = new_index_values
# Remove genes that have now a total count of zero
counts_table = counts_table.transpose()[counts_table.sum(axis=0) > 0].transpose()
adata = AnnData(counts_table)
adata.obs["imagecol"] = imgcol
adata.obs["imagerow"] = imgrow
adata.obsm["spatial"] = np.c_[[imgcol, imgrow]].reshape(-1, 2)
return adata if copy else None