Source code for stlearn.image_preprocessing.feature_extractor
from typing import Literal
import numpy as np
from anndata import AnnData
from PIL import Image
from sklearn.decomposition import PCA
from tqdm import tqdm
from .model_zoo import Model
_CNN_BASE = Literal["resnet50", "vgg16", "inception_v3", "xception"]
[docs]
def extract_feature(
adata: AnnData,
cnn_base: _CNN_BASE = "resnet50",
n_components: int = 50,
seeds: int = 1,
verbose: bool = False,
copy: bool = False,
) -> AnnData | None:
"""\
Extract latent morphological features from H&E images using pre-trained
convolutional neural network base
Parameters
----------
adata:
Annotated data matrix.
cnn_base:
Established convolutional neural network bases
choose one from ['resnet50', 'vgg16', 'inception_v3', 'xception']
n_components:
Number of principal components to compute for latent morphological features
seeds:
Fix random state
verbose:
Verbose output
copy:
Return a copy instead of writing to adata.
Returns
-------
Depending on `copy`, returns or updates `adata` with the following fields.
**X_morphology** : `adata.obsm` field
Dimension reduced latent morphological features.
Raises
------
ValueError
If any image fails to process or if tile_path column is missing.
"""
adata = adata.copy() if copy else adata
if "tile_path" not in adata.obs:
raise ValueError("Please run the function stlearn.pp.tiling")
model = Model(cnn_base)
# Pre-allocate feature matrix, spot names and arrays to avoid overhead
tile_paths = adata.obs["tile_path"].values
n_spots = len(tile_paths)
if n_spots == 0:
raise ValueError("No tile paths found in adata.obs['tile_path']")
first_features = _read_and_predict(tile_paths[0], model, verbose=verbose)
n_features = len(first_features)
# Setup feature matrix
feature_matrix = np.empty((n_spots, n_features), dtype=np.float32)
feature_matrix[0] = first_features
with tqdm(
total=n_spots,
desc="Extract feature",
bar_format="{l_bar}{bar} [ time left: {remaining} ]",
initial=1, # We already processed the first image
) as pbar:
for i in range(1, n_spots):
features = _read_and_predict(tile_paths[i], model, verbose=verbose)
feature_matrix[i] = features
if i % 100 == 0:
pbar.update(100)
adata.obsm["X_tile_feature"] = feature_matrix
pca = PCA(n_components=n_components, random_state=seeds)
pca.fit(feature_matrix)
adata.obsm["X_morphology"] = pca.transform(feature_matrix)
print("The morphology feature is added to adata.obsm['X_morphology']!")
return adata if copy else None
def _read_and_predict(path, model, verbose=False):
try:
with Image.open(path) as img:
tile = np.asarray(img, dtype=np.float32)
if verbose:
print(f"Loaded image: {path}")
tile = tile[np.newaxis, ...]
return model.predict(tile).ravel()
except Exception as e:
raise ValueError(f"Failed to process image: {path}. Error: {str(e)}")