Source code for stlearn.image_preprocessing.feature_extractor

from .model_zoo import encode, Model
from typing import Optional, Union
from anndata import AnnData
import numpy as np
from .._compat import Literal
from PIL import Image
import pandas as pd
from pathlib import Path

# Test progress bar
from tqdm import tqdm

_CNN_BASE = Literal["resnet50", "vgg16", "inception_v3", "xception"]


[docs]def extract_feature( adata: AnnData, cnn_base: _CNN_BASE = "resnet50", n_components: int = 50, verbose: bool = False, copy: bool = False, seeds: int = 1, ) -> Optional[AnnData]: """\ Extract latent morphological features from H&E images using pre-trained convolutional neural network base Parameters ---------- adata Annotated data matrix. cnn_base Established convolutional neural network bases choose one from ['resnet50', 'vgg16', 'inception_v3', 'xception'] n_components Number of principal components to compute for latent morphological features verbose Verbose output copy Return a copy instead of writing to adata. seeds Fix random state Returns ------- Depending on `copy`, returns or updates `adata` with the following fields. **X_morphology** : `adata.obsm` field Dimension reduced latent morphological features. """ feature_dfs = [] model = Model(cnn_base) if "tile_path" not in adata.obs: raise ValueError("Please run the function stlearn.pp.tiling") with tqdm( total=len(adata), desc="Extract feature", bar_format="{l_bar}{bar} [ time left: {remaining} ]", ) as pbar: for spot, tile_path in adata.obs["tile_path"].items(): tile = Image.open(tile_path) tile = np.asarray(tile, dtype="int32") tile = tile.astype(np.float32) tile = np.stack([tile]) if verbose: print("extract feature for spot: {}".format(str(spot))) features = encode(tile, model) feature_dfs.append(pd.DataFrame(features, columns=[spot])) pbar.update(1) feature_df = pd.concat(feature_dfs, axis=1) adata.obsm["X_tile_feature"] = feature_df.transpose().to_numpy() from sklearn.decomposition import PCA pca = PCA(n_components=n_components, random_state=seeds) pca.fit(feature_df.transpose().to_numpy()) adata.obsm["X_morphology"] = pca.transform(feature_df.transpose().to_numpy()) print("The morphology feature is added to adata.obsm['X_morphology']!") return adata if copy else None