Skip to content

Commit

Permalink
Merge pull request #2 from mvinyard/test-release
Browse files Browse the repository at this point in the history
Test release `v0.0.1rc0`
  • Loading branch information
mvinyard committed Aug 3, 2023
2 parents 6da4c65 + 7e890b2 commit 05af47d
Show file tree
Hide file tree
Showing 5 changed files with 769 additions and 32 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# 🔎 AnnDataQuery

Fetch and format data matrices from AnnData.
[![PyPI pyversions](https://img.shields.io/pypi/pyversions/adata_query.svg)](https://pypi.python.org/pypi/adata_query/)
[![PyPI version](https://badge.fury.io/py/adata_query.svg)](https://badge.fury.io/py/adata_query)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

Fetch data matrices from AnnData and format as `np.ndarray` or `torch.Tensor`, on any device.

Example: [notebook](https://colab.research.google.com/github/mvinyard/AnnDataQuery/blob/test-release/notebooks/anndata_query_tutorial.ipynb)

For more information, see: [documentation](https://michael-vinyard.gitbook.io/anndataquery/)
4 changes: 3 additions & 1 deletion adata_query/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# __init__.py

__version__ = "v0.0.1"

from . import _utils
from . import _core

from ._core import format_data, fetch
from ._core import format_data, fetch, locate
90 changes: 63 additions & 27 deletions adata_query/_core/_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import autodevice
import anndata
import torch as _torch
import numpy as np


# -- import local dependencies: ------------------------------------------------
Expand All @@ -12,7 +13,7 @@


# -- set typing: ---------------------------------------------------------------
from typing import Optional
from typing import Dict, List, Optional, Union


# -- operational class: --------------------------------------------------------
Expand All @@ -31,8 +32,12 @@ def _forward(self, adata, key):
return format_data(data=data, torch = self._torch, device = self._device)

def _grouped_subroutine(self, adata, key):
for group, group_df in self._GROUPED:
yield self._forward(adata[group_df.index], key)
if self._as_dict:
for group, group_df in self._GROUPED:
yield group, self._forward(adata[group_df.index], key)
else:
for group, group_df in self._GROUPED:
yield self._forward(adata[group_df.index], key)

def __call__(
self,
Expand All @@ -41,22 +46,41 @@ def __call__(
groupby: Optional[str] = None,
torch: bool = False,
device: _torch.device = autodevice.AutoDevice(),
as_dict: bool = True,
):
"""
adata: anndata.AnnData [required]
adata: anndata.AnnData [ required ]
Annotated single-cell data object.
key: str [required]
key: str [ required ]
Key to access a matrix in adata. For example, if you wanted to access
adata.obsm['X_pca'], you would pass: "X_pca".
groupby: Optional[str], default = None
Optionally, one may choose to group data according to a cell-specific
annotation in adata.obs. This would invoke returning data as List
torch: bool, default = False
Boolean indicator of whether data should be formatted as torch.Tensor. If
False (default), data is formatted as np.ndarray.device (torch.device) =
autodevice.AutoDevice(). Should torch=True, the device ("cpu", "cuda:N",
"mps:N") may be set. The default value, autodevice.AutoDevice() will
indicate the use of GPU, if available.
device: torch.device, default = autodevice.AutoDevice()
as_dict: bool, default = True
Only relevant when `groupby` is not None. Boolean indicator to return
data in a Dict where the key for each value corresponds to the respective
`groupby` value. If False, returns List.
"""

self.__update__(locals(), public=[None])

if hasattr(self, "_groupby"):
if self._as_dict:
return dict(self._grouped_subroutine(adata, key))
return list(self._grouped_subroutine(adata, key))
return self._forward(adata, key)

Expand All @@ -66,51 +90,63 @@ def fetch(
groupby: Optional[str] = None,
torch: bool = False,
device: _torch.device = autodevice.AutoDevice(),
as_dict: bool = True,
*args,
**kwargs,
):
) -> Union[
_torch.Tensor,
np.ndarray,
List[Union[_torch.Tensor, np.ndarray]],
Dict[Union[str, int], Union[_torch.Tensor, np.ndarray]],
]:
"""
Given, adata and a key that points to a specific matrix stored in adata,
return the data, formatted either as np.ndarray or torch.Tensor. If formatted
as torch.Tensor, device may be specified based on available devices.
Parameters
----------
adata: anndata.AnnData [ required ]
Annotated single-cell data object.
key: str [ required ]
Key to access a matrix in adata. For example, if you wanted to access
adata.obsm['X_pca'], you would pass: "X_pca".
groupby: Optional[str], default = None
Optionally, one may choose to group data according to a cell-specific
annotation in adata.obs. This would invoke returning data as List
torch: bool, default = False
Boolean indicator of whether data should be formatted as torch.Tensor. If
False (default), data is formatted as np.ndarray.device (torch.device) =
autodevice.AutoDevice(). Should torch=True, the device ("cpu", "cuda:N",
"mps:N") may be set. The default value, autodevice.AutoDevice() will
autodevice.AutoDevice(). Should torch=True, the device ("cpu", "cuda:N",
"mps:N") may be set. The default value, autodevice.AutoDevice() will
indicate the use of GPU, if available.
as_dict: bool, default = True
Only relevant when `groupby` is not None. Boolean indicator to return
data in a Dict where the key for each value corresponds to the respective
`groupby` value. If False, returns List.
Returns
-------
data: Union[torch.Tensor, np.ndarray, List[torch.Tensor], List[np.ndarray]
data: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, np.ndarray]], Dict[Union[str, int], Union[torch.Tensor, np.ndarray]]
Formatted data as np.ndarray or torch.Tensor. If torch=True the torch.Tensor
is allocated to the device indicated by the device argument. If groupby is passed,
returned as a List[np.ndarray] or List[torch.Tensor]
is allocated to the device indicated by the device argument. If `groupby` is passed,
returned as Dict[np.ndarray] or Dict[torch.Tensor]. If groupby is passed and `as_dict`
= False, returns List[np.ndarray] or List[torch.Tensor].
"""



fetcher = AnnDataFetcher()

return fetcher(
adata = adata,
key = key,
groupby = groupby,
torch = torch,
device = device,
adata=adata,
key=key,
groupby=groupby,
torch=torch,
device=device,
as_dict=as_dict,
*args,
**kwargs,
)
)
Loading

0 comments on commit 05af47d

Please sign in to comment.