Merge pull request #2 from mvinyard/test-release

Test release `v0.0.1rc0`
mvinyard · Aug 3, 2023 · 05af47d · 05af47d
2 parents 6da4c65 + 7e890b2
commit 05af47d
Show file tree

Hide file tree

Showing 5 changed files with 769 additions and 32 deletions.
diff --git a/README.md b/README.md
@@ -1,5 +1,11 @@
 # 🔎 AnnDataQuery
 
-Fetch and format data matrices from AnnData.
+[![PyPI pyversions](https://img.shields.io/pypi/pyversions/adata_query.svg)](https://pypi.python.org/pypi/adata_query/)
+[![PyPI version](https://badge.fury.io/py/adata_query.svg)](https://badge.fury.io/py/adata_query)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
+Fetch data matrices from AnnData and format as `np.ndarray` or `torch.Tensor`, on any device.
+
+Example: [notebook](https://colab.research.google.com/github/mvinyard/AnnDataQuery/blob/test-release/notebooks/anndata_query_tutorial.ipynb)
 
 For more information, see: [documentation](https://michael-vinyard.gitbook.io/anndataquery/)
diff --git a/adata_query/__init__.py b/adata_query/__init__.py
@@ -1,6 +1,8 @@
 # __init__.py
 
+__version__ = "v0.0.1"
+
 from . import _utils
 from . import _core
 
-from ._core import format_data, fetch
+from ._core import format_data, fetch, locate
diff --git a/adata_query/_core/_fetcher.py b/adata_query/_core/_fetcher.py
@@ -4,6 +4,7 @@
 import autodevice
 import anndata
 import torch as _torch
+import numpy as np
 
 
 # -- import local dependencies: ------------------------------------------------
@@ -12,7 +13,7 @@
 
 
 # -- set typing: ---------------------------------------------------------------
-from typing import Optional
+from typing import Dict, List, Optional, Union
 
 
 # -- operational class: --------------------------------------------------------
@@ -31,8 +32,12 @@ def _forward(self, adata, key):
         return format_data(data=data, torch = self._torch, device = self._device)
 
     def _grouped_subroutine(self, adata, key):
-        for group, group_df in self._GROUPED:
-            yield self._forward(adata[group_df.index], key)
+        if self._as_dict:
+            for group, group_df in self._GROUPED:
+                yield group, self._forward(adata[group_df.index], key)
+        else:
+            for group, group_df in self._GROUPED:
+                yield self._forward(adata[group_df.index], key)
 
     def __call__(
         self,
@@ -41,22 +46,41 @@ def __call__(
         groupby: Optional[str] = None,
         torch: bool = False,
         device: _torch.device = autodevice.AutoDevice(),
+        as_dict: bool = True,
     ):
         """
-        adata: anndata.AnnData [required]
+        adata: anndata.AnnData [ required ]
+            Annotated single-cell data object.
         
-        key: str [required]
+        key: str [ required ]
+            Key to access a matrix in adata. For example, if you wanted to access
+            adata.obsm['X_pca'], you would pass: "X_pca".
         
         groupby: Optional[str], default = None
-        
+            Optionally, one may choose to group data according to a cell-specific
+            annotation in adata.obs. This would invoke returning data as List
+            
         torch: bool, default = False
-        
+            Boolean indicator of whether data should be formatted as torch.Tensor. If
+            False (default), data is formatted as np.ndarray.device (torch.device) =
+            autodevice.AutoDevice(). Should torch=True, the device ("cpu", "cuda:N", 
+            "mps:N") may be set. The default value, autodevice.AutoDevice() will 
+            indicate the use of GPU, if available.
+
         device: torch.device, default = autodevice.AutoDevice()
+            
+    
+        as_dict: bool, default = True
+            Only relevant when `groupby` is not None. Boolean indicator to return
+            data in a Dict where the key for each value corresponds to the respective
+            `groupby` value. If False, returns List.
         """
 
         self.__update__(locals(), public=[None])
 
         if hasattr(self, "_groupby"):
+            if self._as_dict:
+                return dict(self._grouped_subroutine(adata, key))
             return list(self._grouped_subroutine(adata, key))
         return self._forward(adata, key)
 
@@ -66,51 +90,63 @@ def fetch(
     groupby: Optional[str] = None,
     torch: bool = False,
     device: _torch.device = autodevice.AutoDevice(),
+    as_dict: bool = True,
     *args,
     **kwargs,
-):
+) -> Union[
+    _torch.Tensor,
+    np.ndarray,
+    List[Union[_torch.Tensor, np.ndarray]],
+    Dict[Union[str, int], Union[_torch.Tensor, np.ndarray]],
+]:
     """
     Given, adata and a key that points to a specific matrix stored in adata,
     return the data, formatted either as np.ndarray or torch.Tensor. If formatted
     as torch.Tensor, device may be specified based on available devices.
-    
+
     Parameters
     ----------
     adata: anndata.AnnData [ required ]
         Annotated single-cell data object.
-        
+
     key: str [ required ]
         Key to access a matrix in adata. For example, if you wanted to access
         adata.obsm['X_pca'], you would pass: "X_pca".
-    
+
     groupby: Optional[str], default = None
         Optionally, one may choose to group data according to a cell-specific
         annotation in adata.obs. This would invoke returning data as List
-        
+
     torch: bool, default = False
         Boolean indicator of whether data should be formatted as torch.Tensor. If
         False (default), data is formatted as np.ndarray.device (torch.device) =
-        autodevice.AutoDevice(). Should torch=True, the device ("cpu", "cuda:N", 
-        "mps:N") may be set. The default value, autodevice.AutoDevice() will 
+        autodevice.AutoDevice(). Should torch=True, the device ("cpu", "cuda:N",
+        "mps:N") may be set. The default value, autodevice.AutoDevice() will
         indicate the use of GPU, if available.
-    
+
+    as_dict: bool, default = True
+        Only relevant when `groupby` is not None. Boolean indicator to return
+        data in a Dict where the key for each value corresponds to the respective
+        `groupby` value. If False, returns List.
+
     Returns
     -------
-    data: Union[torch.Tensor, np.ndarray, List[torch.Tensor], List[np.ndarray]
+    data: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, np.ndarray]], Dict[Union[str, int], Union[torch.Tensor, np.ndarray]]
         Formatted data as np.ndarray or torch.Tensor. If torch=True the torch.Tensor
-        is allocated to the device indicated by the device argument. If groupby is passed,
-        returned as a List[np.ndarray] or List[torch.Tensor]
+        is allocated to the device indicated by the device argument. If `groupby` is passed,
+        returned as Dict[np.ndarray] or Dict[torch.Tensor]. If groupby is passed and `as_dict`
+        = False, returns List[np.ndarray] or List[torch.Tensor].
     """
-
-
+
     fetcher = AnnDataFetcher()
-    
+
     return fetcher(
-        adata = adata,
-        key = key,
-        groupby = groupby,
-        torch = torch,
-        device = device,
+        adata=adata,
+        key=key,
+        groupby=groupby,
+        torch=torch,
+        device=device,
+        as_dict=as_dict,
         *args,
         **kwargs,
-    )
+    )