-
Notifications
You must be signed in to change notification settings - Fork 78
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
154 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import abc | ||
from typing import Generic, TypeVar | ||
|
||
from jax.random import PRNGKey | ||
|
||
|
||
# from levanter.data import ShardCache | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
class ItemSampler(Generic[T], abc.ABC): | ||
""" | ||
Samples batches of data from a dataset. | ||
""" | ||
|
||
# TODO: getstate/setstate | ||
|
||
@abc.abstractmethod | ||
def sample(self, index: int, *, key: PRNGKey) -> T: | ||
""" | ||
Samples a batch of data from the dataset. | ||
Args: | ||
index: The index of the item to sample. This can be any nonnegative integer. | ||
key: The random key if you need additional randomness | ||
Returns: | ||
The sampled data. | ||
""" | ||
raise NotImplementedError | ||
|
||
|
||
class RowSampler(ItemSampler[T]): | ||
""" | ||
Samples rows from a shard cache randomly. | ||
""" | ||
|
||
def __init__(self, cache): | ||
self.cache = cache | ||
|
||
def sample(self, index, *, key: PRNGKey) -> T: | ||
max_index = self.cache.final_row_count() | ||
index = index % max_index | ||
|
||
return self.cache.get_row(index) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters