Skip to content

Commit

Permalink
missed some prints
Browse files Browse the repository at this point in the history
  • Loading branch information
dlwh committed Sep 13, 2024
1 parent e33a905 commit 2645efb
Showing 1 changed file with 0 additions and 3 deletions.
3 changes: 0 additions & 3 deletions src/levanter/data/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,16 @@ async def current_len(self) -> Optional[int]:
async def get_batch(self, indices: Sequence[int]) -> Sequence[T_co]:
token_arrays = await self._await_token_cache()
# logger.info(f"Time to get token cache: {time.time() - time_in}")
print(f"waiting until len is at least {max(indices) + 1}")
len = await self.wait_until_len_at_least(max(indices) + 1)
if len is not None and len < max(indices) + 1:
raise ValueError("Requested indices beyond the end of the dataset")
offsets = np.array(indices) * self.seq_len
print(f"getting offsets {offsets}")
with ts.Batch():
out = []
for offset in offsets:
out.append(token_arrays.data[offset : offset + self.seq_len].read())

out = await asyncio.gather(*out)
print("done waiting")
return out

def get_batch_sync(self, indices: Sequence[int]) -> Sequence[T_co]:
Expand Down

0 comments on commit 2645efb

Please sign in to comment.