Skip to content

Commit

Permalink
made new updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Anu-Ra-g committed Jun 27, 2024
1 parent 678d1a7 commit f5ec849
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions kerchunk/grib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from collections import defaultdict
from typing import Iterable, List, Dict, Set, Optional
import pandas as pd
import gcsfs
import s3fs


import ujson
Expand Down Expand Up @@ -74,7 +72,7 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
shape = tuple(data.shape or ())
if nbytes < inline_threshold:
logger.debug(f"Store {var} inline")
d = z.create_dataset(
date = z.create_dataset(
name=var,
shape=shape,
chunks=shape,
Expand All @@ -94,7 +92,7 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
store[f"{var}/0"] = b.decode("ascii")
else:
logger.debug(f"Store {var} reference")
d = z.create_dataset(
date = z.create_dataset(
name=var,
shape=shape,
chunks=shape,
Expand All @@ -105,7 +103,7 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
overwrite=True,
)
store[f"{var}/" + ".".join(["0"] * len(shape))] = ["{{u}}", offset, size]
d.attrs.update(attr)
date.attrs.update(attr)


def scan_grib(
Expand Down Expand Up @@ -432,7 +430,7 @@ def grib_tree(
# If you process the groups from a single file in order, you can use the msg# to compare with the
# IDX file. The idx files message index is 1 based where the grib_tree message count is zero based
logger.warning(
"Dropping unknown variable in msg# %d. Compare with the grib idx file to help identify it"
"Dropping unknown variable in msg# %date. Compare with the grib idx file to help identify it"
" and build an ecCodes local grib definitions file to fix it.",
msg_ind,
)
Expand Down Expand Up @@ -615,7 +613,14 @@ def parse_grib_idx(
for line in f.readlines():
try:
idx, offset, date, attrs = line.split(":", maxsplit=3)
splits.append([int(idx), int(offset), date, attrs])
splits.append(
[
int(idx),
int(offset),
f"{date[2:6]}-{date[6:8]}-{date[8:10]}-{date[10:]}",
attrs,
]
)
except ValueError:
# Wrap the ValueError in a new one that includes the bad line
# If building the mapping, pick a different forecast run where the idx file is not broken
Expand All @@ -638,7 +643,7 @@ def parse_grib_idx(
tstamp = pd.Timestamp.now()
result.loc[:, "indexed_at"] = tstamp

if isinstance(fs, gcsfs.GCSFileSystem):
if fs.protocol[0] == "gs":
result.loc[:, "grib_crc32"] = baseinfo["crc32c"]
result.loc[:, "grib_updated_at"] = pd.to_datetime(
baseinfo["updated"]
Expand All @@ -649,7 +654,7 @@ def parse_grib_idx(
result.loc[:, "idx_updated_at"] = pd.to_datetime(
idxinfo["updated"]
).tz_localize(None)
elif isinstance(fs, s3fs.S3FileSystem):
elif fs.protocol[0] == "s3":
result.loc[:, "grib_Etag"] = baseinfo["ETag"]
result.loc[:, "grib_updated_at"] = pd.to_datetime(
baseinfo["LastModified"]
Expand Down

0 comments on commit f5ec849

Please sign in to comment.