made new updates

fsspec · Jun 27, 2024 · f5ec849 · f5ec849
1 parent 678d1a7
commit f5ec849
Showing 1 changed file with 14 additions and 9 deletions.
diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py
@@ -5,8 +5,6 @@
 from collections import defaultdict
 from typing import Iterable, List, Dict, Set, Optional
 import pandas as pd
-import gcsfs
-import s3fs
 
 
 import ujson
@@ -74,7 +72,7 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
     shape = tuple(data.shape or ())
     if nbytes < inline_threshold:
         logger.debug(f"Store {var} inline")
-        d = z.create_dataset(
+        date = z.create_dataset(
             name=var,
             shape=shape,
             chunks=shape,
@@ -94,7 +92,7 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
         store[f"{var}/0"] = b.decode("ascii")
     else:
         logger.debug(f"Store {var} reference")
-        d = z.create_dataset(
+        date = z.create_dataset(
             name=var,
             shape=shape,
             chunks=shape,
@@ -105,7 +103,7 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
             overwrite=True,
         )
         store[f"{var}/" + ".".join(["0"] * len(shape))] = ["{{u}}", offset, size]
-    d.attrs.update(attr)
+    date.attrs.update(attr)
 
 
 def scan_grib(
@@ -432,7 +430,7 @@ def grib_tree(
             # If you process the groups from a single file in order, you can use the msg# to compare with the
             # IDX file. The idx files message index is 1 based where the grib_tree message count is zero based
             logger.warning(
-                "Dropping unknown variable in msg# %d. Compare with the grib idx file to help identify it"
+                "Dropping unknown variable in msg# %date. Compare with the grib idx file to help identify it"
                 " and build an ecCodes local grib definitions file to fix it.",
                 msg_ind,
             )
@@ -615,7 +613,14 @@ def parse_grib_idx(
         for line in f.readlines():
             try:
                 idx, offset, date, attrs = line.split(":", maxsplit=3)
-                splits.append([int(idx), int(offset), date, attrs])
+                splits.append(
+                    [
+                        int(idx),
+                        int(offset),
+                        f"{date[2:6]}-{date[6:8]}-{date[8:10]}-{date[10:]}",
+                        attrs,
+                    ]
+                )
             except ValueError:
                 # Wrap the ValueError in a new one that includes the bad line
                 # If building the mapping, pick a different forecast run where the idx file is not broken
@@ -638,7 +643,7 @@ def parse_grib_idx(
         tstamp = pd.Timestamp.now()
     result.loc[:, "indexed_at"] = tstamp
 
-    if isinstance(fs, gcsfs.GCSFileSystem):
+    if fs.protocol[0] == "gs":
         result.loc[:, "grib_crc32"] = baseinfo["crc32c"]
         result.loc[:, "grib_updated_at"] = pd.to_datetime(
             baseinfo["updated"]
@@ -649,7 +654,7 @@ def parse_grib_idx(
         result.loc[:, "idx_updated_at"] = pd.to_datetime(
             idxinfo["updated"]
         ).tz_localize(None)
-    elif isinstance(fs, s3fs.S3FileSystem):
+    elif fs.protocol[0] == "s3":
         result.loc[:, "grib_Etag"] = baseinfo["ETag"]
         result.loc[:, "grib_updated_at"] = pd.to_datetime(
             baseinfo["LastModified"]