Skip to content

Commit

Permalink
Save pandas table
Browse files Browse the repository at this point in the history
  • Loading branch information
mcmahom5 committed Aug 28, 2023
1 parent fdeb72e commit eec345e
Showing 1 changed file with 50 additions and 0 deletions.
50 changes: 50 additions & 0 deletions src/mridle/extras/datasets/pandas_styler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from pathlib import PurePosixPath

from kedro.io.core import (
AbstractVersionedDataSet,
get_filepath_str,
get_protocol_and_path,
DataSetError,
Version,
)

import fsspec
import numpy as np

from typing import Any, Dict


class PandasStylerHtml(AbstractVersionedDataSet):
def __init__(self, filepath: str, version: Version = None):
"""Creates a new instance of ImageDataSet to load / save image data for given filepath.
Args:
filepath: The location of the image file to load / save data.
"""
# parse the path and protocol (e.g. file, http, s3, etc.)
protocol, path = get_protocol_and_path(filepath)
self._protocol = protocol
self._filepath = PurePosixPath(path)
self._fs = fsspec.filesystem(self._protocol)
super().__init__(
filepath=PurePosixPath(path),
version=version,
exists_function=self._fs.exists,
glob_function=self._fs.glob,
)

def _load(self) -> np.ndarray:
raise DataSetError("`load` is not supported on AltairDataSet")

def _save(self, data) -> None:
"""Saves a plotly figure as html to the specified filepath."""
# using get_filepath_str ensures that the protocol and path are appended correctly for different filesystems
save_path = get_filepath_str(self._get_save_path(), self._protocol)
df = data.data # Extract the DataFrame from the styler
df_html = df.to_html(index=False) # Convert DataFrame to HTML
with open(save_path, "w") as f:
f.write(df_html)

def _describe(self) -> Dict[str, Any]:
"""Returns a dict that describes the attributes of the dataset."""
return dict(filepath=self._filepath, protocol=self._protocol)

0 comments on commit eec345e

Please sign in to comment.