Skip to content

Commit

Permalink
chore: prepare for release 0.1.4
Browse files Browse the repository at this point in the history
Letsql uses maturin as the build-backend. To handle our dependencies
we use poetry. As of now, and in the future, maturin does not recognize
the dependencies specified by poetry see this issue:

PyO3/maturin#632

It also does not provide an alternative way to support dynamic dependencies
The following issue is still open

PyO3/maturin#1537

On the other side poetry will support PEP-621 project style dependencies
in the version 2.0

python-poetry/poetry#3332

Therefore one simple solution is to duplicate the dependencies section, as in
the package:

https://github.com/tmtenbrink/rustfrc/blob/main/pyproject.toml

To do so, a semi-automated approach is to generate the dependencies using poetry
export

poetry export -f requirements.txt --without="test,dev,docs" /
--all-extras --without-hashes --output requirements.txt

And then update the dependencies section in the pyproject.toml file.

For more details on how to express poetry optional dependencies as PEP-621 optional
dependencies, see the following resources:

https://astarvienna.github.io/howtotoml.html#extras
https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#dependencies-and-requirements
https://python-poetry.org/docs/pyproject/#extras

Additionally this commit solves a few inconsistencies regarding packages
listed as optional (duckdb, ibis), but when running the code it raises ImportError.

See the penguins_example.py for code that was raising ImportError
  • Loading branch information
mesejo committed Jun 26, 2024
1 parent 37df954 commit 09ade00
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "letsql"
version = "0.1.3"
version = "0.1.4"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
11 changes: 11 additions & 0 deletions examples/penguins_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import letsql as ls
from letsql.common.caching import ParquetCacheStorage
from pathlib import Path

t = ls.examples.penguins.fetch()

con = t.op().source

t.filter([t.species == "Adelie"]).cache(
storage=ParquetCacheStorage(source=con, path=Path.cwd())
).execute()
2 changes: 2 additions & 0 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,27 @@ build-backend = "maturin"
[project]
name = "letsql"
dynamic = ["version"]
dependencies = [
"ibis-framework==9.0.0 ; python_version >= '3.10' and python_version < '4.0'",
"dask==2023.12.1 ; python_version >= '3.10' and python_version < '4.0'",
"attrs==23.2.0 ; python_version >= '3.10' and python_version < '4.0'",
"connectorx==0.3.2 ; python_version >= '3.10' and python_version < '4.0'",
"psycopg2-binary==2.9.9 ; python_version >= '3.10' and python_version < '4.0'",
"sqlalchemy==2.0.29 ; python_version >= '3.10' and python_version < '4.0'",
"pyarrow==13.0.0 ; python_version >= '3.10' and python_version < '4.0'",
"palmerpenguins==0.1.4 ; python_version >= '3.10' and python_version < '4.0'",
"structlog==24.2.0 ; python_version >= '3.10' and python_version < '4.0'",
"pytest-mock==3.14.0 ; python_version >= '3.10' and python_version < '4.0'",
]
requires-python = ">=3.7"
authors = [
{ name = "Hussain Sultan", email = "hussain@letsql.com" },
]
maintainers = [
{ email = "Dan Lovell <dan@letsql.com>" },
{ email = "Daniel Mesejo <mesejo@letsql.com>" },
]
description = "Data processing library built on top of Ibis and DataFusion to write multi-engine data workflows."
readme = "README.md"
license = { file = "LICENSE" }
classifiers = [
Expand All @@ -25,6 +45,18 @@ Repository = "https://github.com/letsql/letsql.git"
Issues = "https://github.com/letsql/letsql/issues"
Changelog = "https://github.com/letsql/letsql/blob/main/CHANGELOG.md"

[project.optional-dependencies]
duckb = [
"duckdb==0.10.3 ; python_version >= '3.10' and python_version < '4.0'"
]
datafusion = [
"datafusion==34.0.0 ; python_version >= '3.10' and python_version < '4.0'"
]
snowflake = [
"snowflake-connector-python==3.10.1 ; python_version >= '3.10' and python_version < '4.0'"
]


[tool.maturin]
module-name = "letsql._internal"
python-source = "python"
Expand Down
6 changes: 4 additions & 2 deletions python/letsql/backends/let/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from ibis import BaseBackend
from ibis.expr import types as ir
from ibis.expr.schema import SchemaLike
from ibis.backends.datafusion import Backend as IbisDataFusionBackend
from sqlglot import exp, parse_one

import letsql.backends.let.hotfix # noqa: F401
Expand Down Expand Up @@ -64,7 +63,10 @@ def register(
table_or_expr = self._sources.get_table_or_op(table_or_expr)
backend = self._sources.get_backend(table_or_expr)

if isinstance(backend, (DataFusionBackend, IbisDataFusionBackend)):
if (
isinstance(backend, DataFusionBackend)
or getattr(backend, "name", "") == DataFusionBackend.name
):
source = _get_datafusion_dataframe(backend, source)

registered_table = super().register(source, table_name=table_name, **kwargs)
Expand Down
10 changes: 4 additions & 6 deletions python/letsql/common/utils/dask_normalize_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import ibis.expr.operations.relations as ir
import sqlglot as sg

import letsql
from letsql.expr.relations import (
make_native_op,
)
Expand All @@ -18,15 +19,12 @@ def expr_is_bound(expr):
def unbound_expr_to_default_sql(expr):
if expr_is_bound(expr):
raise ValueError
default_sql = ibis.to_sql(
expr,
dialect=ibis.options.sql.default_dialect,
)
default_sql = letsql.to_sql(expr)
return str(default_sql)


def normalize_memory_databasetable(dt):
if dt.source.name not in ("pandas", "datafusion", "duckdb"):
if dt.source.name not in ("pandas", "let", "datafusion", "duckdb"):
raise ValueError
return dask.base._normalize_seq_func(
(
Expand Down Expand Up @@ -175,7 +173,7 @@ def normalize_backend(con):
con_details = {k: con_dct[k] for k in ("host", "port", "dbname")}
elif name == "pandas":
con_details = id(con.dictionary)
elif name in ("datafusion", "duckdb"):
elif name in ("datafusion", "duckdb", "let"):
con_details = id(con.con)
else:
raise ValueError
Expand Down
14 changes: 14 additions & 0 deletions python/letsql/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ class Repr(Config):
interactive: Interactive = Interactive()


class SQL(Config):
"""SQL-related options.
Attributes
----------
dialect : str
Dialect to use for printing SQL when the backend cannot be determined.
"""

dialect: str = "datafusion"


class Options(Config):
"""LETSQL configuration options
Expand All @@ -100,6 +113,7 @@ class Options(Config):
cache: Cache = Cache()
backend: Optional[Any] = None
repr: Repr = Repr()
sql: SQL = SQL()

@property
def interactive(self) -> bool:
Expand Down
27 changes: 27 additions & 0 deletions python/letsql/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import api
from ibis.backends.sql.dialects import DataFusion
from ibis.common.deferred import Deferred, _, deferrable
from ibis.expr.schema import Schema
from ibis.expr.sql import SQLString
from ibis.expr.types import (
Column,
DateValue,
Expand Down Expand Up @@ -90,6 +92,7 @@
"table",
"time",
"today",
"to_sql",
"timestamp",
"union",
"uuid",
Expand Down Expand Up @@ -1515,3 +1518,27 @@ def interval(
microseconds=microseconds,
nanoseconds=nanoseconds,
)


def to_sql(expr: ir.Expr, pretty: bool = True) -> SQLString:
"""Return the formatted SQL string for an expression.
Parameters
----------
expr
Ibis expression.
pretty
Whether to use pretty formatting.
Returns
-------
str
Formatted SQL string
"""
from letsql.config import _backend_init

con = _backend_init()
sg_expr = con._to_sqlglot(expr.unbind())
sql = sg_expr.sql(dialect=DataFusion, pretty=pretty)
return SQLString(sql)

0 comments on commit 09ade00

Please sign in to comment.