Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise error if function is used with parameters to read no geometry, columns, or fids #280

3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
### Improvements

- Support writing dataframes without geometry column (#267)

- Calculate feature count by iterating over features if GDAL returns an
unknown count for a data layer (e.g., OSM driver); this may have signficant
performance impacts for some data sources that would otherwise return an
unknown count (count is used in `read_info`, `read`, `read_dataframe`) (#271).
- Raise error if `read` or `read_dataframe` is called with parameters to read no
columns, geometry, or fids (#280)

### Bug fixes

Expand Down
12 changes: 12 additions & 0 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,12 @@ def ogr_read(
if sql is not None and layer is not None:
raise ValueError("'sql' paramater cannot be combined with 'layer'")

if not (read_geometry or return_fids or columns is None or len(columns) > 0):
raise ValueError(
"at least one of read_geometry or return_fids must be True or columns must "
"be None or non-empty"
)

try:
dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_open(path_c, 0, dataset_options)
Expand Down Expand Up @@ -1193,6 +1199,12 @@ def ogr_open_arrow(
if sql is not None and layer is not None:
raise ValueError("'sql' paramater cannot be combined with 'layer'")

if not (read_geometry or return_fids or columns is None or len(columns) > 0):
raise ValueError(
"at least one of read_geometry or return_fids must be True or columns must "
"be None or non-empty"
)

reader = None
try:
dataset_options = dict_to_options(dataset_kwargs)
Expand Down
6 changes: 5 additions & 1 deletion pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,12 @@ def read_dataframe(
if fid_as_index:
df = df.set_index(meta["fid_column"])
df.index.names = ["fid"]

geometry_name = meta["geometry_name"] or "wkb_geometry"
if geometry_name in df.columns:
if not fid_as_index and len(df.columns) == 0:
# Index not asked, no geometry column and no attribute columns: return empty
return pd.DataFrame()
elif geometry_name in df.columns:
df["geometry"] = from_wkb(df.pop(geometry_name), crs=meta["crs"])
return gp.GeoDataFrame(df, geometry="geometry")
else:
Expand Down
5 changes: 0 additions & 5 deletions pyogrio/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,6 @@ def test_read_arrow_columns(naturalearth_lowres):
assert result.columns.tolist() == ["continent", "geometry"]


def test_read_arrow_layer_without_geometry(test_fgdb_vsi):
result = read_dataframe(test_fgdb_vsi, layer="basetable", use_arrow=True)
assert type(result) is pd.DataFrame


def test_read_arrow_ignore_geometry(naturalearth_lowres):
result = read_dataframe(naturalearth_lowres, use_arrow=True, read_geometry=False)
assert type(result) is pd.DataFrame
Expand Down
92 changes: 91 additions & 1 deletion pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,36 @@ def test_read_dataframe_vsi(naturalearth_lowres_vsi):
assert len(df) == 177


@pytest.mark.parametrize(
"use_arrow",
[
pytest.param(
True,
marks=pytest.mark.skipif(
not has_pyarrow or __gdal_version__ < (3, 6, 0),
reason="Arrow tests require pyarrow and GDAL>=3.6",
),
),
False,
],
)
@pytest.mark.parametrize(
"columns, fid_as_index, exp_len", [(None, False, 2), ([], True, 2), ([], False, 0)]
)
def test_read_layer_without_geometry(
test_fgdb_vsi, columns, fid_as_index, use_arrow, exp_len
):
result = read_dataframe(
test_fgdb_vsi,
layer="basetable",
columns=columns,
fid_as_index=fid_as_index,
use_arrow=use_arrow,
)
assert type(result) is pd.DataFrame
assert len(result) == exp_len


@pytest.mark.parametrize(
"naturalearth_lowres, expected_ext",
[(".gpkg", ".gpkg"), (".shp", ".shp")],
Expand All @@ -93,6 +123,36 @@ def test_read_no_geometry(naturalearth_lowres_all_ext):
assert not isinstance(df, gp.GeoDataFrame)


@pytest.mark.parametrize(
"use_arrow",
[
pytest.param(
True,
marks=pytest.mark.skipif(
not has_pyarrow or __gdal_version__ < (3, 6, 0),
reason="Arrow tests require pyarrow and GDAL>=3.6",
),
),
False,
],
)
def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres, use_arrow):
with pytest.raises(
ValueError,
match=(
"at least one of read_geometry or return_fids must be True or columns must "
"be None or non-empty"
),
):
_ = read_dataframe(
naturalearth_lowres,
columns=[],
read_geometry=False,
fid_as_index=False,
use_arrow=use_arrow,
)


def test_read_force_2d(test_fgdb_vsi):
with pytest.warns(
UserWarning, match=r"Measured \(M\) geometry types are not supported"
Expand Down Expand Up @@ -157,7 +217,11 @@ def test_read_fid_as_index(naturalearth_lowres_all_ext):
df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
assert_index_equal(df.index, pd.RangeIndex(0, 2))

df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=True, **kwargs)
df = read_dataframe(
naturalearth_lowres_all_ext,
fid_as_index=True,
**kwargs,
)
if naturalearth_lowres_all_ext.suffix in [".gpkg"]:
# File format where fid starts at 1
assert_index_equal(df.index, pd.Index([3, 4], name="fid"))
Expand All @@ -166,6 +230,32 @@ def test_read_fid_as_index(naturalearth_lowres_all_ext):
assert_index_equal(df.index, pd.Index([2, 3], name="fid"))


@pytest.mark.parametrize(
"use_arrow",
[
pytest.param(
True,
marks=pytest.mark.skipif(
not has_pyarrow or __gdal_version__ < (3, 6, 0),
reason="Arrow tests require pyarrow and GDAL>=3.6",
),
),
False,
],
)
def test_read_fid_as_index_only(naturalearth_lowres, use_arrow):
df = read_dataframe(
naturalearth_lowres,
columns=[],
read_geometry=False,
fid_as_index=True,
use_arrow=use_arrow,
)
assert df is not None
assert len(df) == 177
assert len(df.columns) == 0


@pytest.mark.filterwarnings("ignore:.*Layer .* does not have any features to read")
def test_read_where(naturalearth_lowres_all_ext):
# empty filter should return full set of records
Expand Down
65 changes: 40 additions & 25 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,20 @@ def test_read_no_geometry(naturalearth_lowres):
assert geometry is None


def test_read_columns(naturalearth_lowres):
# read no columns or geometry
meta, _, geometry, fields = read(
naturalearth_lowres, columns=[], read_geometry=False
)
assert geometry is None
assert len(fields) == 0
array_equal(meta["fields"], np.empty(shape=(0, 4), dtype="object"))
def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres):
with pytest.raises(
ValueError,
match=(
"at least one of read_geometry or return_fids must be True or columns must "
"be None or non-empty"
),
):
_ = read(
naturalearth_lowres, columns=[], read_geometry=False, return_fids=False
)


def test_read_columns(naturalearth_lowres):
columns = ["NAME", "NAME_LONG"]
meta, _, geometry, fields = read(
naturalearth_lowres, columns=columns, read_geometry=False
Expand Down Expand Up @@ -212,23 +217,6 @@ def test_read_fids(naturalearth_lowres):
assert np.array_equal(fields[-1], expected_fields[-1][subset])


def test_return_fids(naturalearth_lowres):
# default is to not return fids
fids = read(naturalearth_lowres)[1]
assert fids is None

fids = read(naturalearth_lowres, return_fids=False)[1]
assert fids is None

fids = read(naturalearth_lowres, return_fids=True, skip_features=2, max_features=2)[
1
]
assert fids is not None
assert fids.dtype == np.int64
# Note: shapefile FIDS start at 0
assert np.array_equal(fids, np.array([2, 3], dtype="int64"))


def test_read_fids_out_of_bounds(naturalearth_lowres):
with pytest.raises(
FeatureError,
Expand Down Expand Up @@ -257,6 +245,33 @@ def test_read_fids_unsupported_keywords(naturalearth_lowres):
read(naturalearth_lowres, fids=[1], max_features=5)


def test_read_return_fids(naturalearth_lowres):
# default is to not return fids
fids = read(naturalearth_lowres)[1]
assert fids is None

fids = read(naturalearth_lowres, return_fids=False)[1]
assert fids is None

fids = read(naturalearth_lowres, return_fids=True, skip_features=2, max_features=2)[
1
]
assert fids is not None
assert fids.dtype == np.int64
# Note: shapefile FIDS start at 0
assert np.array_equal(fids, np.array([2, 3], dtype="int64"))


def test_read_return_only_fids(naturalearth_lowres):
_, fids, geometry, field_data = read(
naturalearth_lowres, columns=[], read_geometry=False, return_fids=True
)
assert fids is not None
assert len(fids) == 177
assert geometry is None
assert len(field_data) == 0


def test_write(tmpdir, naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)

Expand Down
Loading