Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle geopandas and shapely geometries via geo_interface link #1000

Merged
merged 15 commits into from
May 18, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/ci_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,14 @@ jobs:
# python-version: 3.7
# isDraft: true
# Pair Python 3.7 with NumPy 1.17 and Python 3.9 with NumPy 1.20
# Only install geopandas on Python 3.9/NumPy 1.20
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for this?

Copy link
Member Author

@weiji14 weiji14 Apr 6, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since geopandas is an optional dependency, probably good to have a test matrix that doesn't include it.

include:
- python-version: 3.7
numpy-version: '1.17'
geopandas: ''
- python-version: 3.9
numpy-version: '1.20'
geopandas: 'geopandas'
defaults:
run:
shell: bash -l {0}
Expand Down Expand Up @@ -87,7 +90,7 @@ jobs:
- name: Install dependencies
run: |
conda install gmt=6.1.1 numpy=${{ matrix.numpy-version }} \
pandas xarray netCDF4 packaging \
pandas xarray netCDF4 packaging ${{ matrix.geopandas }} \
codecov coverage[toml] dvc ipython make \
pytest-cov pytest-mpl pytest>=6.0 \
sphinx-gallery
Expand Down
9 changes: 6 additions & 3 deletions MAINTENANCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,12 @@ There are 9 configuration files located in `.github/workflows`:

This is run on every commit to the *master* and Pull Request branches.
It is also scheduled to run daily on the *master* branch.
In draft Pull Requests, only two jobs on Linux (minimum NEP29 Python/NumPy versions
and latest Python/NumPy versions) are triggered to save on Continuous Integration
resources.
In draft Pull Requests, only two jobs on Linux are triggered to save on
Continuous Integration resources:

- Minimum [NEP29](https://numpy.org/neps/nep-0029-deprecation_policy)
Python/NumPy versions
- Latest Python/NumPy versions + GeoPandas

3. `ci_docs.yml` (Build documentation on Linux/macOS/Windows)

Expand Down
1 change: 1 addition & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
# intersphinx configuration
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"geopandas": ("https://geopandas.org/", None),
"numpy": ("https://numpy.org/doc/stable/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
"xarray": ("https://xarray.pydata.org/en/stable/", None),
Expand Down
5 changes: 3 additions & 2 deletions doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,10 @@ PyGMT requires the following libraries to be installed:
* `netCDF4 <https://unidata.github.io/netcdf4-python>`__
* `packaging <https://packaging.pypa.io>`__

The following are optional (but recommended) dependencies:
The following are optional dependencies:

* `IPython <https://ipython.org>`__: For embedding the figures in Jupyter notebooks.
* `IPython <https://ipython.org>`__: For embedding the figures in Jupyter notebooks (recommended).
* `GeoPandas <https://geopandas.org>`__: For using and plotting GeoDataFrame objects.

Installing GMT and other dependencies
-------------------------------------
Expand Down
14 changes: 10 additions & 4 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
GMTInvalidInput,
GMTVersionError,
)
from pygmt.helpers import data_kind, dummy_context
from pygmt.helpers import data_kind, dummy_context, tempfile_from_geojson

FAMILIES = [
"GMT_IS_DATASET",
Expand Down Expand Up @@ -1417,12 +1417,18 @@ def virtualfile_from_data(

if check_kind == "raster" and kind not in ("file", "grid"):
raise GMTInvalidInput(f"Unrecognized data type for grid: {type(data)}")
if check_kind == "vector" and kind not in ("file", "matrix", "vectors"):
raise GMTInvalidInput(f"Unrecognized data type: {type(data)}")
if check_kind == "vector" and kind not in (
"file",
"matrix",
"vectors",
"geojson",
):
raise GMTInvalidInput(f"Unrecognized data type for vector: {type(data)}")

# Decide which virtualfile_from_ function to use
_virtualfile_from = {
"file": dummy_context,
"geojson": tempfile_from_geojson,
"grid": self.virtualfile_from_grid,
# Note: virtualfile_from_matrix is not used because a matrix can be
# converted to vectors instead, and using vectors allows for better
Expand All @@ -1432,7 +1438,7 @@ def virtualfile_from_data(
}[kind]

# Ensure the data is an iterable (Python list or tuple)
if kind in ("file", "grid"):
if kind in ("file", "geojson", "grid"):
_data = (data,)
elif kind == "vectors":
_data = [np.atleast_1d(x), np.atleast_1d(y)]
Expand Down
2 changes: 1 addition & 1 deletion pygmt/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Functions, classes, decorators, and context managers to help wrap GMT modules.
"""
from pygmt.helpers.decorators import fmt_docstring, kwargs_to_strings, use_alias
from pygmt.helpers.tempfile import GMTTempFile, unique_name
from pygmt.helpers.tempfile import GMTTempFile, tempfile_from_geojson, unique_name
from pygmt.helpers.utils import (
args_in_kwargs,
build_arg_string,
Expand Down
52 changes: 52 additions & 0 deletions pygmt/helpers/tempfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import os
import uuid
from contextlib import contextmanager
from tempfile import NamedTemporaryFile

import numpy as np
Expand Down Expand Up @@ -104,3 +105,54 @@ def loadtxt(self, **kwargs):
Data read from the text file.
"""
return np.loadtxt(self.name, **kwargs)


@contextmanager
def tempfile_from_geojson(geojson):
"""
Saves any geo-like Python object which implements ``__geo_interface__``
(e.g. a geopandas GeoDataFrame) to a temporary OGR_GMT text file.

Parameters
----------
geojson : geopandas.GeoDataFrame
A geopandas GeoDataFrame, or any geo-like Python object which
implements __geo_interface__, i.e. a GeoJSON
weiji14 marked this conversation as resolved.
Show resolved Hide resolved

Yields
------
tmpfilename : str
A temporary OGR_GMT format file holding the geographical data.
E.g. 'track-1a2b3c4.tsv'.
"""
with GMTTempFile(suffix=".gmt") as tmpfile:
os.remove(tmpfile.name) # ensure file is deleted first
ogrgmt_kwargs = dict(filename=tmpfile.name, driver="OGR_GMT", mode="w")
try:
# Using geopandas.to_file to directly export to OGR_GMT format
geojson.to_file(**ogrgmt_kwargs)
except AttributeError:
# pylint: disable=import-outside-toplevel
# Other 'geo' formats which implement __geo_interface__
import json

import fiona
import geopandas as gpd

with fiona.Env():
jsontext = json.dumps(geojson.__geo_interface__)
# Do Input/Output via Fiona virtual memory
with fiona.io.MemoryFile(file_or_bytes=jsontext.encode()) as memfile:
geoseries = gpd.GeoSeries.from_file(filename=memfile)
geoseries.to_file(**ogrgmt_kwargs)

# with memfile.open(driver="GeoJSON") as collection:
# # Get schema from GeoJSON
# schema = collection.schema
# # Write to temporary OGR_GMT format file
# with fiona.open(
# fp=tmpfile.name, mode="w", driver="OGR_GMT", schema=schema
# ) as ogrgmtfile:
# ogrgmtfile.write(geojson.__geo_interface__)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was my failed attempt to convert a GeoJSON string to an OGR_GMT (*.gmt) format file purely using fiona (i.e. no geopandas installed). The problem I found was that a schema had to be set, and that requires a lot of lines of code to do, which is why I stuck to using geopandas in the lines above.

If someone can figure out a good way to solve the schema problem, that would be fantastic!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I've removed this chunk of unusable code in 34de789. Hopefully this can be revisited in the future so that GeoJSON objects can be converted into OGR_GMT purely using fiona without the need for installing geopandas (a bit of a big dependency).


yield tmpfile.name
2 changes: 2 additions & 0 deletions pygmt/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def data_kind(data, x=None, y=None, z=None):
kind = "file"
elif isinstance(data, xr.DataArray):
kind = "grid"
elif hasattr(data, "__geo_interface__"):
kind = "geojson"
elif data is not None:
kind = "matrix"
else:
Expand Down
3 changes: 2 additions & 1 deletion pygmt/src/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def info(table, **kwargs):

Parameters
----------
table : str or np.ndarray or pandas.DataFrame or xarray.Dataset
table : str or numpy.ndarray or pandas.DataFrame or xarray.Dataset or
geopandas.GeoDataFrame
weiji14 marked this conversation as resolved.
Show resolved Hide resolved
Pass in either a file name to an ASCII data table, a 1D/2D numpy array,
a pandas dataframe, or an xarray dataset made up of 1D xarray.DataArray
data variables.
Expand Down
66 changes: 66 additions & 0 deletions pygmt/tests/test_geopandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Tests on integration with geopandas.
"""
import numpy.testing as npt
import pytest
from pygmt import info

gpd = pytest.importorskip("geopandas")
shapely = pytest.importorskip("shapely")


@pytest.fixture(scope="module", name="gdf")
def fixture_gdf():
"""
Create a sample geopandas GeoDataFrame object with shapely geometries of
different types.
"""
linestring = shapely.geometry.LineString([(20, 15), (30, 15)])
polygon = shapely.geometry.Polygon([(20, 10), (23, 10), (23, 14), (20, 14)])
multipolygon = shapely.geometry.shape(
{
"type": "MultiPolygon",
"coordinates": [
[
[[0, 0], [20, 0], [10, 20], [0, 0]], # Counter-clockwise
[[3, 2], [10, 16], [17, 2], [3, 2]], # Clockwise
],
[[[6, 4], [14, 4], [10, 12], [6, 4]]], # Counter-clockwise
[[[25, 5], [30, 10], [35, 5], [25, 5]]],
],
}
)
# Multipolygon first so the OGR_GMT file has @GMULTIPOLYGON in the header
gdf = gpd.GeoDataFrame(
index=["multipolygon", "polygon", "linestring"],
geometry=[multipolygon, polygon, linestring],
)

return gdf


def test_geopandas_info_geodataframe(gdf):
"""
Check that info can return the bounding box region from a
geopandas.GeoDataFrame.
"""
output = info(table=gdf, per_column=True)
npt.assert_allclose(actual=output, desired=[0.0, 35.0, 0.0, 20.0])


@pytest.mark.parametrize(
"geomtype,desired",
[
("multipolygon", [0.0, 35.0, 0.0, 20.0]),
("polygon", [20.0, 23.0, 10.0, 14.0]),
("linestring", [20.0, 30.0, 15.0, 15.0]),
],
)
def test_geopandas_info_shapely(gdf, geomtype, desired):
"""
Check that info can return the bounding box region from a shapely.geometry
object that has a __geo_interface__ property.
"""
geom = gdf.loc[geomtype].geometry
output = info(table=geom, per_column=True)
npt.assert_allclose(actual=output, desired=desired)