From 47dbb5dceed5748b1d8c64d85b135e18bf2bcd86 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sat, 31 Aug 2024 02:31:10 +0200 Subject: [PATCH] GTI driver: recognize STAC GeoParquet catalogs --- autotest/gdrivers/gti.py | 35 +++++ doc/source/drivers/raster/gti.rst | 13 ++ frmts/vrt/CMakeLists.txt | 3 +- frmts/vrt/gdaltileindexdataset.cpp | 206 +++++++++++++++++++++++++++-- 4 files changed, 246 insertions(+), 11 deletions(-) diff --git a/autotest/gdrivers/gti.py b/autotest/gdrivers/gti.py index 75719ac07d26..2bab88273269 100755 --- a/autotest/gdrivers/gti.py +++ b/autotest/gdrivers/gti.py @@ -2947,3 +2947,38 @@ def test_gti_read_multi_threaded_disabled_because_truncated_source(tmp_vsimem): assert vrt_ds.GetMetadataItem("MULTI_THREADED_RASTERIO_LAST_USED", "__DEBUG__") == ( "1" if gdal.GetNumCPUs() >= 2 else "0" ) + + +############################################################################### + + +@pytest.mark.require_curl() +@pytest.mark.require_driver("Parquet") +def test_gti_stac_geoparquet(): + + url = ( + "https://github.com/stac-utils/stac-geoparquet/raw/main/tests/data/naip.parquet" + ) + + conn = gdaltest.gdalurlopen(url, timeout=4) + if conn is None: + pytest.skip("cannot open URL") + + ds = gdal.Open("GTI:/vsicurl/" + url) + assert ds.GetSpatialRef().GetAuthorityCode(None) == "26914" + assert ds.GetGeoTransform() == pytest.approx( + (408231.0, 1.0, 0.0, 3873862.0, 0.0, -1.0), rel=1e-5 + ) + assert ds.RasterCount == 4 + assert [band.GetColorInterpretation() for band in ds] == [ + gdal.GCI_RedBand, + gdal.GCI_GreenBand, + gdal.GCI_BlueBand, + gdal.GCI_Undefined, + ] + assert [band.GetDescription() for band in ds] == [ + "Red", + "Green", + "Blue", + "NIR (near-infrared)", + ] diff --git a/doc/source/drivers/raster/gti.rst b/doc/source/drivers/raster/gti.rst index a380c8bf7382..93afa4fc0948 100644 --- a/doc/source/drivers/raster/gti.rst +++ b/doc/source/drivers/raster/gti.rst @@ -73,6 +73,19 @@ The GTI driver accepts different types of connection strings: For example: ``tileindex.gti`` +STAC GeoParquet support +----------------------- + +.. versionadded:: 3.10 + +The driver can support `STAC GeoParquet catalogs `_, +provided GDAL is build with :ref:`vector.parquet` support. +It can make use of fields ``proj:epsg`` and ``proj:transform`` from the +`Projection Extension Specification `_, +to correctly infer the appropriate projection and resolution. + +Example of a valid connection string: ``GTI:/vsicurl/https://github.com/stac-utils/stac-geoparquet/raw/main/tests/data/naip.parquet`` + Tile index requirements ----------------------- diff --git a/frmts/vrt/CMakeLists.txt b/frmts/vrt/CMakeLists.txt index 447d48eaedbe..777c49af525a 100644 --- a/frmts/vrt/CMakeLists.txt +++ b/frmts/vrt/CMakeLists.txt @@ -20,7 +20,8 @@ add_gdal_driver( gdaltileindexdataset.cpp STRONG_CXX_WFLAGS) gdal_standard_includes(gdal_vrt) -target_include_directories(gdal_vrt PRIVATE ${GDAL_RASTER_FORMAT_SOURCE_DIR}/raw) +target_include_directories(gdal_vrt PRIVATE ${GDAL_RASTER_FORMAT_SOURCE_DIR}/raw + $) set(GDAL_DATA_FILES ${CMAKE_CURRENT_SOURCE_DIR}/data/gdalvrt.xsd diff --git a/frmts/vrt/gdaltileindexdataset.cpp b/frmts/vrt/gdaltileindexdataset.cpp index b33ea497a488..660daa41e500 100644 --- a/frmts/vrt/gdaltileindexdataset.cpp +++ b/frmts/vrt/gdaltileindexdataset.cpp @@ -40,12 +40,14 @@ #include "cpl_port.h" #include "cpl_error_internal.h" +#include "cpl_json.h" #include "cpl_mem_cache.h" #include "cpl_minixml.h" #include "cpl_quad_tree.h" #include "vrtdataset.h" #include "vrt_priv.h" #include "ogrsf_frmts.h" +#include "ogrwarpedlayer.h" #include "gdal_proxy.h" #include "gdal_thread_pool.h" #include "gdal_utils.h" @@ -215,6 +217,9 @@ class GDALTileIndexDataset final : public GDALPamDataset //! Vector layer with the sources OGRLayer *m_poLayer = nullptr; + //! When the SRS of m_poLayer is not the one we expose + std::unique_ptr m_poWarpedLayerKeeper{}; + //! Geotransform matrix of the tile index std::array m_adfGeoTransform{0, 0, 0, 0, 0, 0}; @@ -583,6 +588,9 @@ GDALTileIndexDataset::GDALTileIndexDataset() static std::string GetAbsoluteFileName(const char *pszTileName, const char *pszVRTName) { + if (STARTS_WITH(pszTileName, "https://")) + return std::string("/vsicurl/").append(pszTileName); + if (CPLIsFilenameRelative(pszTileName) && !STARTS_WITH(pszTileName, "GetLayerDefn(); + + // Is this a https://stac-utils.github.io/stac-geoparquet/latest/spec/stac-geoparquet-spec ? + const bool bIsStacGeoParquet = + poLayerDefn->GetFieldIndex("assets.image.href") >= 0; + const char *pszLocationFieldName = GetOption(MD_LOCATION_FIELD); if (!pszLocationFieldName) { - constexpr const char *DEFAULT_LOCATION_FIELD_NAME = "location"; - pszLocationFieldName = DEFAULT_LOCATION_FIELD_NAME; + if (bIsStacGeoParquet) + { + pszLocationFieldName = "assets.image.href"; + } + else + { + constexpr const char *DEFAULT_LOCATION_FIELD_NAME = "location"; + pszLocationFieldName = DEFAULT_LOCATION_FIELD_NAME; + } } - auto poLayerDefn = m_poLayer->GetLayerDefn(); + m_nLocationFieldIndex = poLayerDefn->GetFieldIndex(pszLocationFieldName); if (m_nLocationFieldIndex < 0) { @@ -979,8 +1000,8 @@ bool GDALTileIndexDataset::Open(GDALOpenInfo *poOpenInfo) const char *pszMinY = GetOption(MD_MINY); const char *pszMaxX = GetOption(MD_MAXX); const char *pszMaxY = GetOption(MD_MAXY); - const int nCountMinMaxXY = (pszMinX ? 1 : 0) + (pszMinY ? 1 : 0) + - (pszMaxX ? 1 : 0) + (pszMaxY ? 1 : 0); + int nCountMinMaxXY = (pszMinX ? 1 : 0) + (pszMinY ? 1 : 0) + + (pszMaxX ? 1 : 0) + (pszMaxY ? 1 : 0); if (nCountMinMaxXY != 0 && nCountMinMaxXY != 4) { CPLError(CE_Failure, CPLE_AppDefined, @@ -1022,11 +1043,10 @@ bool GDALTileIndexDataset::Open(GDALOpenInfo *poOpenInfo) return false; } } - else + else if (const auto poSRS = m_poLayer->GetSpatialRef()) { - const auto poSRS = m_poLayer->GetSpatialRef(); // Ignore GPKG "Undefined geographic SRS" and "Undefined Cartesian SRS" - if (poSRS && !STARTS_WITH(poSRS->GetName(), "Undefined ")) + if (!STARTS_WITH(poSRS->GetName(), "Undefined ")) m_oSRS = *poSRS; } @@ -1086,6 +1106,108 @@ bool GDALTileIndexDataset::Open(GDALOpenInfo *poOpenInfo) } } + // Take into STAC GeoParquet proj:epsg and proj:transform fields + std::unique_ptr poFeature; + std::string osResX, osResY, osMinX, osMinY, osMaxX, osMaxY; + int iProjEPSG = -1; + int iProjTransform = -1; + if (bIsStacGeoParquet && !pszSRS && !pszResX && !pszResY && !pszMinX && + !pszMinY && !pszMaxX && !pszMaxY && + (iProjEPSG = poLayerDefn->GetFieldIndex("proj:epsg")) >= 0 && + (iProjTransform = poLayerDefn->GetFieldIndex("proj:transform")) >= 0) + { + poFeature.reset(m_poLayer->GetNextFeature()); + if (poFeature && poFeature->IsFieldSet(iProjEPSG) && + poFeature->IsFieldSet(iProjTransform)) + { + const int nEPSGCode = poFeature->GetFieldAsInteger(iProjEPSG); + OGRSpatialReference oSTACSRS; + oSTACSRS.SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER); + if (nEPSGCode > 0 && + oSTACSRS.importFromEPSG(nEPSGCode) == OGRERR_NONE) + { + int nTransformCount = 0; + const auto padfFeatureTransform = + poFeature->GetFieldAsDoubleList(iProjTransform, + &nTransformCount); + OGREnvelope sEnvelope; + if (nTransformCount >= 6 && m_poLayer->GetSpatialRef() && + m_poLayer->GetExtent(&sEnvelope, /* bForce = */ true) == + OGRERR_NONE) + { + const double dfResX = padfFeatureTransform[0]; + osResX = CPLSPrintf("%.17g", dfResX); + const double dfResY = std::fabs(padfFeatureTransform[4]); + osResY = CPLSPrintf("%.17g", dfResY); + + auto poCT = std::unique_ptr( + OGRCreateCoordinateTransformation( + m_poLayer->GetSpatialRef(), &oSTACSRS)); + auto poInvCT = std::unique_ptr( + poCT ? poCT->GetInverse() : nullptr); + double dfOutMinX = 0; + double dfOutMinY = 0; + double dfOutMaxX = 0; + double dfOutMaxY = 0; + if (dfResX > 0 && dfResY > 0 && poCT && poInvCT && + poCT->TransformBounds(sEnvelope.MinX, sEnvelope.MinY, + sEnvelope.MaxX, sEnvelope.MaxY, + &dfOutMinX, &dfOutMinY, + &dfOutMaxX, &dfOutMaxY, 21)) + { + constexpr double EPSILON = 1e-3; + const bool bTileAlignedOnRes = + (fmod(std::fabs(padfFeatureTransform[3]), dfResX) <= + EPSILON * dfResX && + fmod(std::fabs(padfFeatureTransform[5]), dfResY) <= + EPSILON * dfResY); + + osMinX = CPLSPrintf( + "%.17g", + !bTileAlignedOnRes + ? dfOutMinX + : std::floor(dfOutMinX / dfResX) * dfResX); + osMinY = CPLSPrintf( + "%.17g", + !bTileAlignedOnRes + ? dfOutMinY + : std::floor(dfOutMinY / dfResY) * dfResY); + osMaxX = CPLSPrintf( + "%.17g", + !bTileAlignedOnRes + ? dfOutMaxX + : std::ceil(dfOutMaxX / dfResX) * dfResX); + osMaxY = CPLSPrintf( + "%.17g", + !bTileAlignedOnRes + ? dfOutMaxY + : std::ceil(dfOutMaxY / dfResY) * dfResY); + + m_oSRS = oSTACSRS; + pszResX = osResX.c_str(); + pszResY = osResY.c_str(); + pszMinX = osMinX.c_str(); + pszMinY = osMinY.c_str(); + pszMaxX = osMaxX.c_str(); + pszMaxY = osMaxY.c_str(); + nCountMinMaxXY = 4; + + poFeature.reset(); + m_poLayer->ResetReading(); + + m_poWarpedLayerKeeper = + std::make_unique( + m_poLayer, /* iGeomField = */ 0, + /* bTakeOwnership = */ false, poCT.release(), + poInvCT.release()); + m_poLayer = m_poWarpedLayerKeeper.get(); + poLayerDefn = m_poLayer->GetLayerDefn(); + } + } + } + } + } + bool bHasMaskBand = false; if ((!pszBandCount && apoXMLNodeBands.empty()) || (!(pszResX && pszResY) && nCountXSizeYSizeGT == 0)) @@ -1093,8 +1215,8 @@ bool GDALTileIndexDataset::Open(GDALOpenInfo *poOpenInfo) CPLDebug("VRT", "Inspecting one feature due to missing metadata items"); m_bScannedOneFeatureAtOpening = true; - auto poFeature = - std::unique_ptr(m_poLayer->GetNextFeature()); + if (!poFeature) + poFeature.reset(m_poLayer->GetNextFeature()); if (!poFeature || !poFeature->IsFieldSetAndNotNull(m_nLocationFieldIndex)) { @@ -1618,6 +1740,66 @@ bool GDALTileIndexDataset::Open(GDALOpenInfo *poOpenInfo) nRasterYSize = static_cast(std::ceil(nRasterYSize / dfOvrFactor)); } + std::vector aosDescriptions; + if (bIsStacGeoParquet && poFeature) + { + const int nEOBandsIdx = + poLayerDefn->GetFieldIndex("assets.image.eo:bands"); + if (nEOBandsIdx >= 0 && + poLayerDefn->GetFieldDefn(nEOBandsIdx)->GetSubType() == OFSTJSON && + poFeature->IsFieldSet(nEOBandsIdx)) + { + const char *pszStr = poFeature->GetFieldAsString(nEOBandsIdx); + CPLJSONDocument oDoc; + if (oDoc.LoadMemory(pszStr) && + oDoc.GetRoot().GetType() == CPLJSONObject::Type::Array) + { + const auto oArray = oDoc.GetRoot().ToArray(); + if (oArray.Size() == nBandCount) + { + int i = 0; + aosDescriptions.resize(nBandCount); + static const std::map + oMapCommonName = { + {"red", GCI_RedBand}, + {"green", GCI_GreenBand}, + {"blue", GCI_BlueBand}, + {"alpha", GCI_AlphaBand}, + }; + for (const auto &oObj : oArray) + { + if (oObj.GetType() == CPLJSONObject::Type::Object) + { + const auto osCommonName = + oObj.GetString("common_name"); + const auto oIter = + oMapCommonName.find(osCommonName); + if (oIter != oMapCommonName.end()) + aeColorInterp[i] = oIter->second; + + const auto osName = oObj.GetString("name"); + aosDescriptions[i] = osName; + + const auto osDescription = + oObj.GetString("description"); + if (!osDescription.empty()) + { + if (aosDescriptions[i].empty()) + aosDescriptions[i] = osDescription; + else + aosDescriptions[i] + .append(" (") + .append(osDescription) + .append(")"); + } + } + ++i; + } + } + } + } + } + GDALTileIndexBand *poFirstBand = nullptr; for (int i = 0; i < nBandCount; ++i) { @@ -1644,6 +1826,10 @@ bool GDALTileIndexDataset::Open(GDALOpenInfo *poOpenInfo) m_bSameDataType = false; } + if (!aosDescriptions.empty() && !aosDescriptions[i].empty()) + { + poBand->GDALRasterBand::SetDescription(aosDescriptions[i].c_str()); + } if (!apoXMLNodeBands.empty()) { const char *pszVal = CPLGetXMLValue(