init research

This commit is contained in:
2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
+8
View File
@@ -0,0 +1,8 @@
## :dataframe-geo
This module, published as `dataframe-geo`, contains all logic and tests for DataFrame to be able to work
with geographical data.
**Experimental**.
This module targets java 11 because of the restriction from `org.jetbrains.kotlin.jupyter`.
+74
View File
@@ -0,0 +1,74 @@
import org.jetbrains.dataframe.gradle.DataSchemaVisibility
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
plugins {
with(convention.plugins) {
alias(kotlinJvm8)
}
with(libs.plugins) {
alias(publisher)
}
}
group = "org.jetbrains.kotlinx"
repositories {
// osgeo repository should come before Maven Central
maven(url = "https://repo.osgeo.org/repository/release")
mavenCentral()
mavenLocal()
}
kotlin.sourceSets {
main {
kotlin.srcDir("src/generated-dataschema-accessors/main/kotlin/")
}
}
// https://stackoverflow.com/questions/26993105/i-get-an-error-downloading-javax-media-jai-core1-1-3-from-maven-central
// jai core dependency should be excluded from geotools dependencies and added separately
fun ExternalModuleDependency.excludeJaiCore() = exclude("javax.media", "jai_core")
dependencies {
api(projects.core)
// Geotools
implementation(libs.geotools.main) { excludeJaiCore() }
implementation(libs.geotools.shapefile) { excludeJaiCore() }
implementation(libs.geotools.geojson) { excludeJaiCore() }
implementation(libs.geotools.referencing) { excludeJaiCore() }
implementation(libs.geotools.epsg.hsql) { excludeJaiCore() }
// JAI
implementation(libs.jai.core)
// JTS
implementation(libs.jts.core)
implementation(libs.jts.io.common)
// Ktor
implementation(libs.ktor.client.core)
implementation(libs.ktor.client.cio)
implementation(libs.ktor.client.content.negotiation)
implementation(libs.ktor.serialization.kotlinx.json)
testImplementation(kotlin("test"))
testImplementation(projects.dataframeJson)
}
tasks.withType<KotlinCompile>().configureEach {
friendPaths.from(project(projects.core.path).projectDir)
}
kotlinPublications {
publication {
publicationName = "dataframeGeo"
artifactId = project.name
description = "GeoDataFrame API"
packageName = artifactId
}
}
tasks.test {
useJUnitPlatform()
}
@@ -0,0 +1,13 @@
@file:Suppress("UNCHECKED_CAST", "USELESS_CAST")
package org.jetbrains.kotlinx.dataframe.geo
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.ColumnsScope
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithGeometry>.geometry: DataColumn<org.locationtech.jts.geom.Geometry> @JvmName("WithGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.Geometry>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithGeometry>.geometry: org.locationtech.jts.geom.Geometry @JvmName("WithGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.Geometry
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithGeometry?>.geometry: DataColumn<org.locationtech.jts.geom.Geometry?> @JvmName("NullableWithGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.Geometry?>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithGeometry?>.geometry: org.locationtech.jts.geom.Geometry? @JvmName("NullableWithGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.Geometry?
@@ -0,0 +1,13 @@
@file:Suppress("UNCHECKED_CAST", "USELESS_CAST")
package org.jetbrains.kotlinx.dataframe.geo
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.ColumnsScope
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithLineStringGeometry>.geometry: DataColumn<org.locationtech.jts.geom.LineString> @JvmName("WithLineStringGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.LineString>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithLineStringGeometry>.geometry: org.locationtech.jts.geom.LineString @JvmName("WithLineStringGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.LineString
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithLineStringGeometry?>.geometry: DataColumn<org.locationtech.jts.geom.LineString?> @JvmName("NullableWithLineStringGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.LineString?>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithLineStringGeometry?>.geometry: org.locationtech.jts.geom.LineString? @JvmName("NullableWithLineStringGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.LineString?
@@ -0,0 +1,13 @@
@file:Suppress("UNCHECKED_CAST", "USELESS_CAST")
package org.jetbrains.kotlinx.dataframe.geo
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.ColumnsScope
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithMultiLineStringGeometry>.geometry: DataColumn<org.locationtech.jts.geom.MultiLineString> @JvmName("WithMultiLineStringGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.MultiLineString>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithMultiLineStringGeometry>.geometry: org.locationtech.jts.geom.MultiLineString @JvmName("WithMultiLineStringGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.MultiLineString
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithMultiLineStringGeometry?>.geometry: DataColumn<org.locationtech.jts.geom.MultiLineString?> @JvmName("NullableWithMultiLineStringGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.MultiLineString?>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithMultiLineStringGeometry?>.geometry: org.locationtech.jts.geom.MultiLineString? @JvmName("NullableWithMultiLineStringGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.MultiLineString?
@@ -0,0 +1,13 @@
@file:Suppress("UNCHECKED_CAST", "USELESS_CAST")
package org.jetbrains.kotlinx.dataframe.geo
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.ColumnsScope
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithMultiPointGeometry>.geometry: DataColumn<org.locationtech.jts.geom.MultiPoint> @JvmName("WithMultiPointGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.MultiPoint>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithMultiPointGeometry>.geometry: org.locationtech.jts.geom.MultiPoint @JvmName("WithMultiPointGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.MultiPoint
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithMultiPointGeometry?>.geometry: DataColumn<org.locationtech.jts.geom.MultiPoint?> @JvmName("NullableWithMultiPointGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.MultiPoint?>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithMultiPointGeometry?>.geometry: org.locationtech.jts.geom.MultiPoint? @JvmName("NullableWithMultiPointGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.MultiPoint?
@@ -0,0 +1,13 @@
@file:Suppress("UNCHECKED_CAST", "USELESS_CAST")
package org.jetbrains.kotlinx.dataframe.geo
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.ColumnsScope
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithMultiPolygonGeometry>.geometry: DataColumn<org.locationtech.jts.geom.MultiPolygon> @JvmName("WithMultiPolygonGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.MultiPolygon>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithMultiPolygonGeometry>.geometry: org.locationtech.jts.geom.MultiPolygon @JvmName("WithMultiPolygonGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.MultiPolygon
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithMultiPolygonGeometry?>.geometry: DataColumn<org.locationtech.jts.geom.MultiPolygon?> @JvmName("NullableWithMultiPolygonGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.MultiPolygon?>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithMultiPolygonGeometry?>.geometry: org.locationtech.jts.geom.MultiPolygon? @JvmName("NullableWithMultiPolygonGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.MultiPolygon?
@@ -0,0 +1,13 @@
@file:Suppress("UNCHECKED_CAST", "USELESS_CAST")
package org.jetbrains.kotlinx.dataframe.geo
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.ColumnsScope
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithPointGeometry>.geometry: DataColumn<org.locationtech.jts.geom.Point> @JvmName("WithPointGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.Point>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithPointGeometry>.geometry: org.locationtech.jts.geom.Point @JvmName("WithPointGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.Point
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithPointGeometry?>.geometry: DataColumn<org.locationtech.jts.geom.Point?> @JvmName("NullableWithPointGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.Point?>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithPointGeometry?>.geometry: org.locationtech.jts.geom.Point? @JvmName("NullableWithPointGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.Point?
@@ -0,0 +1,13 @@
@file:Suppress("UNCHECKED_CAST", "USELESS_CAST")
package org.jetbrains.kotlinx.dataframe.geo
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.ColumnsScope
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithPolygonGeometry>.geometry: DataColumn<org.locationtech.jts.geom.Polygon> @JvmName("WithPolygonGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.Polygon>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithPolygonGeometry>.geometry: org.locationtech.jts.geom.Polygon @JvmName("WithPolygonGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.Polygon
public val ColumnsScope<org.jetbrains.kotlinx.dataframe.geo.WithPolygonGeometry?>.geometry: DataColumn<org.locationtech.jts.geom.Polygon?> @JvmName("NullableWithPolygonGeometry_geometry") get() = this["geometry"] as DataColumn<org.locationtech.jts.geom.Polygon?>
public val DataRow<org.jetbrains.kotlinx.dataframe.geo.WithPolygonGeometry?>.geometry: org.locationtech.jts.geom.Polygon? @JvmName("NullableWithPolygonGeometry_geometry") get() = this["geometry"] as org.locationtech.jts.geom.Polygon?
@@ -0,0 +1,81 @@
package org.jetbrains.kotlinx.dataframe.geo
import org.geotools.api.referencing.crs.CoordinateReferenceSystem
import org.geotools.geometry.jts.JTS
import org.geotools.referencing.CRS
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.update
import org.jetbrains.kotlinx.dataframe.api.with
/**
* A data structure representing a geographical DataFrame, combining spatial data with
* an optional Coordinate Reference System (CRS).
*
* @param T The type parameter extending `WithGeometry`, indicating the presence of a geometry column.
* @property df The underlying `DataFrame` containing geometries.
* @property crs The coordinate reference system associated with the data, if any.
*/
public class GeoDataFrame<T : WithGeometry>(public val df: DataFrame<T>, public val crs: CoordinateReferenceSystem?) {
/**
* Creates a new [GeoDataFrame] by applying transformations to the underlying [DataFrame].
*
* This function opens a modification scope where the current [DataFrame] can be transformed using
* [Kotlin DataFrame operations](https://kotlin.github.io/dataframe/operations.html). The transformation block
* receives the original [DataFrame] both as a receiver and as an explicit argument, allowing flexible modifications.
*
* The Coordinate Reference System (CRS) remains unchanged.
*
* @param block A lambda defining the transformations to apply to the DataFrame.
* @return A new [GeoDataFrame] instance with the modified DataFrame while preserving the original CRS.
*/
public inline fun modify(block: DataFrame<T>.(DataFrame<T>) -> DataFrame<T>): GeoDataFrame<T> =
GeoDataFrame(df.block(df), crs)
/**
* Transforms the geometries to a specified Coordinate Reference System (CRS).
*
* This function reprojects the geometry data from the current CRS to a target CRS.
* If no target CRS is specified and the `GeoDataFrame` has no CRS, WGS 84 is used by default.
*
* @param targetCrs The target CRS for transformation.
* @return A new `GeoDataFrame` with reprojected geometries and the specified CRS.
*/
public fun applyCrs(targetCrs: CoordinateReferenceSystem): GeoDataFrame<T> {
if (targetCrs == this.crs) return this
// Use WGS 84 by default TODO
val sourceCRS: CoordinateReferenceSystem = this.crs ?: DEFAULT_CRS
val transform = CRS.findMathTransform(sourceCRS, targetCrs, true)
return GeoDataFrame(
df.update { geometry }.with { JTS.transform(it, transform) },
targetCrs,
)
}
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (javaClass != other?.javaClass) return false
other as GeoDataFrame<*>
if (df != other.df) return false
return when {
crs == null && other.crs == null -> true
crs == null || other.crs == null -> false
else -> CRS.equalsIgnoreMetadata(crs, other.crs)
}
}
override fun hashCode(): Int {
var result = df.hashCode()
result = 31 * result + (crs?.hashCode() ?: 0)
return result
}
override fun toString(): String = "GeoDataFrame(df=$df, crs=$crs)"
public companion object {
public val DEFAULT_CRS: CoordinateReferenceSystem = CRS.decode("EPSG:4326", true)
}
}
@@ -0,0 +1,45 @@
package org.jetbrains.kotlinx.dataframe.geo
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.locationtech.jts.geom.Geometry
import org.locationtech.jts.geom.LineString
import org.locationtech.jts.geom.MultiLineString
import org.locationtech.jts.geom.MultiPoint
import org.locationtech.jts.geom.MultiPolygon
import org.locationtech.jts.geom.Point
import org.locationtech.jts.geom.Polygon
@DataSchema
public interface WithGeometry {
public val geometry: Geometry
}
@DataSchema
public interface WithPolygonGeometry : WithGeometry {
override val geometry: Polygon
}
@DataSchema
public interface WithMultiPolygonGeometry : WithGeometry {
override val geometry: MultiPolygon
}
@DataSchema
public interface WithPointGeometry : WithGeometry {
override val geometry: Point
}
@DataSchema
public interface WithMultiPointGeometry : WithGeometry {
override val geometry: MultiPoint
}
@DataSchema
public interface WithLineStringGeometry : WithGeometry {
override val geometry: LineString
}
@DataSchema
public interface WithMultiLineStringGeometry : WithGeometry {
override val geometry: MultiLineString
}
@@ -0,0 +1,16 @@
package org.jetbrains.kotlinx.dataframe.geo
import org.geotools.geometry.jts.ReferencedEnvelope
import org.jetbrains.kotlinx.dataframe.api.asIterable
import org.jetbrains.kotlinx.dataframe.geo.jts.computeBounds
/**
* Computes the bounding envelope for all geometries in a `GeoDataFrame`,
* considering the specified coordinate reference system (CRS).
*
* @receiver The `GeoDataFrame` containing the geometries for which to compute bounds.
* @return The bounding envelope that includes all geometries,
* associated with the CRS of the `GeoDataFrame`.
*/
public fun GeoDataFrame<*>.bounds(): ReferencedEnvelope =
ReferencedEnvelope(df.geometry.asIterable().computeBounds(), crs)
@@ -0,0 +1,131 @@
package org.jetbrains.kotlinx.dataframe.geo.geocode
import io.ktor.client.HttpClient
import io.ktor.client.engine.cio.CIO
import io.ktor.client.plugins.contentnegotiation.ContentNegotiation
import io.ktor.client.request.post
import io.ktor.client.request.setBody
import io.ktor.client.statement.bodyAsText
import io.ktor.http.ContentType
import io.ktor.http.contentType
import io.ktor.serialization.kotlinx.json.json
import kotlinx.coroutines.runBlocking
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonObject
import kotlinx.serialization.json.jsonPrimitive
import org.jetbrains.annotations.ApiStatus.Experimental
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.geo.GeoDataFrame
import org.jetbrains.kotlinx.dataframe.geo.toGeo
import org.locationtech.jts.geom.Geometry
import org.locationtech.jts.geom.GeometryFactory
import org.locationtech.jts.io.geojson.GeoJsonReader
/**
* Experimental geo coding utility.
*/
@Experimental
public object Geocoder {
private val url = "https://geo2.datalore.jetbrains.com/map_data/geocoding"
private fun countryQuery(country: String) =
"""
{
"region_query_names" : [ "$country" ],
"region_query_countries" : null,
"region_query_states" : null,
"region_query_counties" : null,
"ambiguity_resolver" : {
"ambiguity_resolver_ignoring_strategy" : null,
"ambiguity_resolver_box" : null,
"ambiguity_resolver_closest_coord" : null
}
}
""".trimIndent()
private fun geocodeQuery(countries: List<String>) =
"""
{
"version" : 3,
"mode" : "by_geocoding",
"feature_options" : [ "limit", "position", "centroid" ],
"resolution" : null,
"view_box" : null,
"fetched_ids" : null,
"region_queries" : [
${countries.joinToString(",\n") { countryQuery(it) }}
],
"scope" : [ ],
"level" : "country",
"namesake_example_limit" : 10,
"allow_ambiguous" : false
}
""".trimIndent()
private fun idsQuery(ids: List<String>) =
"""
{"version": 3,
"mode": "by_id",
"feature_options": ["boundary"],
"resolution": 5,
"view_box": null,
"fetched_ids": null,
"ids": [${ids.joinToString(", ") { "\"" + it + "\"" }}]}
""".trimIndent()
private val client = HttpClient(CIO) {
install(ContentNegotiation) {
json(
Json {
prettyPrint = true
isLenient = true
},
)
}
}
public fun geocodeCountries(countries: List<String>): GeoDataFrame<*> {
val query = geocodeQuery(countries)
val foundNames = mutableListOf<String>()
val geometries = mutableListOf<Geometry>()
runBlocking {
val responseString = client.post(url) {
contentType(ContentType.Application.Json)
// headers[HttpHeaders.AcceptEncoding] = "gzip"
setBody(query)
}.bodyAsText()
val ids = mutableListOf<String>()
Json.parseToJsonElement(responseString).jsonObject["data"]!!.jsonObject["answers"]!!.jsonArray.forEach {
it.jsonObject["features"]!!.jsonArray.single().jsonObject.also {
foundNames.add(it["name"]!!.jsonPrimitive.content)
ids.add(it["id"]!!.jsonPrimitive.content)
}
}
val idsQuery = idsQuery(ids)
val responseStringGeometries = client.post(url) {
contentType(ContentType.Application.Json)
// headers[HttpHeaders.AcceptEncoding] = "gzip"
setBody(idsQuery)
}.bodyAsText()
val geoJsonReader = GeoJsonReader(GeometryFactory())
Json.parseToJsonElement(
responseStringGeometries,
).jsonObject["data"]!!.jsonObject["answers"]!!.jsonArray.forEach {
it.jsonObject["features"]!!.jsonArray.single().jsonObject.also {
val boundary = it["boundary"]!!.jsonPrimitive.content
geometries.add(geoJsonReader.read(boundary))
}
}
}
return dataFrameOf(
"country" to countries,
"foundName" to foundNames,
"geometry" to geometries,
).toGeo()
}
}
@@ -0,0 +1,66 @@
package org.jetbrains.kotlinx.dataframe.geo.geotools
import org.geotools.api.feature.simple.SimpleFeature
import org.geotools.api.feature.simple.SimpleFeatureType
import org.geotools.api.feature.type.GeometryDescriptor
import org.geotools.api.referencing.crs.CoordinateReferenceSystem
import org.geotools.data.simple.SimpleFeatureCollection
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.Infer
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.geo.GeoDataFrame
import org.jetbrains.kotlinx.dataframe.geo.WithGeometry
import org.locationtech.jts.geom.Geometry
/**
* Converts this SimpleFeatureCollection to a GeoDataFrame.
*
* This method transforms the SimpleFeatureCollection into a GeoDataFrame, extracting both
* spatial (geometry) and non-spatial attributes, and associates them with an optional
* Coordinate Reference System (CRS) if available.
*
* @return a GeoDataFrame containing the data from this SimpleFeatureCollection, including
* geometries and other attributes, and an associated CRS if present.
*/
public fun SimpleFeatureCollection.toGeoDataFrame(): GeoDataFrame<*> {
require(schema is SimpleFeatureType) {
"GeoTools: SimpleFeatureType expected but was: ${schema::class.simpleName}"
}
val attributeDescriptors = (schema as SimpleFeatureType).attributeDescriptors
val dataAttributes = attributeDescriptors?.filter { it !is GeometryDescriptor }?.map { it!! } ?: emptyList()
val geometryAttribute = attributeDescriptors?.find { it is GeometryDescriptor }
?: throw IllegalArgumentException("No geometry attribute")
// In GeoJSON, the crs attribute is optional
val crs: CoordinateReferenceSystem? = (geometryAttribute as GeometryDescriptor).coordinateReferenceSystem
val data = dataAttributes.associate { it.localName to ArrayList<Any?>() }
val geometries = ArrayList<Geometry>()
features().use {
while (it.hasNext()) {
val feature = it.next()
require(feature is SimpleFeature) {
"GeoTools: SimpleFeature expected but was: ${feature::class.simpleName}"
}
val featureGeometry = feature.getAttribute(geometryAttribute.name)
require(featureGeometry is Geometry) {
"Not a geometry: [${geometryAttribute.name}] = ${featureGeometry?.javaClass?.simpleName} (feature id: ${feature.id})"
}
// TODO require(featureGeometry.isValid) { "Invalid geometry, feature id: ${feature.id}" }
for (dataAttribute in dataAttributes) {
data[dataAttribute.localName]?.add(feature.getAttribute(dataAttribute.name))
}
geometries.add(featureGeometry)
}
}
val geometryColumn = DataColumn.createByType("geometry", geometries, Infer.Type)
@Suppress("UNCHECKED_CAST")
return GeoDataFrame((data.toDataFrame() + geometryColumn) as DataFrame<WithGeometry>, crs)
}
@@ -0,0 +1,57 @@
package org.jetbrains.kotlinx.dataframe.geo.geotools
import org.geotools.api.feature.simple.SimpleFeature
import org.geotools.data.collection.ListFeatureCollection
import org.geotools.data.simple.SimpleFeatureCollection
import org.geotools.feature.simple.SimpleFeatureBuilder
import org.geotools.feature.simple.SimpleFeatureTypeBuilder
import org.jetbrains.kotlinx.dataframe.api.forEach
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.single
import org.jetbrains.kotlinx.dataframe.geo.GeoDataFrame
import org.locationtech.jts.geom.Geometry
/**
* Converts the `GeoDataFrame` to a `SimpleFeatureCollection`.
*
* @param name Optional name for the `SimpleFeatureCollection`. Defaults to "geodata" if not specified.
* @param singleGeometryType Whether to enforce a single geometry type within the collection. Defaults to false.
* @return A `SimpleFeatureCollection` representing the `GeoDataFrame`.
*/
public fun GeoDataFrame<*>.toSimpleFeatureCollection(
name: String? = null,
singleGeometryType: Boolean = false,
): SimpleFeatureCollection {
val typeBuilder = SimpleFeatureTypeBuilder()
typeBuilder.name = name ?: "geodata"
typeBuilder.setCRS(crs)
val geometryClass = if (singleGeometryType) {
// todo singleOrNull() ?: error()
df["geometry"].map { it!!::class.java }.distinct().single()
} else {
Geometry::class.java
}
typeBuilder.add("the_geom", geometryClass)
df.columnNames().filter { it != "geometry" }.forEach { colName ->
typeBuilder.add(colName, String::class.java)
}
val featureType = typeBuilder.buildFeatureType()
val featureCollection = ListFeatureCollection(featureType)
val featureBuilder = SimpleFeatureBuilder(featureType)
// if ID is present, SortedMap in DefaultFeatureCollection sorts rows by ID lexicographically
// I couldn't disable writing it, so let's generate lexicographically sorted IDs
val format = "f%0${df.rowsCount().toString().length}d"
df.forEach { row ->
val geometry = row["geometry"]
featureBuilder.add(geometry)
df.columnNames().filter { it != "geometry" }.forEach { colName ->
featureBuilder.add(row[colName])
}
val feature: SimpleFeature = featureBuilder.buildFeature(String.format(format, index()))
featureCollection.add(feature)
}
return featureCollection
}
@@ -0,0 +1,73 @@
package org.jetbrains.kotlinx.dataframe.geo.io
import org.geotools.data.shapefile.ShapefileDataStoreFactory
import org.geotools.data.simple.SimpleFeatureCollection
import org.geotools.geojson.feature.FeatureJSON
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.geo.GeoDataFrame
import org.jetbrains.kotlinx.dataframe.geo.geotools.toGeoDataFrame
import org.jetbrains.kotlinx.dataframe.io.asUrl
import java.io.File
import java.net.URL
public fun GeoDataFrame.Companion.readGeoJson(path: String): GeoDataFrame<*> = readGeoJson(asUrl(path))
public fun GeoDataFrame.Companion.readGeoJson(url: URL): GeoDataFrame<*> =
url.openStream().use { inputStream ->
val featureCollection = FeatureJSON().readFeatureCollection(inputStream) as SimpleFeatureCollection
featureCollection.toGeoDataFrame()
}
public fun DataFrame.Companion.readGeoJson(path: String): GeoDataFrame<*> = GeoDataFrame.readGeoJson(path)
public fun DataFrame.Companion.readGeoJson(url: URL): GeoDataFrame<*> = GeoDataFrame.readGeoJson(url)
/**
* Examples:
* ```
* GeoDataFrame.readShapefile("simple_points")
* GeoDataFrame.readShapefile("simple_points/simple_points.shp")
* ```
*
* @param path path to *.shp or *.shp.gz file, or to a directory containing such a file
*/
public fun GeoDataFrame.Companion.readShapefile(path: String): GeoDataFrame<*> {
val url = resolveShapefileUrl(path)
return readShapeFileImpl(url)
}
public fun GeoDataFrame.Companion.readShapefile(url: URL): GeoDataFrame<*> {
val resolvedUrl = if (url.protocol == "file") {
resolveShapefileUrl(url.path)
} else {
url
}
return readShapeFileImpl(resolvedUrl)
}
private fun readShapeFileImpl(url: URL): GeoDataFrame<*> {
val dataStore = ShapefileDataStoreFactory().createDataStore(url)
try {
return dataStore.featureSource.features.toGeoDataFrame()
} finally {
dataStore.dispose()
}
}
private fun resolveShapefileUrl(path: String): URL {
val file = File(path)
val shpFile = when {
file.isDirectory -> findShapefileInDirectory(file)
else -> file
}
return shpFile.toURI().toURL()
}
private fun findShapefileInDirectory(dir: File): File =
File(dir, "${dir.name}.shp").takeIf { it.exists() }
?: File(dir, "${dir.name}.shp.gz").takeIf { it.exists() }
?: throw IllegalArgumentException("No shapefile found in directory: ${dir.absolutePath}")
public fun DataFrame.Companion.readShapefile(path: String): GeoDataFrame<*> = GeoDataFrame.readShapefile(path)
public fun DataFrame.Companion.readShapefile(url: URL): GeoDataFrame<*> = GeoDataFrame.readShapefile(url)
@@ -0,0 +1,68 @@
package org.jetbrains.kotlinx.dataframe.geo.io
import org.geotools.api.data.FileDataStoreFinder
import org.geotools.api.data.SimpleFeatureStore
import org.geotools.api.data.Transaction
import org.geotools.feature.simple.SimpleFeatureTypeBuilder
import org.geotools.geojson.feature.FeatureJSON
import org.jetbrains.kotlinx.dataframe.geo.GeoDataFrame
import org.jetbrains.kotlinx.dataframe.geo.geotools.toSimpleFeatureCollection
import java.io.File
import java.nio.file.Path
import kotlin.io.path.createDirectories
import kotlin.io.path.notExists
import kotlin.io.path.outputStream
public fun GeoDataFrame<*>.writeGeoJson(path: String): Unit = writeGeoJson(File(path))
public fun GeoDataFrame<*>.writeGeoJson(path: Path) {
val featureJSON = FeatureJSON()
path.outputStream().use { outputStream ->
featureJSON.writeFeatureCollection(toSimpleFeatureCollection(), outputStream)
}
}
public fun GeoDataFrame<*>.writeGeoJson(file: File) {
writeGeoJson(file.toPath())
}
public fun GeoDataFrame<*>.writeShapefile(directoryPath: String): Unit = writeShapefile(File(directoryPath))
public fun GeoDataFrame<*>.writeShapefile(directory: Path) {
if (directory.notExists()) {
directory.createDirectories()
}
val fileName = directory.fileName.toString()
val shp = directory.resolve("$fileName.shp")
val creationParams = mutableMapOf<String, java.io.Serializable>()
creationParams["url"] = shp.toUri().toURL()
val factory = FileDataStoreFinder.getDataStoreFactory("shp")
val dataStore = factory.createNewDataStore(creationParams)
val featureCollection = toSimpleFeatureCollection(fileName, true)
val schema = featureCollection.schema
val schemaWithCrs = SimpleFeatureTypeBuilder.retype(schema, crs ?: GeoDataFrame.DEFAULT_CRS)
dataStore.createSchema(schemaWithCrs)
val featureSource = dataStore.getFeatureSource(fileName) as SimpleFeatureStore
val transaction = Transaction.AUTO_COMMIT
try {
featureSource.addFeatures(featureCollection)
transaction.commit()
} catch (e: Exception) {
e.printStackTrace()
transaction.rollback()
} finally {
dataStore.dispose()
transaction.close()
}
}
public fun GeoDataFrame<*>.writeShapefile(directory: File) {
writeShapefile(directory.toPath())
}
@@ -0,0 +1,17 @@
package org.jetbrains.kotlinx.dataframe.geo.jts
import org.locationtech.jts.geom.Envelope
import org.locationtech.jts.geom.Geometry
/**
* Computes the bounding envelope for a collection of geometries.
*
*
* @receiver The collection of geometries for which to compute the bounds.
* @return The minimal envelope that encompasses all geometries in the collection.
*/
public fun Iterable<Geometry>.computeBounds(): Envelope {
val bounds = Envelope()
forEach { geometry -> bounds.expandToInclude(geometry.envelopeInternal) }
return bounds
}
@@ -0,0 +1,72 @@
package org.jetbrains.kotlinx.dataframe.geo.jts
import org.locationtech.jts.geom.Geometry
import org.locationtech.jts.geom.util.AffineTransformation
/**
* Scales the geometry around its center using the same scaling factor for both axes.
*
* @param factor The scaling factor for both the X and Y axes.
* @return A new geometry scaled around its center.
*/
public fun Geometry.scaleAroundCenter(factor: Double): Geometry = scaleAroundCenter(factor, factor)
/**
* Scales the geometry around its center using different scaling factors for the X and Y axes.
*
* @param xFactor The scaling factor for the X axis.
* @param yFactor The scaling factor for the Y axis.
* @return A new geometry scaled around its center.
*/
public fun Geometry.scaleAroundCenter(xFactor: Double, yFactor: Double): Geometry {
val centroid = centroid.coordinate
val moveToOrigin = AffineTransformation
.translationInstance(-centroid.x, -centroid.y)
val scale = AffineTransformation.scaleInstance(xFactor, yFactor)
val moveBack = AffineTransformation.translationInstance(centroid.x, centroid.y)
val transformation = moveToOrigin.compose(scale).compose(moveBack)
return transformation.transform(this)
}
/**
* Translates (moves) the geometry by the specified distances along the X and Y axes.
*
* @param valueX The translation distance along the X axis.
* @param valueY The translation distance along the Y axis.
* @return A new geometry translated by the specified distances.
*/
public fun Geometry.translate(valueX: Double, valueY: Double): Geometry =
AffineTransformation().translate(valueX, valueY).transform(this)
/**
* Rotates the geometry around its center by the specified angle in radians.
*
* @param angleRadians The rotation angle in radians.
* @return A new geometry rotated around its center.
*/
public fun Geometry.rotate(angleRadians: Double): Geometry {
val centroid = centroid.coordinate
val moveToOrigin = AffineTransformation.translationInstance(-centroid.x, -centroid.y)
val rotate = AffineTransformation.rotationInstance(angleRadians)
val moveBack = AffineTransformation.translationInstance(centroid.x, centroid.y)
val transformation = moveToOrigin.compose(rotate).compose(moveBack)
return transformation.transform(this)
}
/**
* Reflects the geometry across the X axis, inverting its horizontal position.
*
* @return A new geometry reflected across the X axis.
*/
public fun Geometry.reflectX(): Geometry = scaleAroundCenter(-1.0, 1.0)
/**
* Reflects the geometry across the Y axis, inverting its vertical position.
*
* @return A new geometry reflected across the Y axis.
*/
public fun Geometry.reflectY(): Geometry = scaleAroundCenter(1.0, -1.0)
@@ -0,0 +1,41 @@
package org.jetbrains.kotlinx.dataframe.geo.jts
import org.locationtech.jts.geom.LineString
import org.locationtech.jts.geom.MultiLineString
import org.locationtech.jts.geom.MultiPoint
import org.locationtech.jts.geom.MultiPolygon
import org.locationtech.jts.geom.Point
import org.locationtech.jts.geom.Polygon
/**
* Converts a [Polygon] to a [MultiPolygon] by wrapping it in a MultiPolygon.
*
* @receiver Polygon to be converted.
* @return A MultiPolygon containing the original Polygon.
*/
public fun Polygon.toMultiPolygon(): MultiPolygon {
val geometryFactory = this.factory
return geometryFactory.createMultiPolygon(arrayOf(this))
}
/**
* Converts a [Point] to a [MultiPoint] by wrapping it in a MultiPoint.
*
* @receiver Point to be converted.
* @return A MultiPoint containing the original Point.
*/
public fun Point.toMultiPoint(): MultiPoint {
val geometryFactory = this.factory
return geometryFactory.createMultiPoint(arrayOf(this))
}
/**
* Converts a [LineString] to a [MultiLineString] by wrapping it in a MultiLineString.
*
* @receiver LineString to be converted.
* @return A MultiLineString containing the original LineString.
*/
public fun LineString.toMultiLineString(): MultiLineString {
val geometryFactory = this.factory
return geometryFactory.createMultiLineString(arrayOf(this))
}
@@ -0,0 +1,24 @@
package org.jetbrains.kotlinx.dataframe.geo
import org.geotools.api.referencing.crs.CoordinateReferenceSystem
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
/**
* Transforms an `AnyFrame` (a general-purpose DataFrame) into a `GeoDataFrame`
* by interpreting it as a `DataFrame` containing geometry data. Optionally, a
* Coordinate Reference System (CRS) can be specified.
*
* @receiver The input DataFrame to be converted into a `GeoDataFrame`.
* @param crs The coordinate reference system to associate with the `GeoDataFrame`.
* If null, no specific CRS is applied.
* @return The resulting `GeoDataFrame` with geometry and, if provided, an associated CRS.
*
* Note: The `AnyFrame` must contain a `geometry` column to be converted successfully.
*/
@Suppress("UNCHECKED_CAST")
public fun AnyFrame.toGeo(crs: CoordinateReferenceSystem? = null): GeoDataFrame<*> =
GeoDataFrame(
this as DataFrame<WithGeometry>,
crs,
)
@@ -0,0 +1,87 @@
package org.jetbrains.kotlinx.dataframe.geo.io
import org.geotools.referencing.CRS
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.geo.GeoDataFrame
import org.jetbrains.kotlinx.dataframe.geo.toGeo
import org.locationtech.jts.geom.Coordinate
import org.locationtech.jts.geom.GeometryFactory
import java.io.File
import java.nio.file.Files
import kotlin.test.Test
import kotlin.test.assertEquals
class IOTest {
private val simplePointsDf = run {
val geometryFactory = GeometryFactory()
val point1 = geometryFactory.createPoint(Coordinate(30.5, 50.5))
val point2 = geometryFactory.createPoint(Coordinate(31.5, 51.5))
dataFrameOf("name", "geometry")(
"Point 1",
point1,
"Point 2",
point2,
)
}
private val simplePointsGeoDf = simplePointsDf.toGeo(GeoDataFrame.DEFAULT_CRS)
private val classLoader = (this::class as Any).javaClass.classLoader
@Test
fun readGeoJson() {
val jsonURL = classLoader.getResource("./simple_points.geojson")!!
val geodf = GeoDataFrame.readGeoJson(jsonURL)
assertEquals(simplePointsDf, geodf.df)
assert(geodf.crs == null)
}
@Test
fun writeGeoJson() {
val tempFile = Files.createTempFile("simple_points", ".json").toFile()
simplePointsGeoDf.writeGeoJson(tempFile)
val loadedGeoDataFrame = GeoDataFrame.readGeoJson(tempFile.toURI().toURL())
assertEquals(simplePointsGeoDf.df, loadedGeoDataFrame.df)
// TODO: Doesn't work because of how equality between CRS is checked by geotools
// assertEquals(simplePointsGeoDf, loadedGeoDataFrame)
tempFile.deleteOnExit()
}
@Test
fun readShapefile() {
val shapefileURL = classLoader.getResource("./simple_points/simple_points.shp")!!
val geodf = GeoDataFrame.readShapefile(shapefileURL)
assertEquals(simplePointsDf, geodf.df)
assert(geodf.crs == null)
}
@Test
fun writeShapefile() {
val tempDir = Files.createTempDirectory("shapefiles").toFile()
val tempShapefileDir = File(tempDir, "simple_points").also { it.mkdir() }
simplePointsGeoDf.writeShapefile(tempShapefileDir)
val shapefile = File("${tempShapefileDir.path}/simple_points.shp")
assertEquals(simplePointsGeoDf, GeoDataFrame.readShapefile(shapefile.toURI().toURL()))
tempDir.deleteOnExit()
}
@Test
fun readShapefileDirectory() {
val shapefileURL = classLoader.getResource("./simple_points")!!
val geodf = GeoDataFrame.readShapefile(shapefileURL)
assertEquals(simplePointsDf, geodf.df)
assert(geodf.crs == null)
}
@Test
fun readShapefileDirectoryFile() {
val geodf = GeoDataFrame.readShapefile("src/test/resources/simple_points")
assertEquals(simplePointsDf, geodf.df)
assert(geodf.crs == null)
}
}
@@ -0,0 +1,25 @@
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [30.5, 50.5]
},
"properties": {
"name": "Point 1"
}
},
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [31.5, 51.5]
},
"properties": {
"name": "Point 2"
}
}
]
}