Spatial aggregation functions

st_intersects_aggregate

st_intersects_aggregate(leftIndex, rightIndex)

Returns true if any of the leftIndex and rightIndex pairs intersect.

Parameters:

leftIndex (Column) – Geometry
rightIndex (Column) – Geometry

Return type:

Column

Example:

left_df = (
    spark.createDataFrame([{'geom': 'POLYGON ((0 0, 0 3, 3 3, 3 0))'}])
        .select(grid_tessellateexplode(col("geom"), lit(1)).alias("left_index"))
)
right_df = (
    spark.createDataFrame([{'geom': 'POLYGON ((2 2, 2 4, 4 4, 4 2))'}])
        .select(grid_tessellateexplode(col("geom"), lit(1)).alias("right_index"))
)
(
    left_df
        .join(right_df, col("left_index.index_id") == col("right_index.index_id"))
        .groupBy()
        .agg(st_intersects_aggregate(col("left_index"), col("right_index")))
).show(1, False)
+------------------------------------------------+
|st_intersects_aggregate(left_index, right_index)|
+------------------------------------------------+
|true                                            |
+------------------------------------------------+

val leftDf = List("POLYGON ((0 0, 0 3, 3 3, 3 0))").toDF("geom")
    .select(grid_tessellateexplode($"geom", lit(1)).alias("left_index"))
val rightDf = List("POLYGON ((2 2, 2 4, 4 4, 4 2))").toDF("geom")
    .select(grid_tessellateexplode($"geom", lit(1)).alias("right_index"))
leftDf
    .join(rightDf, $"left_index.index_id" === $"right_index.index_id")
    .groupBy()
    .agg(st_intersects_aggregate($"left_index", $"right_index"))
    .show(false)
+------------------------------------------------+
|st_intersects_aggregate(left_index, right_index)|
+------------------------------------------------+
|true                                            |
+------------------------------------------------+

WITH l AS (SELECT grid_tessellateexplode("POLYGON ((0 0, 0 3, 3 3, 3 0))", 1) AS left_index),
    r AS (SELECT grid_tessellateexplode("POLYGON ((2 2, 2 4, 4 4, 4 2))", 1) AS right_index)
SELECT st_intersects_aggregate(l.left_index, r.right_index)
FROM l INNER JOIN r on l.left_index.index_id = r.right_index.index_id
+------------------------------------------------+
|st_intersects_aggregate(left_index, right_index)|
+------------------------------------------------+
|true                                            |
+------------------------------------------------+

df.l <- select(
    createDataFrame(data.frame(geom = "POLYGON ((0 0, 0 3, 3 3, 3 0))")),
    alias(grid_tessellateexplode(column("geom"), lit(1L)), "left_index")
)
df.r <- select(
    createDataFrame(data.frame(geom = "POLYGON ((2 2, 2 4, 4 4, 4 2))")),
    alias(grid_tessellateexplode(column("geom"), lit(1L)), "right_index")
)
showDF(
    select(
        join(df.l, df.r, df.l$left_index.index_id == df.r$right_index.index_id),
        st_intersects_aggregate(column("left_index"), column("right_index"))
    ), truncate=F
)
+------------------------------------------------+
|st_intersects_aggregate(left_index, right_index)|
+------------------------------------------------+
|true                                            |
+------------------------------------------------+

st_intersection_aggregate

st_intersection_aggregate(leftIndex, rightIndex)

Computes the intersections of leftIndex and rightIndex and returns the union of these intersections.

Parameters:

leftIndex (Column) – Geometry
rightIndex (Column) – Geometry

Return type:

Column

Example:

left_df = (
    spark.createDataFrame([{'geom': 'POLYGON ((0 0, 0 3, 3 3, 3 0))'}])
        .select(grid_tessellateexplode(col("geom"), lit(1)).alias("left_index"))
)
right_df = (
    spark.createDataFrame([{'geom': 'POLYGON ((2 2, 2 4, 4 4, 4 2))'}])
        .select(grid_tessellateexplode(col("geom"), lit(1)).alias("right_index"))
)
(
    left_df
        .join(right_df, col("left_index.index_id") == col("right_index.index_id"))
        .groupBy()
        .agg(st_astext(st_intersection_aggregate(col("left_index"), col("right_index"))))
).show(1, False)
+--------------------------------------------------------------+
|convert_to(st_intersection_aggregate(left_index, right_index))|
+--------------------------------------------------------------+
|POLYGON ((2 2, 3 2, 3 3, 2 3, 2 2))                           |
+--------------------------------------------------------------+

val leftDf = List("POLYGON ((0 0, 0 3, 3 3, 3 0))").toDF("geom")
    .select(grid_tessellateexplode($"geom", lit(1)).alias("left_index"))
val rightDf = List("POLYGON ((2 2, 2 4, 4 4, 4 2))").toDF("geom")
    .select(grid_tessellateexplode($"geom", lit(1)).alias("right_index"))
leftDf
    .join(rightDf, $"left_index.index_id" === $"right_index.index_id")
    .groupBy()
    .agg(st_astext(st_intersection_aggregate($"left_index", $"right_index")))
    .show(false)
+--------------------------------------------------------------+
|convert_to(st_intersection_aggregate(left_index, right_index))|
+--------------------------------------------------------------+
|POLYGON ((2 2, 3 2, 3 3, 2 3, 2 2))                           |
+--------------------------------------------------------------+

WITH l AS (SELECT grid_tessellateexplode("POLYGON ((0 0, 0 3, 3 3, 3 0))", 1) AS left_index),
    r AS (SELECT grid_tessellateexplode("POLYGON ((2 2, 2 4, 4 4, 4 2))", 1) AS right_index)
SELECT st_astext(st_intersection_aggregate(l.left_index, r.right_index))
FROM l INNER JOIN r on l.left_index.index_id = r.right_index.index_id
+--------------------------------------------------------------+
|convert_to(st_intersection_aggregate(left_index, right_index))|
+--------------------------------------------------------------+
|POLYGON ((2 2, 3 2, 3 3, 2 3, 2 2))                           |
+--------------------------------------------------------------+

df.l <- select(
    createDataFrame(data.frame(geom = "POLYGON ((0 0, 0 3, 3 3, 3 0))")),
    alias(grid_tessellateexplode(column("geom"), lit(1L)), "left_index")
)
df.r <- select(
    createDataFrame(data.frame(geom = "POLYGON ((2 2, 2 4, 4 4, 4 2))")),
    alias(grid_tessellateexplode(column("geom"), lit(1L)), "right_index")
)
showDF(
    select(
        join(df.l, df.r, df.l$left_index.index_id == df.r$right_index.index_id),
        st_astext(st_intersection_aggregate(column("left_index"), column("right_index")))
    ), truncate=F
)
+--------------------------------------------------------------+
|convert_to(st_intersection_aggregate(left_index, right_index))|
+--------------------------------------------------------------+
|POLYGON ((2 2, 3 2, 3 3, 2 3, 2 2))                           |
+--------------------------------------------------------------+

st_union_agg

st_union_agg(geom)

Computes the union of the input geometries.

Parameters:: geom (Column) – Geometry
Return type:: Column
Example:

df = spark.createDataFrame([{'geom': 'POLYGON ((10 10, 20 10, 20 20, 10 20, 10 10))'}, {'geom': 'POLYGON ((15 15, 25 15, 25 25, 15 25, 15 15))'}])
df.select(st_astext(st_union_agg(col('geom')))).show()
+-------------------------------------------------------------------------+
| st_union_agg(geom)                                                      |
+-------------------------------------------------------------------------+
|POLYGON ((20 15, 20 10, 10 10, 10 20, 15 20, 15 25, 25 25, 25 15, 20 15))|
+-------------------------------------------------------------------------+

val df = List("POLYGON ((10 10, 20 10, 20 20, 10 20, 10 10))", "POLYGON ((10 10, 20 10, 20 20, 10 20, 10 10))").toDF("geom")
df.select(st_astext(st_union_agg(col('geom')))).show()
+-------------------------------------------------------------------------+
| st_union_agg(geom)                                                      |
+-------------------------------------------------------------------------+
|POLYGON ((20 15, 20 10, 10 10, 10 20, 15 20, 15 25, 25 25, 25 15, 20 15))|
+-------------------------------------------------------------------------+

WITH geoms ('geom') AS (VALUES ('POLYGON ((10 10, 20 10, 20 20, 10 20, 10 10))'), ('POLYGON ((10 10, 20 10, 20 20, 10 20, 10 10))'))
SELECT st_astext(st_union_agg(geoms));
+-------------------------------------------------------------------------+
| st_union_agg(geom)                                                      |
+-------------------------------------------------------------------------+
|POLYGON ((20 15, 20 10, 10 10, 10 20, 15 20, 15 25, 25 25, 25 15, 20 15))|
+-------------------------------------------------------------------------+

df.geom <- select(createDataFrame(data.frame(geom = c('POLYGON ((10 10, 20 10, 20 20, 10 20, 10 10))'), ('POLYGON ((10 10, 20 10, 20 20, 10 20, 10 10))'))))
showDF(select(st_astext(st_union_agg(column("geom")))), truncate=F)
+-------------------------------------------------------------------------+
| st_union_agg(geom)                                                      |
+-------------------------------------------------------------------------+
|POLYGON ((20 15, 20 10, 10 10, 10 20, 15 20, 15 25, 25 25, 25 15, 20 15))|
+-------------------------------------------------------------------------+

grid_cell_intersection_agg

grid_cell_intersection_agg(chips)

Computes the chip representing the intersection of the input chips.

Parameters:: chips (Column) – Chips
Return type:: Column
Example:

df = df.withColumn("chip", grid_tessellateexplode(...))
df.groupBy("chip.index_id").agg(grid_cell_intersection_agg("chip").alias("agg_chip")).limit(1).show()
+--------------------------------------------------------+
| agg_chip                                               |
+--------------------------------------------------------+
|{is_core: false, index_id: 590418571381702655, wkb: ...}|
+--------------------------------------------------------+

val df = other_df.withColumn("chip", grid_tessellateexplode(...))
df.groupBy("chip.index_id").agg(grid_cell_intersection_agg("chip").alias("agg_chip")).limit(1).show()
+--------------------------------------------------------+
| agg_chip                                               |
+--------------------------------------------------------+
|{is_core: false, index_id: 590418571381702655, wkb: ...}|
+--------------------------------------------------------+

WITH chips AS (SELECT grid_tessellateexplode(wkt) AS "chip" FROM ...)
SELECT grid_cell_intersection_agg(chips) AS agg_chip FROM chips GROUP BY chips.index_id;
+--------------------------------------------------------+
| agg_chip                                               |
+--------------------------------------------------------+
|{is_core: false, index_id: 590418571381702655, wkb: ...}|
+--------------------------------------------------------+

showDF(select(grid_cell_intersection_agg(column("chip"))), truncate=F)
+--------------------------------------------------------+
| agg_chip                                               |
+--------------------------------------------------------+
|{is_core: false, index_id: 590418571381702655, wkb: ...}|
+--------------------------------------------------------+

grid_cell_union_agg

grid_cell_union_agg(chips)

Computes the chip representing the union of the input chips.

Parameters:: chips (Column) – Chips
Return type:: Column
Example:

df = df.withColumn("chip", grid_tessellateexplode(...))
df.groupBy("chip.index_id").agg(grid_cell_union_agg("chip").alias("agg_chip")).limit(1).show()
+--------------------------------------------------------+
| agg_chip                                               |
+--------------------------------------------------------+
|{is_core: false, index_id: 590418571381702655, wkb: ...}|
+--------------------------------------------------------+

val df = other_df.withColumn("chip", grid_tessellateexplode(...))
df.groupBy("chip.index_id").agg(grid_cell_union_agg("chip").alias("agg_chip")).limit(1).show()
+--------------------------------------------------------+
| agg_chip                                               |
+--------------------------------------------------------+
|{is_core: false, index_id: 590418571381702655, wkb: ...}|
+--------------------------------------------------------+

WITH chips AS (SELECT grid_tessellateexplode(wkt) AS "chip" FROM ...)
SELECT grid_cell_union_agg(chips) AS agg_chip FROM chips GROUP BY chips.index_id;
+--------------------------------------------------------+
| agg_chip                                               |
+--------------------------------------------------------+
|{is_core: false, index_id: 590418571381702655, wkb: ...}|
+--------------------------------------------------------+

showDF(select(grid_cell_union_agg(column("chip"))), truncate=F)
+--------------------------------------------------------+
| agg_chip                                               |
+--------------------------------------------------------+
|{is_core: false, index_id: 590418571381702655, wkb: ...}|
+--------------------------------------------------------+