Skip to main content

Python API Reference

GeoBrix provides Python bindings through PySpark, offering Pythonic access to all GeoBrix functionality.

Import Patterns

RasterX

from databricks.labs.gbx.rasterx import functions as rx

# Register functions
rx.register(spark)

raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)

# Use functions
df = rasters.select(rx.rst_boundingbox("tile"))
df.limit(1).show(truncate=False)
Example output
+--------------------------------------------------+----------------------------------+
|path |bbox |
+--------------------------------------------------+----------------------------------+
|.../nyc_sentinel2/nyc_sentinel2_red.tif |POLYGON ((-74.26 40.49, ...)) |
+--------------------------------------------------+----------------------------------+

GridX (BNG)

from databricks.labs.gbx.gridx.bng import functions as bx

# Register functions
bx.register(spark)

# Use functions (gbx_bng_cellarea returns square kilometres)
df = spark.sql("SELECT gbx_bng_cellarea('TQ', 1000) as area_km2")
df.show()
Example output
+----------+
|area_km2 |
+----------+
|1.0 |
+----------+

VectorX

from databricks.labs.gbx.vectorx.jts.legacy import functions as vx

# Register functions
vx.register(spark)

# Load legacy data
legacy_data = spark.table("legacy_geometries")

# Use functions
df = legacy_data.select(vx.st_legacyaswkb("mosaic_geom"))
Example output
+-----------+
|wkb |
+-----------+
|[BINARY] |
+-----------+

RasterX Functions

Accessor Functions

Get raster properties and metadata.

rst_boundingbox(tile)

Get the bounding box of a raster.

from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
bbox_df = rasters.select(
"path",
rx.rst_boundingbox("tile").alias("bbox")
)
bbox_df.limit(3).show()
Example output
+----------------------------------------------------------+----------------------------------+
|path |bbox |
+----------------------------------------------------------+----------------------------------+
|.../nyc/sentinel2/nyc_sentinel2_red.tif |POLYGON ((-74.26 40.49, ...)) |
+----------------------------------------------------------+----------------------------------+

rst_width(tile)

Get raster width in pixels.

from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
width_df = rasters.select(rx.rst_width("tile").alias("width"))
width_df.limit(1).show()
Example output
+------+
|width |
+------+
|10980 |
+------+

rst_height(tile)

Get raster height in pixels.

from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
height_df = rasters.select(rx.rst_height("tile").alias("height"))
height_df.limit(1).show()
Example output
+------+
|height|
+------+
|10980 |
+------+

rst_numbands(tile)

Get number of bands in raster.

from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
bands_df = rasters.select(rx.rst_numbands("tile").alias("num_bands"))
bands_df.limit(1).show()
Example output
+---------+
|num_bands|
+---------+
|1 |
+---------+

rst_metadata(tile)

Get raster metadata.

from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
metadata_df = rasters.select(rx.rst_metadata("tile").alias("metadata"))
metadata_df.limit(1).show(truncate=False)
Example output
+------------------+
|metadata |
+------------------+
|{driver=GTiff,...}|
+------------------+

rst_srid(tile)

Get spatial reference identifier.

from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
srid_df = rasters.select(rx.rst_srid("tile").alias("srid"))
srid_df.limit(1).show()
Example output
+-----+
|srid |
+-----+
|32618|
+-----+

Transformation Functions

Transform and manipulate rasters.

rst_clip(tile, geometry, cutline_all_touched)

Clip raster by geometry. The geometry must be WKT (string) or WKB (binary); do not use st_geomfromtext() or other DBR native geometry, as GeoBrix does not accept it.

from databricks.labs.gbx.rasterx import functions as rx
from pyspark.sql.functions import lit

rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
clip_wkt = "POLYGON((-122 37, -122 38, -121 38, -121 37, -122 37))"
clipped = rasters.select(
rx.rst_clip("tile", lit(clip_wkt), lit(True)).alias("clipped_tile")
)
clipped.limit(1).show(truncate=False)
Example output
+----------------------------------+
|clipped_tile |
+----------------------------------+
|[STRUCT cellid, raster, metadata] |
+----------------------------------+

Complete Example

from databricks.labs.gbx.rasterx import functions as rx
from pyspark.sql.functions import expr

# Register functions
rx.register(spark)

raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)

# Extract metadata and process
result = rasters.select(
"path",
rx.rst_boundingbox("tile").alias("bbox"),
rx.rst_width("tile").alias("width"),
rx.rst_height("tile").alias("height"),
rx.rst_numbands("tile").alias("bands"),
rx.rst_metadata("tile").alias("metadata")
).filter(
"width > 1000 AND height > 1000"
)

result.limit(3).show()
Example output
+----------------------------------------------------------+-----+------+-----+------------------+
|path |bbox |width |... |metadata |
+----------------------------------------------------------+-----+------+-----+------------------+
|.../nyc/sentinel2/nyc_sentinel2_red.tif |... |10980 |... |{driver=GTiff,...}|
+----------------------------------------------------------+-----+------+-----+------------------+

GridX Functions

BNG Functions

British National Grid functions.

bng_cellarea(grid_letter, precision)

Calculate area of a BNG grid cell.

from databricks.labs.gbx.gridx.bng import functions as bx
bx.register(spark)

# Calculate cell area (result in km²)
area = spark.sql("SELECT gbx_bng_cellarea('TQ', 1000) as area_km2")
area.show()
Example output
+----------+
|area_km2 |
+----------+
|1.0 |
+----------+

bng_pointascell(point, resolution) (point to BNG cell)

Convert point geometry to BNG grid cell. The point must be WKT or WKB; do not use st_point() or other DBR native geometry.

from databricks.labs.gbx.gridx.bng import functions as bx
from pyspark.sql.functions import lit

bx.register(spark)
# Point in BNG coordinates (eastings, northings); resolution '1km' or integer 3
df = spark.range(1).select(
bx.bng_pointascell(lit("POINT(530000 180000)"), lit("1km")).alias("bng_cell")
)
df.show()
Example output
+----------+
|bng_cell |
+----------+
|TQ 30 80 |
+----------+

Complete Example

from databricks.labs.gbx.gridx.bng import functions as bx
from pyspark.sql.functions import count

# Register functions
bx.register(spark)

# Aggregate points by BNG cell (point as WKT in SQL)
result = spark.sql("""
SELECT
gbx_bng_pointascell(concat('POINT(', cast(longitude as string), ' ', cast(latitude as string), ')'), 1000) as bng_cell,
COUNT(*) as point_count,
AVG(value) as avg_value
FROM measurements
WHERE country = 'GB'
GROUP BY bng_cell
""")

result.write.mode("overwrite").saveAsTable("bng_aggregated")
Example output
+----------+-----------+---------+
|bng_cell |point_count|avg_value|
+----------+-----------+---------+
|TQ3080 |42 |15.3 |
+----------+-----------+---------+

VectorX Functions

Complete Example

Databricks Runtime integration

This example uses st_geomfromwkb, st_isvalid, and st_area and requires Databricks Runtime 17.1+ (or Databricks SQL with ST support). The example and its integration test live under docs/tests-dbr/.

from databricks.labs.gbx.vectorx.jts.legacy import functions as vx
vx.register(spark)
legacy_table = spark.table(table_name)
migrated = legacy_table.select(
col("*"),
expr("st_geomfromwkb(gbx_st_legacyaswkb(mosaic_geom))").alias("geometry")
).select(
col("feature_id"),
col("properties"),
col("geometry"),
expr("st_isvalid(geometry)").alias("is_valid"),
expr("st_area(geometry)").alias("area")
).filter(col("is_valid") == True)
Example output
Table migrated_features: feature_id, properties, geometry, is_valid, area (valid rows only).

Next Steps