Python API Reference
GeoBrix provides Python bindings through PySpark, offering Pythonic access to all GeoBrix functionality.
Import Patterns
RasterX
from databricks.labs.gbx.rasterx import functions as rx
# Register functions
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
# Use functions
df = rasters.select(rx.rst_boundingbox("tile"))
df.limit(1).show(truncate=False)
+--------------------------------------------------+----------------------------------+
|path |bbox |
+--------------------------------------------------+----------------------------------+
|.../nyc_sentinel2/nyc_sentinel2_red.tif |POLYGON ((-74.26 40.49, ...)) |
+--------------------------------------------------+----------------------------------+
GridX (BNG)
from databricks.labs.gbx.gridx.bng import functions as bx
# Register functions
bx.register(spark)
# Use functions (gbx_bng_cellarea returns square kilometres)
df = spark.sql("SELECT gbx_bng_cellarea('TQ', 1000) as area_km2")
df.show()
+----------+
|area_km2 |
+----------+
|1.0 |
+----------+
VectorX
from databricks.labs.gbx.vectorx.jts.legacy import functions as vx
# Register functions
vx.register(spark)
# Load legacy data
legacy_data = spark.table("legacy_geometries")
# Use functions
df = legacy_data.select(vx.st_legacyaswkb("mosaic_geom"))
+-----------+
|wkb |
+-----------+
|[BINARY] |
+-----------+
RasterX Functions
Accessor Functions
Get raster properties and metadata.
rst_boundingbox(tile)
Get the bounding box of a raster.
from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
bbox_df = rasters.select(
"path",
rx.rst_boundingbox("tile").alias("bbox")
)
bbox_df.limit(3).show()
+----------------------------------------------------------+----------------------------------+
|path |bbox |
+----------------------------------------------------------+----------------------------------+
|.../nyc/sentinel2/nyc_sentinel2_red.tif |POLYGON ((-74.26 40.49, ...)) |
+----------------------------------------------------------+----------------------------------+
rst_width(tile)
Get raster width in pixels.
from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
width_df = rasters.select(rx.rst_width("tile").alias("width"))
width_df.limit(1).show()
+------+
|width |
+------+
|10980 |
+------+
rst_height(tile)
Get raster height in pixels.
from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
height_df = rasters.select(rx.rst_height("tile").alias("height"))
height_df.limit(1).show()
+------+
|height|
+------+
|10980 |
+------+
rst_numbands(tile)
Get number of bands in raster.
from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
bands_df = rasters.select(rx.rst_numbands("tile").alias("num_bands"))
bands_df.limit(1).show()
+---------+
|num_bands|
+---------+
|1 |
+---------+
rst_metadata(tile)
Get raster metadata.
from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
metadata_df = rasters.select(rx.rst_metadata("tile").alias("metadata"))
metadata_df.limit(1).show(truncate=False)
+------------------+
|metadata |
+------------------+
|{driver=GTiff,...}|
+------------------+
rst_srid(tile)
Get spatial reference identifier.
from databricks.labs.gbx.rasterx import functions as rx
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
srid_df = rasters.select(rx.rst_srid("tile").alias("srid"))
srid_df.limit(1).show()
+-----+
|srid |
+-----+
|32618|
+-----+
Transformation Functions
Transform and manipulate rasters.
rst_clip(tile, geometry, cutline_all_touched)
Clip raster by geometry. The geometry must be WKT (string) or WKB (binary); do not use st_geomfromtext() or other DBR native geometry, as GeoBrix does not accept it.
from databricks.labs.gbx.rasterx import functions as rx
from pyspark.sql.functions import lit
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
clip_wkt = "POLYGON((-122 37, -122 38, -121 38, -121 37, -122 37))"
clipped = rasters.select(
rx.rst_clip("tile", lit(clip_wkt), lit(True)).alias("clipped_tile")
)
clipped.limit(1).show(truncate=False)
+----------------------------------+
|clipped_tile |
+----------------------------------+
|[STRUCT cellid, raster, metadata] |
+----------------------------------+
Complete Example
from databricks.labs.gbx.rasterx import functions as rx
from pyspark.sql.functions import expr
# Register functions
rx.register(spark)
raster_path = SAMPLE_NYC_RASTER
rasters = spark.read.format("gdal").load(raster_path)
# Extract metadata and process
result = rasters.select(
"path",
rx.rst_boundingbox("tile").alias("bbox"),
rx.rst_width("tile").alias("width"),
rx.rst_height("tile").alias("height"),
rx.rst_numbands("tile").alias("bands"),
rx.rst_metadata("tile").alias("metadata")
).filter(
"width > 1000 AND height > 1000"
)
result.limit(3).show()
+----------------------------------------------------------+-----+------+-----+------------------+
|path |bbox |width |... |metadata |
+----------------------------------------------------------+-----+------+-----+------------------+
|.../nyc/sentinel2/nyc_sentinel2_red.tif |... |10980 |... |{driver=GTiff,...}|
+----------------------------------------------------------+-----+------+-----+------------------+
GridX Functions
BNG Functions
British National Grid functions.
bng_cellarea(grid_letter, precision)
Calculate area of a BNG grid cell.
from databricks.labs.gbx.gridx.bng import functions as bx
bx.register(spark)
# Calculate cell area (result in km²)
area = spark.sql("SELECT gbx_bng_cellarea('TQ', 1000) as area_km2")
area.show()
+----------+
|area_km2 |
+----------+
|1.0 |
+----------+
bng_pointascell(point, resolution) (point to BNG cell)
Convert point geometry to BNG grid cell. The point must be WKT or WKB; do not use st_point() or other DBR native geometry.
from databricks.labs.gbx.gridx.bng import functions as bx
from pyspark.sql.functions import lit
bx.register(spark)
# Point in BNG coordinates (eastings, northings); resolution '1km' or integer 3
df = spark.range(1).select(
bx.bng_pointascell(lit("POINT(530000 180000)"), lit("1km")).alias("bng_cell")
)
df.show()
+----------+
|bng_cell |
+----------+
|TQ 30 80 |
+----------+
Complete Example
from databricks.labs.gbx.gridx.bng import functions as bx
from pyspark.sql.functions import count
# Register functions
bx.register(spark)
# Aggregate points by BNG cell (point as WKT in SQL)
result = spark.sql("""
SELECT
gbx_bng_pointascell(concat('POINT(', cast(longitude as string), ' ', cast(latitude as string), ')'), 1000) as bng_cell,
COUNT(*) as point_count,
AVG(value) as avg_value
FROM measurements
WHERE country = 'GB'
GROUP BY bng_cell
""")
result.write.mode("overwrite").saveAsTable("bng_aggregated")
+----------+-----------+---------+
|bng_cell |point_count|avg_value|
+----------+-----------+---------+
|TQ3080 |42 |15.3 |
+----------+-----------+---------+
VectorX Functions
Complete Example
This example uses st_geomfromwkb, st_isvalid, and st_area and requires Databricks Runtime 17.1+ (or Databricks SQL with ST support). The example and its integration test live under docs/tests-dbr/.
from databricks.labs.gbx.vectorx.jts.legacy import functions as vx
vx.register(spark)
legacy_table = spark.table(table_name)
migrated = legacy_table.select(
col("*"),
expr("st_geomfromwkb(gbx_st_legacyaswkb(mosaic_geom))").alias("geometry")
).select(
col("feature_id"),
col("properties"),
col("geometry"),
expr("st_isvalid(geometry)").alias("is_valid"),
expr("st_area(geometry)").alias("area")
).filter(col("is_valid") == True)
Table migrated_features: feature_id, properties, geometry, is_valid, area (valid rows only).