databricks.labs.dqx.profiler.profile_builder
make_null_or_empty_profile
@register_profile_builder("null_or_empty")
def make_null_or_empty_profile(
_: DataFrame, column_name: str, column_type: T.DataType,
profiler_metrics: dict[str, Any],
profiler_options: dict[str, Any]) -> DQProfile | None
Creates an 'is_not_null_or_empty', 'is_not_null', or 'is_not_empty' profile by checking the input column type, profiled metrics, and profiler options.
Arguments:
column_name- Input column namecolumn_type- Input column typeprofiler_metrics- Column-level statistics computed by the DQProfilerprofiler_options- Configuration options for the DQProfiler
Returns:
A DQProfile if the correct conditions are met, otherwise None
make_is_in_profile
@register_profile_builder("is_in")
def make_is_in_profile(df: DataFrame, column_name: str,
column_type: T.DataType, profiler_metrics: dict[str,
Any],
profiler_options: dict[str, Any]) -> DQProfile | None
Creates an 'is_in' profile by checking the input column type, profiled metrics, and profiler options.
Arguments:
df- Single-column DataFramecolumn_name- Input column namecolumn_type- Input column typeprofiler_metrics- Column-level statistics computed by the DQProfilerprofiler_options- Configuration options for the DQProfiler
Returns:
A DQProfile if the correct conditions are met, otherwise None
make_min_max_profile
@register_profile_builder("min_max")
def make_min_max_profile(df: DataFrame, column_name: str,
column_type: T.DataType, profiler_metrics: dict[str,
Any],
profiler_options: dict[str, Any]) -> DQProfile | None
Creates a 'min_max' profile by checking the input column type, profiled metrics, and profiler options.
Arguments:
df- Single-column DataFramecolumn_name- Input column name (used for DQProfile output)column_type- Input column typeprofiler_metrics- Column-level statistics computed by the DQProfiler (includes summary stats)profiler_options- Configuration options for the DQProfiler
Returns:
A DQProfile if the correct conditions are met, otherwise None