databricks.labs.dqx.profiler.generator
DQGenerator Objects
class DQGenerator(DQEngineBase)
generate_dq_rules
def generate_dq_rules(profiles: list[DQProfile] | None = None,
level: str = "error") -> list[dict]
Generates a list of data quality rules based on the provided dq profiles.
Arguments:
profiles
- A list of data quality profiles to generate rules for.level
- The criticality level of the rules (default is "error").
Returns:
A list of dictionaries representing the data quality rules.
dq_generate_is_in
@staticmethod
def dq_generate_is_in(column: str, level: str = "error", **params: dict)
Generates a data quality rule to check if a column's value is in a specified list.
Arguments:
column
- The name of the column to check.level
- The criticality level of the rule (default is "error").params
- Additional parameters, including the list of values to check against.
Returns:
A dictionary representing the data quality rule.
dq_generate_min_max
@staticmethod
def dq_generate_min_max(column: str, level: str = "error", **params: dict)
Generates a data quality rule to check if a column's value is within a specified range.
Arguments:
column
- The name of the column to check.level
- The criticality level of the rule (default is "error").params
- Additional parameters, including the minimum and maximum values.
Returns:
A dictionary representing the data quality rule, or None if no limits are provided.
dq_generate_is_not_null
@staticmethod
def dq_generate_is_not_null(column: str, level: str = "error", **params: dict)
Generates a data quality rule to check if a column's value is not null.
Arguments:
column
- The name of the column to check.level
- The criticality level of the rule (default is "error").params
- Additional parameters.
Returns:
A dictionary representing the data quality rule.
dq_generate_is_not_null_or_empty
@staticmethod
def dq_generate_is_not_null_or_empty(column: str,
level: str = "error",
**params: dict)
Generates a data quality rule to check if a column's value is not null or empty.
Arguments:
column
- The name of the column to check.level
- The criticality level of the rule (default is "error").params
- Additional parameters, including whether to trim strings.
Returns:
A dictionary representing the data quality rule.