databricks.labs.dqx.cli
open_remote_config
@dqx.command
def open_remote_config(w: WorkspaceClient,
*,
ctx: WorkspaceContext | None = None)
Opens remote configuration in the browser.
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.ctx
- The WorkspaceContext instance to use for accessing the workspace.
open_dashboards
@dqx.command
def open_dashboards(w: WorkspaceClient,
*,
ctx: WorkspaceContext | None = None)
Opens remote dashboard directory in the browser.
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.ctx
- The WorkspaceContext instance to use for accessing the workspace.
installations
@dqx.command
def installations(w: WorkspaceClient,
*,
product_name: str = "dqx") -> list[dict]
Show installations by different users on the same workspace.
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.product_name
- The name of the product to search for in the installation folder.
validate_checks
@dqx.command
def validate_checks(w: WorkspaceClient,
*,
run_config: str = "",
validate_custom_check_functions: bool = True,
ctx: WorkspaceContext | None = None) -> list[dict]
Validate checks stored in a workspace file or volume.
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.run_config
- The name of the run configuration to use. If not provided, run it for all run configs.validate_custom_check_functions
- Whether to validate custom check functions (default is True).ctx
- The WorkspaceContext instance to use for accessing the workspace.
profile
@dqx.command
def profile(w: WorkspaceClient,
*,
run_config: str = "",
patterns: str = "",
exclude_patterns: str = "",
timeout_minutes: int = 30,
ctx: WorkspaceContext | None = None) -> None
Profile input data and generate quality rule (checks) candidates.
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.run_config
- The name of the run configuration to use. If not provided, run it for all run configs.patterns
- Semicolon-separated list of location patterns (with wildcards) to profile. If provided, location fields in the run config are ignored. Requires a run config to be provided which is used as a template for other fields.exclude_patterns
- Semicolon-separated list of location patterns to exclude. Useful to skip existing output and quarantine tables based on suffixes.timeout_minutes
- The timeout for the workflow run in minutes (default is 30).ctx
- The WorkspaceContext instance to use for accessing the workspace.
apply_checks
@dqx.command
def apply_checks(w: WorkspaceClient,
*,
run_config: str = "",
patterns: str = "",
exclude_patterns: str = "",
output_table_suffix: str = "_dq_output",
quarantine_table_suffix: str = "_dq_quarantine",
timeout_minutes: int = 30,
ctx: WorkspaceContext | None = None) -> None
Apply data quality checks to the input data and save the results.
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.run_config
- The name of the run configuration to use. If not provided, run it for all run configs.patterns
- Semicolon-separated list of location patterns (with wildcards) to profile. If provided, location fields in the run config are ignored. Requires a run config to be provided which is used as a template for other fields.exclude_patterns
- Semicolon-separated list of location patterns to exclude. Useful to skip existing output and quarantine tables based on suffixes.output_table_suffix
- Suffix to append to the output table names (default is "_dq_output").quarantine_table_suffix
- Suffix to append to the quarantine table names (default is "_dq_quarantine").timeout_minutes
- The timeout for the workflow run in minutes (default is 30).ctx
- The WorkspaceContext instance to use for accessing the workspace.
e2e
@dqx.command
def e2e(w: WorkspaceClient,
*,
run_config: str = "",
patterns: str = "",
exclude_patterns: str = "",
output_table_suffix: str = "_dq_output",
quarantine_table_suffix: str = "_dq_quarantine",
timeout_minutes: int = 60,
ctx: WorkspaceContext | None = None) -> None
Run end to end workflow to:
- profile input data and generate quality checks candidates
- apply the generated quality checks
- save the results to the output table and optionally quarantine table (based on the run config)
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.run_config
- The name of the run configuration to use. If not provided, run it for all run configs.patterns
- Semicolon-separated list of location patterns (with wildcards) to profile. If provided, location fields in the run config are ignored. Requires a run config to be provided which is used as a template for other fields.exclude_patterns
- Semicolon-separated list of location patterns to exclude. Useful to skip existing output and quarantine tables based on suffixes.output_table_suffix
- Suffix to append to the output table names (default is "_dq_output").quarantine_table_suffix
- Suffix to append to the quarantine table names (default is "_dq_quarantine").timeout_minutes
- The timeout for the workflow run in minutes (default is 60).ctx
- The WorkspaceContext instance to use for accessing the workspace.
workflows
@dqx.command
def workflows(w: WorkspaceClient, *, ctx: WorkspaceContext | None = None)
Show deployed workflows and their state
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.ctx
- The WorkspaceContext instance to use for accessing the workspace.
logs
@dqx.command
def logs(w: WorkspaceClient,
*,
workflow: str | None = None,
ctx: WorkspaceContext | None = None)
Show logs of the latest job run.
Arguments:
w
- The WorkspaceClient instance to use for accessing the workspace.workflow
- The name of the workflow to show logs for.ctx
- The WorkspaceContext instance to use for accessing the workspace