Databricks Labs Data Generator
Getting Started
Get Started Here
Installation instructions
Generating column data
Using standard datasets
Using data ranges
Generating text data
Using data distributions
Options for column specification
Repeatable Data Generation
Revisiting the IOT data example
Using constraints to control data generation
Using streaming data
Generating JSON and structured column data
Generating synthetic data from existing data
Generating Change Data Capture (CDC) data
Using multiple tables
Extending text generation
Use with Delta Live Tables
Troubleshooting data generation
API
Quick API index
The dbldatagen package API
Development
Contributing to the Databricks Labs Data Generator
Building the code
Testing
Using the Databricks Labs data generator
Coding Style
Change log
Build requirements
License
License
Databricks Labs Data Generator
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
K
|
L
|
M
|
N
|
O
|
P
|
R
|
S
|
T
|
U
|
W
A
addEntry() (Datasets.NavigatorNode method)
adjustForColumnDatatype() (DataRange method)
(DateRange method)
(NRange method)
ALLOWED_OPTIONS (BasicGeometriesProvider attribute)
(BasicProcessHistorianProvider attribute)
(BasicTelematicsProvider attribute)
(BenchmarkGroupByProvider attribute)
allowed_options() (DatasetProvider static method)
alpha (Beta property)
associatedDatasets (DatasetProvider.DatasetDefinition attribute)
autoComputePartitions() (DatasetProvider method)
B
baseColumn (ColumnGenerationSpec property)
baseColumns (ColumnGenerationSpec property)
BasicGeometriesProvider (class in dbldatagen.datasets.basic_geometries)
BasicProcessHistorianProvider (class in dbldatagen.datasets.basic_process_historian)
BasicTelematicsProvider (class in dbldatagen.datasets.basic_telematics)
BasicUserProvider (class in dbldatagen.datasets.basic_user)
begin (ColumnGenerationSpec property)
BenchmarkGroupByProvider (class in dbldatagen.datasets.benchmark_groupby)
beta (Beta property)
Beta (class in dbldatagen.distributions.beta)
beta_func() (Beta static method)
build() (DataGenerator method)
build_order (DataGenerator property)
C
ChainedRelation (class in dbldatagen.constraints.chained_relation)
checkBoolOption() (ColumnSpecOptions method)
checkExclusiveOptions() (ColumnSpecOptions method)
checkOptions() (DatasetProvider method)
checkOptionValues() (ColumnSpecOptions method)
checkValidColumnProperties() (ColumnSpecOptions method)
classicGenerateText() (ILText method)
(TemplateGenerator method)
clone() (DataGenerator method)
coalesce_values() (in module dbldatagen.utils)
COLUMN_COUNT (BasicGeometriesProvider attribute)
(BasicProcessHistorianProvider attribute)
(BasicTelematicsProvider attribute)
(BasicUserProvider attribute)
(BenchmarkGroupByProvider attribute)
ColumnGenerationSpec (class in dbldatagen.column_generation_spec)
ColumnGeneratorBuilder (class in dbldatagen.function_builder)
ColumnSpecOptions (class in dbldatagen.column_spec_options)
columnsReferencesFromSQLString() (SchemaParser class method)
columnTypeFromString() (SchemaParser class method)
compactNumpyTypeForValues() (TextGenerator static method)
computeBuildPlan() (DataGenerator method)
computeDateRange() (DateRange class method)
computeTimestampIntervals() (DateRange method)
computeTimestampRange() (DateRange class method)
Constraint (class in dbldatagen.constraints.constraint)
CUSTOMER_MIN_VALUE (MultiTableTelephonyProvider attribute)
D
DataAnalyzer (class in dbldatagen.data_analyzer)
DataDistribution (class in dbldatagen.distributions.data_distribution)
DataGenerator (class in dbldatagen.data_generator)
DataGenError
DataRange (class in dbldatagen.datarange)
dataset_definition() (in module dbldatagen.datasets.dataset_provider)
DatasetProvider (class in dbldatagen.datasets.dataset_provider)
DatasetProvider.DatasetDecoratorUtils (class in dbldatagen.datasets.dataset_provider)
DatasetProvider.DatasetDefinition (class in dbldatagen.datasets.dataset_provider)
DatasetProvider.NoAssociatedDatasetsMixin (class in dbldatagen.datasets.dataset_provider)
Datasets (class in dbldatagen.datasets_object)
Datasets.NavigatorNode (class in dbldatagen.datasets_object)
datatype (ColumnGenerationSpec property)
DateRange (class in dbldatagen.daterange)
dbldatagen
module
dbldatagen.column_generation_spec
module
dbldatagen.column_spec_options
module
dbldatagen.constraints
module
dbldatagen.constraints.chained_relation
module
dbldatagen.constraints.constraint
module
dbldatagen.constraints.literal_range_constraint
module
dbldatagen.constraints.literal_relation_constraint
module
dbldatagen.constraints.negative_values
module
dbldatagen.constraints.positive_values
module
dbldatagen.constraints.ranged_values_constraint
module
dbldatagen.constraints.sql_expr
module
dbldatagen.constraints.unique_combinations
module
dbldatagen.data_analyzer
module
dbldatagen.data_generator
module
dbldatagen.datagen_constants
module
dbldatagen.datarange
module
dbldatagen.datasets
module
dbldatagen.datasets.basic_geometries
module
dbldatagen.datasets.basic_process_historian
module
dbldatagen.datasets.basic_telematics
module
dbldatagen.datasets.basic_user
module
dbldatagen.datasets.benchmark_groupby
module
dbldatagen.datasets.dataset_provider
module
dbldatagen.datasets.multi_table_telephony_provider
module
dbldatagen.datasets_object
module
dbldatagen.daterange
module
dbldatagen.distributions
module
dbldatagen.distributions.beta
module
dbldatagen.distributions.data_distribution
module
dbldatagen.distributions.exponential_distribution
module
dbldatagen.distributions.gamma
module
dbldatagen.distributions.normal_distribution
module
dbldatagen.function_builder
module
dbldatagen.html_utils
module
dbldatagen.nrange
module
dbldatagen.schema_parser
module
dbldatagen.spark_singleton
module
dbldatagen.text_generator_plugins
module
dbldatagen.text_generators
module
dbldatagen.utils
module
DEFAULT_AVG_EVENTS_PER_CUSTOMER (MultiTableTelephonyProvider attribute)
DEFAULT_DATE_FORMAT (DateRange attribute)
DEFAULT_END_DATE (DateRange attribute)
DEFAULT_END_DATE_TIMESTAMP (DateRange attribute)
DEFAULT_END_TIMESTAMP (BasicProcessHistorianProvider attribute)
(BasicTelematicsProvider attribute)
(DateRange attribute)
DEFAULT_MAX_LAT (BasicTelematicsProvider attribute)
DEFAULT_MAX_LON (BasicTelematicsProvider attribute)
DEFAULT_MIN_LAT (BasicTelematicsProvider attribute)
DEFAULT_MIN_LON (BasicTelematicsProvider attribute)
DEFAULT_NUM_CUSTOMERS (MultiTableTelephonyProvider attribute)
DEFAULT_NUM_DAYS (MultiTableTelephonyProvider attribute)
DEFAULT_NUM_DEVICES (BasicProcessHistorianProvider attribute)
(BasicTelematicsProvider attribute)
DEFAULT_NUM_GROUPS (BenchmarkGroupByProvider attribute)
DEFAULT_NUM_PLANS (MultiTableTelephonyProvider attribute)
DEFAULT_NUM_PLANTS (BasicProcessHistorianProvider attribute)
DEFAULT_NUM_TAGS (BasicProcessHistorianProvider attribute)
DEFAULT_PARTITIONS (DatasetProvider attribute)
DEFAULT_PCT_NULLS (BenchmarkGroupByProvider attribute)
DEFAULT_ROWS (DatasetProvider attribute)
DEFAULT_START_DATE (DateRange attribute)
DEFAULT_START_DATE_TIMESTAMP (DateRange attribute)
DEFAULT_START_TIMESTAMP (BasicProcessHistorianProvider attribute)
(BasicTelematicsProvider attribute)
(DateRange attribute)
DEFAULT_TABLE_NAME (DatasetProvider attribute)
DEFAULT_UTC_TS_FORMAT (DateRange attribute)
deprecated() (in module dbldatagen.utils)
describe() (DataGenerator method)
(Datasets class method)
description (DatasetProvider.DatasetDefinition attribute)
DEVICE_MIN_VALUE (MultiTableTelephonyProvider attribute)
E
end (ColumnGenerationSpec property)
ensure() (in module dbldatagen.utils)
explain() (DataGenerator method)
Exponential (class in dbldatagen.distributions.exponential_distribution)
exponential_func() (Exponential static method)
expr (ColumnGenerationSpec property)
exprs (ColumnGenerationSpec property)
F
fakerText() (in module dbldatagen.text_generator_plugins)
FakerTextFactory (class in dbldatagen.text_generator_plugins)
filterExpression (Constraint property)
find() (Datasets.NavigatorNode method)
flatten() (DataGenerator static method)
formatCodeAsHtml() (HtmlUtils class method)
formatTextAsHtml() (HtmlUtils class method)
G
Gamma (class in dbldatagen.distributions.gamma)
gamma_func() (Gamma static method)
generateName() (DataGenerator class method)
generateNormalizedDistributionSample() (Beta method)
(DataDistribution method)
(Exponential method)
(Gamma method)
(Normal method)
generateText() (ILText method)
get() (Datasets method)
get_np_random_generator() (DataDistribution static method)
getAssociatedDataset() (DatasetProvider method)
(DatasetProvider.NoAssociatedDatasetsMixin method)
(Datasets method)
(MultiTableTelephonyProvider method)
getAsTupleOrElse() (TextGenerator static method)
getColumnSpec() (DataGenerator method)
getColumnType() (DataGenerator method)
getCombinedDataset() (Datasets method)
getContinuousRange() (DataRange method)
(DateRange method)
(NRange method)
getCustomers() (MultiTableTelephonyProvider method)
getDatasetDefinition() (DatasetProvider class method)
getDatasetTables() (DatasetProvider class method)
getDeviceEvents() (MultiTableTelephonyProvider method)
getDiscreteRange() (DataRange method)
(DateRange method)
(NRange method)
getEnrichedDataset() (Datasets method)
getInferredColumnNames() (DataGenerator method)
getInstance() (SparkSingleton class method)
getLocalInstance() (SparkSingleton class method)
getNames() (ColumnGenerationSpec method)
getNamesAndTypes() (ColumnGenerationSpec method)
getNPRandomGenerator() (TextGenerator method)
getOrElse() (ColumnGenerationSpec method)
(ColumnSpecOptions method)
getOutputColumnNames() (DataGenerator method)
getOutputColumnNamesAndTypes() (DataGenerator method)
getPlanEntry() (ColumnGenerationSpec method)
getPlans() (MultiTableTelephonyProvider method)
getProviderDefinitions() (Datasets class method)
getRegisteredDatasets() (DatasetProvider class method)
getRegisteredDatasetsVersion() (DatasetProvider class method)
getScale() (DataRange method)
(DateRange method)
(NRange method)
getSummaryDataset() (Datasets method)
getSupportingDataset() (Datasets method)
getTableGenerator() (BasicGeometriesProvider method)
(BasicProcessHistorianProvider method)
(BasicTelematicsProvider method)
(BasicUserProvider method)
(BenchmarkGroupByProvider method)
(DatasetProvider method)
(MultiTableTelephonyProvider method)
getTypeDefinitionParser() (SchemaParser class method)
H
hasColumnSpec() (DataGenerator method)
HtmlUtils (class in dbldatagen.html_utils)
I
ILText (class in dbldatagen.text_generators)
inferDatatype (ColumnGenerationSpec property)
inferredSchema (DataGenerator property)
interval (ColumnGenerationSpec property)
isEmpty() (DataRange method)
(DateRange method)
(NRange method)
isFieldExplicitlyDefined() (DataGenerator method)
isFieldOmitted (ColumnGenerationSpec property)
isFinal() (Datasets.NavigatorNode method)
isFullyPopulated() (DataRange method)
(DateRange method)
(NRange method)
isRandom (ColumnGenerationSpec property)
isValidDataProviderType() (DatasetProvider class method)
isWeightedValuesColumn (ColumnGenerationSpec property)
J
json_value_from_path() (in module dbldatagen.utils)
K
keys() (ColumnGenerationSpec method)
L
list() (Datasets class method)
LiteralRange (class in dbldatagen.constraints.literal_range_constraint)
LiteralRelation (class in dbldatagen.constraints.literal_relation_constraint)
M
makeGenerationExpressions() (ColumnGenerationSpec method)
max (ColumnGenerationSpec property)
(DataRange property)
MAX_DEVICE_ID (BasicProcessHistorianProvider attribute)
(BasicTelematicsProvider attribute)
MAX_LOCATION_ID (BasicGeometriesProvider attribute)
MAX_LONG (BasicUserProvider attribute)
(BenchmarkGroupByProvider attribute)
(MultiTableTelephonyProvider attribute)
MAX_PROPERTY_VALUE (BasicProcessHistorianProvider attribute)
min (ColumnGenerationSpec property)
(DataRange property)
MIN_DEVICE_ID (BasicProcessHistorianProvider attribute)
(BasicTelematicsProvider attribute)
MIN_LOCATION_ID (BasicGeometriesProvider attribute)
MIN_PROPERTY_VALUE (BasicProcessHistorianProvider attribute)
mkBoundsList() (in module dbldatagen.utils)
mkClass() (DatasetProvider.DatasetDecoratorUtils method)
mkCombinedConstraintExpression() (Constraint static method)
mkExprChoicesFn() (ColumnGeneratorBuilder class method)
module
dbldatagen
dbldatagen.column_generation_spec
dbldatagen.column_spec_options
dbldatagen.constraints
dbldatagen.constraints.chained_relation
dbldatagen.constraints.constraint
dbldatagen.constraints.literal_range_constraint
dbldatagen.constraints.literal_relation_constraint
dbldatagen.constraints.negative_values
dbldatagen.constraints.positive_values
dbldatagen.constraints.ranged_values_constraint
dbldatagen.constraints.sql_expr
dbldatagen.constraints.unique_combinations
dbldatagen.data_analyzer
dbldatagen.data_generator
dbldatagen.datagen_constants
dbldatagen.datarange
dbldatagen.datasets
dbldatagen.datasets.basic_geometries
dbldatagen.datasets.basic_process_historian
dbldatagen.datasets.basic_telematics
dbldatagen.datasets.basic_user
dbldatagen.datasets.benchmark_groupby
dbldatagen.datasets.dataset_provider
dbldatagen.datasets.multi_table_telephony_provider
dbldatagen.datasets_object
dbldatagen.daterange
dbldatagen.distributions
dbldatagen.distributions.beta
dbldatagen.distributions.data_distribution
dbldatagen.distributions.exponential_distribution
dbldatagen.distributions.gamma
dbldatagen.distributions.normal_distribution
dbldatagen.function_builder
dbldatagen.html_utils
dbldatagen.nrange
dbldatagen.schema_parser
dbldatagen.spark_singleton
dbldatagen.text_generator_plugins
dbldatagen.text_generators
dbldatagen.utils
MultiTableTelephonyProvider (class in dbldatagen.datasets.multi_table_telephony_provider)
N
name (DatasetProvider.DatasetDefinition attribute)
NegativeValues (class in dbldatagen.constraints.negative_values)
NoFilterMixin (class in dbldatagen.constraints.constraint)
NoPrepareTransformMixin (class in dbldatagen.constraints.constraint)
Normal (class in dbldatagen.distributions.normal_distribution)
normal_func() (Normal static method)
NRange (class in dbldatagen.nrange)
numColumns (ColumnGenerationSpec property)
numFeatures (ColumnGenerationSpec property)
O
option() (DataGenerator method)
options (ColumnSpecOptions property)
options() (DataGenerator method)
P
pandasGenerateText() (ILText method)
(PyfuncText method)
(TemplateGenerator method)
parse_time_interval() (in module dbldatagen.utils)
parseCreateTable() (SchemaParser class method)
parseInterval() (DateRange class method)
PLAN_MIN_VALUE (MultiTableTelephonyProvider attribute)
PositiveValues (class in dbldatagen.constraints.positive_values)
prefix (ColumnGenerationSpec property)
prepareDataGenerator() (Constraint method)
(NoPrepareTransformMixin method)
(UniqueCombinations method)
primaryTable (DatasetProvider.DatasetDefinition attribute)
providerClass (DatasetProvider.DatasetDefinition attribute)
PyfuncText (class in dbldatagen.text_generator_plugins)
PyfuncTextFactory (class in dbldatagen.text_generator_plugins)
R
random (DataGenerator property)
randomSeed (ColumnGenerationSpec property)
(DataDistribution property)
(DataGenerator property)
(TextGenerator property)
RangedValues (class in dbldatagen.constraints.ranged_values_constraint)
rate (Exponential property)
registerDataset() (DatasetProvider class method)
reset() (DataGenerator class method)
rounding (DataDistribution property)
rowCount (DataGenerator property)
S
scale (Exponential property)
(Gamma property)
schema (DataGenerator property)
schemaFields (DataGenerator property)
SchemaParser (class in dbldatagen.schema_parser)
scriptDataGeneratorFromData() (DataAnalyzer method)
scriptDataGeneratorFromSchema() (DataAnalyzer class method)
scriptMerge() (DataGenerator method)
scriptTable() (DataGenerator method)
seedColumnName (DataGenerator property)
setBaseColumnDatatypes() (ColumnGenerationSpec method)
setRowCount() (DataGenerator method)
shape (Gamma property)
SparkSingleton (class in dbldatagen.spark_singleton)
specOptions (ColumnGenerationSpec property)
split_list_matching_condition() (in module dbldatagen.utils)
SqlExpr (class in dbldatagen.constraints.sql_expr)
standardNormal() (Normal class method)
step (ColumnGenerationSpec property)
strip_margins() (in module dbldatagen.utils)
structType() (ColumnGenerationSpec method)
SUBSCRIBER_NUM_MIN_VALUE (MultiTableTelephonyProvider attribute)
suffix (ColumnGenerationSpec property)
summarize() (DataAnalyzer method)
summarizeToDF() (DataAnalyzer method)
summary (DatasetProvider.DatasetDefinition attribute)
SUPPORTED_OPERATORS (Constraint attribute)
supportsStreaming (Constraint property)
(DatasetProvider.DatasetDefinition attribute)
system_time_millis() (in module dbldatagen.utils)
T
tables (DatasetProvider.DatasetDefinition attribute)
TemplateGenerator (class in dbldatagen.text_generators)
templates (TemplateGenerator property)
text_separator (ColumnGenerationSpec property)
TextGenerator (class in dbldatagen.text_generators)
textGenerator (ColumnGenerationSpec property)
topologicalSort() (in module dbldatagen.utils)
transformDataframe() (Constraint method)
(NoPrepareTransformMixin method)
(UniqueCombinations method)
U
UniqueCombinations (class in dbldatagen.constraints.unique_combinations)
unregisterDataset() (DatasetProvider class method)
use_seed() (DataGenerator method)
useSeed() (DataGenerator class method)
W
withColumn() (DataGenerator method)
withColumnSpec() (DataGenerator method)
withColumnSpecs() (DataGenerator method)
withConstraint() (DataGenerator method)
withConstraints() (DataGenerator method)
withIdOutput() (DataGenerator method)
withInit() (PyfuncTextFactory method)
withInitPerBatch() (PyfuncTextFactory method)
withRandomSeed() (DataDistribution method)
(TextGenerator method)
withRootProperty() (PyfuncTextFactory method)
withRounding() (DataDistribution method)
withRowCount() (DataGenerator method)
withSchema() (DataGenerator method)
withSqlConstraint() (DataGenerator method)
withStructColumn() (DataGenerator method)