diff --git a/examples/example_gmf_titanic.json b/examples/example_gmf_titanic.json
index 2e4a8f4c..7981bfcc 100644
--- a/examples/example_gmf_titanic.json
+++ b/examples/example_gmf_titanic.json
@@ -4,9 +4,9 @@
"provenance": {
"created by": {
"name": "metasyn",
- "version": "0.7.1.dev15+g2ce8291.d20240308"
+ "version": "1.0.1"
},
- "creation time": "2024-03-08T10:54:42.702163"
+ "creation time": "2024-07-11T12:03:55.877693"
},
"vars": [
{
@@ -24,6 +24,9 @@
"lower": 1,
"upper": 892
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -42,6 +45,9 @@
"avg_sentences": 2.4691358024691357,
"avg_words": 4.093153759820426
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -65,6 +71,9 @@
0.6475869809203143
]
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -84,6 +93,9 @@
"mean": 28.403638823278087,
"sd": 15.862325051407092
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -103,6 +115,9 @@
"mean": -380.6440825743838,
"sd": 12.066012048277289
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -156,6 +171,9 @@
]
}
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -172,6 +190,9 @@
"parameters": {
"rate": 0.03052908440177665
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -221,6 +242,9 @@
]
}
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -246,6 +270,9 @@
0.7244094488188977
]
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -263,6 +290,9 @@
"lower": "1903-07-28",
"upper": "1940-05-27"
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -281,6 +311,9 @@
"upper": "18:39:28",
"precision": "seconds"
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -299,6 +332,9 @@
"upper": "2022-08-15T10:32:05",
"precision": "seconds"
}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
},
{
@@ -313,6 +349,9 @@
"class_name": "NADistribution",
"unique": false,
"parameters": {}
+ },
+ "creation_method": {
+ "created_by": "metasyn"
}
}
]
diff --git a/examples/getting_started.ipynb b/examples/getting_started.ipynb
index 9002bc95..4489256d 100644
--- a/examples/getting_started.ipynb
+++ b/examples/getting_started.ipynb
@@ -36,7 +36,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "e4fae59f",
"metadata": {},
"outputs": [],
@@ -57,7 +57,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "2442cb34",
"metadata": {},
"outputs": [],
@@ -85,7 +85,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"id": "3c2a44b7",
"metadata": {},
"outputs": [
@@ -99,7 +99,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (5, 13)
PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | str |
1 | "Braund, Mr. Ow… | "male" | 22 | 0 | "A/5 21171" | 7.25 | null | "S" | 1937-10-28 | 15:53:04 | 2022-08-05 04:43:34 | null |
2 | "Cumings, Mrs. … | "female" | 38 | 0 | "PC 17599" | 71.2833 | "C85" | "C" | null | 12:26:00 | 2022-08-07 01:56:33 | null |
3 | "Heikkinen, Mis… | "female" | 26 | 0 | "STON/O2. 31012… | 7.925 | null | "S" | 1931-09-24 | 16:08:25 | 2022-08-04 20:27:37 | null |
4 | "Futrelle, Mrs.… | "female" | 35 | 0 | "113803" | 53.1 | "C123" | "S" | 1936-11-30 | null | 2022-08-07 07:05:55 | null |
5 | "Allen, Mr. Wil… | "male" | 35 | 0 | "373450" | 8.05 | null | "S" | 1918-11-07 | 10:59:08 | 2022-08-02 15:13:34 | null |
"
+ "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | str |
1 | "Braund, Mr. Owen Harris" | "male" | 22 | 0 | "A/5 21171" | 7.25 | null | "S" | 1937-10-28 | 15:53:04 | 2022-08-05 04:43:34 | null |
2 | "Cumings, Mrs. John Bradley (Fl… | "female" | 38 | 0 | "PC 17599" | 71.2833 | "C85" | "C" | null | 12:26:00 | 2022-08-07 01:56:33 | null |
3 | "Heikkinen, Miss. Laina" | "female" | 26 | 0 | "STON/O2. 3101282" | 7.925 | null | "S" | 1931-09-24 | 16:08:25 | 2022-08-04 20:27:37 | null |
4 | "Futrelle, Mrs. Jacques Heath (… | "female" | 35 | 0 | "113803" | 53.1 | "C123" | "S" | 1936-11-30 | null | 2022-08-07 07:05:55 | null |
5 | "Allen, Mr. William Henry" | "male" | 35 | 0 | "373450" | 8.05 | null | "S" | 1918-11-07 | 10:59:08 | 2022-08-02 15:13:34 | null |
"
],
"text/plain": [
"shape: (5, 13)\n",
@@ -113,18 +113,18 @@
"│ ┆ Owen Harris ┆ ┆ ┆ ┆ ┆ ┆ 04:43:34 ┆ │\n",
"│ 2 ┆ Cumings, Mrs. ┆ female ┆ 38 ┆ … ┆ null ┆ 12:26:00 ┆ 2022-08-07 ┆ null │\n",
"│ ┆ John Bradley ┆ ┆ ┆ ┆ ┆ ┆ 01:56:33 ┆ │\n",
- "│ ┆ (Flor… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ┆ (Fl… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
"│ 3 ┆ Heikkinen, ┆ female ┆ 26 ┆ … ┆ 1931-09-24 ┆ 16:08:25 ┆ 2022-08-04 ┆ null │\n",
"│ ┆ Miss. Laina ┆ ┆ ┆ ┆ ┆ ┆ 20:27:37 ┆ │\n",
"│ 4 ┆ Futrelle, ┆ female ┆ 35 ┆ … ┆ 1936-11-30 ┆ null ┆ 2022-08-07 ┆ null │\n",
"│ ┆ Mrs. Jacques ┆ ┆ ┆ ┆ ┆ ┆ 07:05:55 ┆ │\n",
- "│ ┆ Heath (Li… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ┆ Heath (… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
"│ 5 ┆ Allen, Mr. ┆ male ┆ 35 ┆ … ┆ 1918-11-07 ┆ 10:59:08 ┆ 2022-08-02 ┆ null │\n",
"│ ┆ William Henry ┆ ┆ ┆ ┆ ┆ ┆ 15:13:34 ┆ │\n",
"└─────────────┴───────────────┴────────┴─────┴───┴────────────┴────────────┴──────────────┴────────┘"
]
},
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -153,35 +153,35 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"id": "0a5a1aa2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{'PassengerId': Int64,\n",
- " 'Name': String,\n",
- " 'Sex': Categorical(ordering='physical'),\n",
- " 'Age': Int64,\n",
- " 'Parch': Int64,\n",
- " 'Ticket': String,\n",
- " 'Fare': Float64,\n",
- " 'Cabin': String,\n",
- " 'Embarked': Categorical(ordering='physical'),\n",
- " 'Birthday': Date,\n",
- " 'Board time': Time,\n",
- " 'Married since': Datetime(time_unit='us', time_zone=None),\n",
- " 'all_NA': String}"
+ "Schema([('PassengerId', Int64),\n",
+ " ('Name', String),\n",
+ " ('Sex', Categorical(ordering='physical')),\n",
+ " ('Age', Int64),\n",
+ " ('Parch', Int64),\n",
+ " ('Ticket', String),\n",
+ " ('Fare', Float64),\n",
+ " ('Cabin', String),\n",
+ " ('Embarked', Categorical(ordering='physical')),\n",
+ " ('Birthday', Date),\n",
+ " ('Board time', Time),\n",
+ " ('Married since', Datetime(time_unit='us', time_zone=None)),\n",
+ " ('all_NA', String)])"
]
},
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "dict(zip(df.columns, df.dtypes))"
+ "df.schema"
]
},
{
@@ -194,7 +194,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"id": "c72c2acb55fca193",
"metadata": {
"collapsed": false
@@ -210,7 +210,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (9, 14)statistic | PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
str | f64 | str | str | f64 | f64 | str | f64 | str | str | str | str | str | str |
"count" | 891.0 | "891" | "891" | 714.0 | 891.0 | "891" | 891.0 | "204" | "889" | "813" | "812" | "799" | "0" |
"null_count" | 0.0 | "0" | "0" | 177.0 | 0.0 | "0" | 0.0 | "687" | "2" | "78" | "79" | "92" | "891" |
"mean" | 446.0 | null | null | 29.693277 | 0.381594 | null | 32.204208 | null | null | "1921-07-28" | "14:38:10.01477… | "2022-07-31 03:… | null |
"std" | 257.353842 | null | null | 14.524527 | 0.806057 | null | 49.693429 | null | null | null | null | null | null |
"min" | 1.0 | "Abbing, Mr. An… | null | 0.0 | 0.0 | "110152" | 0.0 | "A10" | null | "1903-07-28" | "10:39:40" | "2022-07-15 12:… | null |
"25%" | 224.0 | null | null | 20.0 | 0.0 | null | 7.925 | null | null | "1911-09-18" | "12:39:02" | "2022-07-23 11:… | null |
"50%" | 446.0 | null | null | 28.0 | 0.0 | null | 14.4542 | null | null | "1922-03-26" | "14:29:34" | "2022-07-31 00:… | null |
"75%" | 669.0 | null | null | 38.0 | 0.0 | null | 31.0 | null | null | "1930-08-29" | "16:40:12" | "2022-08-08 03:… | null |
"max" | 891.0 | "van Melkebeke,… | null | 80.0 | 6.0 | "WE/P 5735" | 512.3292 | "T" | null | "1940-05-27" | "18:39:28" | "2022-08-15 10:… | null |
"
+ "shape: (9, 14)statistic | PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
str | f64 | str | str | f64 | f64 | str | f64 | str | str | str | str | str | str |
"count" | 891.0 | "891" | "891" | 714.0 | 891.0 | "891" | 891.0 | "204" | "889" | "813" | "812" | "799" | "0" |
"null_count" | 0.0 | "0" | "0" | 177.0 | 0.0 | "0" | 0.0 | "687" | "2" | "78" | "79" | "92" | "891" |
"mean" | 446.0 | null | null | 29.693277 | 0.381594 | null | 32.204208 | null | null | "1921-07-27 22:08:24.798000" | "14:38:10.014778" | "2022-07-31 03:43:48.767209" | null |
"std" | 257.353842 | null | null | 14.524527 | 0.806057 | null | 49.693429 | null | null | null | null | null | null |
"min" | 1.0 | "Abbing, Mr. Anthony" | null | 0.0 | 0.0 | "110152" | 0.0 | "A10" | null | "1903-07-28" | "10:39:40" | "2022-07-15 12:21:15" | null |
"25%" | 224.0 | null | null | 20.0 | 0.0 | null | 7.925 | null | null | "1911-09-18" | "12:39:02" | "2022-07-23 11:16:56" | null |
"50%" | 446.0 | null | null | 28.0 | 0.0 | null | 14.4542 | null | null | "1922-03-26" | "14:29:34" | "2022-07-31 00:36:56" | null |
"75%" | 669.0 | null | null | 38.0 | 0.0 | null | 31.0 | null | null | "1930-08-29" | "16:40:12" | "2022-08-08 03:35:52" | null |
"max" | 891.0 | "van Melkebeke, Mr. Philemon" | null | 80.0 | 6.0 | "WE/P 5735" | 512.3292 | "T" | null | "1940-05-27" | "18:39:28" | "2022-08-15 10:32:05" | null |
"
],
"text/plain": [
"shape: (9, 14)\n",
@@ -222,9 +222,9 @@
"╞════════════╪═════════════╪════════════╪══════╪═══╪════════════╪════════════╪════════════╪════════╡\n",
"│ count ┆ 891.0 ┆ 891 ┆ 891 ┆ … ┆ 813 ┆ 812 ┆ 799 ┆ 0 │\n",
"│ null_count ┆ 0.0 ┆ 0 ┆ 0 ┆ … ┆ 78 ┆ 79 ┆ 92 ┆ 891 │\n",
- "│ mean ┆ 446.0 ┆ null ┆ null ┆ … ┆ 1921-07-28 ┆ 14:38:10.0 ┆ 2022-07-31 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ 14778 ┆ 03:43:48.7 ┆ │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 67208 ┆ │\n",
+ "│ mean ┆ 446.0 ┆ null ┆ null ┆ … ┆ 1921-07-27 ┆ 14:38:10.0 ┆ 2022-07-31 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ 22:08:24.7 ┆ 14778 ┆ 03:43:48.7 ┆ │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ 98000 ┆ ┆ 67209 ┆ │\n",
"│ std ┆ 257.353842 ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n",
"│ min ┆ 1.0 ┆ Abbing, ┆ null ┆ … ┆ 1903-07-28 ┆ 10:39:40 ┆ 2022-07-15 ┆ null │\n",
"│ ┆ ┆ Mr. ┆ ┆ ┆ ┆ ┆ 12:21:15 ┆ │\n",
@@ -242,7 +242,7 @@
"└────────────┴─────────────┴────────────┴──────┴───┴────────────┴────────────┴────────────┴────────┘"
]
},
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -287,19 +287,12 @@
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/13 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/qubix/Documents/repos/metasyn/metasyn/metasyn/provider.py:288: UserWarning: \n",
- "Variable PassengerId seems unique, but not set to be unique.\n",
- "Set the variable to be either unique or not unique to remove this warning.\n",
- "\n",
+ " 0%| | 0/13 [00:00, ?it/s]C:\\Users\\erikj\\surfdrive\\SoDa\\projects\\synthetic_data\\metasyn\\metasyn\\provider.py:288: UserWarning: \n",
+ "Variable 'PassengerId' was detected to be unique, but has not explicitly been set to unique.\n",
+ "To generate only unique values for column 'PassengerId', set unique to True.\n",
+ "To dismiss this warning, set unique to False.\n",
" warnings.warn(\n",
- "100%|██████████| 13/13 [00:04<00:00, 2.73it/s]\n"
+ "100%|██████████| 13/13 [00:13<00:00, 1.05s/it]\n"
]
}
],
@@ -563,9 +556,9 @@
" \"provenance\": {\n",
" \"created by\": {\n",
" \"name\": \"metasyn\",\n",
- " \"version\": \"0.7.1.dev15+g2ce8291.d20240308\"\n",
+ " \"version\": \"1.0.1\"\n",
" },\n",
- " \"creation time\": \"2024-03-08T10:54:43.429799\"\n",
+ " \"creation time\": \"2024-07-11T12:04:20.936256\"\n",
" },\n",
" \"vars\": [\n",
" {\n",
@@ -583,6 +576,9 @@
" \"lower\": 1,\n",
" \"upper\": 892\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -601,6 +597,9 @@
" \"avg_sentences\": 2.4691358024691357,\n",
" \"avg_words\": 4.093153759820426\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -624,6 +623,9 @@
" 0.6475869809203143\n",
" ]\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -643,6 +645,9 @@
" \"mean\": 28.403638823278087,\n",
" \"sd\": 15.862325051407092\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -662,6 +667,9 @@
" \"mean\": -380.6440825743838,\n",
" \"sd\": 12.066012048277289\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -715,6 +723,9 @@
" ]\n",
" }\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -731,6 +742,9 @@
" \"parameters\": {\n",
" \"rate\": 0.03052908440177665\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -780,6 +794,9 @@
" ]\n",
" }\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -805,6 +822,9 @@
" 0.7244094488188977\n",
" ]\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -822,6 +842,9 @@
" \"lower\": \"1903-07-28\",\n",
" \"upper\": \"1940-05-27\"\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -840,6 +863,9 @@
" \"upper\": \"18:39:28\",\n",
" \"precision\": \"seconds\"\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -858,6 +884,9 @@
" \"upper\": \"2022-08-15T10:32:05\",\n",
" \"precision\": \"seconds\"\n",
" }\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" },\n",
" {\n",
@@ -872,6 +901,9 @@
" \"class_name\": \"NADistribution\",\n",
" \"unique\": false,\n",
" \"parameters\": {}\n",
+ " },\n",
+ " \"creation_method\": {\n",
+ " \"created_by\": \"metasyn\"\n",
" }\n",
" }\n",
" ]\n",
@@ -966,7 +998,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
556 | "No. Professor.… | "male" | 19 | 0 | "59076" | 39.365498 | null | "S" | 1939-10-16 | 16:02:17 | null | null |
361 | "Writer." | "female" | null | 0 | "60563" | 36.274039 | "B K11" | "S" | 1926-06-24 | 14:56:12 | 2022-07-19 22:53:27 | null |
685 | "Condition. Acc… | "male" | 30 | 0 | "7749" | 7.960395 | null | "S" | 1913-11-26 | 18:34:43 | 2022-08-01 21:52:48 | null |
725 | "Water. Identif… | "male" | 40 | 0 | "72661" | 58.969772 | null | "S" | 1911-10-06 | 12:51:42 | 2022-07-27 23:05:31 | null |
317 | "Before news." | "male" | 46 | 0 | "539996" | 91.913069 | null | "C" | 1939-08-14 | 17:13:38 | 2022-07-20 19:03:40 | null |
"
+ "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
724 | "Street." | "female" | 43 | 0 | "86225" | 103.760852 | null | "C" | 1916-11-18 | 15:57:46 | 2022-08-15 06:43:52 | null |
389 | "Poor. Tend. Loss. Fill." | "male" | 5 | 1 | "167975" | 5.074021 | null | "S" | 1908-12-29 | 15:00:56 | 2022-08-04 01:26:06 | null |
128 | "Force." | "male" | 37 | 0 | "50570" | 1.347821 | null | "S" | 1930-08-31 | 14:44:02 | 2022-07-24 23:16:53 | null |
646 | "Line. Enter. Financial arrive.… | "female" | null | 0 | "8018" | 42.809746 | "E7" | "S" | 1934-09-21 | 16:26:08 | 2022-08-13 16:23:17 | null |
324 | "Tax sit. Benefit. Money." | "male" | 27 | 0 | "803170" | 22.745622 | null | "S" | 1914-05-19 | 18:07:53 | 2022-07-16 14:24:41 | null |
"
],
"text/plain": [
"shape: (5, 13)\n",
@@ -976,20 +1008,18 @@
"│ i64 ┆ str ┆ cat ┆ i64 ┆ ┆ date ┆ time ┆ --- ┆ null │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ │\n",
"╞═════════════╪══════════════╪════════╪══════╪═══╪════════════╪════════════╪══════════════╪════════╡\n",
- "│ 556 ┆ No. ┆ male ┆ 19 ┆ … ┆ 1939-10-16 ┆ 16:02:17 ┆ null ┆ null │\n",
- "│ ┆ Professor. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ ┆ Open. Black. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ ┆ Buil… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 361 ┆ Writer. ┆ female ┆ null ┆ … ┆ 1926-06-24 ┆ 14:56:12 ┆ 2022-07-19 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 22:53:27 ┆ │\n",
- "│ 685 ┆ Condition. ┆ male ┆ 30 ┆ … ┆ 1913-11-26 ┆ 18:34:43 ┆ 2022-08-01 ┆ null │\n",
- "│ ┆ Accept. ┆ ┆ ┆ ┆ ┆ ┆ 21:52:48 ┆ │\n",
- "│ 725 ┆ Water. ┆ male ┆ 40 ┆ … ┆ 1911-10-06 ┆ 12:51:42 ┆ 2022-07-27 ┆ null │\n",
- "│ ┆ Identify. ┆ ┆ ┆ ┆ ┆ ┆ 23:05:31 ┆ │\n",
- "│ ┆ Still. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ ┆ Father. … ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 317 ┆ Before news. ┆ male ┆ 46 ┆ … ┆ 1939-08-14 ┆ 17:13:38 ┆ 2022-07-20 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 19:03:40 ┆ │\n",
+ "│ 724 ┆ Street. ┆ female ┆ 43 ┆ … ┆ 1916-11-18 ┆ 15:57:46 ┆ 2022-08-15 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 06:43:52 ┆ │\n",
+ "│ 389 ┆ Poor. Tend. ┆ male ┆ 5 ┆ … ┆ 1908-12-29 ┆ 15:00:56 ┆ 2022-08-04 ┆ null │\n",
+ "│ ┆ Loss. Fill. ┆ ┆ ┆ ┆ ┆ ┆ 01:26:06 ┆ │\n",
+ "│ 128 ┆ Force. ┆ male ┆ 37 ┆ … ┆ 1930-08-31 ┆ 14:44:02 ┆ 2022-07-24 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 23:16:53 ┆ │\n",
+ "│ 646 ┆ Line. Enter. ┆ female ┆ null ┆ … ┆ 1934-09-21 ┆ 16:26:08 ┆ 2022-08-13 ┆ null │\n",
+ "│ ┆ Financial ┆ ┆ ┆ ┆ ┆ ┆ 16:23:17 ┆ │\n",
+ "│ ┆ arrive.… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 324 ┆ Tax sit. ┆ male ┆ 27 ┆ … ┆ 1914-05-19 ┆ 18:07:53 ┆ 2022-07-16 ┆ null │\n",
+ "│ ┆ Benefit. ┆ ┆ ┆ ┆ ┆ ┆ 14:24:41 ┆ │\n",
+ "│ ┆ Money. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
"└─────────────┴──────────────┴────────┴──────┴───┴────────────┴────────────┴──────────────┴────────┘"
]
},
@@ -1054,14 +1084,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/13 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 13/13 [00:01<00:00, 8.55it/s]\n"
+ "100%|██████████| 13/13 [00:02<00:00, 4.93it/s]\n"
]
},
{
@@ -1076,7 +1099,8 @@
" 'provenance': 'builtin',\n",
" 'class_name': 'UniqueKeyDistribution',\n",
" 'unique': True,\n",
- " 'parameters': {'lower': 1, 'consecutive': True}}}"
+ " 'parameters': {'lower': 1, 'consecutive': True}},\n",
+ " 'creation_method': {'created_by': 'metasyn', 'unique': True}}"
]
},
"execution_count": 13,
@@ -1121,7 +1145,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
1 | "Art type." | "male" | 55 | 1 | "96907" | 118.288268 | null | "S" | 1906-06-25 | 12:48:10 | 2022-08-10 14:43:26 | null |
2 | "Region." | "male" | null | 0 | "169332" | 33.813913 | null | "S" | 1939-10-29 | null | 2022-08-13 01:06:39 | null |
3 | "Street million… | "male" | 31 | 0 | "377319" | 4.272692 | null | "Q" | 1925-06-22 | 17:55:09 | 2022-07-29 12:36:18 | null |
4 | "Hair. Add." | "male" | 13 | 1 | "350677" | 46.150286 | "E8 " | "S" | 1937-01-16 | 14:20:12 | 2022-07-20 21:05:03 | null |
5 | "Enter performa… | "female" | 34 | 0 | "0465" | 31.544553 | null | "S" | 1937-04-09 | 12:48:26 | 2022-07-30 05:30:37 | null |
"
+ "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | null | cat | date | time | datetime[μs] | null |
1 | "Officer. Together. These." | "male" | 51 | 0 | "38742" | 15.804912 | null | "S" | 1932-06-20 | 18:20:22 | 2022-07-29 05:37:46 | null |
2 | "Seven claim. Assume statement.… | "male" | null | 0 | "3418" | 9.587488 | null | "S" | 1934-01-12 | 15:17:18 | 2022-07-24 03:51:51 | null |
3 | "Act seek and." | "male" | 17 | 0 | "8129" | 5.366794 | null | "S" | 1932-04-09 | 11:54:05 | 2022-07-24 05:24:06 | null |
4 | "Building. Occur. Century." | "male" | 11 | 0 | "121137" | 27.170373 | null | "C" | 1919-12-16 | 11:08:08 | null | null |
5 | "Big. Financial." | "female" | 50 | 0 | "506091" | 3.406364 | null | "S" | 1915-05-27 | null | 2022-08-01 16:27:03 | null |
"
],
"text/plain": [
"shape: (5, 13)\n",
@@ -1131,20 +1155,19 @@
"│ i64 ┆ str ┆ cat ┆ i64 ┆ ┆ date ┆ time ┆ --- ┆ null │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ │\n",
"╞═════════════╪══════════════╪════════╪══════╪═══╪════════════╪════════════╪══════════════╪════════╡\n",
- "│ 1 ┆ Art type. ┆ male ┆ 55 ┆ … ┆ 1906-06-25 ┆ 12:48:10 ┆ 2022-08-10 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 14:43:26 ┆ │\n",
- "│ 2 ┆ Region. ┆ male ┆ null ┆ … ┆ 1939-10-29 ┆ null ┆ 2022-08-13 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 01:06:39 ┆ │\n",
- "│ 3 ┆ Street ┆ male ┆ 31 ┆ … ┆ 1925-06-22 ┆ 17:55:09 ┆ 2022-07-29 ┆ null │\n",
- "│ ┆ million ┆ ┆ ┆ ┆ ┆ ┆ 12:36:18 ┆ │\n",
- "│ ┆ foot. Tv ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ ┆ word rec… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 4 ┆ Hair. Add. ┆ male ┆ 13 ┆ … ┆ 1937-01-16 ┆ 14:20:12 ┆ 2022-07-20 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 21:05:03 ┆ │\n",
- "│ 5 ┆ Enter ┆ female ┆ 34 ┆ … ┆ 1937-04-09 ┆ 12:48:26 ┆ 2022-07-30 ┆ null │\n",
- "│ ┆ performance ┆ ┆ ┆ ┆ ┆ ┆ 05:30:37 ┆ │\n",
- "│ ┆ glass. Truth ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ ┆ h… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 1 ┆ Officer. ┆ male ┆ 51 ┆ … ┆ 1932-06-20 ┆ 18:20:22 ┆ 2022-07-29 ┆ null │\n",
+ "│ ┆ Together. ┆ ┆ ┆ ┆ ┆ ┆ 05:37:46 ┆ │\n",
+ "│ ┆ These. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 2 ┆ Seven claim. ┆ male ┆ null ┆ … ┆ 1934-01-12 ┆ 15:17:18 ┆ 2022-07-24 ┆ null │\n",
+ "│ ┆ Assume ┆ ┆ ┆ ┆ ┆ ┆ 03:51:51 ┆ │\n",
+ "│ ┆ statement.… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 3 ┆ Act seek ┆ male ┆ 17 ┆ … ┆ 1932-04-09 ┆ 11:54:05 ┆ 2022-07-24 ┆ null │\n",
+ "│ ┆ and. ┆ ┆ ┆ ┆ ┆ ┆ 05:24:06 ┆ │\n",
+ "│ 4 ┆ Building. ┆ male ┆ 11 ┆ … ┆ 1919-12-16 ┆ 11:08:08 ┆ null ┆ null │\n",
+ "│ ┆ Occur. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ┆ Century. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 5 ┆ Big. ┆ female ┆ 50 ┆ … ┆ 1915-05-27 ┆ null ┆ 2022-08-01 ┆ null │\n",
+ "│ ┆ Financial. ┆ ┆ ┆ ┆ ┆ ┆ 16:27:03 ┆ │\n",
"└─────────────┴──────────────┴────────┴──────┴───┴────────────┴────────────┴──────────────┴────────┘"
]
},
@@ -1187,14 +1210,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/13 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 13/13 [00:00<00:00, 15.39it/s]\n"
+ "100%|██████████| 13/13 [00:01<00:00, 8.14it/s]\n"
]
},
{
@@ -1207,7 +1223,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
1 | "Elizabeth Shar… | "female" | null | 0 | "84752" | 6.280015 | null | "S" | 1914-03-29 | null | 2022-07-24 05:25:21 | null |
2 | "Charles Johnso… | "female" | 30 | 0 | "72623" | 16.018423 | null | "S" | null | 16:22:06 | 2022-07-25 16:04:20 | null |
3 | "Joel Morris" | "male" | 21 | 0 | "5712" | 8.486588 | "F98" | "C" | 1924-06-23 | 13:39:40 | 2022-07-31 21:44:26 | null |
4 | "Christopher La… | "male" | 49 | 1 | "34749" | 4.524145 | null | "S" | null | 13:27:56 | 2022-07-29 12:47:16 | null |
5 | "Gary Anderson" | "female" | 26 | 0 | "5421" | 41.586641 | null | "S" | 1939-06-02 | null | 2022-07-19 04:31:32 | null |
"
+ "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
1 | "Laura Carter" | "male" | 23 | 0 | "802304" | 4.320629 | null | "S" | 1909-12-11 | 14:37:39 | 2022-07-27 09:10:07 | null |
2 | "Brian Meadows" | "male" | null | 0 | "9274" | 37.57741 | "F551" | "S" | 1904-02-24 | 13:10:50 | 2022-07-22 17:18:33 | null |
3 | "Matthew Leon" | "male" | null | 0 | "6217" | 149.902722 | null | "S" | 1932-12-09 | 15:11:17 | 2022-08-11 01:01:23 | null |
4 | "Edward Thomas" | "female" | 27 | 0 | "635722" | 31.938827 | null | "S" | 1909-03-25 | 16:47:52 | 2022-07-21 23:19:18 | null |
5 | "Richard Shepard III" | "male" | null | 0 | "2007" | 132.868876 | null | "S" | 1930-05-08 | 15:47:51 | 2022-07-25 02:54:31 | null |
"
],
"text/plain": [
"shape: (5, 13)\n",
@@ -1217,16 +1233,16 @@
"│ i64 ┆ str ┆ cat ┆ i64 ┆ ┆ date ┆ time ┆ --- ┆ null │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ │\n",
"╞═════════════╪══════════════╪════════╪══════╪═══╪════════════╪════════════╪══════════════╪════════╡\n",
- "│ 1 ┆ Elizabeth ┆ female ┆ null ┆ … ┆ 1914-03-29 ┆ null ┆ 2022-07-24 ┆ null │\n",
- "│ ┆ Sharp ┆ ┆ ┆ ┆ ┆ ┆ 05:25:21 ┆ │\n",
- "│ 2 ┆ Charles ┆ female ┆ 30 ┆ … ┆ null ┆ 16:22:06 ┆ 2022-07-25 ┆ null │\n",
- "│ ┆ Johnson ┆ ┆ ┆ ┆ ┆ ┆ 16:04:20 ┆ │\n",
- "│ 3 ┆ Joel Morris ┆ male ┆ 21 ┆ … ┆ 1924-06-23 ┆ 13:39:40 ┆ 2022-07-31 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 21:44:26 ┆ │\n",
- "│ 4 ┆ Christopher ┆ male ┆ 49 ┆ … ┆ null ┆ 13:27:56 ┆ 2022-07-29 ┆ null │\n",
- "│ ┆ Lamb ┆ ┆ ┆ ┆ ┆ ┆ 12:47:16 ┆ │\n",
- "│ 5 ┆ Gary ┆ female ┆ 26 ┆ … ┆ 1939-06-02 ┆ null ┆ 2022-07-19 ┆ null │\n",
- "│ ┆ Anderson ┆ ┆ ┆ ┆ ┆ ┆ 04:31:32 ┆ │\n",
+ "│ 1 ┆ Laura Carter ┆ male ┆ 23 ┆ … ┆ 1909-12-11 ┆ 14:37:39 ┆ 2022-07-27 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 09:10:07 ┆ │\n",
+ "│ 2 ┆ Brian ┆ male ┆ null ┆ … ┆ 1904-02-24 ┆ 13:10:50 ┆ 2022-07-22 ┆ null │\n",
+ "│ ┆ Meadows ┆ ┆ ┆ ┆ ┆ ┆ 17:18:33 ┆ │\n",
+ "│ 3 ┆ Matthew Leon ┆ male ┆ null ┆ … ┆ 1932-12-09 ┆ 15:11:17 ┆ 2022-08-11 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 01:01:23 ┆ │\n",
+ "│ 4 ┆ Edward ┆ female ┆ 27 ┆ … ┆ 1909-03-25 ┆ 16:47:52 ┆ 2022-07-21 ┆ null │\n",
+ "│ ┆ Thomas ┆ ┆ ┆ ┆ ┆ ┆ 23:19:18 ┆ │\n",
+ "│ 5 ┆ Richard ┆ male ┆ null ┆ … ┆ 1930-05-08 ┆ 15:47:51 ┆ 2022-07-25 ┆ null │\n",
+ "│ ┆ Shepard III ┆ ┆ ┆ ┆ ┆ ┆ 02:54:31 ┆ │\n",
"└─────────────┴──────────────┴────────┴──────┴───┴────────────┴────────────┴──────────────┴────────┘"
]
},
@@ -1278,14 +1294,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/13 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 13/13 [00:00<00:00, 18.73it/s]\n"
+ "100%|██████████| 13/13 [00:01<00:00, 8.92it/s]\n"
]
},
{
@@ -1298,27 +1307,27 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
1 | "Douglas Campbe… | "female" | 39 | 0 | "46561" | 2.161927 | null | "S" | 1922-07-19 | 12:56:27 | 2022-08-15 09:11:30 | null |
2 | "Richard Smith" | "female" | 33 | 1 | "4444" | 0.539809 | "D6B30" | "S" | 1919-01-06 | 17:07:51 | 2022-08-05 14:44:10 | null |
3 | "Cody Decker" | "male" | 35 | 0 | "708428" | 1.858225 | "C52" | "S" | 1918-08-17 | 17:53:13 | 2022-08-09 01:14:30 | null |
4 | "Kevin Williams… | "male" | 34 | 0 | "34651" | 0.585447 | null | "Q" | 1939-07-15 | 16:06:26 | 2022-08-04 13:08:57 | null |
5 | "Joseph Rhodes" | "female" | 32 | 0 | "1853" | 0.964045 | "F128" | "S" | 1933-11-28 | 11:31:23 | 2022-08-08 11:29:43 | null |
"
+ "shape: (5, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
1 | "Nicole Sims" | "male" | null | 0 | "3805" | 1.348719 | null | "Q" | 1906-07-03 | 17:27:59 | 2022-07-21 23:26:20 | null |
2 | "Dr. David Taylor" | "female" | 27 | 2 | "581310" | 1.257062 | null | "S" | 1916-01-30 | 14:02:15 | 2022-07-28 03:25:30 | null |
3 | "Brian Kelly" | "female" | null | 0 | "285450" | 1.870011 | "G6 " | "C" | 1931-10-09 | 14:07:41 | 2022-08-14 14:23:07 | null |
4 | "Jennifer Flores" | "female" | 22 | 0 | "951612" | 0.843858 | null | "S" | 1934-12-08 | 16:12:26 | 2022-08-15 02:10:52 | null |
5 | "Grace Gutierrez" | "female" | 30 | 0 | "4978" | 1.145782 | null | "S" | 1917-02-28 | 12:19:20 | 2022-07-18 16:00:16 | null |
"
],
"text/plain": [
"shape: (5, 13)\n",
- "┌─────────────┬───────────────┬────────┬─────┬───┬────────────┬────────────┬──────────────┬────────┐\n",
- "│ PassengerId ┆ Name ┆ Sex ┆ Age ┆ … ┆ Birthday ┆ Board time ┆ Married ┆ all_NA │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ since ┆ --- │\n",
- "│ i64 ┆ str ┆ cat ┆ i64 ┆ ┆ date ┆ time ┆ --- ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ │\n",
- "╞═════════════╪═══════════════╪════════╪═════╪═══╪════════════╪════════════╪══════════════╪════════╡\n",
- "│ 1 ┆ Douglas ┆ female ┆ 39 ┆ … ┆ 1922-07-19 ┆ 12:56:27 ┆ 2022-08-15 ┆ null │\n",
- "│ ┆ Campbell ┆ ┆ ┆ ┆ ┆ ┆ 09:11:30 ┆ │\n",
- "│ 2 ┆ Richard Smith ┆ female ┆ 33 ┆ … ┆ 1919-01-06 ┆ 17:07:51 ┆ 2022-08-05 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 14:44:10 ┆ │\n",
- "│ 3 ┆ Cody Decker ┆ male ┆ 35 ┆ … ┆ 1918-08-17 ┆ 17:53:13 ┆ 2022-08-09 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 01:14:30 ┆ │\n",
- "│ 4 ┆ Kevin ┆ male ┆ 34 ┆ … ┆ 1939-07-15 ┆ 16:06:26 ┆ 2022-08-04 ┆ null │\n",
- "│ ┆ Williams ┆ ┆ ┆ ┆ ┆ ┆ 13:08:57 ┆ │\n",
- "│ 5 ┆ Joseph Rhodes ┆ female ┆ 32 ┆ … ┆ 1933-11-28 ┆ 11:31:23 ┆ 2022-08-08 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 11:29:43 ┆ │\n",
- "└─────────────┴───────────────┴────────┴─────┴───┴────────────┴────────────┴──────────────┴────────┘"
+ "┌─────────────┬──────────────┬────────┬──────┬───┬────────────┬────────────┬──────────────┬────────┐\n",
+ "│ PassengerId ┆ Name ┆ Sex ┆ Age ┆ … ┆ Birthday ┆ Board time ┆ Married ┆ all_NA │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ since ┆ --- │\n",
+ "│ i64 ┆ str ┆ cat ┆ i64 ┆ ┆ date ┆ time ┆ --- ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ │\n",
+ "╞═════════════╪══════════════╪════════╪══════╪═══╪════════════╪════════════╪══════════════╪════════╡\n",
+ "│ 1 ┆ Nicole Sims ┆ male ┆ null ┆ … ┆ 1906-07-03 ┆ 17:27:59 ┆ 2022-07-21 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 23:26:20 ┆ │\n",
+ "│ 2 ┆ Dr. David ┆ female ┆ 27 ┆ … ┆ 1916-01-30 ┆ 14:02:15 ┆ 2022-07-28 ┆ null │\n",
+ "│ ┆ Taylor ┆ ┆ ┆ ┆ ┆ ┆ 03:25:30 ┆ │\n",
+ "│ 3 ┆ Brian Kelly ┆ female ┆ null ┆ … ┆ 1931-10-09 ┆ 14:07:41 ┆ 2022-08-14 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 14:23:07 ┆ │\n",
+ "│ 4 ┆ Jennifer ┆ female ┆ 22 ┆ … ┆ 1934-12-08 ┆ 16:12:26 ┆ 2022-08-15 ┆ null │\n",
+ "│ ┆ Flores ┆ ┆ ┆ ┆ ┆ ┆ 02:10:52 ┆ │\n",
+ "│ 5 ┆ Grace ┆ female ┆ 30 ┆ … ┆ 1917-02-28 ┆ 12:19:20 ┆ 2022-07-18 ┆ null │\n",
+ "│ ┆ Gutierrez ┆ ┆ ┆ ┆ ┆ ┆ 16:00:16 ┆ │\n",
+ "└─────────────┴──────────────┴────────┴──────┴───┴────────────┴────────────┴──────────────┴────────┘"
]
},
"execution_count": 16,
@@ -1360,14 +1369,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/13 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 13/13 [00:00<00:00, 22.62it/s]\n"
+ "100%|██████████| 13/13 [00:00<00:00, 13.51it/s]\n"
]
},
{
@@ -1380,7 +1382,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (10, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
1 | "Alicia Zuniga" | "male" | null | 0 | "71922" | 1.835239 | null | "Q" | 1905-04-18 | 15:21:04 | 2022-08-13 22:56:40 | null |
2 | "Julie Roberts" | "male" | 34 | 0 | "HFQ 129053" | 1.641983 | "D93" | "C" | 1926-05-05 | 16:24:45 | null | null |
3 | "Rebekah Copela… | "male" | 24 | 0 | "55625" | 2.940962 | null | "S" | null | 15:45:54 | null | null |
4 | "Jeffrey Gardne… | "male" | 21 | 0 | "72453" | 0.709158 | null | "S" | 1932-04-04 | 17:21:49 | null | null |
5 | "Heather Ward" | "female" | null | 0 | "49197" | 1.360882 | null | "S" | 1922-12-05 | 14:57:29 | 2022-07-27 01:17:00 | null |
6 | "Gwendolyn Rand… | "female" | 33 | 0 | "455533" | 2.591159 | "F344" | "S" | 1927-08-21 | 12:08:05 | 2022-07-26 07:56:55 | null |
7 | "Donna Crawford… | "male" | null | 0 | "20577" | 0.65255 | null | "S" | 1928-07-10 | null | 2022-08-09 20:23:45 | null |
8 | "Tamara Contrer… | "female" | 30 | 0 | "105811" | 4.251059 | null | "S" | 1922-11-19 | 16:28:20 | 2022-07-31 22:53:26 | null |
9 | "Stephanie Grif… | "male" | 38 | 0 | "991613" | 1.06173 | "A01" | "S" | 1914-02-21 | 16:43:33 | 2022-08-12 21:28:24 | null |
10 | "Krystal Kelly" | "female" | null | 1 | "697485" | 1.684701 | "D22" | "S" | 1933-11-13 | 16:46:48 | null | null |
"
+ "shape: (10, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
i64 | str | cat | i64 | i64 | str | f64 | str | cat | date | time | datetime[μs] | null |
1 | "Kathy Crawford" | "male" | 33 | 0 | "36003" | 1.797109 | null | "C" | 1928-07-19 | 17:40:17 | 2022-07-23 02:35:47 | null |
2 | "Toni Rodriguez" | "female" | 37 | 0 | "7260" | 1.832633 | null | "S" | 1915-01-17 | 17:06:54 | 2022-07-29 08:50:29 | null |
3 | "Jesse Lopez" | "female" | 26 | 0 | "265335" | 0.598759 | null | "S" | 1935-10-16 | 12:27:44 | 2022-08-09 13:10:57 | null |
4 | "Ashley Wheeler" | "female" | null | 0 | "41223" | 0.121366 | "F635" | "S" | 1935-08-27 | 14:44:39 | 2022-08-12 10:31:37 | null |
5 | "Edward Jones" | "female" | 27 | 0 | "4287" | 0.685862 | null | "S" | 1929-07-02 | 13:57:05 | 2022-07-23 09:33:00 | null |
6 | "Scott Abbott" | "female" | null | 0 | "5060" | 2.251193 | null | "C" | 1915-07-05 | 14:30:59 | 2022-08-07 17:07:11 | null |
7 | "Samantha Reyes" | "female" | 28 | 0 | "74697" | 0.813593 | "C06" | "S" | 1938-11-28 | 16:23:54 | 2022-07-27 06:23:42 | null |
8 | "Danielle Chaney" | "male" | 31 | 0 | "848674" | 1.765307 | null | "S" | 1922-12-13 | 15:33:10 | 2022-07-21 04:47:26 | null |
9 | "Rebecca Wade" | "female" | 31 | 0 | "OR 00654" | 0.398557 | "D488" | "S" | 1939-07-10 | 13:27:52 | null | null |
10 | "Steven Rice" | "male" | 23 | 0 | "223076" | 0.906104 | null | "S" | null | 12:59:28 | 2022-08-09 03:06:18 | null |
"
],
"text/plain": [
"shape: (10, 13)\n",
@@ -1390,26 +1392,25 @@
"│ i64 ┆ str ┆ cat ┆ i64 ┆ ┆ date ┆ time ┆ --- ┆ null │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ │\n",
"╞═════════════╪══════════════╪════════╪══════╪═══╪════════════╪════════════╪══════════════╪════════╡\n",
- "│ 1 ┆ Alicia ┆ male ┆ null ┆ … ┆ 1905-04-18 ┆ 15:21:04 ┆ 2022-08-13 ┆ null │\n",
- "│ ┆ Zuniga ┆ ┆ ┆ ┆ ┆ ┆ 22:56:40 ┆ │\n",
- "│ 2 ┆ Julie ┆ male ┆ 34 ┆ … ┆ 1926-05-05 ┆ 16:24:45 ┆ null ┆ null │\n",
- "│ ┆ Roberts ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 3 ┆ Rebekah ┆ male ┆ 24 ┆ … ┆ null ┆ 15:45:54 ┆ null ┆ null │\n",
- "│ ┆ Copeland ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 4 ┆ Jeffrey ┆ male ┆ 21 ┆ … ┆ 1932-04-04 ┆ 17:21:49 ┆ null ┆ null │\n",
- "│ ┆ Gardner ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
- "│ 5 ┆ Heather Ward ┆ female ┆ null ┆ … ┆ 1922-12-05 ┆ 14:57:29 ┆ 2022-07-27 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 01:17:00 ┆ │\n",
- "│ 6 ┆ Gwendolyn ┆ female ┆ 33 ┆ … ┆ 1927-08-21 ┆ 12:08:05 ┆ 2022-07-26 ┆ null │\n",
- "│ ┆ Randall ┆ ┆ ┆ ┆ ┆ ┆ 07:56:55 ┆ │\n",
- "│ 7 ┆ Donna ┆ male ┆ null ┆ … ┆ 1928-07-10 ┆ null ┆ 2022-08-09 ┆ null │\n",
- "│ ┆ Crawford ┆ ┆ ┆ ┆ ┆ ┆ 20:23:45 ┆ │\n",
- "│ 8 ┆ Tamara ┆ female ┆ 30 ┆ … ┆ 1922-11-19 ┆ 16:28:20 ┆ 2022-07-31 ┆ null │\n",
- "│ ┆ Contreras ┆ ┆ ┆ ┆ ┆ ┆ 22:53:26 ┆ │\n",
- "│ 9 ┆ Stephanie ┆ male ┆ 38 ┆ … ┆ 1914-02-21 ┆ 16:43:33 ┆ 2022-08-12 ┆ null │\n",
- "│ ┆ Griffin MD ┆ ┆ ┆ ┆ ┆ ┆ 21:28:24 ┆ │\n",
- "│ 10 ┆ Krystal ┆ female ┆ null ┆ … ┆ 1933-11-13 ┆ 16:46:48 ┆ null ┆ null │\n",
- "│ ┆ Kelly ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 1 ┆ Kathy ┆ male ┆ 33 ┆ … ┆ 1928-07-19 ┆ 17:40:17 ┆ 2022-07-23 ┆ null │\n",
+ "│ ┆ Crawford ┆ ┆ ┆ ┆ ┆ ┆ 02:35:47 ┆ │\n",
+ "│ 2 ┆ Toni ┆ female ┆ 37 ┆ … ┆ 1915-01-17 ┆ 17:06:54 ┆ 2022-07-29 ┆ null │\n",
+ "│ ┆ Rodriguez ┆ ┆ ┆ ┆ ┆ ┆ 08:50:29 ┆ │\n",
+ "│ 3 ┆ Jesse Lopez ┆ female ┆ 26 ┆ … ┆ 1935-10-16 ┆ 12:27:44 ┆ 2022-08-09 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 13:10:57 ┆ │\n",
+ "│ 4 ┆ Ashley ┆ female ┆ null ┆ … ┆ 1935-08-27 ┆ 14:44:39 ┆ 2022-08-12 ┆ null │\n",
+ "│ ┆ Wheeler ┆ ┆ ┆ ┆ ┆ ┆ 10:31:37 ┆ │\n",
+ "│ 5 ┆ Edward Jones ┆ female ┆ 27 ┆ … ┆ 1929-07-02 ┆ 13:57:05 ┆ 2022-07-23 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 09:33:00 ┆ │\n",
+ "│ 6 ┆ Scott Abbott ┆ female ┆ null ┆ … ┆ 1915-07-05 ┆ 14:30:59 ┆ 2022-08-07 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 17:07:11 ┆ │\n",
+ "│ 7 ┆ Samantha ┆ female ┆ 28 ┆ … ┆ 1938-11-28 ┆ 16:23:54 ┆ 2022-07-27 ┆ null │\n",
+ "│ ┆ Reyes ┆ ┆ ┆ ┆ ┆ ┆ 06:23:42 ┆ │\n",
+ "│ 8 ┆ Danielle ┆ male ┆ 31 ┆ … ┆ 1922-12-13 ┆ 15:33:10 ┆ 2022-07-21 ┆ null │\n",
+ "│ ┆ Chaney ┆ ┆ ┆ ┆ ┆ ┆ 04:47:26 ┆ │\n",
+ "│ 9 ┆ Rebecca Wade ┆ female ┆ 31 ┆ … ┆ 1939-07-10 ┆ 13:27:52 ┆ null ┆ null │\n",
+ "│ 10 ┆ Steven Rice ┆ male ┆ 23 ┆ … ┆ null ┆ 12:59:28 ┆ 2022-08-09 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 03:06:18 ┆ │\n",
"└─────────────┴──────────────┴────────┴──────┴───┴────────────┴────────────┴──────────────┴────────┘"
]
},
@@ -1472,18 +1473,20 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (1, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
f64 | str | cat | f64 | f64 | str | f64 | str | cat | date | time | datetime[μs] | str |
446.0 | null | null | 29.693277 | 0.381594 | null | 32.204208 | null | null | null | null | 2022-07-31 03:43:48.767209 | null |
"
+ "shape: (1, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
f64 | str | cat | f64 | f64 | str | f64 | str | cat | datetime[ms] | time | datetime[μs] | str |
446.0 | null | null | 29.693277 | 0.381594 | null | 32.204208 | null | null | 1921-07-27 22:08:24.798 | 14:38:10.014778325 | 2022-07-31 03:43:48.767209 | null |
"
],
"text/plain": [
"shape: (1, 13)\n",
- "┌─────────────┬──────┬──────┬───────────┬───┬──────────┬────────────┬─────────────────┬────────┐\n",
- "│ PassengerId ┆ Name ┆ Sex ┆ Age ┆ … ┆ Birthday ┆ Board time ┆ Married since ┆ all_NA │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
- "│ f64 ┆ str ┆ cat ┆ f64 ┆ ┆ date ┆ time ┆ datetime[μs] ┆ str │\n",
- "╞═════════════╪══════╪══════╪═══════════╪═══╪══════════╪════════════╪═════════════════╪════════╡\n",
- "│ 446.0 ┆ null ┆ null ┆ 29.693277 ┆ … ┆ null ┆ null ┆ 2022-07-31 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 03:43:48.767209 ┆ │\n",
- "└─────────────┴──────┴──────┴───────────┴───┴──────────┴────────────┴─────────────────┴────────┘"
+ "┌─────────────┬──────┬──────┬───────────┬───┬───────────────┬──────────────┬──────────────┬────────┐\n",
+ "│ PassengerId ┆ Name ┆ Sex ┆ Age ┆ … ┆ Birthday ┆ Board time ┆ Married ┆ all_NA │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ since ┆ --- │\n",
+ "│ f64 ┆ str ┆ cat ┆ f64 ┆ ┆ datetime[ms] ┆ time ┆ --- ┆ str │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ │\n",
+ "╞═════════════╪══════╪══════╪═══════════╪═══╪═══════════════╪══════════════╪══════════════╪════════╡\n",
+ "│ 446.0 ┆ null ┆ null ┆ 29.693277 ┆ … ┆ 1921-07-27 ┆ 14:38:10.014 ┆ 2022-07-31 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ 22:08:24.798 ┆ 778325 ┆ 03:43:48.767 ┆ │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 209 ┆ │\n",
+ "└─────────────┴──────┴──────┴───────────┴───┴───────────────┴──────────────┴──────────────┴────────┘"
]
},
"execution_count": 18,
@@ -1497,7 +1500,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 22,
"id": "0ba303f3",
"metadata": {},
"outputs": [
@@ -1511,27 +1514,29 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (1, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
f64 | str | cat | f64 | f64 | str | f64 | str | cat | date | time | datetime[μs] | null |
446.0 | null | null | 29.353521 | 0.08642 | null | 1.631487 | null | null | null | null | 2022-07-30 16:27:19.689393 | null |
"
+ "shape: (1, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
f64 | str | cat | f64 | f64 | str | f64 | str | cat | datetime[ms] | time | datetime[μs] | null |
446.0 | null | null | 29.61708 | 0.075196 | null | 1.728378 | null | null | 1921-10-25 02:59:33.068 | 14:37:11.317224287 | 2022-07-30 23:39:23.192874 | null |
"
],
"text/plain": [
"shape: (1, 13)\n",
- "┌─────────────┬──────┬──────┬───────────┬───┬──────────┬────────────┬─────────────────┬────────┐\n",
- "│ PassengerId ┆ Name ┆ Sex ┆ Age ┆ … ┆ Birthday ┆ Board time ┆ Married since ┆ all_NA │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
- "│ f64 ┆ str ┆ cat ┆ f64 ┆ ┆ date ┆ time ┆ datetime[μs] ┆ null │\n",
- "╞═════════════╪══════╪══════╪═══════════╪═══╪══════════╪════════════╪═════════════════╪════════╡\n",
- "│ 446.0 ┆ null ┆ null ┆ 29.353521 ┆ … ┆ null ┆ null ┆ 2022-07-30 ┆ null │\n",
- "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 16:27:19.689393 ┆ │\n",
- "└─────────────┴──────┴──────┴───────────┴───┴──────────┴────────────┴─────────────────┴────────┘"
+ "┌─────────────┬──────┬──────┬──────────┬───┬───────────────┬───────────────┬──────────────┬────────┐\n",
+ "│ PassengerId ┆ Name ┆ Sex ┆ Age ┆ … ┆ Birthday ┆ Board time ┆ Married ┆ all_NA │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ since ┆ --- │\n",
+ "│ f64 ┆ str ┆ cat ┆ f64 ┆ ┆ datetime[ms] ┆ time ┆ --- ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ │\n",
+ "╞═════════════╪══════╪══════╪══════════╪═══╪═══════════════╪═══════════════╪══════════════╪════════╡\n",
+ "│ 446.0 ┆ null ┆ null ┆ 29.61708 ┆ … ┆ 1921-10-25 ┆ 14:37:11.3172 ┆ 2022-07-30 ┆ null │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ 02:59:33.068 ┆ 24287 ┆ 23:39:23.192 ┆ │\n",
+ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 874 ┆ │\n",
+ "└─────────────┴──────┴──────┴──────────┴───┴───────────────┴───────────────┴──────────────┴────────┘"
]
},
- "execution_count": 19,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "mf.synthesize(len(df)).mean()"
+ "mf.synthesize().mean()"
]
},
{
@@ -1544,7 +1549,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 23,
"id": "c6ca2918",
"metadata": {},
"outputs": [
@@ -1571,7 +1576,7 @@
"└─────────────┴──────┴─────┴─────┴───┴──────────┴────────────┴───────────────┴────────┘"
]
},
- "execution_count": 20,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -1582,7 +1587,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 24,
"id": "1dd596fb",
"metadata": {},
"outputs": [
@@ -1596,7 +1601,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (1, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 |
0 | 0 | 0 | 171 | 0 | 0 | 0 | 673 | 1 | 74 | 71 | 96 | 891 |
"
+ "shape: (1, 13)PassengerId | Name | Sex | Age | Parch | Ticket | Fare | Cabin | Embarked | Birthday | Board time | Married since | all_NA |
---|
u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 | u32 |
0 | 0 | 0 | 174 | 0 | 0 | 0 | 658 | 2 | 74 | 83 | 81 | 891 |
"
],
"text/plain": [
"shape: (1, 13)\n",
@@ -1605,17 +1610,17 @@
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ u32 ┆ u32 ┆ u32 ┆ ┆ u32 ┆ u32 ┆ u32 ┆ u32 │\n",
"╞═════════════╪══════╪═════╪═════╪═══╪══════════╪════════════╪═══════════════╪════════╡\n",
- "│ 0 ┆ 0 ┆ 0 ┆ 171 ┆ … ┆ 74 ┆ 71 ┆ 96 ┆ 891 │\n",
+ "│ 0 ┆ 0 ┆ 0 ┆ 174 ┆ … ┆ 74 ┆ 83 ┆ 81 ┆ 891 │\n",
"└─────────────┴──────┴─────┴─────┴───┴──────────┴────────────┴───────────────┴────────┘"
]
},
- "execution_count": 21,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "mf.synthesize(len(df)).null_count()"
+ "mf.synthesize().null_count()"
]
},
{
@@ -1642,7 +1647,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 25,
"id": "e37faad4df8ffde8",
"metadata": {
"collapsed": false
@@ -1652,14 +1657,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/13 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 13/13 [00:00<00:00, 22.52it/s]\n"
+ "100%|██████████| 13/13 [00:00<00:00, 14.49it/s]\n"
]
}
],
@@ -1696,7 +1694,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 26,
"id": "694d4474707f7950",
"metadata": {
"collapsed": false
@@ -1726,7 +1724,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 27,
"id": "34a8aafc95c0219f",
"metadata": {
"collapsed": false
@@ -1758,7 +1756,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 28,
"id": "3b2e873f7362160a",
"metadata": {
"collapsed": false
@@ -1794,7 +1792,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 29,
"id": "ab2b34ed6b11578c",
"metadata": {
"collapsed": false
@@ -1846,7 +1844,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.6"
+ "version": "3.11.3"
},
"vscode": {
"interpreter": {
diff --git a/metasyn/distribution/categorical.py b/metasyn/distribution/categorical.py
index c71a6c7d..bad52dfa 100644
--- a/metasyn/distribution/categorical.py
+++ b/metasyn/distribution/categorical.py
@@ -112,7 +112,7 @@ def _log_like_int(
https://en.wikipedia.org/wiki/Additive_smoothing
with parameter alpha = 1. This effectively says that all values in between the observed
- values have a probability of 1/N, where N is the number of total values. Hoewever, to
+ values have a probability of 1/N, where N is the number of total values. However, to
prevent confusion (and potentially improve the synthesis) is draw from these unobserved
values. That is why the information criterion for integers is not a true BIC, but a pseudo
information criterion.
diff --git a/pyproject.toml b/pyproject.toml
index 30ba16b1..e9449a2f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ classifiers = [
]
dependencies = [
- "polars>=0.20.31",
+ "polars>=1",
"tqdm",
"numpy>=1.20",
"pyarrow", # Dependency of polars since we're converting from pandas.
diff --git a/tests/test_var.py b/tests/test_var.py
index a5ecd111..b7b3f1fe 100644
--- a/tests/test_var.py
+++ b/tests/test_var.py
@@ -240,8 +240,8 @@ def test_na_two(series):
@mark.parametrize(
"series",
- [pd.Series(np.random.randint(0, 100000, size=1000)),
- pl.Series(np.random.randint(0, 100000, size=1000))]
+ [pd.Series(np.random.randint(0, 1000, size=500)),
+ pl.Series(np.random.randint(0, 1000, size=500))]
)
def test_manual_unique_integer(series):
var = MetaVar.fit(series)