Skip to content

Commit

Permalink
Remove stray print statement and add .toml general comment. (#343)
Browse files Browse the repository at this point in the history
  • Loading branch information
qubixes authored Nov 21, 2024
1 parent 04d3901 commit c423382
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
22 changes: 16 additions & 6 deletions metasyn/metaframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,18 @@ def save_toml(self, fp: Optional[Union[pathlib.Path, str]],

doc = tomlkit.loads(tomlkit.dumps(self_dict))
doc["n_rows"].comment("Number of rows")
doc["n_columns"].comment("Number of columns")
doc["n_columns"].comment("""Number of columns
# This is a metadata file with (limited) statistical information about each column separately in
# a dataset. No information about correlations or other relationships between columns is included.
# This file can be used to generate privacy-conscious synthetic data, which consequently has zero
# expected correlations and relationships between columns.
# For each column, the statistics can be either manually specified or estimated from real data.
# This information, including how the estimation was done, is shown in the metadata below.
#
# For more information, see https://github.com/sodascience/metasyn
"""
)
for i in range(self.n_columns):
var = self.meta_vars[i]
doc["vars"][i].comment(f"Metadata for column with name {var.name}")
Expand All @@ -388,10 +399,11 @@ def save_toml(self, fp: Optional[Union[pathlib.Path, str]],
and
np.all(var.distribution.labels == var.distribution.default_distribution().labels)
)
print(var.name, multi_default)
if "parameters" in var.creation_method:
parameter_comments.append(f"The above parameters for column '{var.name}' were "
"manually set by the user, no data was (directly) used.")
parameters = ", ".join(var.creation_method["parameters"])
parameter_comments.append(
f"The parameters {parameters} for column '{var.name}' were "
"manually set by the user, no data was (directly) used.")
elif (var.distribution.matches_name("multinoulli") and multi_default):
parameter_comments.append("This mulinoulli distribution is the default one, no data"
" was used.")
Expand All @@ -401,8 +413,6 @@ def save_toml(self, fp: Optional[Union[pathlib.Path, str]],
if (var.distribution.matches_name("multinoulli") and not multi_default):
counts = (var.distribution.probs*(1-var.prop_missing)*self.n_rows).round()
parameter_comments.append(f"Counts: {counts.astype(int)}\n")
# doc["vars"][i]["distribution"].add(tomlkit.comment(
# f"Counts: {counts.astype(int)}\n"))
par_comment = "\n# ".join(parameter_comments) + "\n\n"
doc["vars"][i]["distribution"]["parameters"].add(tomlkit.comment(par_comment))
if fp is None:
Expand Down
1 change: 1 addition & 0 deletions metasyn/privacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def comment(self, var): # noqa
"""
return "Above are the parameters for the column {var.name}"


class BasicPrivacy(BasePrivacy):
"""Class representing no privacy level.
Expand Down

0 comments on commit c423382

Please sign in to comment.