From 20652407e75e45e96f823b910cc14bc4728cb521 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Fri, 24 May 2024 17:00:03 +0200 Subject: [PATCH 01/17] Map fields to the keys of FOREIGN_KEY_DESCRIPTORS Each key map to a list of descriptors which themselves contain a "fields" attribute. We map the values under this "fields" attribute to the keys of FOREIGN_KEY_DESCRIPTORS --- src/oemof/tabular/config/config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/oemof/tabular/config/config.py b/src/oemof/tabular/config/config.py index 623eb59b..8d517540 100644 --- a/src/oemof/tabular/config/config.py +++ b/src/oemof/tabular/config/config.py @@ -18,6 +18,11 @@ with open(FOREIGN_KEY_DESCRIPTORS_FILE, "r") as fk_descriptors_file: FOREIGN_KEY_DESCRIPTORS = json.load(fk_descriptors_file) +SPECIAL_FIELD_NAMES = {} +for fk, descriptor in FOREIGN_KEY_DESCRIPTORS.items(): + for el in descriptor: + SPECIAL_FIELD_NAMES[el["fields"]] = fk + supported_oemof_tabular_versions = [ None, "0.0.1", From eb9a8b913064c401e4250952b2a709f3cd36b8ac Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Fri, 24 May 2024 17:07:00 +0200 Subject: [PATCH 02/17] Add function to infer datapackage foreign_keys The function infer_metadata_from_data can be run on a datapackage where only the data/elements and data/sequences have been defined by a user and will generate automatically the datapackage.json file containing the metadata. --- src/oemof/tabular/datapackage/building.py | 173 ++++++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index 1302fe99..992b0669 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -6,6 +6,7 @@ import sys import tarfile import urllib.request +import warnings import zipfile from ftplib import FTP from urllib.parse import urlparse @@ -59,6 +60,178 @@ def update_package_descriptor(): p.save("datapackage.json") +def map_sequence_profiles_to_resource_name( + p, excluded_profiles=("timeindex",) +): + """Look in every resource which is a sequence and map each of its fields to itself + + Within this process the unicity of the field names will be checked, with the exception of the field "timeindex" + + """ + + def check_sequences_labels_unicity(labels, new_labels): + intersect = set(labels).intersection(new_labels) + if len(intersect) == 1: + intersect = intersect.pop() + if not intersect == "timeindex": + answer = [intersect] + else: + answer = [] + else: + answer = list(intersect) + + if answer: + warnings.warn( + f"The labels of the profiles are not unique across all files within 'sequences' folder: '{','.join(intersect)}' used more than once" + ) + return answer + + sequences = {} + sequence_labels = [] + duplicated_labels = [] + for r in p.resources: + if "/sequences/" in r.descriptor["path"]: + field_labels = [ + f.name + for f in r.schema.fields + if f.name not in excluded_profiles + ] + sequences[r.descriptor["name"]] = field_labels + duplicated_labels += check_sequences_labels_unicity( + sequence_labels, field_labels + ) + sequence_labels += field_labels + + if duplicated_labels: + # write an error message here + pass + # map each profile to its resource name + sequences_mapping = { + value: key for (key, values) in sequences.items() for value in values + } + return sequences_mapping + + +def infer_resource_foreign_keys(resource, sequences_profiles_to_resource): + """Find out the foreign keys within a resource fields + + Look through all field of a resource which are of type 'string' if any of their values are matching a profile header in any of the sequences resources + + + Parameters + ---------- + resource: a :datapackage.Resource: instance + sequences_profiles_to_resource: the mapping of sequence profile headers to their resource name + + Returns + ------- + The :datapackage.Resource: instance with updated "foreignKeys" field + + """ + r = resource + data = pd.DataFrame.from_records(r.read(keyed=True)) + # TODO not sure this should be set here + r.descriptor["schema"]["primaryKey"] = "name" + if "foreignKeys" not in r.descriptor["schema"]: + r.descriptor["schema"]["foreignKeys"] = [] + + for field in r.schema.fields: + if field.type == "string": + for potential_fk in data.dropna()[field.name].unique(): + + if potential_fk in sequences_profiles_to_resource: + # this is actually a wrong format and should be with a "fields" field under the "reference" fields + + fk = { + "fields": field.name, + "reference": { + "resource": sequences_profiles_to_resource[ + potential_fk + ], + }, + } + + if fk not in r.descriptor["schema"]["foreignKeys"]: + r.descriptor["schema"]["foreignKeys"].append(fk) + r.commit() + return r + + +def infer_package_foreign_keys(package): + """Infer the foreign_keys from data/elements and data/sequences and update meta data + + Parameters + ---------- + package + + Returns + ------- + + """ + p = package + sequences_profiles_to_resource = map_sequence_profiles_to_resource_name(p) + + for r in p.resources: + if "/elements/" in r.descriptor["path"]: + r = infer_resource_foreign_keys(r, sequences_profiles_to_resource) + p.remove_resource(r.name) + p.add_resource(r.descriptor) + + +def infer_metadata_from_data( + package_name="default-name", + path=None, + metadata_filename="datapackage.json", +): + """ + + Returns + ------- + + """ + + # Infer the fields from the package data + path = os.path.abspath(path) + p0 = Package(base_path=path) + p0.infer(os.path.join(path, "**/*.csv")) + p0.commit() + p0.save(os.path.join(path, metadata_filename)) + + foreign_keys = {} + + def infer_resource_basic_foreign_keys(resource): + """insert resource foreign_key into a dict formatted for building.infer_metadata + + Compare the fields of a resource to a list of field names known to be foreign keys. If the field name is within the list, it is used to populate the dict 'foreign_keys' + """ + for field in resource.schema.fields: + if field.name in config.SPECIAL_FIELD_NAMES: + fk_descriptor = config.SPECIAL_FIELD_NAMES[field.name] + if fk_descriptor in foreign_keys: + if resource.name not in foreign_keys[fk_descriptor]: + foreign_keys[fk_descriptor].append(resource.name) + else: + foreign_keys[fk_descriptor] = [resource.name] + + for r in p0.resources: + if "/elements/" in r.descriptor["path"]: + infer_resource_basic_foreign_keys(r) + # this function saves the metadata of the package in json format + infer_metadata( + package_name=package_name, + path=path, + foreign_keys=foreign_keys, + metadata_filename=metadata_filename, + ) + + # reload the package from the saved json file + p = Package(os.path.join(path, metadata_filename)) + infer_package_foreign_keys(p) + p.descriptor["resources"].sort(key=lambda x: (x["path"], x["name"])) + p.commit() + p.save(os.path.join(path, metadata_filename)) + + def infer_metadata( package_name="default-name", keep_resources=False, From 5a89536fa106fcf879cd97375a98d3f0d543d608 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Fri, 24 May 2024 18:15:03 +0200 Subject: [PATCH 03/17] Update documentation --- docs/usage.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/usage.rst b/docs/usage.rst index cf8d9d9f..00f7d608 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -168,6 +168,18 @@ This can also be done for sequences and geometries. To create meta-data `json` file you can use the following code: +.. code-block:: python + + from datapackage_utilities import building + + building.infer_metadata_from_data( + package_name="my-datapackage", + path="/home/user/datpackages/my-datapackage" + ) + + +Or, if you want to specify manually the relation of the foreign keys, you can use this code: + .. code-block:: python from datapackage_utilities import building @@ -354,7 +366,8 @@ field names in the generators-profile resource. .. note:: This usage breaks with the datapackage standard and creates - non-valid resources.** + non-valid resources. + Scripting From c6c5dc8fa216d70e19068c83fb604be58321ed5d Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 28 May 2024 15:11:28 +0200 Subject: [PATCH 04/17] Update oemof.tabular version --- src/oemof/tabular/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oemof/tabular/__init__.py b/src/oemof/tabular/__init__.py index a44593e6..6cfe22c1 100644 --- a/src/oemof/tabular/__init__.py +++ b/src/oemof/tabular/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.5" +__version__ = "0.0.6dev" __project__ = "oemof.tabular" From e4209235cad645d4a4ec61ade058fc6876f86d6a Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 28 May 2024 22:12:11 +0200 Subject: [PATCH 05/17] Fix flake8 --- src/oemof/tabular/datapackage/building.py | 27 +++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index 992b0669..c11875d6 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -63,9 +63,10 @@ def update_package_descriptor(): def map_sequence_profiles_to_resource_name( p, excluded_profiles=("timeindex",) ): - """Look in every resource which is a sequence and map each of its fields to itself + """Look in every sequence resources and map each of its fields to itself - Within this process the unicity of the field names will be checked, with the exception of the field "timeindex" + Within this process the unicity of the field names will be checked, + with the exception of the field "timeindex" """ @@ -82,7 +83,9 @@ def check_sequences_labels_unicity(labels, new_labels): if answer: warnings.warn( - f"The labels of the profiles are not unique across all files within 'sequences' folder: '{','.join(intersect)}' used more than once" + f"The labels of the profiles are not unique across all" + f"files within 'sequences' folder: '{','.join(intersect)}' " + f"used more than once" ) return answer @@ -115,13 +118,16 @@ def check_sequences_labels_unicity(labels, new_labels): def infer_resource_foreign_keys(resource, sequences_profiles_to_resource): """Find out the foreign keys within a resource fields - Look through all field of a resource which are of type 'string' if any of their values are matching a profile header in any of the sequences resources + Look through all field of a resource which are of type 'string' + if any of their values are matching a profile header in any of + the sequences resources Parameters ---------- resource: a :datapackage.Resource: instance - sequences_profiles_to_resource: the mapping of sequence profile headers to their resource name + sequences_profiles_to_resource: the mapping of sequence profile + headers to their resource name Returns ------- @@ -140,7 +146,8 @@ def infer_resource_foreign_keys(resource, sequences_profiles_to_resource): for potential_fk in data.dropna()[field.name].unique(): if potential_fk in sequences_profiles_to_resource: - # this is actually a wrong format and should be with a "fields" field under the "reference" fields + # this is actually a wrong format and should be + # with a "fields" field under the "reference" fields fk = { "fields": field.name, @@ -158,7 +165,7 @@ def infer_resource_foreign_keys(resource, sequences_profiles_to_resource): def infer_package_foreign_keys(package): - """Infer the foreign_keys from data/elements and data/sequences and update meta data + """Infer the foreign_keys from elements and sequences and update meta data Parameters ---------- @@ -200,9 +207,11 @@ def infer_metadata_from_data( foreign_keys = {} def infer_resource_basic_foreign_keys(resource): - """insert resource foreign_key into a dict formatted for building.infer_metadata + """Prepare foreign_keys dict for building.infer_metadata - Compare the fields of a resource to a list of field names known to be foreign keys. If the field name is within the list, it is used to populate the dict 'foreign_keys' + Compare the fields of a resource to a list of field names known + to be foreign keys. If the field name is within the list, it is + used to populate the dict 'foreign_keys' """ for field in resource.schema.fields: if field.name in config.SPECIAL_FIELD_NAMES: From 6dfb247187e7dbf811d64ca938514478428d57db Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 28 May 2024 22:20:56 +0200 Subject: [PATCH 06/17] Update AUTHORS.rst --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 7dc38191..761a1d08 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -14,3 +14,4 @@ Authors * Marie-Claire Gering * Julian Endres * Felix Maurer +* Pierre-Francois Duc From a7ee8635e75d973d5aac8e6e8c09a1c0defe6087 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 28 May 2024 22:21:08 +0200 Subject: [PATCH 07/17] Update CHANGELOG.rst --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fb6f999c..fbd89dfc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,7 @@ Unreleased ---------- Features +* Improve the function to infer package metadata `#173 `_ Fixes From 0c4279bc54abbc8fe113d3d0bb358bb7c178af58 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Mon, 3 Jun 2024 23:45:51 +0200 Subject: [PATCH 08/17] Make path argument mandatory --- src/oemof/tabular/datapackage/building.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index c11875d6..6724c2b6 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -186,15 +186,28 @@ def infer_package_foreign_keys(package): def infer_metadata_from_data( + path, package_name="default-name", - path=None, metadata_filename="datapackage.json", ): - """ + """Creates a metadata .json file at the root-folder of datapackage + + The foreign keys are inferred from the csv files within + "data/elements" and "data/sequences" resources. + + Parameters + ---------- + path: string + Absolute path to root-folder of the datapackage + package_name: string + Name of the data package + metadata_filename: basestring + Name of the inferred metadata string. Returns ------- - + Save a json metadata file at the root-folder of datapackage + under the provided path. """ # Infer the fields from the package data From 37c5ad564fc49a5ae7dc7e573df0c93e7717256f Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Mon, 3 Jun 2024 23:46:19 +0200 Subject: [PATCH 09/17] Fix path separator bug --- src/oemof/tabular/datapackage/building.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index 6724c2b6..c5317c49 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -179,7 +179,7 @@ def infer_package_foreign_keys(package): sequences_profiles_to_resource = map_sequence_profiles_to_resource_name(p) for r in p.resources: - if "/elements/" in r.descriptor["path"]: + if os.sep + "elements" + os.sep in r.descriptor["path"]: r = infer_resource_foreign_keys(r, sequences_profiles_to_resource) p.remove_resource(r.name) p.add_resource(r.descriptor) @@ -213,7 +213,7 @@ def infer_metadata_from_data( # Infer the fields from the package data path = os.path.abspath(path) p0 = Package(base_path=path) - p0.infer(os.path.join(path, "**/*.csv")) + p0.infer(os.path.join(path, "**" + os.sep + "*.csv")) p0.commit() p0.save(os.path.join(path, metadata_filename)) @@ -236,7 +236,7 @@ def infer_resource_basic_foreign_keys(resource): foreign_keys[fk_descriptor] = [resource.name] for r in p0.resources: - if "/elements/" in r.descriptor["path"]: + if os.sep + "elements" + os.sep in r.descriptor["path"]: infer_resource_basic_foreign_keys(r) # this function saves the metadata of the package in json format infer_metadata( From b8b4e64301ff21d3e13990db231d9ee3588160ba Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Mon, 3 Jun 2024 23:47:22 +0200 Subject: [PATCH 10/17] Raise error message in case of non unique sequences labels --- src/oemof/tabular/datapackage/building.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index c5317c49..4234a2b3 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -107,7 +107,7 @@ def check_sequences_labels_unicity(labels, new_labels): if duplicated_labels: # write an error message here - pass + raise ValueError(f"The following sequences labels are not unique across all sequences files: {', '.join(duplicated_labels)} ") # map each profile to its resource name sequences_mapping = { value: key for (key, values) in sequences.items() for value in values From 1f81196617ec64208023c1c6a2ef78f43fea5e85 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Mon, 3 Jun 2024 23:55:31 +0200 Subject: [PATCH 11/17] Lint with black --- src/oemof/tabular/datapackage/building.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index 4234a2b3..c24ec383 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -107,7 +107,9 @@ def check_sequences_labels_unicity(labels, new_labels): if duplicated_labels: # write an error message here - raise ValueError(f"The following sequences labels are not unique across all sequences files: {', '.join(duplicated_labels)} ") + raise ValueError( + f"The following sequences labels are not unique across all sequences files: {', '.join(duplicated_labels)} " + ) # map each profile to its resource name sequences_mapping = { value: key for (key, values) in sequences.items() for value in values From ef6ed5f7c0ee5dbfbab9458c706fc5680ccb6d22 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 4 Jun 2024 00:04:08 +0200 Subject: [PATCH 12/17] Fix flake error --- src/oemof/tabular/datapackage/building.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index c24ec383..412c17d1 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -108,7 +108,9 @@ def check_sequences_labels_unicity(labels, new_labels): if duplicated_labels: # write an error message here raise ValueError( - f"The following sequences labels are not unique across all sequences files: {', '.join(duplicated_labels)} " + f"The following sequences labels are not unique" + f" across all sequences files: " + f"{', '.join(duplicated_labels)}" ) # map each profile to its resource name sequences_mapping = { From c7ab4bfe106bba2ab1acc557e34ae705111f77ac Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 4 Jun 2024 00:28:32 +0200 Subject: [PATCH 13/17] Fix foreign key parsing bug --- src/oemof/tabular/datapackage/building.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index 412c17d1..0613c093 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -147,7 +147,7 @@ def infer_resource_foreign_keys(resource, sequences_profiles_to_resource): for field in r.schema.fields: if field.type == "string": - for potential_fk in data.dropna()[field.name].unique(): + for potential_fk in data[field.name].dropna().unique(): if potential_fk in sequences_profiles_to_resource: # this is actually a wrong format and should be From 634374959bbf83aea729f6e0f3a81380c07a4bfd Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 4 Jun 2024 00:30:05 +0200 Subject: [PATCH 14/17] Update example datapackages Those were generated by infer_metadata_from_data and had no differences except the package name --- .../tabular/examples/datapackages/dispatch/datapackage.json | 4 ++-- .../tabular/examples/datapackages/investment/datapackage.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/oemof/tabular/examples/datapackages/dispatch/datapackage.json b/src/oemof/tabular/examples/datapackages/dispatch/datapackage.json index ed8d4bda..0bc024de 100644 --- a/src/oemof/tabular/examples/datapackages/dispatch/datapackage.json +++ b/src/oemof/tabular/examples/datapackages/dispatch/datapackage.json @@ -1,6 +1,6 @@ { "profile": "tabular-data-package", - "name": "oemof-tabular-dispatch-example", + "name": "dispatch-example", "oemof_tabular_version": "0.0.6dev", "resources": [ { @@ -435,4 +435,4 @@ } } ] -} +} \ No newline at end of file diff --git a/src/oemof/tabular/examples/datapackages/investment/datapackage.json b/src/oemof/tabular/examples/datapackages/investment/datapackage.json index 0b7e2300..a4c8ca2a 100644 --- a/src/oemof/tabular/examples/datapackages/investment/datapackage.json +++ b/src/oemof/tabular/examples/datapackages/investment/datapackage.json @@ -1,6 +1,6 @@ { "profile": "tabular-data-package", - "name": "renpass-invest-example", + "name": "investment-example", "oemof_tabular_version": "0.0.6dev", "resources": [ { @@ -571,4 +571,4 @@ } } ] -} +} \ No newline at end of file From bb3fef52c305d7aa0423eabeafbabae432701f61 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 4 Jun 2024 00:31:24 +0200 Subject: [PATCH 15/17] Update example datapackages Those were generated by infer_metadata_from_data and had only a different order of resources --- .../dispatch_multi_period/datapackage.json | 38 +++++----- .../datapackage.json | 38 +++++----- .../emission_constraint/datapackage.json | 74 +++++++++---------- .../datapackages/foreignkeys/datapackage.json | 12 +-- .../investment_multi_period/datapackage.json | 46 ++++++------ 5 files changed, 104 insertions(+), 104 deletions(-) diff --git a/src/oemof/tabular/examples/datapackages/dispatch_multi_period/datapackage.json b/src/oemof/tabular/examples/datapackages/dispatch_multi_period/datapackage.json index ea126487..f0afb6f4 100644 --- a/src/oemof/tabular/examples/datapackages/dispatch_multi_period/datapackage.json +++ b/src/oemof/tabular/examples/datapackages/dispatch_multi_period/datapackage.json @@ -1,6 +1,6 @@ { "profile": "tabular-data-package", - "name": "oemof-tabular-dispatch-example", + "name": "dispatch_multi_period-example", "oemof_tabular_version": "0.0.6dev", "resources": [ { @@ -380,9 +380,9 @@ } }, { - "path": "data/sequences/load_profile.csv", + "path": "data/periods/periods.csv", "profile": "tabular-data-resource", - "name": "load_profile", + "name": "periods", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -394,8 +394,13 @@ "format": "default" }, { - "name": "electricity-load-profile", - "type": "number", + "name": "periods", + "type": "integer", + "format": "default" + }, + { + "name": "timeincrement", + "type": "integer", "format": "default" } ], @@ -405,9 +410,9 @@ } }, { - "path": "data/sequences/volatile_profile.csv", + "path": "data/sequences/load_profile.csv", "profile": "tabular-data-resource", - "name": "volatile_profile", + "name": "load_profile", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -419,14 +424,9 @@ "format": "default" }, { - "name": "wind-profile", + "name": "electricity-load-profile", "type": "number", "format": "default" - }, - { - "name": "pv-profile", - "type": "integer", - "format": "default" } ], "missingValues": [ @@ -435,9 +435,9 @@ } }, { - "path": "data/periods/periods.csv", + "path": "data/sequences/volatile_profile.csv", "profile": "tabular-data-resource", - "name": "periods", + "name": "volatile_profile", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -449,12 +449,12 @@ "format": "default" }, { - "name": "periods", - "type": "integer", + "name": "wind-profile", + "type": "number", "format": "default" }, { - "name": "timeincrement", + "name": "pv-profile", "type": "integer", "format": "default" } @@ -465,4 +465,4 @@ } } ] -} +} \ No newline at end of file diff --git a/src/oemof/tabular/examples/datapackages/dispatch_multi_period_periodic_values/datapackage.json b/src/oemof/tabular/examples/datapackages/dispatch_multi_period_periodic_values/datapackage.json index f6694575..ad493b29 100644 --- a/src/oemof/tabular/examples/datapackages/dispatch_multi_period_periodic_values/datapackage.json +++ b/src/oemof/tabular/examples/datapackages/dispatch_multi_period_periodic_values/datapackage.json @@ -1,6 +1,6 @@ { "profile": "tabular-data-package", - "name": "oemof-tabular-dispatch-example", + "name": "dispatch_multi_period_periodic_values-example", "oemof_tabular_version": "0.0.6dev", "resources": [ { @@ -380,9 +380,9 @@ } }, { - "path": "data/sequences/load_profile.csv", + "path": "data/periods/periods.csv", "profile": "tabular-data-resource", - "name": "load_profile", + "name": "periods", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -394,8 +394,13 @@ "format": "default" }, { - "name": "electricity-load-profile", - "type": "number", + "name": "periods", + "type": "integer", + "format": "default" + }, + { + "name": "timeincrement", + "type": "integer", "format": "default" } ], @@ -405,9 +410,9 @@ } }, { - "path": "data/sequences/volatile_profile.csv", + "path": "data/sequences/load_profile.csv", "profile": "tabular-data-resource", - "name": "volatile_profile", + "name": "load_profile", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -419,14 +424,9 @@ "format": "default" }, { - "name": "wind-profile", + "name": "electricity-load-profile", "type": "number", "format": "default" - }, - { - "name": "pv-profile", - "type": "integer", - "format": "default" } ], "missingValues": [ @@ -435,9 +435,9 @@ } }, { - "path": "data/periods/periods.csv", + "path": "data/sequences/volatile_profile.csv", "profile": "tabular-data-resource", - "name": "periods", + "name": "volatile_profile", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -449,12 +449,12 @@ "format": "default" }, { - "name": "periods", - "type": "integer", + "name": "wind-profile", + "type": "number", "format": "default" }, { - "name": "timeincrement", + "name": "pv-profile", "type": "integer", "format": "default" } @@ -465,4 +465,4 @@ } } ] -} +} \ No newline at end of file diff --git a/src/oemof/tabular/examples/datapackages/emission_constraint/datapackage.json b/src/oemof/tabular/examples/datapackages/emission_constraint/datapackage.json index 63a61396..662b30fb 100644 --- a/src/oemof/tabular/examples/datapackages/emission_constraint/datapackage.json +++ b/src/oemof/tabular/examples/datapackages/emission_constraint/datapackage.json @@ -1,8 +1,43 @@ { "profile": "tabular-data-package", - "name": "oemof-tabular-dispatch-example", + "name": "emission_constraint-example", "oemof_tabular_version": "0.0.6dev", "resources": [ + { + "path": "data/constraints/emission_constraint.csv", + "profile": "tabular-data-resource", + "name": "emission_constraint", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": { + "fields": [ + { + "name": "name", + "type": "string", + "format": "default" + }, + { + "name": "type", + "type": "string", + "format": "default" + }, + { + "name": "limit", + "type": "integer", + "format": "default" + }, + { + "name": "keyword", + "type": "string", + "format": "default" + } + ], + "missingValues": [ + "" + ] + } + }, { "path": "data/elements/bus.csv", "profile": "tabular-data-resource", @@ -401,41 +436,6 @@ "" ] } - }, - { - "path": "data/constraints/emission_constraint.csv", - "profile": "tabular-data-resource", - "name": "emission_constraint", - "format": "csv", - "mediatype": "text/csv", - "encoding": "utf-8", - "schema": { - "fields": [ - { - "name": "name", - "type": "string", - "format": "default" - }, - { - "name": "type", - "type": "string", - "format": "default" - }, - { - "name": "limit", - "type": "integer", - "format": "default" - }, - { - "name": "keyword", - "type": "string", - "format": "default" - } - ], - "missingValues": [ - "" - ] - } } ] -} +} \ No newline at end of file diff --git a/src/oemof/tabular/examples/datapackages/foreignkeys/datapackage.json b/src/oemof/tabular/examples/datapackages/foreignkeys/datapackage.json index 6bacb163..3f3b6a4e 100644 --- a/src/oemof/tabular/examples/datapackages/foreignkeys/datapackage.json +++ b/src/oemof/tabular/examples/datapackages/foreignkeys/datapackage.json @@ -1,6 +1,6 @@ { "profile": "tabular-data-package", - "name": "oemof-tabular-foreignkeys-examples", + "name": "foreignkeys-example", "oemof_tabular_version": "0.0.6dev", "resources": [ { @@ -113,15 +113,15 @@ } }, { - "fields": "marginal_cost", + "fields": "profile", "reference": { - "resource": "marginal_cost_profile" + "resource": "component_profile" } }, { - "fields": "profile", + "fields": "marginal_cost", "reference": { - "resource": "component_profile" + "resource": "marginal_cost_profile" } } ] @@ -183,4 +183,4 @@ } } ] -} +} \ No newline at end of file diff --git a/src/oemof/tabular/examples/datapackages/investment_multi_period/datapackage.json b/src/oemof/tabular/examples/datapackages/investment_multi_period/datapackage.json index 2fa390c7..c17da4a8 100644 --- a/src/oemof/tabular/examples/datapackages/investment_multi_period/datapackage.json +++ b/src/oemof/tabular/examples/datapackages/investment_multi_period/datapackage.json @@ -1,6 +1,6 @@ { "profile": "tabular-data-package", - "name": "renpass-invest-example", + "name": "investment_multi_period-example", "oemof_tabular_version": "0.0.6dev", "resources": [ { @@ -562,9 +562,9 @@ } }, { - "path": "data/sequences/capacity_cost_profile.csv", + "path": "data/periods/periods.csv", "profile": "tabular-data-resource", - "name": "capacity_cost_profile", + "name": "periods", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -576,7 +576,12 @@ "format": "default" }, { - "name": "cc_profile", + "name": "periods", + "type": "integer", + "format": "default" + }, + { + "name": "timeincrement", "type": "integer", "format": "default" } @@ -587,9 +592,9 @@ } }, { - "path": "data/sequences/load_profile.csv", + "path": "data/sequences/capacity_cost_profile.csv", "profile": "tabular-data-resource", - "name": "load_profile", + "name": "capacity_cost_profile", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -601,8 +606,8 @@ "format": "default" }, { - "name": "electricity-load-profile", - "type": "number", + "name": "cc_profile", + "type": "integer", "format": "default" } ], @@ -612,9 +617,9 @@ } }, { - "path": "data/sequences/source_profile.csv", + "path": "data/sequences/load_profile.csv", "profile": "tabular-data-resource", - "name": "source_profile", + "name": "load_profile", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -626,12 +631,7 @@ "format": "default" }, { - "name": "pv-profile", - "type": "number", - "format": "default" - }, - { - "name": "wind-profile", + "name": "electricity-load-profile", "type": "number", "format": "default" } @@ -642,9 +642,9 @@ } }, { - "path": "data/periods/periods.csv", + "path": "data/sequences/source_profile.csv", "profile": "tabular-data-resource", - "name": "periods", + "name": "source_profile", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", @@ -656,13 +656,13 @@ "format": "default" }, { - "name": "periods", - "type": "integer", + "name": "pv-profile", + "type": "number", "format": "default" }, { - "name": "timeincrement", - "type": "integer", + "name": "wind-profile", + "type": "number", "format": "default" } ], @@ -672,4 +672,4 @@ } } ] -} +} \ No newline at end of file From bbe05faa4e05dbd319324086a37409d2c551a721 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 4 Jun 2024 00:51:07 +0200 Subject: [PATCH 16/17] Sort resources alphabetically --- src/oemof/tabular/datapackage/building.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/oemof/tabular/datapackage/building.py b/src/oemof/tabular/datapackage/building.py index 0613c093..e74bff4e 100644 --- a/src/oemof/tabular/datapackage/building.py +++ b/src/oemof/tabular/datapackage/building.py @@ -185,6 +185,10 @@ def infer_package_foreign_keys(package): for r in p.resources: if os.sep + "elements" + os.sep in r.descriptor["path"]: r = infer_resource_foreign_keys(r, sequences_profiles_to_resource) + # sort foreign_key entries by alphabetically by fields + r.descriptor["schema"]["foreignKeys"].sort( + key=lambda x: x["fields"] + ) p.remove_resource(r.name) p.add_resource(r.descriptor) @@ -430,6 +434,7 @@ def infer_metadata( ) p.add_resource(r.descriptor) + p.descriptor["resources"].sort(key=lambda x: (x["path"], x["name"])) p.commit() p.save(metadata_filename) From ed91085e06d44adf67825bfffec81beb39522616 Mon Sep 17 00:00:00 2001 From: "pierre-francois.duc" Date: Tue, 4 Jun 2024 00:51:26 +0200 Subject: [PATCH 17/17] Fix failing test --- .../examples/datapackages/dispatch/scripts/infer.py | 2 +- .../datapackages/dispatch_multi_period/scripts/infer.py | 2 +- .../scripts/infer.py | 2 +- .../datapackages/emission_constraint/scripts/infer.py | 2 +- .../examples/datapackages/foreignkeys/datapackage.json | 8 ++++---- .../examples/datapackages/foreignkeys/scripts/infer.py | 2 +- .../examples/datapackages/investment/scripts/infer.py | 2 +- .../datapackages/investment_multi_period/scripts/infer.py | 2 +- tests/test_examples.py | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/oemof/tabular/examples/datapackages/dispatch/scripts/infer.py b/src/oemof/tabular/examples/datapackages/dispatch/scripts/infer.py index f156383c..8854c38e 100644 --- a/src/oemof/tabular/examples/datapackages/dispatch/scripts/infer.py +++ b/src/oemof/tabular/examples/datapackages/dispatch/scripts/infer.py @@ -11,7 +11,7 @@ kwargs = {} building.infer_metadata( - package_name="oemof-tabular-dispatch-example", + package_name="dispatch-example", foreign_keys={ "bus": ["volatile", "dispatchable", "storage", "load"], "profile": ["load", "volatile"], diff --git a/src/oemof/tabular/examples/datapackages/dispatch_multi_period/scripts/infer.py b/src/oemof/tabular/examples/datapackages/dispatch_multi_period/scripts/infer.py index f156383c..9754b154 100644 --- a/src/oemof/tabular/examples/datapackages/dispatch_multi_period/scripts/infer.py +++ b/src/oemof/tabular/examples/datapackages/dispatch_multi_period/scripts/infer.py @@ -11,7 +11,7 @@ kwargs = {} building.infer_metadata( - package_name="oemof-tabular-dispatch-example", + package_name="dispatch_multi_period-example", foreign_keys={ "bus": ["volatile", "dispatchable", "storage", "load"], "profile": ["load", "volatile"], diff --git a/src/oemof/tabular/examples/datapackages/dispatch_multi_period_periodic_values/scripts/infer.py b/src/oemof/tabular/examples/datapackages/dispatch_multi_period_periodic_values/scripts/infer.py index f156383c..905dff62 100644 --- a/src/oemof/tabular/examples/datapackages/dispatch_multi_period_periodic_values/scripts/infer.py +++ b/src/oemof/tabular/examples/datapackages/dispatch_multi_period_periodic_values/scripts/infer.py @@ -11,7 +11,7 @@ kwargs = {} building.infer_metadata( - package_name="oemof-tabular-dispatch-example", + package_name="dispatch_multi_period_periodic_values-example", foreign_keys={ "bus": ["volatile", "dispatchable", "storage", "load"], "profile": ["load", "volatile"], diff --git a/src/oemof/tabular/examples/datapackages/emission_constraint/scripts/infer.py b/src/oemof/tabular/examples/datapackages/emission_constraint/scripts/infer.py index 35967473..e6d85082 100644 --- a/src/oemof/tabular/examples/datapackages/emission_constraint/scripts/infer.py +++ b/src/oemof/tabular/examples/datapackages/emission_constraint/scripts/infer.py @@ -12,7 +12,7 @@ building.infer_metadata( - package_name="oemof-tabular-dispatch-example", + package_name="emission_constraint-example", foreign_keys={ "bus": ["volatile", "dispatchable", "storage", "load", "excess"], "profile": ["load", "volatile"], diff --git a/src/oemof/tabular/examples/datapackages/foreignkeys/datapackage.json b/src/oemof/tabular/examples/datapackages/foreignkeys/datapackage.json index 3f3b6a4e..ef715dc0 100644 --- a/src/oemof/tabular/examples/datapackages/foreignkeys/datapackage.json +++ b/src/oemof/tabular/examples/datapackages/foreignkeys/datapackage.json @@ -113,15 +113,15 @@ } }, { - "fields": "profile", + "fields": "marginal_cost", "reference": { - "resource": "component_profile" + "resource": "marginal_cost_profile" } }, { - "fields": "marginal_cost", + "fields": "profile", "reference": { - "resource": "marginal_cost_profile" + "resource": "component_profile" } } ] diff --git a/src/oemof/tabular/examples/datapackages/foreignkeys/scripts/infer.py b/src/oemof/tabular/examples/datapackages/foreignkeys/scripts/infer.py index 41b4cd11..2a2c1ebc 100644 --- a/src/oemof/tabular/examples/datapackages/foreignkeys/scripts/infer.py +++ b/src/oemof/tabular/examples/datapackages/foreignkeys/scripts/infer.py @@ -11,7 +11,7 @@ kwargs = {} building.infer_metadata( - package_name="oemof-tabular-foreignkeys-examples", + package_name="foreignkeys-example", foreign_keys={ "bus": ["component"], "profile": ["component"], diff --git a/src/oemof/tabular/examples/datapackages/investment/scripts/infer.py b/src/oemof/tabular/examples/datapackages/investment/scripts/infer.py index 0a275eda..b5161ab6 100644 --- a/src/oemof/tabular/examples/datapackages/investment/scripts/infer.py +++ b/src/oemof/tabular/examples/datapackages/investment/scripts/infer.py @@ -6,7 +6,7 @@ kwargs = {} building.infer_metadata( - package_name="renpass-invest-example", + package_name="investment-example", foreign_keys={ "bus": [ "volatile", diff --git a/src/oemof/tabular/examples/datapackages/investment_multi_period/scripts/infer.py b/src/oemof/tabular/examples/datapackages/investment_multi_period/scripts/infer.py index 33595840..2283ab96 100644 --- a/src/oemof/tabular/examples/datapackages/investment_multi_period/scripts/infer.py +++ b/src/oemof/tabular/examples/datapackages/investment_multi_period/scripts/infer.py @@ -6,7 +6,7 @@ kwargs = {} building.infer_metadata( - package_name="renpass-invest-example", + package_name="investment_multi_period-example", foreign_keys={ "bus": [ "volatile", diff --git a/tests/test_examples.py b/tests/test_examples.py index 916678af..4e10e694 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -142,5 +142,5 @@ def test_custom_foreign_keys(monkeypatch): / "datapackages" / "foreignkeys" ), - package_name="oemof-tabular-foreignkeys-examples", + package_name="foreignkeys-example", )