From 386c300b3ed67ae7d5a5faec463fd25e56cc5f23 Mon Sep 17 00:00:00 2001 From: Jonathan Erikson Date: Thu, 2 Nov 2023 13:40:26 +0100 Subject: [PATCH 1/2] Merge organizations with same resource --- Px.Dcat/DataCollector.cs | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/Px.Dcat/DataCollector.cs b/Px.Dcat/DataCollector.cs index 647a2b0..9dcf0bb 100644 --- a/Px.Dcat/DataCollector.cs +++ b/Px.Dcat/DataCollector.cs @@ -883,9 +883,9 @@ private Catalog getCatalog() c.License = _settings.License; c.Datasets = getDatasets(); c.Languages = convertLanguages(_settings.Languages); - setProducers(c.Datasets); c.Publisher = _publisher; setOrganizationResources(); + setProducers(c.Datasets); return c; } @@ -907,6 +907,37 @@ private void setOrganizationResources() _organizations[source].Resource = _organizationMapping[source]; } } + + Dictionary newOrgs = new Dictionary(); + + // Merge organizations with same resource + foreach (string source in _organizations.Keys) + { + Organization org1 = _organizations[source]; + string res1 = org1.Resource; + foreach (string source2 in _organizations.Keys) + { + if (source == source2) continue; + + Organization org2 = _organizations[source2]; + string res2 = org2.Resource; + + if (res1 == res2) + { + Organization newOrg = new Organization(); + newOrg.Resource = res1; + newOrg.Names = new HashSet<(string, string)>(org1.Names.Union(org2.Names)); + newOrgs[source] = newOrg; + newOrgs[source2] = newOrg; + } + + } + } + + foreach (string source in newOrgs.Keys) + { + _organizations[source] = newOrgs[source]; + } } /// From 83c077d83c360e522b8f5415c5d11947b7d83dab Mon Sep 17 00:00:00 2001 From: Jonathan Erikson Date: Thu, 2 Nov 2023 15:11:06 +0100 Subject: [PATCH 2/2] Add support for publisher in multiple languages --- Px.Dcat/DataCollector.cs | 139 +++++++++++++++++++-------------------- Px.Dcat/DcatSettings.cs | 2 +- TestApp/Main.cs | 21 +++--- 3 files changed, 80 insertions(+), 82 deletions(-) diff --git a/Px.Dcat/DataCollector.cs b/Px.Dcat/DataCollector.cs index 9dcf0bb..46925c4 100644 --- a/Px.Dcat/DataCollector.cs +++ b/Px.Dcat/DataCollector.cs @@ -511,75 +511,68 @@ private string getCategory(List path) return "http://publications.europa.eu/Resource/authority/data-theme/" + _themeMapping[category]; } - /// - /// Get producer of table - /// - /// Metadata of table - /// Organization with the producer info - private Organization getProducer(PXMeta meta, List langs) + private void addOrganization(HashSet<(string, string)> names) { - HashSet<(string, string)> names = new HashSet<(string, string)>(); List matchingOrgs = new List(); + HashSet<(string, string)> newNames = new HashSet<(string, string)>(names); - foreach (string lang in langs) + foreach ((string lang, string name) in names) { - meta.SetLanguage(lang); - string name = meta.Source; if (_organizations.ContainsKey(name)) { matchingOrgs.Add(_organizations[name]); } - names.Add((lang, name)); } Organization newOrg = new Organization(); + newOrg.Resource = Path.Combine(_settings.BaseUri, "organization", nextString()).Replace("\\", "/"); if (matchingOrgs.Count > 0) { - foreach (Organization org in matchingOrgs) + foreach (Organization o in matchingOrgs) { - names.UnionWith(org.Names); - } - - newOrg.Names = names; - newOrg.Resource = Path.Combine(_settings.BaseUri,"organization",nextString()).Replace("\\", "/"); - - foreach (string name in names.Select(x => x.Item2).Distinct()) - { - _organizations[name] = newOrg; + newNames.UnionWith(o.Names); } } - else + newOrg.Names = newNames; + + foreach (string name in newNames.Select(x => x.Item2).Distinct()) { - newOrg.Names = names; - newOrg.Resource = Path.Combine(_settings.BaseUri, "organization", nextString()).Replace("\\", "/"); + _organizations[name] = newOrg; + } + } - // Add a reference to the organization for each language - foreach (string name in names.Select(x => x.Item2).Distinct()) - { - _organizations.Add(name, newOrg); - } + /// + /// Get producer of table + /// + /// Metadata of table + /// Organization with the producer info + private void setProducer(PXMeta meta, List langs) + { + HashSet<(string, string)> names = new HashSet<(string, string)>(); + + foreach (string lang in langs) + { + meta.SetLanguage(lang); + string name = meta.Source; + names.Add((lang, name)); } - return newOrg; + addOrganization(names); } /// /// Get publisher from _settings /// /// Publisher - private Organization getPublisher() + private void setPublisher() { - string name = _settings.PublisherName; - Organization org; - if (!_organizations.TryGetValue(name, out org)) + HashSet<(string, string)> names = new HashSet<(string, string)>(); + foreach (KeyValuePair pair in _settings.PublisherNames) { - HashSet<(string, string)> names = new HashSet<(string, string)>(); - names.Add((null, name)); - org = new Organization { Names = names, Resource = Path.Combine(_settings.BaseUri, "organization", nextString()).Replace("\\", "/") }; - _organizations.Add(name, org); + names.Add((pair.Key, pair.Value)); } - return org; + addOrganization(names); } /// @@ -764,7 +757,7 @@ private Dataset getDataset(string selection, PXMeta meta, List path) dataset.Sources = getSources(meta, langs); - getProducer(meta, langs); // Wait until all organizations are created before assigning producer + setProducer(meta, langs); // Wait until all organizations are created before assigning producer return dataset; } @@ -855,12 +848,6 @@ private List getDatasets() Item baseItem = _fetcher.GetBaseItem("", "", _settings.MainLanguage, _settings.DatabaseId); addRecursive(baseItem, path, datasets); - - _publisher = getPublisher(); - foreach (Dataset d in datasets) - { - d.Publisher = _publisher; - } return datasets; } @@ -883,9 +870,16 @@ private Catalog getCatalog() c.License = _settings.License; c.Datasets = getDatasets(); c.Languages = convertLanguages(_settings.Languages); - c.Publisher = _publisher; + setPublisher(); setOrganizationResources(); setProducers(c.Datasets); + + Organization publisher = _organizations[_settings.PublisherNames.First().Value]; + c.Publisher = publisher; + foreach (Dataset d in c.Datasets) + { + d.Publisher = publisher; + } return c; } @@ -900,44 +894,47 @@ private void setProducers(List datasets) private void setOrganizationResources() { - foreach (string source in _organizations.Keys) + Dictionary> reverseResourceMapping = new Dictionary>(); + + foreach (string key in _organizationMapping.Keys) { - if (_organizationMapping.ContainsKey(source)) + string res = _organizationMapping[key]; + if (!reverseResourceMapping.ContainsKey(res)) { - _organizations[source].Resource = _organizationMapping[source]; + reverseResourceMapping[res] = new List(); } + reverseResourceMapping[res].Add(key); } - Dictionary newOrgs = new Dictionary(); - - // Merge organizations with same resource - foreach (string source in _organizations.Keys) + // Merge organizations mapped to same resource + foreach (string res in reverseResourceMapping.Keys) { - Organization org1 = _organizations[source]; - string res1 = org1.Resource; - foreach (string source2 in _organizations.Keys) + List sources = reverseResourceMapping[res]; + if (sources.Count > 1) { - if (source == source2) continue; - - Organization org2 = _organizations[source2]; - string res2 = org2.Resource; - - if (res1 == res2) + HashSet<(string, string)> names = new HashSet<(string, string)>(); + foreach (string name in sources) + { + names.UnionWith(_organizations[name].Names); + } + Organization newOrg = new Organization(); + newOrg.Names = names; + newOrg.Resource = res; + foreach ((string lang, string name) in names) { - Organization newOrg = new Organization(); - newOrg.Resource = res1; - newOrg.Names = new HashSet<(string, string)>(org1.Names.Union(org2.Names)); - newOrgs[source] = newOrg; - newOrgs[source2] = newOrg; + _organizations[name] = newOrg; } - } } - foreach (string source in newOrgs.Keys) + foreach (string mappedSource in _organizationMapping.Keys) { - _organizations[source] = newOrgs[source]; + if (_organizations.ContainsKey(mappedSource)) + { + _organizations[mappedSource].Resource = _organizationMapping[mappedSource]; + } } + } /// diff --git a/Px.Dcat/DcatSettings.cs b/Px.Dcat/DcatSettings.cs index 4286bdd..e2bf95d 100644 --- a/Px.Dcat/DcatSettings.cs +++ b/Px.Dcat/DcatSettings.cs @@ -13,8 +13,8 @@ public struct DcatSettings public List Languages; // Read from settings public List> CatalogTitles; public List> CatalogDescriptions; + public List> PublisherNames; - public string PublisherName; public string DatabaseId; public DatabaseType DatabaseType; public string LandingPageUrl; diff --git a/TestApp/Main.cs b/TestApp/Main.cs index a768d9f..5816ceb 100644 --- a/TestApp/Main.cs +++ b/TestApp/Main.cs @@ -13,21 +13,22 @@ public static void Main(string[] args) DcatSettings settings = new DcatSettings { BaseUri = "https://www.baseURI.se/", - BaseApiUrl = "http://api.scb.se/OV0104/v1/doris/", - Languages = new List { "sv", "en" }, + //BaseApiUrl = "http://api.scb.se/OV0104/v1/doris/", + BaseApiUrl = "http://localhost:56338/api/v1/", + Languages = new List { "en" }, CatalogTitles = new List> { new KeyValuePair("sv", "SCB Tabeller"), new KeyValuePair("en", "SCB Tables") }, CatalogDescriptions = new List> { new KeyValuePair("sv", "SCB - Beskrivning"), new KeyValuePair("en", "SCB - Description") }, - PublisherName = "Statistics Sweden", - DatabaseId = @"C:\Temp\Databases\Example\Menu.xml", - DatabaseType = DatabaseType.PX, - //DBid = @"C:\Temp\StatFin2018\StatFin\Menu.xml", - //Fetcher = new PXFetcher(@"C:\Temp\StatFin2018"), + PublisherNames = new List> { new KeyValuePair("en", "Statistics Sweden"), new KeyValuePair("sv", "SCB") }, + DatabaseId = @"C:\Temp\Databases\Example/Menu.xml", + //DatabaseId = @"C:\Temp\StatFin2018\StatFin\Menu.xml", //DatabaseId = "ssd", + DatabaseType = DatabaseType.PX, //DatabaseType = DatabaseType.CNMM, - LandingPageUrl = "http://www.statistikdatabasen.scb.se/goto/", + LandingPageUrl = "http://localhost:56338/goto/", + //LandingPageUrl = "http://www.statistikdatabasen.scb.se/goto/", License = "http://creativecommons.org/publicdomain/zero/1.0/", - ThemeMapping = @"C:\Temp\DataportalXML\Themes.json", - OrganizationMapping = @"C:\Temp\DataportalXML\Organizations.json", + ThemeMapping = @"C:\Temp\DataportalXML\TestApp\Themes.json", + OrganizationMapping = @"C:\Temp\DataportalXML\TestApp\Organizations.json", MainLanguage = "en", }; DcatWriter.WriteToFile("../../../test.xml", settings);