diff --git a/findthatcharity_import/spiders/casc.py b/findthatcharity_import/spiders/casc.py index b7ea98e..3351314 100644 --- a/findthatcharity_import/spiders/casc.py +++ b/findthatcharity_import/spiders/casc.py @@ -76,7 +76,8 @@ def parse(self, response): "alternateName": [], "email": None, "description": None, - "organisationType": ["Sports Club", "Community Amateur Sports Club"], + "organisationType": ["Community Amateur Sports Club", "Sports Club"], + "organisationTypePrimary": ["Community Amateur Sports Club"], "url": None, "location": [], "latestIncome": None, diff --git a/findthatcharity_import/spiders/ccew.py b/findthatcharity_import/spiders/ccew.py index 9b6133d..879886a 100644 --- a/findthatcharity_import/spiders/ccew.py +++ b/findthatcharity_import/spiders/ccew.py @@ -263,6 +263,7 @@ def process_charities(self): "email": record.get("email"), "description": self.get_objects(record), "organisationType": org_types, + "organisationTypePrimary": 'Registered Charity', "url": self.parse_url(record.get("web")), "location": self.get_locations(record), "latestIncome": int(record["income"]) if record.get("income") else None, diff --git a/findthatcharity_import/spiders/ccni.py b/findthatcharity_import/spiders/ccni.py index 9ea5c2b..2ad1832 100644 --- a/findthatcharity_import/spiders/ccni.py +++ b/findthatcharity_import/spiders/ccni.py @@ -94,6 +94,7 @@ def parse_row(self, record): "email": record.get("Email"), "description": None, "organisationType": org_types, + "organisationTypePrimary": 'Registered Charity', "url": self.parse_url(record.get("Website")), "location": [], "latestIncome": int(record["Total income"]) if record.get("Total income") else None, diff --git a/findthatcharity_import/spiders/companies.py b/findthatcharity_import/spiders/companies.py index ac8eb07..cb631e2 100644 --- a/findthatcharity_import/spiders/companies.py +++ b/findthatcharity_import/spiders/companies.py @@ -173,6 +173,7 @@ def parse_row(self, row): "email": None, "description": None, "organisationType": orgtypes, + "organisationTypePrimary": record.get("CompanyCategory", "Regisered Company"), "url": None, "location": [], "latestIncome": None, diff --git a/findthatcharity_import/spiders/gor.py b/findthatcharity_import/spiders/gor.py index 72f9794..1dd8fe5 100644 --- a/findthatcharity_import/spiders/gor.py +++ b/findthatcharity_import/spiders/gor.py @@ -73,6 +73,7 @@ def parse_row(self, record): "email": None, "description": None, "organisationType": org_types, + "organisationTypePrimary": org_types[0], "url": record.get("website"), "location": [], "latestIncome": None, diff --git a/findthatcharity_import/spiders/hesa.py b/findthatcharity_import/spiders/hesa.py index 45d25f8..fe18b70 100644 --- a/findthatcharity_import/spiders/hesa.py +++ b/findthatcharity_import/spiders/hesa.py @@ -74,6 +74,7 @@ def get_rows(self, response): "email": None, "description": None, "organisationType": self.org_types.get(cells[4].strip(), cells[4].strip()), + "organisationType": self.org_types.get(cells[4].strip(), ["Higher Education"])[0], "url": None, "location": [], "latestIncome": None, diff --git a/findthatcharity_import/spiders/lae.py b/findthatcharity_import/spiders/lae.py index 46203e1..fd39479 100644 --- a/findthatcharity_import/spiders/lae.py +++ b/findthatcharity_import/spiders/lae.py @@ -85,8 +85,8 @@ def parse_row(self, record): "alternateName": [], "email": None, "description": None, - "organisationTypePrimary": "Local Authority", "organisationType": org_types, + "organisationTypePrimary": "Local Authority", "url": None, "location": locations, "latestIncome": None, diff --git a/findthatcharity_import/spiders/lani.py b/findthatcharity_import/spiders/lani.py index 6f24674..0664c6a 100644 --- a/findthatcharity_import/spiders/lani.py +++ b/findthatcharity_import/spiders/lani.py @@ -76,6 +76,7 @@ def parse_row(self, record): "email": None, "description": None, "organisationType": org_types, + "organisationTypePrimary": "Local Authority", "url": record.get("website"), "location": locations, "latestIncome": None, diff --git a/findthatcharity_import/spiders/las.py b/findthatcharity_import/spiders/las.py index 984b9eb..d4e6bec 100644 --- a/findthatcharity_import/spiders/las.py +++ b/findthatcharity_import/spiders/las.py @@ -79,6 +79,7 @@ def parse_row(self, record): "email": None, "description": None, "organisationType": org_types, + "organisationTypePrimary": "Local Authority", "url": None, "location": locations, "latestIncome": None, diff --git a/findthatcharity_import/spiders/mutuals.py b/findthatcharity_import/spiders/mutuals.py index 7cc0ffe..cf28299 100644 --- a/findthatcharity_import/spiders/mutuals.py +++ b/findthatcharity_import/spiders/mutuals.py @@ -79,6 +79,7 @@ # "email": None, # "description": record.get("Registration Act"), # "organisationType": org_types, +# "organisationTypePrimary": record.get("Registered As"), # "url": None, # "location": [], # "latestIncome": None, diff --git a/findthatcharity_import/spiders/nhsods.py b/findthatcharity_import/spiders/nhsods.py index c491b03..6c4b14f 100644 --- a/findthatcharity_import/spiders/nhsods.py +++ b/findthatcharity_import/spiders/nhsods.py @@ -123,9 +123,10 @@ def parse_row(self, record, org_type=None): record = self.clean_fields(record) - org_types = ["Health"] + org_types = [] if org_type: org_types.append(org_type) + org_types.append("Health") address = { "streetAddress": record.get("Address Line 1"), @@ -159,6 +160,7 @@ def parse_row(self, record, org_type=None): "email": None, "description": None, "organisationType": org_types, + "organisationTypePrimary": org_types[0], "url": None, "location": [], "latestIncome": None, diff --git a/findthatcharity_import/spiders/oscr.py b/findthatcharity_import/spiders/oscr.py index 41833c2..f003431 100644 --- a/findthatcharity_import/spiders/oscr.py +++ b/findthatcharity_import/spiders/oscr.py @@ -107,6 +107,7 @@ def parse_row(self, record): "email": None, "description": record.get("Objectives"), "organisationType": org_types, + "organisationTypePrimary": "Registered Charity", "url": self.parse_url(record.get("Website")), "location": [], "latestIncome": int(record["Most recent year income"]) if record.get("Most recent year income") else None, diff --git a/findthatcharity_import/spiders/pla.py b/findthatcharity_import/spiders/pla.py index f1cc9d7..f4b2499 100644 --- a/findthatcharity_import/spiders/pla.py +++ b/findthatcharity_import/spiders/pla.py @@ -80,6 +80,7 @@ def parse_row(self, record): "email": None, "description": None, "organisationType": org_types, + "organisationTypePrimary": "Local Authority", "url": None, "location": locations, "latestIncome": None, diff --git a/findthatcharity_import/spiders/rsl.py b/findthatcharity_import/spiders/rsl.py index 09dfb38..8e2b271 100644 --- a/findthatcharity_import/spiders/rsl.py +++ b/findthatcharity_import/spiders/rsl.py @@ -129,8 +129,8 @@ def parse_row(self, record): "alternateName": [], "email": None, "description": None, - "organisationTypePrimary": "Registered Provider of Social Housing", "organisationType": org_types, + "organisationTypePrimary": "Registered Provider of Social Housing", "url": None, "location": locations, "latestIncome": None, diff --git a/findthatcharity_import/spiders/schools_gias.py b/findthatcharity_import/spiders/schools_gias.py index 52165c0..8b587fc 100644 --- a/findthatcharity_import/spiders/schools_gias.py +++ b/findthatcharity_import/spiders/schools_gias.py @@ -70,6 +70,17 @@ def find_csv(self, response): self.source["modified"] = datetime.datetime.now().isoformat() return [scrapy.Request(response.urljoin(link), callback=self.parse_csv)] + def depluralise(self, s): + if not isinstance(s, str): + return s + if s == 'Other types': + return "Other school" + if s.endswith("ies"): + return s[:-3] + "y" + if s.endswith("s"): + return s[:-1] + return s + def parse_row(self, record): record = self.clean_fields(record) @@ -90,9 +101,10 @@ def parse_row(self, record): description=None, organisationType=[ "Education", - record.get("EstablishmentTypeGroup (name)"), - record.get("TypeOfEstablishment (name)"), + self.depluralise(record.get("EstablishmentTypeGroup (name)")), + self.depluralise(record.get("TypeOfEstablishment (name)")), ], + organisationTypePrimary=self.depluralise(record.get("EstablishmentTypeGroup (name)")), url=self.parse_url(record.get("SchoolWebsite")), location=self.get_locations(record), latestIncome=None, diff --git a/findthatcharity_import/spiders/schools_ni.py b/findthatcharity_import/spiders/schools_ni.py index cca18e0..56266d4 100644 --- a/findthatcharity_import/spiders/schools_ni.py +++ b/findthatcharity_import/spiders/schools_ni.py @@ -113,6 +113,7 @@ def parse_row(self, record): record.get("Management"), record.get("Type", "") + " School", ], + "organisationTypePrimary": record.get("Type", "") + " School", "url": None, "location": [], "latestIncome": None, diff --git a/findthatcharity_import/spiders/schools_scotland.py b/findthatcharity_import/spiders/schools_scotland.py index 47b2af4..f824d7f 100644 --- a/findthatcharity_import/spiders/schools_scotland.py +++ b/findthatcharity_import/spiders/schools_scotland.py @@ -131,6 +131,7 @@ def parse(self, response): email=record.get("e_mail"), description=None, organisationType=self.get_org_types(record), + organisationTypePrimary=self.get_org_types(record)[1], url=None, location=self.get_locations(record), latestIncome=None, diff --git a/findthatcharity_import/spiders/schools_wales.py b/findthatcharity_import/spiders/schools_wales.py index 6ea145f..f37a68f 100644 --- a/findthatcharity_import/spiders/schools_wales.py +++ b/findthatcharity_import/spiders/schools_wales.py @@ -112,6 +112,7 @@ def parse_row(self, record): "email": None, "description": None, "organisationType": self.get_org_types(record), + "organisationTypePrimary": self.get_org_types(record)[0], "url": None, "location": self.get_locations(record), "latestIncome": None, @@ -125,15 +126,14 @@ def parse_row(self, record): }) def get_org_types(self, record): - org_types = [ - "Education", - ] + org_types = [] for f in ["Sector", "Governance - see notes", "Welsh Medium Type - see notes", "School Type", "type"]: if record.get(f): if record.get(f) == "PRU": org_types.append("Pupil Referral Unit") else: org_types.append(record[f] + " School") + org_types.append("Education") return org_types def get_locations(self, record):