From 2befe87c0820a55c388a6955aa2f4688de065182 Mon Sep 17 00:00:00 2001
From: Lilian <lilian@boulard.fr>
Date: Thu, 8 Jun 2023 17:43:00 +0200
Subject: [PATCH 1/3] Add `overload_job_titles` parameter to
 `fetch_employee_salaries`

---
 skrub/datasets/_fetching.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)
diff --git a/skrub/datasets/_fetching.py b/skrub/datasets/_fetching.py
index d2d96f0cd..1659baf0b 100644
--- a/skrub/datasets/_fetching.py
+++ b/skrub/datasets/_fetching.py
@@ -638,6 +638,7 @@ def fetch_employee_salaries(
     load_dataframe: bool = True,
     drop_linked: bool = True,
     drop_irrelevant: bool = True,
+    overload_job_titles: bool = True,
     directory: Optional[Union[Path, str]] = None,
 ) -> Union[DatasetAll, DatasetInfoOnly]:
     """Fetches the employee salaries dataset (regression), available at https://openml.org/d/42125
@@ -657,6 +658,11 @@ def fetch_employee_salaries(
         Drops column "full_name", which is usually irrelevant to the
         statistical analysis.
 
+    overload_job_titles : bool, default=True
+        Uses the column `underfilled_job_title` to enrich the
+        `employee_position_title` column, as it contains more detailed
+        information about the job title.
+
     Returns
     -------
     :obj:`DatasetAll`
@@ -685,6 +691,13 @@ def fetch_employee_salaries(
             )
         if drop_irrelevant:
             dataset.X.drop(["full_name"], axis=1, inplace=True)
+        if overload_job_titles:
+            dataset.X["employee_position_title"] = dataset.X[
+                "underfilled_job_title"
+            ].fillna(dataset.X["employee_position_title"])
+            dataset.X.drop(
+                labels=["underfilled_job_title"], axis="columns", inplace=True
+            )
 
     return dataset
 

From 9c99531f5230a9aacb6bc2f02b80241fa5dfe3cb Mon Sep 17 00:00:00 2001
From: Lilian <lilian@boulard.fr>
Date: Mon, 24 Jul 2023 14:22:06 +0200
Subject: [PATCH 2/3] Add changelog entry

---
 CHANGES.rst | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 42db0d0ad..4c2da3902 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -10,7 +10,7 @@ Ongoing development
 =====================
 
 Skrub has not been released yet. It is currently undergoing fast
-development and backward compatability is not ensured.
+development and backward compatibility is not ensured.
 
 Major changes
 -------------
@@ -102,6 +102,12 @@ Minor changes
 * :class:`TableVectorizer` doesn't fail anymore if an infered type doesn't work during transform.
   The new entries not matching the type are replaced by missing values. :pr:`666` by :user:`Leo Grinsztajn <LeoGrin>`
 
+- Dataset fetcher :func:`fetch_employee_salaries` now has a parameter
+  `overload_job_titles` to allow overloading the job titles
+  (`employee_position_title`) with the column `underfilled_job_title`,
+  which provides some more information about the job title.
+  :pr:`581` by :user:`Lilian Boulard <LilianBoulard>`
+
 Before skrub: dirty_cat
 ========================
 

From a50294169bebcdd5a318a3a3f9b7475c2a74d65b Mon Sep 17 00:00:00 2001
From: Lilian <lilian@boulard.fr>
Date: Mon, 24 Jul 2023 15:05:51 +0200
Subject: [PATCH 3/3] Fix path

---
 CHANGES.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 4c2da3902..f80608051 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -102,7 +102,7 @@ Minor changes
 * :class:`TableVectorizer` doesn't fail anymore if an infered type doesn't work during transform.
   The new entries not matching the type are replaced by missing values. :pr:`666` by :user:`Leo Grinsztajn <LeoGrin>`
 
-- Dataset fetcher :func:`fetch_employee_salaries` now has a parameter
+- Dataset fetcher :func:`datasets.fetch_employee_salaries` now has a parameter
   `overload_job_titles` to allow overloading the job titles
   (`employee_position_title`) with the column `underfilled_job_title`,
   which provides some more information about the job title.