From 8b3d6762ac71cc46ccd7d6a836d88b123eb64f84 Mon Sep 17 00:00:00 2001 From: Sardar <160326696+saius02@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:48:01 +0200 Subject: [PATCH 01/15] zwischenstand --- src/safeds/data/tabular/containers/_table.py | 6 ++ .../containers/_table/test_remove_columns.py | 62 +++++++++++++++---- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 6f6282673..a70c4cdca 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -642,6 +642,8 @@ def remove_columns( self, names: str | list[str], /, + *, + ignore_unknown_names: bool = False, ) -> Table: """ Return a new table without the specified columns. @@ -690,6 +692,10 @@ def remove_columns( """ if isinstance(names, str): names = [names] + + if not ignore_unknown_names: + existing_columns = _check_columns_exist(self, names) + return Table._from_polars_lazy_frame( self._lazy_frame.drop(names), diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index 3cc8a5d61..077457d1d 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -1,17 +1,52 @@ import pytest from safeds.data.tabular.containers import Table +from safeds.exceptions import ColumnNotFoundError +# @pytest.mark.parametrize( +# ("table", "expected", "columns", "ignore_unknown_names", "should_raise"), +# [ +# (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"]), +# (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"]), +# (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), []), +# (Table(), Table(), []), +# (Table(), Table(), ["col1"]), +# ], +# ids=[ +# "one column", +# "multiple columns", +# "no columns", +# "empty", +# "missing columns", +# ], +# ) +# def test_should_remove_table_columns(table: Table, expected: Table, columns: list[str]) -> None: +# table = table.remove_columns(columns) +# assert table.schema == expected.schema +# assert table == expected +# assert table.row_count == expected.row_count + @pytest.mark.parametrize( - ("table", "expected", "columns"), + ("table", "expected", "columns", "ignore_unknown_names", "should_raise"), [ - (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"]), - (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"]), - (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), []), - (Table(), Table(), []), - (Table(), Table(), ["col1"]), + (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"], True, False), + (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"], True, False), + (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [], True, False), + (Table(), Table(), [], True, False), + (Table(), Table(), ["col1"], True, False), + (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"], False, False), + (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"], False, False), + (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [], False, False), + (Table(), Table(), [], False, False), + (Table(), Table(), ["col1"], True, True), + (Table(), Table(), ["col12"], False, True) ], ids=[ + "one column, ignore unknown names", + "multiple columns, ignore unknown names", + "no columns, ignore unknown names", + "empty, ignore unknown names", + "missing columns, ignore unknown names", "one column", "multiple columns", "no columns", @@ -19,8 +54,13 @@ "missing columns", ], ) -def test_should_remove_table_columns(table: Table, expected: Table, columns: list[str]) -> None: - table = table.remove_columns(columns) - assert table.schema == expected.schema - assert table == expected - assert table.row_count == expected.row_count +def test_should_remove_table_columns(table: Table, expected: Table, columns: list[str], ignore_unknown_names: bool, should_raise: bool) -> None: + if should_raise: + with pytest.raises(ColumnNotFoundError): + table.remove_columns(columns) + else: + table = table.remove_columns(columns, ignore_unknown_names=ignore_unknown_names) + assert table.schema == expected.schema + assert table == expected + assert table.row_count == expected.row_count + From f2c0c7f4929722d89e5a3cc356276d2bf201ceae Mon Sep 17 00:00:00 2001 From: Sardar <160326696+saius02@users.noreply.github.com> Date: Fri, 21 Jun 2024 15:40:42 +0200 Subject: [PATCH 02/15] fertig --- .../containers/_table/test_remove_columns.py | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index 077457d1d..b4162bcc9 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -3,29 +3,6 @@ from safeds.exceptions import ColumnNotFoundError -# @pytest.mark.parametrize( -# ("table", "expected", "columns", "ignore_unknown_names", "should_raise"), -# [ -# (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"]), -# (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"]), -# (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), []), -# (Table(), Table(), []), -# (Table(), Table(), ["col1"]), -# ], -# ids=[ -# "one column", -# "multiple columns", -# "no columns", -# "empty", -# "missing columns", -# ], -# ) -# def test_should_remove_table_columns(table: Table, expected: Table, columns: list[str]) -> None: -# table = table.remove_columns(columns) -# assert table.schema == expected.schema -# assert table == expected -# assert table.row_count == expected.row_count - @pytest.mark.parametrize( ("table", "expected", "columns", "ignore_unknown_names", "should_raise"), [ From 0c471f3ee113194b5e1cbbb6b2d62c823b05c66d Mon Sep 17 00:00:00 2001 From: Sardar <160326696+saius02@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:04:29 +0200 Subject: [PATCH 03/15] Ruff Error fixed completely --- src/safeds/data/tabular/containers/_table.py | 4 +--- .../data/tabular/containers/_table/test_remove_columns.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index a70c4cdca..ab5a697ff 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -651,8 +651,6 @@ def remove_columns( **Notes:** - The original table is not modified. - - This method does not raise if a column does not exist. You can use it to ensure that the resulting table does - not contain certain columns. Parameters ---------- @@ -694,7 +692,7 @@ def remove_columns( names = [names] if not ignore_unknown_names: - existing_columns = _check_columns_exist(self, names) + _check_columns_exist(self, names) return Table._from_polars_lazy_frame( diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index b4162bcc9..c66c69bae 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -16,7 +16,7 @@ (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [], False, False), (Table(), Table(), [], False, False), (Table(), Table(), ["col1"], True, True), - (Table(), Table(), ["col12"], False, True) + (Table(), Table(), ["col12"], False, True), ], ids=[ "one column, ignore unknown names", From 33b7d1e7431ae15d90fd76b62d51a3f3b705d441 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:06:03 +0000 Subject: [PATCH 04/15] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_table.py | 3 +-- .../containers/_table/test_remove_columns.py | 13 ++++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index ab5a697ff..8511da69b 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -690,11 +690,10 @@ def remove_columns( """ if isinstance(names, str): names = [names] - + if not ignore_unknown_names: _check_columns_exist(self, names) - return Table._from_polars_lazy_frame( self._lazy_frame.drop(names), ) diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index c66c69bae..cad741141 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -13,7 +13,13 @@ (Table(), Table(), ["col1"], True, False), (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"], False, False), (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"], False, False), - (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [], False, False), + ( + Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), + Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), + [], + False, + False, + ), (Table(), Table(), [], False, False), (Table(), Table(), ["col1"], True, True), (Table(), Table(), ["col12"], False, True), @@ -31,7 +37,9 @@ "missing columns", ], ) -def test_should_remove_table_columns(table: Table, expected: Table, columns: list[str], ignore_unknown_names: bool, should_raise: bool) -> None: +def test_should_remove_table_columns( + table: Table, expected: Table, columns: list[str], ignore_unknown_names: bool, should_raise: bool, +) -> None: if should_raise: with pytest.raises(ColumnNotFoundError): table.remove_columns(columns) @@ -40,4 +48,3 @@ def test_should_remove_table_columns(table: Table, expected: Table, columns: lis assert table.schema == expected.schema assert table == expected assert table.row_count == expected.row_count - From 8ebf36a3eed4404246f00ade707d5ad138560a3c Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:07:34 +0000 Subject: [PATCH 05/15] style: apply automated linter fixes --- .../data/tabular/containers/_table/test_remove_columns.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index cad741141..e812c15c8 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -38,7 +38,11 @@ ], ) def test_should_remove_table_columns( - table: Table, expected: Table, columns: list[str], ignore_unknown_names: bool, should_raise: bool, + table: Table, + expected: Table, + columns: list[str], + ignore_unknown_names: bool, + should_raise: bool, ) -> None: if should_raise: with pytest.raises(ColumnNotFoundError): From cd3b50ae23b5706d01617c303b9f7815133bec86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20Pl=C3=BCcker?= Date: Sat, 22 Jun 2024 18:05:37 +0200 Subject: [PATCH 06/15] some refactoring --- .../containers/_table/test_remove_columns.py | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index e812c15c8..dad75f2ba 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -2,27 +2,24 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import ColumnNotFoundError - +# Test cases where no exception is expected @pytest.mark.parametrize( - ("table", "expected", "columns", "ignore_unknown_names", "should_raise"), + ("table", "expected", "columns", "ignore_unknown_names"), [ - (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"], True, False), - (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"], True, False), - (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [], True, False), - (Table(), Table(), [], True, False), - (Table(), Table(), ["col1"], True, False), - (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"], False, False), - (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"], False, False), + (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"], True), + (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"], True), + (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [], True), + (Table(), Table(), [], True), + (Table(), Table(), ["col1"], True), + (Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table({"col1": [1, 2, 1]}), ["col2"], False), + (Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table(), ["col1", "col2"], False), ( Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [], False, - False, ), - (Table(), Table(), [], False, False), - (Table(), Table(), ["col1"], True, True), - (Table(), Table(), ["col12"], False, True), + (Table(), Table(), [], False), ], ids=[ "one column, ignore unknown names", @@ -34,21 +31,35 @@ "multiple columns", "no columns", "empty", - "missing columns", ], ) -def test_should_remove_table_columns( +def test_should_remove_table_columns_no_exception( table: Table, expected: Table, columns: list[str], ignore_unknown_names: bool, - should_raise: bool, ) -> None: - if should_raise: - with pytest.raises(ColumnNotFoundError): - table.remove_columns(columns) - else: - table = table.remove_columns(columns, ignore_unknown_names=ignore_unknown_names) - assert table.schema == expected.schema - assert table == expected - assert table.row_count == expected.row_count + table = table.remove_columns(columns, ignore_unknown_names=ignore_unknown_names) + assert table.schema == expected.schema + assert table == expected + assert table.row_count == expected.row_count + +# Test cases where an exception is expected +@pytest.mark.parametrize( + ("table", "columns", "ignore_unknown_names"), + [ + (Table(), ["col1"], False), + (Table(), ["col12"], False), + ], + ids=[ + "missing columns", + "missing columns", + ], +) +def test_should_raise_error_for_unknown_columns( + table: Table, + columns: list[str], + ignore_unknown_names: bool, +) -> None: + with pytest.raises(ColumnNotFoundError): + table.remove_columns(columns, ignore_unknown_names=ignore_unknown_names) \ No newline at end of file From 7f1bfba99427dbef81aafc2db57e4fa3b68c131d Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sat, 22 Jun 2024 16:07:03 +0000 Subject: [PATCH 07/15] style: apply automated linter fixes --- .../data/tabular/containers/_table/test_remove_columns.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index dad75f2ba..f65c29692 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -2,6 +2,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import ColumnNotFoundError + # Test cases where no exception is expected @pytest.mark.parametrize( ("table", "expected", "columns", "ignore_unknown_names"), @@ -44,6 +45,7 @@ def test_should_remove_table_columns_no_exception( assert table == expected assert table.row_count == expected.row_count + # Test cases where an exception is expected @pytest.mark.parametrize( ("table", "columns", "ignore_unknown_names"), @@ -62,4 +64,4 @@ def test_should_raise_error_for_unknown_columns( ignore_unknown_names: bool, ) -> None: with pytest.raises(ColumnNotFoundError): - table.remove_columns(columns, ignore_unknown_names=ignore_unknown_names) \ No newline at end of file + table.remove_columns(columns, ignore_unknown_names=ignore_unknown_names) From e57c5e3b495af87852b25d1976bffbc26d787777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20Pl=C3=BCcker?= Date: Mon, 24 Jun 2024 16:40:38 +0200 Subject: [PATCH 08/15] Small details adjusted Update remove_columns function and related usage - Added documentation for the "ignore_unknown_names" parameter in the remove_columns function. - Updated the replace_column function to call remove_columns with "ignore_unknown_names=True" to maintain existing behavior. - Updated the examples in the documentation to include "ignore_unknown_names=True". --- src/safeds/data/tabular/containers/_table.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 8511da69b..06cc3e98b 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -656,6 +656,9 @@ def remove_columns( ---------- names: The names of the columns to remove. + ignore_unknown_names: + If set to True, columns that are not present in the table will be ignored. + If set to False, an error will be raised if any of the specified columns do not exist. Returns ------- @@ -666,7 +669,7 @@ def remove_columns( -------- >>> from safeds.data.tabular.containers import Table >>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]}) - >>> table.remove_columns("a") + >>> table.remove_columns("a", ignore_unknown_names=True) +-----+ | b | | --- | @@ -677,7 +680,7 @@ def remove_columns( | 6 | +-----+ - >>> table.remove_columns(["c"]) + >>> table.remove_columns(["c"], ignore_unknown_names=True) +-----+-----+ | a | b | | --- | --- | @@ -934,7 +937,7 @@ def replace_column( _check_columns_dont_exist(self, [column.name for column in new_columns], old_name=old_name) if len(new_columns) == 0: - return self.remove_columns(old_name) + return self.remove_columns(old_name, ignore_unknown_names=True) if len(new_columns) == 1: new_column = new_columns[0] From d6c7fdb52be96156f1cbe3bae45c3f9a697784bb Mon Sep 17 00:00:00 2001 From: Sardar <160326696+saius02@users.noreply.github.com> Date: Fri, 28 Jun 2024 10:06:40 +0200 Subject: [PATCH 09/15] removes empty line --- .../safeds/data/tabular/containers/_table/test_remove_columns.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index f65c29692..4b9345683 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -45,7 +45,6 @@ def test_should_remove_table_columns_no_exception( assert table == expected assert table.row_count == expected.row_count - # Test cases where an exception is expected @pytest.mark.parametrize( ("table", "columns", "ignore_unknown_names"), From c26236209d445aaa09cb1d188f7f3faaa00ab893 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 28 Jun 2024 08:08:40 +0000 Subject: [PATCH 10/15] style: apply automated linter fixes --- .../safeds/data/tabular/containers/_table/test_remove_columns.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py index 4b9345683..f65c29692 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_columns.py @@ -45,6 +45,7 @@ def test_should_remove_table_columns_no_exception( assert table == expected assert table.row_count == expected.row_count + # Test cases where an exception is expected @pytest.mark.parametrize( ("table", "columns", "ignore_unknown_names"), From ef6414cf05eedf2ee2d3e2c92e120c1da21a8bc9 Mon Sep 17 00:00:00 2001 From: Sardar <160326696+saius02@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:05:37 +0200 Subject: [PATCH 11/15] update classification.ipynb --- docs/tutorials/classification.ipynb | 282 ++++++++++++++++++++-------- 1 file changed, 201 insertions(+), 81 deletions(-) diff --git a/docs/tutorials/classification.ipynb b/docs/tutorials/classification.ipynb index 0bba08deb..9e4065fc0 100644 --- a/docs/tutorials/classification.ipynb +++ b/docs/tutorials/classification.ipynb @@ -2,195 +2,315 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "# Classification\n", "\n", "This tutorial uses safeds on **titanic passenger data** to predict who will survive and who will not, using sex as a feature for the prediction.\n" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "1. Load your data into a `Table`, the data is available under `docs/tutorials/data/titanic.csv`:\n" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "1. Load your data into a `Table`, the data is available under `docs/tutorials/data/titanic.csv`:\n" + ] }, { "cell_type": "code", + "execution_count": 71, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (15, 12)
idnamesexagesiblings_spousesparents_childrentickettravel_classfarecabinport_embarkedsurvived
i64strstrf64i64i64stri64f64strstri64
0"Abbing, Mr. Anthony""male"42.000"C.A. 5547"37.55null"Southampton"0
1"Abbott, Master. Eugene Joseph""male"13.002"C.A. 2673"320.25null"Southampton"0
2"Abbott, Mr. Rossmore Edward""male"16.011"C.A. 2673"320.25null"Southampton"0
3"Abbott, Mrs. Stanton (Rosa Hun…"female"35.011"C.A. 2673"320.25null"Southampton"1
4"Abelseth, Miss. Karen Marie""female"16.000"348125"37.65null"Southampton"1
10"Adahl, Mr. Mauritz Nils Martin""male"30.000"C 7076"37.25null"Southampton"0
11"Adams, Mr. John""male"26.000"341826"38.05null"Southampton"0
12"Ahlin, Mrs. Johan (Johanna Per…"female"40.010"7546"39.475null"Southampton"0
13"Aks, Master. Philip Frank""male"0.833301"392091"39.35null"Southampton"1
14"Aks, Mrs. Sam (Leah Rosen)""female"18.001"392091"39.35null"Southampton"1
" + ], + "text/plain": [ + "+-----+----------------------+--------+----------+---+----------+-------+---------------+----------+\n", + "| id | name | sex | age | … | fare | cabin | port_embarked | survived |\n", + "| --- | --- | --- | --- | | --- | --- | --- | --- |\n", + "| i64 | str | str | f64 | | f64 | str | str | i64 |\n", + "+==================================================================================================+\n", + "| 0 | Abbing, Mr. Anthony | male | 42.00000 | … | 7.55000 | null | Southampton | 0 |\n", + "| 1 | Abbott, Master. | male | 13.00000 | … | 20.25000 | null | Southampton | 0 |\n", + "| | Eugene Joseph | | | | | | | |\n", + "| 2 | Abbott, Mr. Rossmore | male | 16.00000 | … | 20.25000 | null | Southampton | 0 |\n", + "| | Edward | | | | | | | |\n", + "| 3 | Abbott, Mrs. Stanton | female | 35.00000 | … | 20.25000 | null | Southampton | 1 |\n", + "| | (Rosa Hun… | | | | | | | |\n", + "| 4 | Abelseth, Miss. | female | 16.00000 | … | 7.65000 | null | Southampton | 1 |\n", + "| | Karen Marie | | | | | | | |\n", + "| … | … | … | … | … | … | … | … | … |\n", + "| 10 | Adahl, Mr. Mauritz | male | 30.00000 | … | 7.25000 | null | Southampton | 0 |\n", + "| | Nils Martin | | | | | | | |\n", + "| 11 | Adams, Mr. John | male | 26.00000 | … | 8.05000 | null | Southampton | 0 |\n", + "| 12 | Ahlin, Mrs. Johan | female | 40.00000 | … | 9.47500 | null | Southampton | 0 |\n", + "| | (Johanna Per… | | | | | | | |\n", + "| 13 | Aks, Master. Philip | male | 0.83330 | … | 9.35000 | null | Southampton | 1 |\n", + "| | Frank | | | | | | | |\n", + "| 14 | Aks, Mrs. Sam (Leah | female | 18.00000 | … | 9.35000 | null | Southampton | 1 |\n", + "| | Rosen) | | | | | | | |\n", + "+-----+----------------------+--------+----------+---+----------+-------+---------------+----------+" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from safeds.data.tabular.containers import Table\n", "\n", "titanic = Table.from_csv_file(\"data/titanic.csv\")\n", "#For visualisation purposes we only print out the first 15 rows.\n", "titanic.slice_rows(0, 15)" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "2. Split the titanic dataset into two tables. A training set, that we will use later to implement a training model to predict the survival of passengers, containing 60% of the data, and a testing set containing the rest of the data.\n", "Delete the column `survived` from the test set, to be able to predict it later:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "source": [ - "train_table, testing_table = titanic.split_rows(0.6)\n", - "\n", - "test_table = testing_table.remove_columns([\"survived\"]).shuffle_rows()" - ], + "execution_count": 72, "metadata": { "collapsed": false }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "train_table, testing_table = titanic.split_rows(0.6)\n", + "\n", + "test_table = testing_table.remove_columns([\"fare\"]).shuffle_rows()" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "3. Use `OneHotEncoder` to create an encoder, that will be used later to transform the training table.\n", "* We use `OneHotEncoder` to transform non-numerical categorical values into numerical representations with values of zero or one. In this example we will transform the values of the sex column, hence they will be used in the model for predicting the surviving of passengers.\n", "* Use the `fit` function of the `OneHotEncoder` to pass the table and the column names, that will be used as features to predict who will survive to the encoder.\n", "* The names of the column before transformation need to be saved, because `OneHotEncoder` changes the names of the fitted `Column`s:\n" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", + "execution_count": 73, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ "from safeds.data.tabular.transformation import OneHotEncoder\n", "\n", "encoder = OneHotEncoder(column_names=\"sex\").fit(train_table)" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "4. Transform the training table using the fitted encoder, and create a set with the new names of the fitted `Column`s:\n" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "4. Transform the training table using the fitted encoder, and create a set with the new names of the fitted `Column`s:\n" + ] }, { "cell_type": "code", - "source": "transformed_table = encoder.transform(train_table)", + "execution_count": 74, "metadata": { "collapsed": false }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "transformed_table = encoder.transform(train_table)" + ] }, { "cell_type": "markdown", - "source": "5. Mark the `survived` `Column` as the target variable to be predicted. Include some columns only as extra columns, which are completely ignored by the model:", "metadata": { "collapsed": false - } + }, + "source": [ + "5. Mark the `survived` `Column` as the target variable to be predicted. Include some columns only as extra columns, which are completely ignored by the model:" + ] }, { "cell_type": "code", + "execution_count": 75, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ "extra_names = [\"id\", \"name\", \"ticket\", \"cabin\", \"port_embarked\", \"age\", \"fare\"]\n", "\n", "train_tabular_dataset = transformed_table.to_tabular_dataset(\"survived\", extra_names=extra_names)" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": "6. Use `RandomForest` classifier as a model for the classification. Pass the \"train_tabular_dataset\" table to the fit function of the model:", "metadata": { "collapsed": false - } + }, + "source": [ + "6. Use `RandomForest` classifier as a model for the classification. Pass the \"train_tabular_dataset\" table to the fit function of the model:" + ] }, { "cell_type": "code", + "execution_count": 76, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ "from safeds.ml.classical.classification import RandomForestClassifier\n", "\n", "model = RandomForestClassifier()\n", "fitted_model= model.fit(train_tabular_dataset)" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "7. Use the fitted random forest model, that we trained on the training dataset to predict the survival rate of passengers in the test dataset.\n", "Transform the test data with `OneHotEncoder` first, to be able to pass it to the predict function, that uses our fitted random forest model for prediction:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (15, 12)
idnameagesiblings_spousesparents_childrentickettravel_classcabinport_embarkedsex__femalesex__malesurvived
i64strf64i64i64stri64strstru8u8i64
301"Danoff, Mr. Yoto"27.000"349219"3null"Southampton"010
477"Greenfield, Mr. William Bertra…23.001"PC 17759"1"D10 D12""Cherbourg"010
386"Farrell, Mr. James"40.500"367232"3null"Queenstown"010
864"Newell, Miss. Madeleine"31.010"35273"1"D36""Cherbourg"101
267"Cor, Mr. Ivan"27.000"349229"3null"Southampton"010
1044"Ryerson, Master. John Borie"13.022"PC 17608"1"B57 B59 B63 B66""Cherbourg"011
641"Keane, Mr. Daniel"35.000"233734"2null"Queenstown"010
222"Chambers, Mrs. Norman Campbell…33.010"113806"1"E8""Southampton"101
926"Palsson, Miss. Torborg Danira"8.031"349909"3null"Southampton"100
171"Burns, Miss. Elizabeth Margare…41.000"16966"1"E40""Cherbourg"101
" + ], + "text/plain": [ + "+------+-------------+----------+-------------+---+------------+------------+-----------+----------+\n", + "| id | name | age | siblings_sp | … | port_embar | sex__femal | sex__male | survived |\n", + "| --- | --- | --- | ouses | | ked | e | --- | --- |\n", + "| i64 | str | f64 | --- | | --- | --- | u8 | i64 |\n", + "| | | | i64 | | str | u8 | | |\n", + "+==================================================================================================+\n", + "| 301 | Danoff, Mr. | 27.00000 | 0 | … | Southampto | 0 | 1 | 0 |\n", + "| | Yoto | | | | n | | | |\n", + "| 477 | Greenfield, | 23.00000 | 0 | … | Cherbourg | 0 | 1 | 0 |\n", + "| | Mr. William | | | | | | | |\n", + "| | Bertra… | | | | | | | |\n", + "| 386 | Farrell, | 40.50000 | 0 | … | Queenstown | 0 | 1 | 0 |\n", + "| | Mr. James | | | | | | | |\n", + "| 864 | Newell, | 31.00000 | 1 | … | Cherbourg | 1 | 0 | 1 |\n", + "| | Miss. | | | | | | | |\n", + "| | Madeleine | | | | | | | |\n", + "| 267 | Cor, Mr. | 27.00000 | 0 | … | Southampto | 0 | 1 | 0 |\n", + "| | Ivan | | | | n | | | |\n", + "| … | … | … | … | … | … | … | … | … |\n", + "| 1044 | Ryerson, | 13.00000 | 2 | … | Cherbourg | 0 | 1 | 1 |\n", + "| | Master. | | | | | | | |\n", + "| | John Borie | | | | | | | |\n", + "| 641 | Keane, Mr. | 35.00000 | 0 | … | Queenstown | 0 | 1 | 0 |\n", + "| | Daniel | | | | | | | |\n", + "| 222 | Chambers, | 33.00000 | 1 | … | Southampto | 1 | 0 | 1 |\n", + "| | Mrs. Norman | | | | n | | | |\n", + "| | Campbell… | | | | | | | |\n", + "| 926 | Palsson, | 8.00000 | 3 | … | Southampto | 1 | 0 | 0 |\n", + "| | Miss. | | | | n | | | |\n", + "| | Torborg | | | | | | | |\n", + "| | Danira | | | | | | | |\n", + "| 171 | Burns, | 41.00000 | 0 | … | Cherbourg | 1 | 0 | 1 |\n", + "| | Miss. | | | | | | | |\n", + "| | Elizabeth | | | | | | | |\n", + "| | Margare… | | | | | | | |\n", + "+------+-------------+----------+-------------+---+------------+------------+-----------+----------+" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "transformed_test_table = encoder.transform(test_table)\n", "\n", "prediction = fitted_model.predict(\n", " transformed_test_table\n", ")\n", + "\n", + "\n", "#For visualisation purposes we only print out the first 15 rows.\n", "prediction.to_table().slice_rows(start=0, length=15)" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "8. You can test the accuracy of that model with the initial testing_table as follows:" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "8. You can test the accuracy of that model with the initial testing_table as follows:" + ] }, { "cell_type": "code", + "execution_count": 78, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7614503816793893" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "testing_table = encoder.transform(testing_table)\n", "\n", "test_tabular_dataset = testing_table.to_tabular_dataset(\"survived\", extra_names=extra_names)\n", "fitted_model.accuracy(test_tabular_dataset)\n" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null, - "outputs": [] + ] } ], "metadata": { @@ -202,14 +322,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.12.3" } }, "nbformat": 4, From 771dcccf33bf1847c21488cab14e7a5d764cc49d Mon Sep 17 00:00:00 2001 From: Sardar <160326696+saius02@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:31:52 +0200 Subject: [PATCH 12/15] remove_columns updated in other functions --- docs/tutorials/classification.ipynb | 134 ++---------------- src/safeds/ml/classical/_supervised_model.py | 2 +- src/safeds/ml/classical/regression/_arima.py | 2 +- .../classification/test_classifier.py | 2 +- .../ml/classical/regression/test_regressor.py | 2 +- tests/safeds/ml/nn/test_forward_workflow.py | 2 +- 6 files changed, 16 insertions(+), 128 deletions(-) diff --git a/docs/tutorials/classification.ipynb b/docs/tutorials/classification.ipynb index 9e4065fc0..ec700642b 100644 --- a/docs/tutorials/classification.ipynb +++ b/docs/tutorials/classification.ipynb @@ -22,56 +22,11 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (15, 12)
idnamesexagesiblings_spousesparents_childrentickettravel_classfarecabinport_embarkedsurvived
i64strstrf64i64i64stri64f64strstri64
0"Abbing, Mr. Anthony""male"42.000"C.A. 5547"37.55null"Southampton"0
1"Abbott, Master. Eugene Joseph""male"13.002"C.A. 2673"320.25null"Southampton"0
2"Abbott, Mr. Rossmore Edward""male"16.011"C.A. 2673"320.25null"Southampton"0
3"Abbott, Mrs. Stanton (Rosa Hun…"female"35.011"C.A. 2673"320.25null"Southampton"1
4"Abelseth, Miss. Karen Marie""female"16.000"348125"37.65null"Southampton"1
10"Adahl, Mr. Mauritz Nils Martin""male"30.000"C 7076"37.25null"Southampton"0
11"Adams, Mr. John""male"26.000"341826"38.05null"Southampton"0
12"Ahlin, Mrs. Johan (Johanna Per…"female"40.010"7546"39.475null"Southampton"0
13"Aks, Master. Philip Frank""male"0.833301"392091"39.35null"Southampton"1
14"Aks, Mrs. Sam (Leah Rosen)""female"18.001"392091"39.35null"Southampton"1
" - ], - "text/plain": [ - "+-----+----------------------+--------+----------+---+----------+-------+---------------+----------+\n", - "| id | name | sex | age | … | fare | cabin | port_embarked | survived |\n", - "| --- | --- | --- | --- | | --- | --- | --- | --- |\n", - "| i64 | str | str | f64 | | f64 | str | str | i64 |\n", - "+==================================================================================================+\n", - "| 0 | Abbing, Mr. Anthony | male | 42.00000 | … | 7.55000 | null | Southampton | 0 |\n", - "| 1 | Abbott, Master. | male | 13.00000 | … | 20.25000 | null | Southampton | 0 |\n", - "| | Eugene Joseph | | | | | | | |\n", - "| 2 | Abbott, Mr. Rossmore | male | 16.00000 | … | 20.25000 | null | Southampton | 0 |\n", - "| | Edward | | | | | | | |\n", - "| 3 | Abbott, Mrs. Stanton | female | 35.00000 | … | 20.25000 | null | Southampton | 1 |\n", - "| | (Rosa Hun… | | | | | | | |\n", - "| 4 | Abelseth, Miss. | female | 16.00000 | … | 7.65000 | null | Southampton | 1 |\n", - "| | Karen Marie | | | | | | | |\n", - "| … | … | … | … | … | … | … | … | … |\n", - "| 10 | Adahl, Mr. Mauritz | male | 30.00000 | … | 7.25000 | null | Southampton | 0 |\n", - "| | Nils Martin | | | | | | | |\n", - "| 11 | Adams, Mr. John | male | 26.00000 | … | 8.05000 | null | Southampton | 0 |\n", - "| 12 | Ahlin, Mrs. Johan | female | 40.00000 | … | 9.47500 | null | Southampton | 0 |\n", - "| | (Johanna Per… | | | | | | | |\n", - "| 13 | Aks, Master. Philip | male | 0.83330 | … | 9.35000 | null | Southampton | 1 |\n", - "| | Frank | | | | | | | |\n", - "| 14 | Aks, Mrs. Sam (Leah | female | 18.00000 | … | 9.35000 | null | Southampton | 1 |\n", - "| | Rosen) | | | | | | | |\n", - "+-----+----------------------+--------+----------+---+----------+-------+---------------+----------+" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from safeds.data.tabular.containers import Table\n", "\n", @@ -92,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": null, "metadata": { "collapsed": false }, @@ -117,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": null, "metadata": { "collapsed": false }, @@ -139,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "metadata": { "collapsed": false }, @@ -159,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": null, "metadata": { "collapsed": false }, @@ -181,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": null, "metadata": { "collapsed": false }, @@ -205,67 +160,11 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (15, 12)
idnameagesiblings_spousesparents_childrentickettravel_classcabinport_embarkedsex__femalesex__malesurvived
i64strf64i64i64stri64strstru8u8i64
301"Danoff, Mr. Yoto"27.000"349219"3null"Southampton"010
477"Greenfield, Mr. William Bertra…23.001"PC 17759"1"D10 D12""Cherbourg"010
386"Farrell, Mr. James"40.500"367232"3null"Queenstown"010
864"Newell, Miss. Madeleine"31.010"35273"1"D36""Cherbourg"101
267"Cor, Mr. Ivan"27.000"349229"3null"Southampton"010
1044"Ryerson, Master. John Borie"13.022"PC 17608"1"B57 B59 B63 B66""Cherbourg"011
641"Keane, Mr. Daniel"35.000"233734"2null"Queenstown"010
222"Chambers, Mrs. Norman Campbell…33.010"113806"1"E8""Southampton"101
926"Palsson, Miss. Torborg Danira"8.031"349909"3null"Southampton"100
171"Burns, Miss. Elizabeth Margare…41.000"16966"1"E40""Cherbourg"101
" - ], - "text/plain": [ - "+------+-------------+----------+-------------+---+------------+------------+-----------+----------+\n", - "| id | name | age | siblings_sp | … | port_embar | sex__femal | sex__male | survived |\n", - "| --- | --- | --- | ouses | | ked | e | --- | --- |\n", - "| i64 | str | f64 | --- | | --- | --- | u8 | i64 |\n", - "| | | | i64 | | str | u8 | | |\n", - "+==================================================================================================+\n", - "| 301 | Danoff, Mr. | 27.00000 | 0 | … | Southampto | 0 | 1 | 0 |\n", - "| | Yoto | | | | n | | | |\n", - "| 477 | Greenfield, | 23.00000 | 0 | … | Cherbourg | 0 | 1 | 0 |\n", - "| | Mr. William | | | | | | | |\n", - "| | Bertra… | | | | | | | |\n", - "| 386 | Farrell, | 40.50000 | 0 | … | Queenstown | 0 | 1 | 0 |\n", - "| | Mr. James | | | | | | | |\n", - "| 864 | Newell, | 31.00000 | 1 | … | Cherbourg | 1 | 0 | 1 |\n", - "| | Miss. | | | | | | | |\n", - "| | Madeleine | | | | | | | |\n", - "| 267 | Cor, Mr. | 27.00000 | 0 | … | Southampto | 0 | 1 | 0 |\n", - "| | Ivan | | | | n | | | |\n", - "| … | … | … | … | … | … | … | … | … |\n", - "| 1044 | Ryerson, | 13.00000 | 2 | … | Cherbourg | 0 | 1 | 1 |\n", - "| | Master. | | | | | | | |\n", - "| | John Borie | | | | | | | |\n", - "| 641 | Keane, Mr. | 35.00000 | 0 | … | Queenstown | 0 | 1 | 0 |\n", - "| | Daniel | | | | | | | |\n", - "| 222 | Chambers, | 33.00000 | 1 | … | Southampto | 1 | 0 | 1 |\n", - "| | Mrs. Norman | | | | n | | | |\n", - "| | Campbell… | | | | | | | |\n", - "| 926 | Palsson, | 8.00000 | 3 | … | Southampto | 1 | 0 | 0 |\n", - "| | Miss. | | | | n | | | |\n", - "| | Torborg | | | | | | | |\n", - "| | Danira | | | | | | | |\n", - "| 171 | Burns, | 41.00000 | 0 | … | Cherbourg | 1 | 0 | 1 |\n", - "| | Miss. | | | | | | | |\n", - "| | Elizabeth | | | | | | | |\n", - "| | Margare… | | | | | | | |\n", - "+------+-------------+----------+-------------+---+------------+------------+-----------+----------+" - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "transformed_test_table = encoder.transform(test_table)\n", "\n", @@ -289,22 +188,11 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7614503816793893" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "testing_table = encoder.transform(testing_table)\n", "\n", diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index a075f1855..9c946eca5 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -411,7 +411,7 @@ def _predict_with_sklearn_model( with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="X does not have valid feature names") predicted_target_vector = model.predict(features._data_frame) - output = dataset.remove_columns(target_name).add_columns( + output = dataset.remove_columns(target_name, ignore_unknown_names= True).add_columns( Column(target_name, predicted_target_vector), ) diff --git a/src/safeds/ml/classical/regression/_arima.py b/src/safeds/ml/classical/regression/_arima.py index ba9c7c7d4..d5c2ebc6f 100644 --- a/src/safeds/ml/classical/regression/_arima.py +++ b/src/safeds/ml/classical/regression/_arima.py @@ -136,7 +136,7 @@ def predict(self, time_series: TimeSeriesDataset) -> Table: # make a table without forecast_horizon = len(time_series.target._series.to_numpy()) result_table = time_series.to_table() - result_table = result_table.remove_columns([time_series.target.name]) + result_table = result_table.remove_columns([time_series.target.name], ignore_unknown_names= True) # Validation if not self.is_fitted or self._arima is None: raise ModelNotFittedError diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 52d6a926d..109228893 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -192,7 +192,7 @@ def test_should_raise_if_not_fitted(self, classifier: Classifier, valid_data: Ta def test_should_raise_if_dataset_misses_features(self, classifier: Classifier, valid_data: TabularDataset) -> None: fitted_classifier = classifier.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): - fitted_classifier.predict(valid_data.features.remove_columns(["feat1", "feat2"])) + fitted_classifier.predict(valid_data.features.remove_columns(["feat1", "feat2"], ignore_unknown_names= True)) @pytest.mark.parametrize( ("invalid_data", "expected_error", "expected_error_msg"), diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 829418b14..438c46c56 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -190,7 +190,7 @@ def test_should_raise_if_not_fitted(self, regressor: Regressor, valid_data: Tabu def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, valid_data: TabularDataset) -> None: fitted_regressor = regressor.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): - fitted_regressor.predict(valid_data.features.remove_columns(["feat1", "feat2"])) + fitted_regressor.predict(valid_data.features.remove_columns(["feat1", "feat2"], ignore_unknown_names= True)) @pytest.mark.parametrize( ("invalid_data", "expected_error", "expected_error_msg"), diff --git a/tests/safeds/ml/nn/test_forward_workflow.py b/tests/safeds/ml/nn/test_forward_workflow.py index 763c39666..e56b3b464 100644 --- a/tests/safeds/ml/nn/test_forward_workflow.py +++ b/tests/safeds/ml/nn/test_forward_workflow.py @@ -25,7 +25,7 @@ def test_forward_model(device: Device) -> None: table_1 = Table.from_csv_file( path=resolve_resource_path(_inflation_path), ) - table_1 = table_1.remove_columns(["date"]) + table_1 = table_1.remove_columns(["date"], ignore_unknown_names= True) table_2 = table_1.slice_rows(start=0, length=table_1.row_count - 14) table_2 = table_2.add_columns([(table_1.slice_rows(start=14)).get_column("value").rename("target")]) train_table, test_table = table_2.split_rows(0.8) From b0da239458891cf269b51796c7eb8d6af1b28689 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 28 Jun 2024 09:33:24 +0000 Subject: [PATCH 13/15] style: apply automated linter fixes --- src/safeds/ml/classical/_supervised_model.py | 2 +- src/safeds/ml/classical/regression/_arima.py | 2 +- tests/safeds/ml/classical/classification/test_classifier.py | 2 +- tests/safeds/ml/classical/regression/test_regressor.py | 2 +- tests/safeds/ml/nn/test_forward_workflow.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 9c946eca5..8c5e966f9 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -411,7 +411,7 @@ def _predict_with_sklearn_model( with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="X does not have valid feature names") predicted_target_vector = model.predict(features._data_frame) - output = dataset.remove_columns(target_name, ignore_unknown_names= True).add_columns( + output = dataset.remove_columns(target_name, ignore_unknown_names=True).add_columns( Column(target_name, predicted_target_vector), ) diff --git a/src/safeds/ml/classical/regression/_arima.py b/src/safeds/ml/classical/regression/_arima.py index d5c2ebc6f..01163250d 100644 --- a/src/safeds/ml/classical/regression/_arima.py +++ b/src/safeds/ml/classical/regression/_arima.py @@ -136,7 +136,7 @@ def predict(self, time_series: TimeSeriesDataset) -> Table: # make a table without forecast_horizon = len(time_series.target._series.to_numpy()) result_table = time_series.to_table() - result_table = result_table.remove_columns([time_series.target.name], ignore_unknown_names= True) + result_table = result_table.remove_columns([time_series.target.name], ignore_unknown_names=True) # Validation if not self.is_fitted or self._arima is None: raise ModelNotFittedError diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 109228893..66342f510 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -192,7 +192,7 @@ def test_should_raise_if_not_fitted(self, classifier: Classifier, valid_data: Ta def test_should_raise_if_dataset_misses_features(self, classifier: Classifier, valid_data: TabularDataset) -> None: fitted_classifier = classifier.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): - fitted_classifier.predict(valid_data.features.remove_columns(["feat1", "feat2"], ignore_unknown_names= True)) + fitted_classifier.predict(valid_data.features.remove_columns(["feat1", "feat2"], ignore_unknown_names=True)) @pytest.mark.parametrize( ("invalid_data", "expected_error", "expected_error_msg"), diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 438c46c56..07207c775 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -190,7 +190,7 @@ def test_should_raise_if_not_fitted(self, regressor: Regressor, valid_data: Tabu def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, valid_data: TabularDataset) -> None: fitted_regressor = regressor.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): - fitted_regressor.predict(valid_data.features.remove_columns(["feat1", "feat2"], ignore_unknown_names= True)) + fitted_regressor.predict(valid_data.features.remove_columns(["feat1", "feat2"], ignore_unknown_names=True)) @pytest.mark.parametrize( ("invalid_data", "expected_error", "expected_error_msg"), diff --git a/tests/safeds/ml/nn/test_forward_workflow.py b/tests/safeds/ml/nn/test_forward_workflow.py index e56b3b464..acb8a1cd9 100644 --- a/tests/safeds/ml/nn/test_forward_workflow.py +++ b/tests/safeds/ml/nn/test_forward_workflow.py @@ -25,7 +25,7 @@ def test_forward_model(device: Device) -> None: table_1 = Table.from_csv_file( path=resolve_resource_path(_inflation_path), ) - table_1 = table_1.remove_columns(["date"], ignore_unknown_names= True) + table_1 = table_1.remove_columns(["date"], ignore_unknown_names=True) table_2 = table_1.slice_rows(start=0, length=table_1.row_count - 14) table_2 = table_2.add_columns([(table_1.slice_rows(start=14)).get_column("value").rename("target")]) train_table, test_table = table_2.split_rows(0.8) From f36fe4108b3d5b6e51fd9cc98c7d3aae76bc117f Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 28 Jun 2024 13:09:49 +0200 Subject: [PATCH 14/15] docs: undo changes to classification tutorial --- docs/tutorials/classification.ipynb | 170 +++++++++++++--------------- 1 file changed, 81 insertions(+), 89 deletions(-) diff --git a/docs/tutorials/classification.ipynb b/docs/tutorials/classification.ipynb index ec700642b..0bba08deb 100644 --- a/docs/tutorials/classification.ipynb +++ b/docs/tutorials/classification.ipynb @@ -2,203 +2,195 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "collapsed": false - }, "source": [ "# Classification\n", "\n", "This tutorial uses safeds on **titanic passenger data** to predict who will survive and who will not, using sex as a feature for the prediction.\n" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "markdown", - "metadata": { - "collapsed": false - }, "source": [ "1. Load your data into a `Table`, the data is available under `docs/tutorials/data/titanic.csv`:\n" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [ "from safeds.data.tabular.containers import Table\n", "\n", "titanic = Table.from_csv_file(\"data/titanic.csv\")\n", "#For visualisation purposes we only print out the first 15 rows.\n", "titanic.slice_rows(0, 15)" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "collapsed": false }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", "source": [ "2. Split the titanic dataset into two tables. A training set, that we will use later to implement a training model to predict the survival of passengers, containing 60% of the data, and a testing set containing the rest of the data.\n", "Delete the column `survived` from the test set, to be able to predict it later:" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [ "train_table, testing_table = titanic.split_rows(0.6)\n", "\n", - "test_table = testing_table.remove_columns([\"fare\"]).shuffle_rows()" - ] - }, - { - "cell_type": "markdown", + "test_table = testing_table.remove_columns([\"survived\"]).shuffle_rows()" + ], "metadata": { "collapsed": false }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", "source": [ "3. Use `OneHotEncoder` to create an encoder, that will be used later to transform the training table.\n", "* We use `OneHotEncoder` to transform non-numerical categorical values into numerical representations with values of zero or one. In this example we will transform the values of the sex column, hence they will be used in the model for predicting the surviving of passengers.\n", "* Use the `fit` function of the `OneHotEncoder` to pass the table and the column names, that will be used as features to predict who will survive to the encoder.\n", "* The names of the column before transformation need to be saved, because `OneHotEncoder` changes the names of the fitted `Column`s:\n" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [ "from safeds.data.tabular.transformation import OneHotEncoder\n", "\n", "encoder = OneHotEncoder(column_names=\"sex\").fit(train_table)" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "collapsed": false }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", "source": [ "4. Transform the training table using the fitted encoder, and create a set with the new names of the fitted `Column`s:\n" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": null, + "source": "transformed_table = encoder.transform(train_table)", "metadata": { "collapsed": false }, - "outputs": [], - "source": [ - "transformed_table = encoder.transform(train_table)" - ] + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", + "source": "5. Mark the `survived` `Column` as the target variable to be predicted. Include some columns only as extra columns, which are completely ignored by the model:", "metadata": { "collapsed": false - }, - "source": [ - "5. Mark the `survived` `Column` as the target variable to be predicted. Include some columns only as extra columns, which are completely ignored by the model:" - ] + } }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [ "extra_names = [\"id\", \"name\", \"ticket\", \"cabin\", \"port_embarked\", \"age\", \"fare\"]\n", "\n", "train_tabular_dataset = transformed_table.to_tabular_dataset(\"survived\", extra_names=extra_names)" - ] + ], + "metadata": { + "collapsed": false + }, + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", + "source": "6. Use `RandomForest` classifier as a model for the classification. Pass the \"train_tabular_dataset\" table to the fit function of the model:", "metadata": { "collapsed": false - }, - "source": [ - "6. Use `RandomForest` classifier as a model for the classification. Pass the \"train_tabular_dataset\" table to the fit function of the model:" - ] + } }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [ "from safeds.ml.classical.classification import RandomForestClassifier\n", "\n", "model = RandomForestClassifier()\n", "fitted_model= model.fit(train_tabular_dataset)" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "collapsed": false }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", "source": [ "7. Use the fitted random forest model, that we trained on the training dataset to predict the survival rate of passengers in the test dataset.\n", "Transform the test data with `OneHotEncoder` first, to be able to pass it to the predict function, that uses our fitted random forest model for prediction:" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [ "transformed_test_table = encoder.transform(test_table)\n", "\n", "prediction = fitted_model.predict(\n", " transformed_test_table\n", ")\n", - "\n", - "\n", "#For visualisation purposes we only print out the first 15 rows.\n", "prediction.to_table().slice_rows(start=0, length=15)" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "collapsed": false }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", "source": [ "8. You can test the accuracy of that model with the initial testing_table as follows:" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [ "testing_table = encoder.transform(testing_table)\n", "\n", "test_tabular_dataset = testing_table.to_tabular_dataset(\"survived\", extra_names=extra_names)\n", "fitted_model.accuracy(test_tabular_dataset)\n" - ] + ], + "metadata": { + "collapsed": false + }, + "execution_count": null, + "outputs": [] } ], "metadata": { @@ -210,14 +202,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" + "pygments_lexer": "ipython2", + "version": "2.7.6" } }, "nbformat": 4, From f69393ff863b383c66e7bc62e2cd2df03b411f7b Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 28 Jun 2024 13:10:43 +0200 Subject: [PATCH 15/15] docs: example with default behavior --- src/safeds/data/tabular/containers/_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 06cc3e98b..b4be7f697 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -669,7 +669,7 @@ def remove_columns( -------- >>> from safeds.data.tabular.containers import Table >>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]}) - >>> table.remove_columns("a", ignore_unknown_names=True) + >>> table.remove_columns("a") +-----+ | b | | --- |