From 155b1c045754a349bd7b41fef758b68adc3ec42c Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 29 Mar 2024 18:51:55 +0100 Subject: [PATCH] feat: stubs for `safe-ds` library (#950) Closes #926 ### Summary of Changes Add a first version of stubs for the `safe-ds` library. --- .../data/image/containers/image.sdsstub | 262 +++++++ .../data/tabular/containers/column.sdsstub | 265 +++++++ .../data/tabular/containers/row.sdsstub | 118 +++ .../data/tabular/containers/table.sdsstub | 735 ++++++++++++++++++ .../tabular/containers/tagged_table.sdsstub | 347 +++++++++ .../tabular/containers/time_series.sdsstub | 371 +++++++++ .../transformation/discretizer.sdsstub | 29 + .../tabular/transformation/imputer.sdsstub | 53 ++ .../transformation/label_encoder.sdsstub | 25 + .../transformation/one_hot_encoder.sdsstub | 48 ++ .../transformation/range_scaler.sdsstub | 31 + .../transformation/standard_scaler.sdsstub | 25 + .../transformation/table_transformer.sdsstub | 125 +++ .../data/tabular/typing/column_type.sdsstub | 26 + .../safeds/data/tabular/typing/schema.sdsstub | 72 ++ .../classification/ada_boost.sdsstub | 47 ++ .../classification/classifier.sdsstub | 101 +++ .../classification/decision_tree.sdsstub | 24 + .../classification/gradient_boosting.sdsstub | 41 + .../k_nearest_neighbors.sdsstub | 34 + .../logistic_regression.sdsstub | 24 + .../classification/random_forest.sdsstub | 33 + .../support_vector_machine.sdsstub | 66 ++ .../ml/classical/regression/ada_boost.sdsstub | 47 ++ .../regression/decision_tree.sdsstub | 24 + .../regression/elastic_net_regression.sdsstub | 40 + .../regression/gradient_boosting.sdsstub | 41 + .../regression/k_nearest_neighbors.sdsstub | 34 + .../regression/lasso_regression.sdsstub | 33 + .../regression/linear_regression.sdsstub | 24 + .../regression/random_forest.sdsstub | 33 + .../ml/classical/regression/regressor.sdsstub | 69 ++ .../regression/ridge_regression.sdsstub | 33 + .../regression/support_vector_machine.sdsstub | 67 ++ .../builtins/safeds/ml/nn/classifier.sdsstub | 50 ++ .../builtins/safeds/ml/nn/fnn_layer.sdsstub | 15 + .../builtins/safeds/ml/nn/regressor.sdsstub | 50 ++ 37 files changed, 3462 insertions(+) create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/image/containers/image.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/column.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/row.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/table.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/tagged_table.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/time_series.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/discretizer.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/imputer.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/label_encoder.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/one_hot_encoder.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/range_scaler.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/standard_scaler.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/table_transformer.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/typing/column_type.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/typing/schema.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/ada_boost.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/classifier.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/decision_tree.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/gradient_boosting.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/k_nearest_neighbors.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/logistic_regression.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/random_forest.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/support_vector_machine.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/ada_boost.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/decision_tree.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/elastic_net_regression.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/gradient_boosting.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/k_nearest_neighbors.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/lasso_regression.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/linear_regression.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/random_forest.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/regressor.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/ridge_regression.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/support_vector_machine.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/classifier.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/fnn_layer.sdsstub create mode 100644 packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/regressor.sdsstub diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/image/containers/image.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/image/containers/image.sdsstub new file mode 100644 index 000000000..6a9107b98 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/image/containers/image.sdsstub @@ -0,0 +1,262 @@ +package safeds.data.image.containers + +/** + * A container for image data. + */ +class Image { + /** + * Get the width of the image in pixels. + */ + attr width: Int + /** + * Get the height of the image in pixels. + */ + attr height: Int + /** + * Get the number of channels of the image. + */ + attr channel: Int + + /** + * Create an image from a file. + * + * @param path The path to the image file. + * @param device The device where the tensor will be saved on. Defaults to the default device + * + * @result result1 The image. + */ + @Impure([ImpurityReason.FileReadFromParameterizedPath("path")]) + @PythonName("from_file") + static fun fromFile( + path: String + ) -> result1: Image + + /** + * Save the image as a JPEG file. + * + * @param path The path to the JPEG file. + */ + @Impure([ImpurityReason.FileWriteToParameterizedPath("path")]) + @PythonName("to_jpeg_file") + fun toJpegFile( + path: String + ) + + /** + * Save the image as a PNG file. + * + * @param path The path to the PNG file. + */ + @Impure([ImpurityReason.FileWriteToParameterizedPath("path")]) + @PythonName("to_png_file") + fun toPngFile( + path: String + ) + + /** + * Return a new `Image` that has been resized to a given size. + * + * The original image is not modified. + * + * @result result1 The image with the given width and height. + */ + @Pure + fun resize( + @PythonName("new_width") newWidth: Int, + @PythonName("new_height") newHeight: Int + ) -> result1: Image + + /** + * Return a new `Image` that is converted to grayscale. + * + * The original image is not modified. + * + * @result result1 The grayscale image. + */ + @Pure + @PythonName("convert_to_grayscale") + fun convertToGrayscale() -> result1: Image + + /** + * Return a new `Image` that has been cropped to a given bounding rectangle. + * + * The original image is not modified. + * + * @result result1 The cropped image. + */ + @Pure + fun crop( + x: Int, + y: Int, + width: Int, + height: Int + ) -> result1: Image + + /** + * Return a new `Image` that is flipped vertically (horizontal axis, flips up-down and vice versa). + * + * The original image is not modified. + * + * @result result1 The flipped image. + */ + @Pure + @PythonName("flip_vertically") + fun flipVertically() -> result1: Image + + /** + * Return a new `Image` that is flipped horizontally (vertical axis, flips left-right and vice versa). + * + * The original image is not modified. + * + * @result result1 The flipped image. + */ + @Pure + @PythonName("flip_horizontally") + fun flipHorizontally() -> result1: Image + + /** + * Return a new `Image` with an adjusted brightness. + * + * The original image is not modified. + * + * @param factor The brightness factor. + * 1.0 will not change the brightness. + * Below 1.0 will result in a darker image. + * Above 1.0 will resolut in a brighter image. + * Has to be bigger than or equal to 0 (black). + * + * @result result1 The Image with adjusted brightness. + */ + @Pure + @PythonName("adjust_brightness") + fun adjustBrightness( + factor: Float + ) -> result1: Image + + /** + * Return a new `Image` with noise added to the image. + * + * The original image is not modified. + * + * @param standardDeviation The standard deviation of the normal distribution. Has to be bigger than or equal to 0. + * + * @result result1 The image with added noise. + */ + @Pure + @PythonName("add_noise") + fun addNoise( + @PythonName("standard_deviation") standardDeviation: Float + ) -> result1: Image + + /** + * Return a new `Image` with adjusted contrast. + * + * The original image is not modified. + * + * @param factor If factor > 1, increase contrast of image. + * If factor = 1, no changes will be made. + * If factor < 1, make image greyer. + * Has to be bigger than or equal to 0 (gray). + * + * @result result1 New image with adjusted contrast. + */ + @Pure + @PythonName("adjust_contrast") + fun adjustContrast( + factor: Float + ) -> result1: Image + + /** + * Return a new `Image` with adjusted color balance. + * + * The original image is not modified. + * + * @param factor Has to be bigger than or equal to 0. + * If 0 <= factor < 1, make image greyer. + * If factor = 1, no changes will be made. + * If factor > 1, increase color balance of image. + * + * @result result1 The new, adjusted image. + */ + @Pure + @PythonName("adjust_color_balance") + fun adjustColorBalance( + factor: Float + ) -> result1: Image + + /** + * Return a blurred version of the image. + * + * The original image is not modified. + * + * @param radius Radius is directly proportional to the blur value. The radius is equal to the amount of pixels united in + * each direction. A radius of 1 will result in a united box of 9 pixels. + * + * @result result1 The blurred image. + */ + @Pure + fun blur( + radius: Int + ) -> result1: Image + + /** + * Return a sharpened version of the image. + * + * The original image is not modified. + * + * @param factor If factor > 1, increase the sharpness of the image. + * If factor = 1, no changes will be made. + * If factor < 1, blur the image. + * Has to be bigger than or equal to 0 (blurred). + * + * @result result1 The image sharpened by the given factor. + */ + @Pure + fun sharpen( + factor: Float + ) -> result1: Image + + /** + * Return a new `Image` with colors inverted. + * + * The original image is not modified. + * + * @result result1 The image with inverted colors. + */ + @Pure + @PythonName("invert_colors") + fun invertColors() -> result1: Image + + /** + * Return a new `Image` that is rotated 90 degrees clockwise. + * + * The original image is not modified. + * + * @result result1 The image rotated 90 degrees clockwise. + */ + @Pure + @PythonName("rotate_right") + fun rotateRight() -> result1: Image + + /** + * Return a new `Image` that is rotated 90 degrees counter-clockwise. + * + * The original image is not modified. + * + * @result result1 The image rotated 90 degrees counter-clockwise. + */ + @Pure + @PythonName("rotate_left") + fun rotateLeft() -> result1: Image + + /** + * Return a grayscale version of the image with the edges highlighted. + * + * The original image is not modified. + * + * @result result1 The image with edges found. + */ + @Pure + @PythonName("find_edges") + fun findEdges() -> result1: Image +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/column.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/column.sdsstub new file mode 100644 index 000000000..1d2a67c4e --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/column.sdsstub @@ -0,0 +1,265 @@ +package safeds.data.tabular.containers + +from safeds.data.image.containers import Image +from safeds.data.tabular.typing import ColumnType + +/** + * A column is a named collection of values. + * + * @param name The name of the column. + * @param data The data. + */ +class Column( + name: String, + data: List = [] +) { + /** + * Return the name of the column. + */ + attr name: String + /** + * Return the number of elements in the column. + */ + @PythonName("number_of_rows") attr numberOfRows: Int + /** + * Return the type of the column. + */ + attr type: ColumnType + + /** + * Return a list of all unique values in the column. + * + * @result result1 List of unique values in the column. + */ + @Pure + @PythonName("get_unique_values") + fun getUniqueValues() -> result1: List + + /** + * Return column value at specified index, starting at 0. + * + * @param index Index of requested element. + * + * @result result1 Value at index in column. + */ + @Pure + @PythonName("get_value") + fun getValue( + index: Int + ) -> result1: T + + /** + * Check if all values have a given property. + * + * @param predicate Callable that is used to find matches. + * + * @result result1 True if all match. + */ + @Pure + fun all( + predicate: (param1: T) -> param2: Boolean + ) -> result1: Boolean + + /** + * Check if any value has a given property. + * + * @param predicate Callable that is used to find matches. + * + * @result result1 True if any match. + */ + @Pure + fun any( + predicate: (param1: T) -> param2: Boolean + ) -> result1: Boolean + + /** + * Check if no values has a given property. + * + * @param predicate Callable that is used to find matches. + * + * @result result1 True if none match. + */ + @Pure + fun none( + predicate: (param1: T) -> param2: Boolean + ) -> result1: Boolean + + /** + * Return whether the column has missing values. + * + * @result result1 True if missing values exist. + */ + @Pure + @PythonName("has_missing_values") + fun hasMissingValues() -> result1: Boolean + + /** + * Return a new column with a new name. + * + * The original column is not modified. + * + * @param newName The new name of the column. + * + * @result result1 A new column with the new name. + */ + @Pure + fun rename( + @PythonName("new_name") newName: String + ) -> result1: Column + + /** + * Apply a transform method to every data point. + * + * The original column is not modified. + * + * @param transformer Function that will be applied to all data points. + * + * @result result1 The transformed column. + */ + @Pure + fun transform( + transformer: (param1: T) -> param2: R + ) -> result1: Column + + /** + * Calculate Pearson correlation between this and another column. Both columns have to be numerical. + * + * @result result1 Correlation between the two columns. + */ + @Pure + @PythonName("correlation_with") + fun correlationWith( + @PythonName("other_column") otherColumn: Column + ) -> result1: Float + + /** + * Calculate the idness of this column. + * + * We define the idness as follows: + * + * $$ + * \frac{\text{number of different values}}{\text{number of rows}} + * $$ + * + * @result result1 The idness of the column. + */ + @Pure + fun idness() -> result1: Float + + /** + * Return the maximum value of the column. The column has to be numerical. + * + * @result result1 The maximum value. + */ + @Pure + fun maximum() -> result1: Float + + /** + * Return the mean value of the column. The column has to be numerical. + * + * @result result1 The mean value. + */ + @Pure + fun mean() -> result1: Float + + /** + * Return the median value of the column. The column has to be numerical. + * + * @result result1 The median value. + */ + @Pure + fun median() -> result1: Float + + /** + * Return the minimum value of the column. The column has to be numerical. + * + * @result result1 The minimum value. + */ + @Pure + fun minimum() -> result1: Float + + /** + * Return the ratio of missing values to the total number of elements in the column. + * + * @result result1 The ratio of missing values to the total number of elements in the column. + */ + @Pure + @PythonName("missing_value_ratio") + fun missingValueRatio() -> result1: Float + + /** + * Return the mode of the column. + * + * @result result1 Returns a list with the most common values. + */ + @Pure + fun mode() -> result1: List + + /** + * Calculate the stability of this column. + * + * We define the stability as follows: + * + * $$ + * \frac{\text{number of occurrences of most common non-null value}}{\text{number of non-null values}} + * $$ + * + * The stability is not definded for a column with only null values. + * + * @result result1 The stability of the column. + */ + @Pure + fun stability() -> result1: Float + + /** + * Return the standard deviation of the column. The column has to be numerical. + * + * @result result1 The standard deviation of all values. + */ + @Pure + @PythonName("standard_deviation") + fun standardDeviation() -> result1: Float + + /** + * Return the sum of the column. The column has to be numerical. + * + * @result result1 The sum of all values. + */ + @Pure + fun sum() -> result1: Float + + /** + * Return the variance of the column. The column has to be numerical. + * + * @result result1 The variance of all values. + */ + @Pure + fun variance() -> result1: Float + + /** + * Plot this column in a boxplot. This function can only plot real numerical data. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_boxplot") + fun plotBoxplot() -> result1: Image + + /** + * Plot a column in a histogram. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_histogram") + fun plotHistogram() -> result1: Image + + /** + * Return an HTML representation of the column. + * + * @result result1 The generated HTML. + */ + @Pure + @PythonName("to_html") + fun toHtml() -> result1: String +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/row.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/row.sdsstub new file mode 100644 index 000000000..518e78398 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/row.sdsstub @@ -0,0 +1,118 @@ +package safeds.data.tabular.containers + +from safeds.data.tabular.typing import ColumnType, Schema + +/** + * A row is a collection of named values. + * + * @param data The data. If None, an empty row is created. + */ +class Row( + data: Map>? = null // TODO: update default value to empty map +) { + /** + * Return a list of all column names in the row. + */ + @PythonName("column_names") attr columnNames: List + /** + * Return the number of columns in this row. + */ + @PythonName("number_of_column") attr numberOfColumn: Int + /** + * Return the schema of the row. + */ + attr `schema`: Schema + + /** + * Create a row from a dictionary that maps column names to column values. + * + * @param data The data. + * + * @result result1 The created row. + */ + @Pure + @PythonName("from_dict") + static fun fromDict( + data: Map + ) -> result1: Row + + /** + * Return the value of a specified column. + * + * @param columnName The column name. + * + * @result result1 The column value. + */ + @Pure + @PythonName("get_value") + fun getValue( + @PythonName("column_name") columnName: String + ) -> result1: Any + + /** + * Check whether the row contains a given column. + * + * @param columnName The column name. + * + * @result result1 True, if the row contains the column, False otherwise. + */ + @Pure + @PythonName("has_column") + fun hasColumn( + @PythonName("column_name") columnName: String + ) -> result1: Boolean + + /** + * Return the type of the specified column. + * + * @param columnName The column name. + * + * @result result1 The type of the column. + */ + @Pure + @PythonName("get_column_type") + fun getColumnType( + @PythonName("column_name") columnName: String + ) -> result1: ColumnType + + // // TODO Safe-DS does not support tuple types. + // /** + // * Sort the columns of a `Row` with the given comparator and return a new `Row`. + // * + // * The original row is not modified. The comparator is a function that takes two tuples of (ColumnName, + // * Value) `col1` and `col2` and returns an integer: + // * + // * * If `col1` should be ordered before `col2`, the function should return a negative number. + // * * If `col1` should be ordered after `col2`, the function should return a positive number. + // * * If the original order of `col1` and `col2` should be kept, the function should return 0. + // * + // * If no comparator is given, the columns will be sorted alphabetically by their name. + // * + // * @param comparator The function used to compare two tuples of (ColumnName, Value). + // * + // * @result result1 A new row with sorted columns. + // */ + // @Pure + // @PythonName("sort_columns") + // fun sortColumns( + // comparator: (param1: Tuple, param2: Tuple) -> param3: Int + // ) -> result1: Row + + /** + * Return a dictionary that maps column names to column values. + * + * @result result1 Dictionary representation of the row. + */ + @Pure + @PythonName("to_dict") + fun toDict() -> result1: Map + + /** + * Return an HTML representation of the row. + * + * @result result1 The generated HTML. + */ + @Pure + @PythonName("to_html") + fun toHtml() -> result1: String +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/table.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/table.sdsstub new file mode 100644 index 000000000..4800251ad --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/table.sdsstub @@ -0,0 +1,735 @@ +package safeds.data.tabular.containers + +from safeds.data.image.containers import Image +from safeds.data.tabular.containers import Column, Row, TaggedTable, TimeSeries +from safeds.data.tabular.transformation import InvertibleTableTransformer, TableTransformer +from safeds.data.tabular.typing import ColumnType, Schema + +/** + * A table is a two-dimensional collection of data. It can either be seen as a list of rows or as a list of columns. + * + * To create a `Table` call the constructor or use one of the following static methods: + * + * | Method | Description | + * | ---------------------------------------------------------------------------- | -------------------------------------- | + * | [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file] | Create a table from a CSV file. | + * | [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file] | Create a table from a JSON file. | + * | [from_dict][safeds.data.tabular.containers._table.Table.from_dict] | Create a table from a dictionary. | + * | [from_columns][safeds.data.tabular.containers._table.Table.from_columns] | Create a table from a list of columns. | + * | [from_rows][safeds.data.tabular.containers._table.Table.from_rows] | Create a table from a list of rows. | + * + * Note: When removing the last column of the table, the `number_of_columns` property will be set to 0. + * + * @param data The data. If None, an empty table is created. + */ +class Table( + data: Map>? = null +) { + /** + * Return a list of all column names in this table. + * + * Alias for self.schema.column_names -> list[str]. + */ + @PythonName("column_names") attr columnNames: List + /** + * Return the number of columns. + */ + @PythonName("number_of_columns") attr numberOfColumns: Int + /** + * Return the number of rows. + */ + @PythonName("number_of_rows") attr numberOfRows: Int + /** + * Return the schema of the table. + */ + attr `schema`: Schema + + /** + * Read data from a CSV file into a table. + * + * @param path The path to the CSV file. + * + * @result result1 The table created from the CSV file. + */ + @Impure([ImpurityReason.FileReadFromParameterizedPath("path")]) + @PythonName("from_csv_file") + static fun fromCsvFile( + path: String + ) -> result1: Table + + /** + * Read data from an Excel file into a table. + * + * Valid file extensions are `.xls`, '.xlsx', `.xlsm`, `.xlsb`, `.odf`, `.ods` and `.odt`. + * + * @param path The path to the Excel file. + * + * @result result1 The table created from the Excel file. + */ + @Impure([ImpurityReason.FileReadFromParameterizedPath("path")]) + @PythonName("from_excel_file") + static fun fromExcelFile( + path: String + ) -> result1: Table + + /** + * Read data from a JSON file into a table. + * + * @param path The path to the JSON file. + * + * @result result1 The table created from the JSON file. + */ + @Impure([ImpurityReason.FileReadFromParameterizedPath("path")]) + @PythonName("from_json_file") + static fun fromJsonFile( + path: String + ) -> result1: Table + + /** + * Create a table from a dictionary that maps column names to column values. + * + * @param data The data. + * + * @result result1 The generated table. + */ + @Pure + @PythonName("from_dict") + static fun fromDict( + data: Map> + ) -> result1: Table + + /** + * Return a table created from a list of columns. + * + * @param columns The columns to be combined. They need to have the same size. + * + * @result result1 The generated table. + */ + @Pure + @PythonName("from_columns") + static fun fromColumns( + columns: List + ) -> result1: Table + + /** + * Return a table created from a list of rows. + * + * @param rows The rows to be combined. They need to have a matching schema. + * + * @result result1 The generated table. + */ + @Pure + @PythonName("from_rows") + static fun fromRows( + rows: List + ) -> result1: Table + + /** + * Return a column with the data of the specified column. + * + * @param columnName The name of the column. + * + * @result result1 The column. + */ + @Pure + @PythonName("get_column") + fun getColumn( + @PythonName("column_name") columnName: String + ) -> result1: Column + + /** + * Return whether the table contains a given column. + * + * Alias for self.schema.hasColumn(column_name: str) -> bool. + * + * @param columnName The name of the column. + * + * @result result1 True if the column exists. + */ + @Pure + @PythonName("has_column") + fun hasColumn( + @PythonName("column_name") columnName: String + ) -> result1: Boolean + + /** + * Return the type of the given column. + * + * Alias for self.schema.get_type_of_column(column_name: str) -> ColumnType. + * + * @param columnName The name of the column to be queried. + * + * @result result1 The type of the column. + */ + @Pure + @PythonName("get_column_type") + fun getColumnType( + @PythonName("column_name") columnName: String + ) -> result1: ColumnType + + /** + * Return the row at a specified index. + * + * @param index The index. + * + * @result result1 The row of the table at the index. + */ + @Pure + @PythonName("get_row") + fun getRow( + index: Int + ) -> result1: Row + + /** + * Return a table with a number of statistical key values. + * + * The original table is not modified. + * + * @result result1 The table with statistics. + */ + @Pure + @PythonName("summarize_statistics") + fun summarizeStatistics() -> result1: Table + + /** + * Return a new table with the provided column attached at the end. + * + * The original table is not modified. + * + * @result result1 The table with the column attached. + */ + @Pure + @PythonName("add_column") + fun addColumn( + column: Column + ) -> result1: Table + + /** + * Return a new `Table` with multiple added columns. + * + * The original table is not modified. + * + * @param columns The columns to be added. + * + * @result result1 A new table combining the original table and the given columns. + */ + @Pure + @PythonName("add_columns") + fun addColumns( + columns: union, Table> + ) -> result1: Table + + /** + * Return a new `Table` with an added Row attached. + * + * If the table happens to be empty beforehand, respective columns will be added automatically. + * + * The order of columns of the new row will be adjusted to the order of columns in the table. + * The new table will contain the merged schema. + * + * The original table is not modified. + * + * @param row The row to be added. + * + * @result result1 A new table with the added row at the end. + */ + @Pure + @PythonName("add_row") + fun addRow( + row: Row + ) -> result1: Table + + /** + * Return a new `Table` with multiple added Rows attached. + * + * The order of columns of the new rows will be adjusted to the order of columns in the table. + * The new table will contain the merged schema. + * + * The original table is not modified. + * + * @param rows The rows to be added. + * + * @result result1 A new table which combines the original table and the given rows. + */ + @Pure + @PythonName("add_rows") + fun addRows( + rows: union, Table> + ) -> result1: Table + + /** + * Return a new table with rows filtered by Callable (e.g. lambda function). + * + * The original table is not modified. + * + * @param query A Callable that is applied to all rows. + * + * @result result1 A table containing only the rows filtered by the query. + */ + @Pure + @PythonName("filter_rows") + fun filterRows( + query: (param1: Row) -> param2: Boolean + ) -> result1: Table + + /** + * Return a dictionary with copies of the output tables as values and the keys from the key_selector. + * + * The original table is not modified. + * + * @param keySelector A Callable that is applied to all rows and returns the key of the group. + * + * @result result1 A dictionary containing the new tables as values and the selected keys as keys. + */ + @Pure + @PythonName("group_rows_by") + fun groupRowsBy( + @PythonName("key_selector") keySelector: (param1: Row) -> param2: T + ) -> result1: Map + + /** + * Return a new table with only the given column(s). + * + * The original table is not modified. + * + * Note: When removing the last column of the table, the `number_of_columns` property will be set to 0. + * + * @param columnNames A list containing only the columns to be kept. + * + * @result result1 A table containing only the given column(s). + */ + @Pure + @PythonName("keep_only_columns") + fun keepOnlyColumns( + @PythonName("column_names") columnNames: List + ) -> result1: Table + + /** + * Return a new table without the given column(s). + * + * The original table is not modified. + * + * Note: When removing the last column of the table, the `number_of_columns` property will be set to 0. + * + * @param columnNames A list containing all columns to be dropped. + * + * @result result1 A table without the given columns. + */ + @Pure + @PythonName("remove_columns") + fun removeColumns( + @PythonName("column_names") columnNames: List + ) -> result1: Table + + /** + * Return a new table without the columns that contain missing values. + * + * The original table is not modified. + * + * Note: When removing the last column of the table, the `number_of_columns` property will be set to 0. + * + * @result result1 A table without the columns that contain missing values. + */ + @Pure + @PythonName("remove_columns_with_missing_values") + fun removeColumnsWithMissingValues() -> result1: Table + + /** + * Return a new table without the columns that contain non-numerical values. + * + * The original table is not modified. + * + * Note: When removing the last column of the table, the `number_of_columns` property will be set to 0. + * + * @result result1 A table without the columns that contain non-numerical values. + */ + @Pure + @PythonName("remove_columns_with_non_numerical_values") + fun removeColumnsWithNonNumericalValues() -> result1: Table + + /** + * Return a new table with every duplicate row removed. + * + * The original table is not modified. + * + * @result result1 The table with the duplicate rows removed. + */ + @Pure + @PythonName("remove_duplicate_rows") + fun removeDuplicateRows() -> result1: Table + + /** + * Return a new table without the rows that contain missing values. + * + * The original table is not modified. + * + * @result result1 A table without the rows that contain missing values. + */ + @Pure + @PythonName("remove_rows_with_missing_values") + fun removeRowsWithMissingValues() -> result1: Table + + /** + * Return a new table without those rows that contain at least one outlier. + * + * We define an outlier as a value that has a distance of more than 3 standard deviations from the column mean. + * Missing values are not considered outliers. They are also ignored during the calculation of the standard + * deviation. + * + * The original table is not modified. + * + * @result result1 A new table without rows containing outliers. + */ + @Pure + @PythonName("remove_rows_with_outliers") + fun removeRowsWithOutliers() -> result1: Table + + /** + * Return a new `Table` with a single column renamed. + * + * The original table is not modified. + * + * @param oldName The old name of the target column. + * @param newName The new name of the target column. + * + * @result result1 The Table with the renamed column. + */ + @Pure + @PythonName("rename_column") + fun renameColumn( + @PythonName("old_name") oldName: String, + @PythonName("new_name") newName: String + ) -> result1: Table + + /** + * Return a new table with the specified old column replaced by a list of new columns. + * + * The order of columns is kept. + * + * The original table is not modified. + * + * @param oldColumnName The name of the column to be replaced. + * @param newColumns The list of new columns replacing the old column. + * + * @result result1 A table with the old column replaced by the new columns. + */ + @Pure + @PythonName("replace_column") + fun replaceColumn( + @PythonName("old_column_name") oldColumnName: String, + @PythonName("new_columns") newColumns: List + ) -> result1: Table + + /** + * Return a new `Table` with randomly shuffled rows of this `Table`. + * + * The original table is not modified. + * + * @result result1 The shuffled Table. + */ + @Pure + @PythonName("shuffle_rows") + fun shuffleRows() -> result1: Table + + /** + * Slice a part of the table into a new table. + * + * The original table is not modified. + * + * @param start The first index of the range to be copied into a new table, None by default. + * @param end The last index of the range to be copied into a new table, None by default. + * @param step The step size used to iterate through the table, 1 by default. + * + * @result result1 The resulting table. + */ + @Pure + @PythonName("slice_rows") + fun sliceRows( + start: Int? = null, + end: Int? = null, + step: Int = 1 + ) -> result1: Table + + /** + * Sort the columns of a `Table` with the given comparator and return a new `Table`. + * + * The comparator is a function that takes two columns `col1` and `col2` and + * returns an integer: + * + * * If `col1` should be ordered before `col2`, the function should return a negative number. + * * If `col1` should be ordered after `col2`, the function should return a positive number. + * * If the original order of `col1` and `col2` should be kept, the function should return 0. + * + * If no comparator is given, the columns will be sorted alphabetically by their name. + * + * The original table is not modified. + * + * @param comparator The function used to compare two columns. + * + * @result result1 A new table with sorted columns. + */ + @Pure + @PythonName("sort_columns") + fun sortColumns( + comparator: (param1: Column, param2: Column) -> param3: Int + ) -> result1: Table + + /** + * Sort the rows of a `Table` with the given comparator and return a new `Table`. + * + * The comparator is a function that takes two rows `row1` and `row2` and + * returns an integer: + * + * * If `row1` should be ordered before `row2`, the function should return a negative number. + * * If `row1` should be ordered after `row2`, the function should return a positive number. + * * If the original order of `row1` and `row2` should be kept, the function should return 0. + * + * The original table is not modified. + * + * @param comparator The function used to compare two rows. + * + * @result result1 A new table with sorted rows. + */ + @Pure + @PythonName("sort_rows") + fun sortRows( + comparator: (param1: Row, param2: Row) -> param3: Int + ) -> result1: Table + + /** + * Split the table into two new tables. + * + * The original table is not modified. + * + * @param percentageInFirst The desired size of the first table in percentage to the given table; must be between 0 and 1. + * + * @result result1 A tuple containing the two resulting tables. The first table has the specified size, the second table + * contains the rest of the data. + * @result result2 A tuple containing the two resulting tables. The first table has the specified size, the second table + * contains the rest of the data. + */ + @Pure + @PythonName("split_rows") + fun splitRows( + @PythonName("percentage_in_first") percentageInFirst: Float + ) -> (result1: Table, result2: Table) + + /** + * Return a new `TaggedTable` with columns marked as a target column or feature columns. + * + * The original table is not modified. + * + * @param targetName Name of the target column. + * @param featureNames Names of the feature columns. If None, all columns except the target column are used. + * + * @result result1 A new tagged table with the given target and feature names. + */ + @Pure + @PythonName("tag_columns") + fun tagColumns( + @PythonName("target_name") targetName: String, + @PythonName("feature_names") featureNames: List? = null + ) -> result1: TaggedTable + + /** + * Return a new `TimeSeries` with columns marked as a target and time column or feature columns. + * + * The original table is not modified. + * + * @param targetName Name of the target column. + * @param timeName Name of the time column. + * @param featureNames Names of the feature columns. If None, all columns except the target and time columns are used. + * + * @result result1 A new time series with the given target, time and feature names. + */ + @Pure + @PythonName("time_columns") + fun timeColumns( + @PythonName("target_name") targetName: String, + @PythonName("time_name") timeName: String, + @PythonName("feature_names") featureNames: List? = null + ) -> result1: TimeSeries + + /** + * Return a new `Table` with the provided column transformed by calling the provided transformer. + * + * The original table is not modified. + * + * @result result1 The table with the transformed column. + */ + @Pure + @PythonName("transform_column") + fun transformColumn( + name: String, + transformer: (param1: Row) -> param2: Any + ) -> result1: Table + + /** + * Return a new `Table` with a learned transformation applied to this table. + * + * The original table is not modified. + * + * @param transformer The transformer which transforms the given table. + * + * @result result1 The transformed table. + */ + @Pure + @PythonName("transform_table") + fun transformTable( + transformer: TableTransformer + ) -> result1: Table + + /** + * Return a new `Table` with the inverted transformation applied by the given transformer. + * + * The original table is not modified. + * + * @param transformer A transformer that was fitted with columns, which are all present in the table. + * + * @result result1 The original table. + */ + @Pure + @PythonName("inverse_transform_table") + fun inverseTransformTable( + transformer: InvertibleTableTransformer + ) -> result1: Table + + /** + * Plot a correlation heatmap for all numerical columns of this `Table`. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_correlation_heatmap") + fun plotCorrelationHeatmap() -> result1: Image + + /** + * Plot two columns against each other in a lineplot. + * + * If there are multiple x-values for a y-value, the resulting plot will consist of a line representing the mean + * and the lower-transparency area around the line representing the 95% confidence interval. + * + * @param xColumnName The column name of the column to be plotted on the x-Axis. + * @param yColumnName The column name of the column to be plotted on the y-Axis. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_lineplot") + fun plotLineplot( + @PythonName("x_column_name") xColumnName: String, + @PythonName("y_column_name") yColumnName: String + ) -> result1: Image + + /** + * Plot two columns against each other in a scatterplot. + * + * @param xColumnName The column name of the column to be plotted on the x-Axis. + * @param yColumnName The column name of the column to be plotted on the y-Axis. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_scatterplot") + fun plotScatterplot( + @PythonName("x_column_name") xColumnName: String, + @PythonName("y_column_name") yColumnName: String + ) -> result1: Image + + /** + * Plot a boxplot for every numerical column. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_boxplots") + fun plotBoxplots() -> result1: Image + + /** + * Plot a histogram for every column. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_histograms") + fun plotHistograms() -> result1: Image + + /** + * Write the data from the table into a CSV file. + * + * If the file and/or the directories do not exist they will be created. If the file already exists it will be + * overwritten. + * + * @param path The path to the output file. + */ + @Impure([ImpurityReason.FileWriteToParameterizedPath("path")]) + @PythonName("to_csv_file") + fun toCsvFile( + path: String + ) + + /** + * Write the data from the table into an Excel file. + * + * Valid file extensions are `.xls`, '.xlsx', `.xlsm`, `.xlsb`, `.odf`, `.ods` and `.odt`. + * If the file and/or the directories do not exist, they will be created. If the file already exists, it will be + * overwritten. + * + * @param path The path to the output file. + */ + @Impure([ImpurityReason.FileWriteToParameterizedPath("path")]) + @PythonName("to_excel_file") + fun toExcelFile( + path: String + ) + + /** + * Write the data from the table into a JSON file. + * + * If the file and/or the directories do not exist, they will be created. If the file already exists it will be + * overwritten. + * + * @param path The path to the output file. + */ + @Impure([ImpurityReason.FileWriteToParameterizedPath("path")]) + @PythonName("to_json_file") + fun toJsonFile( + path: String + ) + + /** + * Return a dictionary that maps column names to column values. + * + * @result result1 Dictionary representation of the table. + */ + @Pure + @PythonName("to_dict") + fun toDict() -> result1: Map> + + /** + * Return an HTML representation of the table. + * + * @result result1 The generated HTML. + */ + @Pure + @PythonName("to_html") + fun toHtml() -> result1: String + + /** + * Return a list of the columns. + * + * @result result1 List of columns. + */ + @Pure + @PythonName("to_columns") + fun toColumns() -> result1: List + + /** + * Return a list of the rows. + * + * @result result1 List of rows. + */ + @Pure + @PythonName("to_rows") + fun toRows() -> result1: List +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/tagged_table.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/tagged_table.sdsstub new file mode 100644 index 000000000..a8c7a0a72 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/tagged_table.sdsstub @@ -0,0 +1,347 @@ +package safeds.data.tabular.containers + +from safeds.data.tabular.containers import Column, Row, Table + +/** + * A tagged table is a table that additionally knows which columns are features and which are the target to predict. + * + * @param data The data. + * @param targetName Name of the target column. + * @param featureNames Names of the feature columns. If None, all columns except the target column are used. + */ +class TaggedTable( + data: Map>, + @PythonName("target_name") targetName: String, + @PythonName("feature_names") featureNames: List? = null +) sub Table { + /** + * Get the feature columns of the tagged table. + */ + attr features: Table + /** + * Get the target column of the tagged table. + */ + attr target: Column + + /** + * Return a new table with the provided column attached at the end, as a feature column. + * + * the original table is not modified. + * + * @param column The column to be added. + * + * @result result1 The table with the attached feature column. + */ + @Pure + @PythonName("add_column_as_feature") + fun addColumnAsFeature( + column: Column + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with the provided columns attached at the end, as feature columns. + * + * The original table is not modified. + * + * @param columns The columns to be added as features. + * + * @result result1 The table with the attached feature columns. + */ + @Pure + @PythonName("add_columns_as_features") + fun addColumnsAsFeatures( + columns: union, Table> + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with the provided column attached at the end, as neither target nor feature column. + * + * The original table is not modified. + * + * @param column The column to be added. + * + * @result result1 The table with the column attached as neither target nor feature column. + */ + @Pure + @PythonName("add_column") + fun addColumn( + column: Column + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with multiple added columns, as neither target nor feature columns. + * + * The original table is not modified. + * + * @param columns The columns to be added. + * + * @result result1 A new table combining the original table and the given columns as neither target nor feature columns. + */ + @Pure + @PythonName("add_columns") + fun addColumns( + columns: union, Table> + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with an added Row attached. + * + * The original table is not modified. + * + * @param row The row to be added. + * + * @result result1 A new tagged table with the added row at the end. + */ + @Pure + @PythonName("add_row") + fun addRow( + row: Row + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with multiple added Rows attached. + * + * The original table is not modified. + * + * @param rows The rows to be added. + * + * @result result1 A new tagged table which combines the original table and the given rows. + */ + @Pure + @PythonName("add_rows") + fun addRows( + rows: union, Table> + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` containing only rows that match the given Callable (e.g. lambda function). + * + * The original tagged table is not modified. + * + * @param query A Callable that is applied to all rows. + * + * @result result1 A new tagged table containing only the rows to match the query. + */ + @Pure + @PythonName("filter_rows") + fun filterRows( + query: (param1: Row) -> param2: Boolean + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with only the given column(s). + * + * The original table is not modified. + * + * @param columnNames A list containing only the columns to be kept. + * + * @result result1 A table containing only the given column(s). + */ + @Pure + @PythonName("keep_only_columns") + fun keepOnlyColumns( + @PythonName("column_names") columnNames: List + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with the given column(s) removed from the table. + * + * The original table is not modified. + * + * @param columnNames The names of all columns to be dropped. + * + * @result result1 A table without the given columns. + */ + @Pure + @PythonName("remove_columns") + fun removeColumns( + @PythonName("column_names") columnNames: List + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with every column that misses values removed. + * + * The original table is not modified. + * + * @result result1 A table without the columns that contain missing values. + */ + @Pure + @PythonName("remove_columns_with_missing_values") + fun removeColumnsWithMissingValues() -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with every column that contains non-numerical values removed. + * + * The original table is not modified. + * + * @result result1 A table without the columns that contain non-numerical values. + */ + @Pure + @PythonName("remove_columns_with_non_numerical_values") + fun removeColumnsWithNonNumericalValues() -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with all row duplicates removed. + * + * The original table is not modified. + * + * @result result1 The table with the duplicate rows removed. + */ + @Pure + @PythonName("remove_duplicate_rows") + fun removeDuplicateRows() -> result1: TaggedTable + + /** + * Return a new `TaggedTable` without the rows that contain missing values. + * + * The original table is not modified. + * + * @result result1 A table without the rows that contain missing values. + */ + @Pure + @PythonName("remove_rows_with_missing_values") + fun removeRowsWithMissingValues() -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with all rows that contain at least one outlier removed. + * + * We define an outlier as a value that has a distance of more than 3 standard deviations from the column mean. + * Missing values are not considered outliers. They are also ignored during the calculation of the standard + * deviation. + * + * The original table is not modified. + * + * @result result1 A new table without rows containing outliers. + */ + @Pure + @PythonName("remove_rows_with_outliers") + fun removeRowsWithOutliers() -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with a single column renamed. + * + * The original table is not modified. + * + * @param oldName The old name of the target column. + * @param newName The new name of the target column. + * + * @result result1 The Table with the renamed column. + */ + @Pure + @PythonName("rename_column") + fun renameColumn( + @PythonName("old_name") oldName: String, + @PythonName("new_name") newName: String + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with the specified old column replaced by a list of new columns. + * + * If the column to be replaced is the target column, it must be replaced by exactly one column. That column + * becomes the new target column. If the column to be replaced is a feature column, the new columns that replace it + * all become feature columns. + * + * The order of columns is kept. The original table is not modified. + * + * @param oldColumnName The name of the column to be replaced. + * @param newColumns The new columns replacing the old column. + * + * @result result1 A table with the old column replaced by the new column. + */ + @Pure + @PythonName("replace_column") + fun replaceColumn( + @PythonName("old_column_name") oldColumnName: String, + @PythonName("new_columns") newColumns: List + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with randomly shuffled rows of this table. + * + * The original table is not modified. + * + * @result result1 The shuffled Table. + */ + @Pure + @PythonName("shuffle_rows") + fun shuffleRows() -> result1: TaggedTable + + /** + * Slice a part of the table into a new `TaggedTable`. + * + * The original table is not modified. + * + * @param start The first index of the range to be copied into a new table, None by default. + * @param end The last index of the range to be copied into a new table, None by default. + * @param step The step size used to iterate through the table, 1 by default. + * + * @result result1 The resulting table. + */ + @Pure + @PythonName("slice_rows") + fun sliceRows( + start: Int? = null, + end: Int? = null, + step: Int = 1 + ) -> result1: TaggedTable + + /** + * Sort the columns of a `TaggedTable` with the given comparator and return a new `TaggedTable`. + * + * The comparator is a function that takes two columns `col1` and `col2` and + * returns an integer: + * + * * If the function returns a negative number, `col1` will be ordered before `col2`. + * * If the function returns a positive number, `col1` will be ordered after `col2`. + * * If the function returns 0, the original order of `col1` and `col2` will be kept. + * + * If no comparator is given, the columns will be sorted alphabetically by their name. + * + * The original table is not modified. + * + * @param comparator The function used to compare two columns. + * + * @result result1 A new table with sorted columns. + */ + @Pure + @PythonName("sort_columns") + fun sortColumns( + comparator: (param1: Column, param2: Column) -> param3: Int + ) -> result1: TaggedTable + + /** + * Sort the rows of a `TaggedTable` with the given comparator and return a new `TaggedTable`. + * + * The comparator is a function that takes two rows `row1` and `row2` and + * returns an integer: + * + * * If the function returns a negative number, `row1` will be ordered before `row2`. + * * If the function returns a positive number, `row1` will be ordered after `row2`. + * * If the function returns 0, the original order of `row1` and `row2` will be kept. + * + * The original table is not modified. + * + * @param comparator The function used to compare two rows. + * + * @result result1 A new table with sorted rows. + */ + @Pure + @PythonName("sort_rows") + fun sortRows( + comparator: (param1: Row, param2: Row) -> param3: Int + ) -> result1: TaggedTable + + /** + * Return a new `TaggedTable` with the provided column transformed by calling the provided transformer. + * + * The original table is not modified. + * + * @result result1 The table with the transformed column. + */ + @Pure + @PythonName("transform_column") + fun transformColumn( + name: String, + transformer: (param1: Row) -> param2: Any + ) -> result1: TaggedTable +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/time_series.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/time_series.sdsstub new file mode 100644 index 000000000..67864de6c --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/containers/time_series.sdsstub @@ -0,0 +1,371 @@ +package safeds.data.tabular.containers + +from safeds.data.image.containers import Image +from safeds.data.tabular.containers import Column, Row, Table + +/** + * @param data The data. + * @param targetName Name of the target column. + * @param timeName Name of the time column + * @param featureNames Names of the feature columns. If None, all columns except the target and time columns are used. + */ +class TimeSeries( + data: Map>, + @PythonName("target_name") targetName: String, + @PythonName("time_name") timeName: String, + @PythonName("feature_names") featureNames: List? = null +) sub Table { + /** + * Get the target column of the tagged table. + */ + attr target: Column + /** + * Get the feature columns of the tagged table. + */ + attr features: Table + /** + * Get the time column of the time series. + */ + attr time: Column + + /** + * Return a new `TimeSeries` with the provided column attached at the end, as neither target nor feature column. + * + * The original time series is not modified. + * + * @param column The column to be added. + * + * @result result1 The time series with the column attached as neither target nor feature column. + */ + @Pure + @PythonName("add_column") + fun addColumn( + column: Column + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with the provided column attached at the end, as a feature column. + * + * the original time series is not modified. + * + * @param column The column to be added. + * + * @result result1 The time series with the attached feature column. + */ + @Pure + @PythonName("add_column_as_feature") + fun addColumnAsFeature( + column: Column + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with the provided columns attached at the end, as feature columns. + * + * The original time series is not modified. + * + * @param columns The columns to be added as features. + * + * @result result1 The time series with the attached feature columns. + */ + @Pure + @PythonName("add_columns_as_features") + fun addColumnsAsFeatures( + columns: union, Table> + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with multiple added columns, as neither target nor feature columns. + * + * The original time series is not modified. + * + * @param columns The columns to be added. + * + * @result result1 A new time series combining the original table and the given columns as neither target nor feature columns. + */ + @Pure + @PythonName("add_columns") + fun addColumns( + columns: union, Table> + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with an extra Row attached. + * + * The original time series is not modified. + * + * @param row The row to be added. + * + * @result result1 A new time series with the added row at the end. + */ + @Pure + @PythonName("add_row") + fun addRow( + row: Row + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with multiple extra Rows attached. + * + * The original time series is not modified. + * + * @param rows The rows to be added. + * + * @result result1 A new time series which combines the original time series and the given rows. + */ + @Pure + @PythonName("add_rows") + fun addRows( + rows: union, Table> + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` containing only rows that match the given Callable (e.g. lambda function). + * + * The original time series is not modified. + * + * @param query A Callable that is applied to all rows. + * + * @result result1 A time series containing only the rows to match the query. + */ + @Pure + @PythonName("filter_rows") + fun filterRows( + query: (param1: Row) -> param2: Boolean + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with only the given column(s). + * + * The original time series is not modified. + * + * @param columnNames A list containing the columns to be kept. + * + * @result result1 A time series containing only the given column(s). + */ + @Pure + @PythonName("keep_only_columns") + fun keepOnlyColumns( + @PythonName("column_names") columnNames: List + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with the given column(s) removed from the time series. + * + * The original time series is not modified. + * + * @param columnNames The names of all columns to be dropped. + * + * @result result1 A time series without the given columns. + */ + @Pure + @PythonName("remove_columns") + fun removeColumns( + @PythonName("column_names") columnNames: List + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with every column that misses values removed. + * + * The original time series is not modified. + * + * @result result1 A time series without the columns that contain missing values. + */ + @Pure + @PythonName("remove_columns_with_missing_values") + fun removeColumnsWithMissingValues() -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with every column that contains non-numerical values removed. + * + * The original time series is not modified. + * + * @result result1 A time series without the columns that contain non-numerical values. + */ + @Pure + @PythonName("remove_columns_with_non_numerical_values") + fun removeColumnsWithNonNumericalValues() -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with all row duplicates removed. + * + * The original time series is not modified. + * + * @result result1 The time series with the duplicate rows removed. + */ + @Pure + @PythonName("remove_duplicate_rows") + fun removeDuplicateRows() -> result1: TimeSeries + + /** + * Return a new `TimeSeries` without the rows that contain missing values. + * + * The original time series is not modified. + * + * @result result1 A time series without the rows that contain missing values. + */ + @Pure + @PythonName("remove_rows_with_missing_values") + fun removeRowsWithMissingValues() -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with all rows that contain at least one outlier removed. + * + * We define an outlier as a value that has a distance of more than 3 standard deviations from the column mean. + * Missing values are not considered outliers. They are also ignored during the calculation of the standard + * deviation. + * + * The original time series is not modified. + * + * @result result1 A new time series without rows containing outliers. + */ + @Pure + @PythonName("remove_rows_with_outliers") + fun removeRowsWithOutliers() -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with a single column renamed. + * + * The original time series is not modified. + * + * @param oldName The old name of the column. + * @param newName The new name of the column. + * + * @result result1 The time series with the renamed column. + */ + @Pure + @PythonName("rename_column") + fun renameColumn( + @PythonName("old_name") oldName: String, + @PythonName("new_name") newName: String + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with the specified old column replaced by a list of new columns. + * + * If the column to be replaced is the target or time column, it must be replaced by exactly one column. That column + * becomes the new target or time column. If the column to be replaced is a feature column, the new columns that replace it + * all become feature columns. + * + * The order of columns is kept. The original time series is not modified. + * + * @param oldColumnName The name of the column to be replaced. + * @param newColumns The new columns replacing the old column. + * + * @result result1 A time series with the old column replaced by the new columns. + */ + @Pure + @PythonName("replace_column") + fun replaceColumn( + @PythonName("old_column_name") oldColumnName: String, + @PythonName("new_columns") newColumns: List + ) -> result1: TimeSeries + + /** + * Slice a part of the table into a new `TimeSeries`. + * + * The original time series is not modified. + * + * @param start The first index of the range to be copied into a new time series, None by default. + * @param end The last index of the range to be copied into a new time series, None by default. + * @param step The step size used to iterate through the time series, 1 by default. + * + * @result result1 The resulting time series. + */ + @Pure + @PythonName("slice_rows") + fun sliceRows( + start: Int? = null, + end: Int? = null, + step: Int = 1 + ) -> result1: TimeSeries + + /** + * Sort the columns of a `TimeSeries` with the given comparator and return a new `TimeSeries`. + * + * The comparator is a function that takes two columns `col1` and `col2` and + * returns an integer: + * + * * If the function returns a negative number, `col1` will be ordered before `col2`. + * * If the function returns a positive number, `col1` will be ordered after `col2`. + * * If the function returns 0, the original order of `col1` and `col2` will be kept. + * + * If no comparator is given, the columns will be sorted alphabetically by their name. + * + * The original time series is not modified. + * + * @param comparator The function used to compare two columns. + * + * @result result1 A new time series with sorted columns. + */ + @Pure + @PythonName("sort_columns") + fun sortColumns( + comparator: (param1: Column, param2: Column) -> param3: Int + ) -> result1: TimeSeries + + /** + * Return a new `TimeSeries` with the provided column transformed by calling the provided transformer. + * + * The original time series is not modified. + * + * @param name The name of the column to be transformed. + * @param transformer The transformer to the given column + * + * @result result1 The time series with the transformed column. + */ + @Pure + @PythonName("transform_column") + fun transformColumn( + name: String, + transformer: (param1: Row) -> param2: Any + ) -> result1: TimeSeries + + /** + * Plot a lagplot for the target column. + * + * @param lag The amount of lag used to plot + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_lagplot") + fun plotLagplot( + lag: Int + ) -> result1: Image + + /** + * Plot the time series target or the given column(s) as line plot. + * + * The function will take the time column as the default value for y_column_name and the target column as the + * default value for x_column_name. + * + * @param xColumnName The column name of the column to be plotted on the x-Axis, default is the time column. + * @param yColumnName The column name of the column to be plotted on the y-Axis, default is the target column. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_lineplot") + fun plotLineplot( + @PythonName("x_column_name") xColumnName: String? = null, + @PythonName("y_column_name") yColumnName: String? = null + ) -> result1: Image + + /** + * Plot the time series target or the given column(s) as scatter plot. + * + * The function will take the time column as the default value for x_column_name and the target column as the + * default value for y_column_name. + * + * @param xColumnName The column name of the column to be plotted on the x-Axis. + * @param yColumnName The column name of the column to be plotted on the y-Axis. + * + * @result result1 The plot as an image. + */ + @Pure + @PythonName("plot_scatterplot") + fun plotScatterplot( + @PythonName("x_column_name") xColumnName: String? = null, + @PythonName("y_column_name") yColumnName: String? = null + ) -> result1: Image +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/discretizer.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/discretizer.sdsstub new file mode 100644 index 000000000..b21a8d414 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/discretizer.sdsstub @@ -0,0 +1,29 @@ +package safeds.data.tabular.transformation + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import TableTransformer + +/** + * The Discretizer bins continuous data into intervals. + * + * @param numberOfBins The number of bins to be created. + */ +class Discretizer( + @PythonName("number_of_bins") numberOfBins: Int = 5 +) sub TableTransformer { + /** + * Learn a transformation for a set of columns in a table. + * + * This transformer is not modified. + * + * @param table The table used to fit the transformer. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The fitted transformer. + */ + @Pure + fun fit( + table: Table, + @PythonName("column_names") columnNames: List? + ) -> result1: Discretizer +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/imputer.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/imputer.sdsstub new file mode 100644 index 000000000..e8aa85231 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/imputer.sdsstub @@ -0,0 +1,53 @@ +package safeds.data.tabular.transformation + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import TableTransformer + +/** + * Replace missing values with the given strategy. + * + * @param strategy The strategy used to impute missing values. Use the classes nested inside `Imputer.Strategy` to specify it. + */ +class Imputer( + strategy: Imputer.Strategy +) sub TableTransformer { + enum Strategy { + /** + * An imputation strategy for imputing missing data with given constant values. + * + * @param value The given value to impute missing values. + */ + Constant(value: Any) + + /** + * An imputation strategy for imputing missing data with mean values. + */ + Mean + + /** + * An imputation strategy for imputing missing data with median values. + */ + Median + + /** + * An imputation strategy for imputing missing data with mode values. The lowest value will be used if there are multiple values with the same highest count. + */ + Mode + } + + /** + * Learn a transformation for a set of columns in a table. + * + * This transformer is not modified. + * + * @param table The table used to fit the transformer. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The fitted transformer. + */ + @Pure + fun fit( + table: Table, + @PythonName("column_names") columnNames: List? + ) -> result1: Imputer +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/label_encoder.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/label_encoder.sdsstub new file mode 100644 index 000000000..84bc57466 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/label_encoder.sdsstub @@ -0,0 +1,25 @@ +package safeds.data.tabular.transformation + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import InvertibleTableTransformer + +/** + * The LabelEncoder encodes one or more given columns into labels. + */ +class LabelEncoder() sub InvertibleTableTransformer { + /** + * Learn a transformation for a set of columns in a table. + * + * This transformer is not modified. + * + * @param table The table used to fit the transformer. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The fitted transformer. + */ + @Pure + fun fit( + table: Table, + @PythonName("column_names") columnNames: List? + ) -> result1: LabelEncoder +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/one_hot_encoder.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/one_hot_encoder.sdsstub new file mode 100644 index 000000000..f6292bfe5 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/one_hot_encoder.sdsstub @@ -0,0 +1,48 @@ +package safeds.data.tabular.transformation + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import InvertibleTableTransformer + +/** + * A way to deal with categorical features that is particularly useful for unordered (i.e. nominal) data. + * + * It replaces a column with a set of columns, each representing a unique value in the original column. The value of + * each new column is 1 if the original column had that value, and 0 otherwise. Take the following table as an + * example: + * + * | col1 | + * |------| + * | "a" | + * | "b" | + * | "c" | + * | "a" | + * + * The one-hot encoding of this table is: + * + * | col1__a | col1__b | col1__c | + * |---------|---------|---------| + * | 1 | 0 | 0 | + * | 0 | 1 | 0 | + * | 0 | 0 | 1 | + * | 1 | 0 | 0 | + * + * The name "one-hot" comes from the fact that each row has exactly one 1 in it, and the rest of the values are 0s. + * One-hot encoding is closely related to dummy variable / indicator variables, which are used in statistics. + */ +class OneHotEncoder() sub InvertibleTableTransformer { + /** + * Learn a transformation for a set of columns in a table. + * + * This transformer is not modified. + * + * @param table The table used to fit the transformer. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The fitted transformer. + */ + @Pure + fun fit( + table: Table, + @PythonName("column_names") columnNames: List? + ) -> result1: OneHotEncoder +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/range_scaler.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/range_scaler.sdsstub new file mode 100644 index 000000000..e74898117 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/range_scaler.sdsstub @@ -0,0 +1,31 @@ +package safeds.data.tabular.transformation + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import InvertibleTableTransformer + +/** + * The RangeScaler transforms column values by scaling each value to a given range. + * + * @param minimum The minimum of the new range after the transformation + * @param maximum The maximum of the new range after the transformation + */ +class RangeScaler( + minimum: Float = 0.0, + maximum: Float = 1.0 +) sub InvertibleTableTransformer { + /** + * Learn a transformation for a set of columns in a table. + * + * This transformer is not modified. + * + * @param table The table used to fit the transformer. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The fitted transformer. + */ + @Pure + fun fit( + table: Table, + @PythonName("column_names") columnNames: List? + ) -> result1: RangeScaler +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/standard_scaler.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/standard_scaler.sdsstub new file mode 100644 index 000000000..e9f709132 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/standard_scaler.sdsstub @@ -0,0 +1,25 @@ +package safeds.data.tabular.transformation + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import InvertibleTableTransformer + +/** + * The StandardScaler transforms column values to a range by removing the mean and scaling to unit variance. + */ +class StandardScaler() sub InvertibleTableTransformer { + /** + * Learn a transformation for a set of columns in a table. + * + * This transformer is not modified. + * + * @param table The table used to fit the transformer. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The fitted transformer. + */ + @Pure + fun fit( + table: Table, + @PythonName("column_names") columnNames: List? + ) -> result1: StandardScaler +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/table_transformer.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/table_transformer.sdsstub new file mode 100644 index 000000000..edf337a5a --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/transformation/table_transformer.sdsstub @@ -0,0 +1,125 @@ +package safeds.data.tabular.transformation + +from safeds.data.tabular.containers import Table + +/** + * Learn a transformation for a set of columns in a `Table` and transform another `Table` with the same columns. + */ +class TableTransformer { + /** + * Learn a transformation for a set of columns in a table. + * + * This transformer is not modified. + * + * @param table The table used to fit the transformer. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The fitted transformer. + */ + @Pure + fun fit( + table: Table, + @PythonName("column_names") columnNames: List? + ) -> result1: TableTransformer + + /** + * Apply the learned transformation to a table. + * + * The table is not modified. + * + * @param table The table to which the learned transformation is applied. + * + * @result result1 The transformed table. + */ + @Pure + fun transform( + table: Table + ) -> result1: Table + + /** + * Get the names of all new columns that have been added by the transformer. + * + * @result result1 A list of names of the added columns, ordered as they will appear in the table. + */ + @Pure + @PythonName("get_names_of_added_columns") + fun getNamesOfAddedColumns() -> result1: List + + /** + * Get the names of all columns that have been changed by the transformer. + * + * @result result1 A list of names of changed columns, ordered as they appear in the table. + */ + @Pure + @PythonName("get_names_of_changed_columns") + fun getNamesOfChangedColumns() -> result1: List + + /** + * Get the names of all columns that have been removed by the transformer. + * + * @result result1 A list of names of the removed columns, ordered as they appear in the table the transformer was fitted on. + */ + @Pure + @PythonName("get_names_of_removed_columns") + fun getNamesOfRemovedColumns() -> result1: List + + /** + * Check if the transformer is fitted. + * + * @result result1 Whether the transformer is fitted. + */ + @Pure + @PythonName("is_fitted") + fun isFitted() -> result1: Boolean + + /** + * Learn a transformation for a set of columns in a table and apply the learned transformation to the same table. + * + * The table is not modified. If you also need the fitted transformer, use `fit` and `transform` separately. + * + * @param table The table used to fit the transformer. The transformer is then applied to this table. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The transformed table. + */ + @Pure + @PythonName("fit_and_transform") + fun fitAndTransform( + table: Table, + @PythonName("column_names") columnNames: List? = null + ) -> result1: Table +} + +/** + * A `TableTransformer` that can also undo the learned transformation after it has been applied. + */ +class InvertibleTableTransformer() sub TableTransformer { + /** + * Learn a transformation for a set of columns in a table. + * + * @param table The table used to fit the transformer. + * @param columnNames The list of columns from the table used to fit the transformer. If `None`, all columns are used. + * + * @result result1 The fitted transformer. + */ + @Pure + fun fit( + table: Table, + @PythonName("column_names") columnNames: List? + ) -> result1: InvertibleTableTransformer + + /** + * Undo the learned transformation. + * + * The table is not modified. + * + * @param transformedTable The table to be transformed back to the original version. + * + * @result result1 The original table. + */ + @Pure + @PythonName("inverse_transform") + fun inverseTransform( + @PythonName("transformed_table") transformedTable: Table + ) -> result1: Table +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/typing/column_type.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/typing/column_type.sdsstub new file mode 100644 index 000000000..856552432 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/typing/column_type.sdsstub @@ -0,0 +1,26 @@ +package safeds.data.tabular.typing + +/** + * Abstract base class for column types. + * + * @param isNullable Whether the columntype is nullable. + */ +class ColumnType { + /** + * Return whether the given column type is nullable. + * + * @result result1 True if the column is nullable. + */ + @Pure + @PythonName("is_nullable") + fun isNullable() -> result1: Boolean + + /** + * Return whether the given column type is numeric. + * + * @result result1 True if the column is numeric. + */ + @Pure + @PythonName("is_numeric") + fun isNumeric() -> result1: Boolean +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/typing/schema.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/typing/schema.sdsstub new file mode 100644 index 000000000..bad8f7ba0 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/data/tabular/typing/schema.sdsstub @@ -0,0 +1,72 @@ +package safeds.data.tabular.typing + +from safeds.data.tabular.typing import ColumnType + +/** + * Store column names and corresponding data types for a `Table` or `Row`. + * + * @param schema Map from column names to data types. + */ +class Schema { + /** + * Return a list of all column names saved in this schema. + */ + @PythonName("column_names") attr columnNames: List + + /** + * Return whether the schema contains a given column. + * + * @param columnName The name of the column. + * + * @result result1 True if the schema contains the column. + */ + @Pure + @PythonName("has_column") + fun hasColumn( + @PythonName("column_name") columnName: String + ) -> result1: Boolean + + /** + * Return the type of the given column. + * + * @param columnName The name of the column. + * + * @result result1 The type of the column. + */ + @Pure + @PythonName("get_column_type") + fun getColumnType( + @PythonName("column_name") columnName: String + ) -> result1: ColumnType + + /** + * Return a dictionary that maps column names to column types. + * + * @result result1 Dictionary representation of the schema. + */ + @Pure + @PythonName("to_dict") + fun toDict() -> result1: Map + + /** + * Merge multiple schemas into one. + * + * For each type missmatch the new schema will have the least common supertype. + * + * The type hierarchy is as follows: + * * Anything + * * RealNumber + * * Integer + * * Boolean + * * String + * + * @param schemas the list of schemas you want to merge + * + * @result result1 the new merged schema + */ + @Pure + @PythonName("merge_multiple_schemas") + static fun mergeMultipleSchemas( + schemas: List + ) -> result1: Schema +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/ada_boost.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/ada_boost.sdsstub new file mode 100644 index 000000000..e4c0507dd --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/ada_boost.sdsstub @@ -0,0 +1,47 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.classification import Classifier + +/** + * Ada Boost classification. + * + * @param learner The learner from which the boosted ensemble is built. + * @param maximumNumberOfLearners The maximum number of learners at which boosting is terminated. In case of perfect fit, the learning procedure + * is stopped early. Has to be greater than 0. + * @param learningRate Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution + * of each classifier. Has to be greater than 0. + */ +@PythonName("AdaBoost") +class AdaBoostClassifier( + learner: Classifier? = null, + @PythonName("maximum_number_of_learners") maximumNumberOfLearners: Int = 50, + @PythonName("learning_rate") learningRate: Float = 1.0 +) sub Classifier { + /** + * Get the base learner used for training the ensemble. + */ + attr learner: Classifier? + /** + * Get the maximum number of learners in the ensemble. + */ + @PythonName("maximum_number_of_learners") attr maximumNumberOfLearners: Int + /** + * Get the learning rate. + */ + @PythonName("learning_rate") attr learningRate: Float + + /** + * Create a copy of this classifier and fit it with the given training data. + * + * This classifier is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedClassifier The fitted classifier. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedClassifier: AdaBoostClassifier +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/classifier.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/classifier.sdsstub new file mode 100644 index 000000000..9b0741c2c --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/classifier.sdsstub @@ -0,0 +1,101 @@ +package safeds.ml.classical.classification + +from safeds.data.tabular.containers import Table, TaggedTable + +/** + * Abstract base class for all classifiers. + */ +class Classifier { + /** + * Create a copy of this classifier and fit it with the given training data. + * + * This classifier is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedClassifier The fitted classifier. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedClassifier: Classifier + + /** + * Predict a target vector using a dataset containing feature vectors. The model has to be trained first. + * + * @param dataset The dataset containing the feature vectors. + * + * @result prediction A dataset containing the given feature vectors and the predicted target vector. + */ + @Pure + fun predict( + dataset: Table + ) -> prediction: TaggedTable + + /** + * Check if the classifier is fitted. + * + * @result isFitted Whether the classifier is fitted. + */ + @Pure + @PythonName("is_fitted") + fun isFitted() -> isFitted: Boolean + + /** + * Compute the accuracy of the classifier on the given data. + * + * @param validationOrTestSet The validation or test set. + * + * @result accuracy The calculated accuracy score, i.e. the percentage of equal data. + */ + @Pure + fun accuracy( + @PythonName("validation_or_test_set") validationOrTestSet: TaggedTable + ) -> accuracy: Float + + /** + * Compute the classifier's precision on the given data. + * + * @param validationOrTestSet The validation or test set. + * @param positiveClass The class to be considered positive. All other classes are considered negative. + * + * @result precision The calculated precision score, i.e. the ratio of correctly predicted positives to all predicted positives. + * Return 1 if no positive predictions are made. + */ + @Pure + fun precision( + @PythonName("validation_or_test_set") validationOrTestSet: TaggedTable, + @PythonName("positive_class") positiveClass: Any + ) -> precision: Float + + /** + * Compute the classifier's recall on the given data. + * + * @param validationOrTestSet The validation or test set. + * @param positiveClass The class to be considered positive. All other classes are considered negative. + * + * @result recall The calculated recall score, i.e. the ratio of correctly predicted positives to all expected positives. + * Return 1 if there are no positive expectations. + */ + @Pure + fun recall( + @PythonName("validation_or_test_set") validationOrTestSet: TaggedTable, + @PythonName("positive_class") positiveClass: Any + ) -> recall: Float + + /** + * Compute the classifier's $F_1$-score on the given data. + * + * @param validationOrTestSet The validation or test set. + * @param positiveClass The class to be considered positive. All other classes are considered negative. + * + * @result f1Score The calculated $F_1$-score, i.e. the harmonic mean between precision and recall. + * Return 1 if there are no positive expectations and predictions. + */ + @Pure + @PythonName("f1_score") + fun f1Score( + @PythonName("validation_or_test_set") validationOrTestSet: TaggedTable, + @PythonName("positive_class") positiveClass: Any + ) -> f1Score: Float +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/decision_tree.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/decision_tree.sdsstub new file mode 100644 index 000000000..cc88c824d --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/decision_tree.sdsstub @@ -0,0 +1,24 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.classification import Classifier + +/** + * Decision tree classification. + */ +@PythonName("DecisionTree") +class DecisionTreeClassifier() sub Classifier { + /** + * Create a copy of this classifier and fit it with the given training data. + * + * This classifier is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedClassifier The fitted classifier. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedClassifier: DecisionTreeClassifier +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/gradient_boosting.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/gradient_boosting.sdsstub new file mode 100644 index 000000000..d97a88dd1 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/gradient_boosting.sdsstub @@ -0,0 +1,41 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.classification import Classifier + +/** + * Gradient boosting classification. + * + * @param numberOfTrees The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large + * number usually results in better performance. + * @param learningRate The larger the value, the more the model is influenced by each additional tree. If the learning rate is too + * low, the model might underfit. If the learning rate is too high, the model might overfit. + */ +@PythonName("GradientBoosting") +class GradientBoostingClassifier( + @PythonName("number_of_trees") numberOfTrees: Int = 100, + @PythonName("learning_rate") learningRate: Float = 0.1 +) sub Classifier { + /** + * Get the number of trees (estimators) in the ensemble. + */ + @PythonName("number_of_trees") attr numberOfTrees: Int + /** + * Get the learning rate. + */ + @PythonName("learning_rate") attr learningRate: Float + + /** + * Create a copy of this classifier and fit it with the given training data. + * + * This classifier is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedClassifier The fitted classifier. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedClassifier: GradientBoostingClassifier +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/k_nearest_neighbors.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/k_nearest_neighbors.sdsstub new file mode 100644 index 000000000..5c5de65be --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/k_nearest_neighbors.sdsstub @@ -0,0 +1,34 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.classification import Classifier + +/** + * K-nearest-neighbors classification. + * + * @param numberOfNeighbors The number of neighbors to use for interpolation. Has to be greater than 0 (validated in the constructor) and + * less than or equal to the sample size (validated when calling `fit`). + */ +@PythonName("KNearestNeighbors") +class KNearestNeighborsClassifier( + @PythonName("number_of_neighbors") numberOfNeighbors: Int +) sub Classifier { + /** + * Get the number of neighbors used for interpolation. + */ + @PythonName("number_of_neighbors") attr numberOfNeighbors: Int + + /** + * Create a copy of this classifier and fit it with the given training data. + * + * This classifier is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedClassifier The fitted classifier. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedClassifier: KNearestNeighborsClassifier +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/logistic_regression.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/logistic_regression.sdsstub new file mode 100644 index 000000000..5fce47ffd --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/logistic_regression.sdsstub @@ -0,0 +1,24 @@ +package safeds.ml.classical.classification + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.classification import Classifier + +/** + * Regularized logistic regression. + */ +@PythonName("LogisticRegression") +class LogisticRegressionClassifier() sub Classifier { + /** + * Create a copy of this classifier and fit it with the given training data. + * + * This classifier is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedClassifier The fitted classifier. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedClassifier: LogisticRegressionClassifier +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/random_forest.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/random_forest.sdsstub new file mode 100644 index 000000000..42d3609e3 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/random_forest.sdsstub @@ -0,0 +1,33 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.classification import Classifier + +/** + * Random forest classification. + * + * @param numberOfTrees The number of trees to be used in the random forest. Has to be greater than 0. + */ +@PythonName("RandomForest") +class RandomForestClassifier( + @PythonName("number_of_trees") numberOfTrees: Int = 100 +) sub Classifier { + /** + * Get the number of trees used in the random forest. + */ + @PythonName("number_of_trees") attr numberOfTrees: Int + + /** + * Create a copy of this classifier and fit it with the given training data. + * + * This classifier is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedClassifier The fitted classifier. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedClassifier: RandomForestClassifier +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/support_vector_machine.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/support_vector_machine.sdsstub new file mode 100644 index 000000000..07a1a7a99 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/classification/support_vector_machine.sdsstub @@ -0,0 +1,66 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.classification import Classifier + +/** + * Support vector machine. + * + * @param c The strength of regularization. Must be strictly positive. + * @param kernel The type of kernel to be used. Defaults to None. + */ +@PythonName("SupportVectorMachine") +class SupportVectorMachineClassifier( + c: Float = 1.0, + kernel: SupportVectorMachineClassifier.Kernel? = null +) sub Classifier { + /** + * The kernel functions that can be used in the support vector machine. + */ + enum Kernel { + /** + * A linear kernel. + */ + Linear + + /** + * A polynomial kernel. + * + * @param degree The degree of the polynomial. + */ + Polynomial(degree: Int) + + /** + * A sigmoid kernel. + */ + Sigmoid + + /** + * A radial basis function kernel. + */ + RadialBasisFunction + } + + /** + * Get the regularization strength. + */ + attr c: Float + /** + * Get the type of kernel used. + */ + attr kernel: SupportVectorMachineClassifier.Kernel + + /** + * Create a copy of this classifier and fit it with the given training data. + * + * This classifier is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedClassifier The fitted classifier. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedClassifier: SupportVectorMachineClassifier +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/ada_boost.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/ada_boost.sdsstub new file mode 100644 index 000000000..fb5b32af3 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/ada_boost.sdsstub @@ -0,0 +1,47 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * Ada Boost regression. + * + * @param learner The learner from which the boosted ensemble is built. + * @param maximumNumberOfLearners The maximum number of learners at which boosting is terminated. In case of perfect fit, the learning procedure + * is stopped early. Has to be greater than 0. + * @param learningRate Weight applied to each regressor at each boosting iteration. A higher learning rate increases the contribution + * of each regressor. Has to be greater than 0. + */ +@PythonName("AdaBoost") +class AdaBoostRegressor( + learner: Regressor? = null, + @PythonName("maximum_number_of_learners") maximumNumberOfLearners: Int = 50, + @PythonName("learning_rate") learningRate: Float = 1.0 +) sub Regressor { + /** + * Get the base learner used for training the ensemble. + */ + attr learner: Regressor? + /** + * Get the maximum number of learners in the ensemble. + */ + @PythonName("maximum_number_of_learners") attr maximumNumberOfLearners: Int + /** + * Get the learning rate. + */ + @PythonName("learning_rate") attr learningRate: Float + + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: AdaBoostRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/decision_tree.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/decision_tree.sdsstub new file mode 100644 index 000000000..cb90c9ed3 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/decision_tree.sdsstub @@ -0,0 +1,24 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * Decision tree regression. + */ +@PythonName("DecisionTree") +class DecisionTreeRegressor() sub Regressor { + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: DecisionTreeRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/elastic_net_regression.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/elastic_net_regression.sdsstub new file mode 100644 index 000000000..8ba2c482c --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/elastic_net_regression.sdsstub @@ -0,0 +1,40 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * Elastic net regression. + * + * @param alpha Controls the regularization of the model. The higher the value, the more regularized it becomes. + * @param lassoRatio Number between 0 and 1 that controls the ratio between Lasso and Ridge regularization. If 0, only Ridge + * regularization is used. If 1, only Lasso regularization is used. + */ +@PythonName("ElasticNetRegression") +class ElasticNetRegressor( + alpha: Float = 1.0, + @PythonName("lasso_ratio") lassoRatio: Float = 0.5 +) sub Regressor { + /** + * Get the regularization of the model. + */ + attr alpha: Float + /** + * Get the ratio between Lasso and Ridge regularization. + */ + @PythonName("lasso_ratio") attr lassoRatio: Float + + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: ElasticNetRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/gradient_boosting.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/gradient_boosting.sdsstub new file mode 100644 index 000000000..60af9f639 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/gradient_boosting.sdsstub @@ -0,0 +1,41 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * Gradient boosting regression. + * + * @param numberOfTrees The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large + * number usually results in better performance. + * @param learningRate The larger the value, the more the model is influenced by each additional tree. If the learning rate is too + * low, the model might underfit. If the learning rate is too high, the model might overfit. + */ +@PythonName("GradientBoosting") +class GradientBoostingRegressor( + @PythonName("number_of_trees") numberOfTrees: Int = 100, + @PythonName("learning_rate") learningRate: Float = 0.1 +) sub Regressor { + /** + * Get the number of trees (estimators) in the ensemble. + */ + @PythonName("number_of_trees") attr numberOfTrees: Int + /** + * Get the learning rate. + */ + @PythonName("learning_rate") attr learningRate: Float + + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: GradientBoostingRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/k_nearest_neighbors.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/k_nearest_neighbors.sdsstub new file mode 100644 index 000000000..235d07db2 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/k_nearest_neighbors.sdsstub @@ -0,0 +1,34 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * K-nearest-neighbors regression. + * + * @param numberOfNeighbors The number of neighbors to use for interpolation. Has to be greater than 0 (validated in the constructor) and + * less than or equal to the sample size (validated when calling `fit`). + */ +@PythonName("KNearestNeighbors") +class KNearestNeighborsRegressor( + @PythonName("number_of_neighbors") numberOfNeighbors: Int +) sub Regressor { + /** + * Get the number of neighbors used for interpolation. + */ + @PythonName("number_of_neighbors") attr numberOfNeighbors: Int + + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: KNearestNeighborsRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/lasso_regression.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/lasso_regression.sdsstub new file mode 100644 index 000000000..867840c8a --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/lasso_regression.sdsstub @@ -0,0 +1,33 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * Lasso regression. + * + * @param alpha Controls the regularization of the model. The higher the value, the more regularized it becomes. + */ +@PythonName("LassoRegression") +class LassoRegressor( + alpha: Float = 1.0 +) sub Regressor { + /** + * Get the regularization of the model. + */ + attr alpha: Float + + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: LassoRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/linear_regression.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/linear_regression.sdsstub new file mode 100644 index 000000000..6afc14faf --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/linear_regression.sdsstub @@ -0,0 +1,24 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * Linear regression. + */ +@PythonName("LinearRegression") +class LinearRegressionRegressor() sub Regressor { + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: LinearRegressionRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/random_forest.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/random_forest.sdsstub new file mode 100644 index 000000000..19e9c5f28 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/random_forest.sdsstub @@ -0,0 +1,33 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * Random forest regression. + * + * @param numberOfTrees The number of trees to be used in the random forest. Has to be greater than 0. + */ +@PythonName("RandomForest") +class RandomForestRegressor( + @PythonName("number_of_trees") numberOfTrees: Int = 100 +) sub Regressor { + /** + * Get the number of trees used in the random forest. + */ + @PythonName("number_of_trees") attr numberOfTrees: Int + + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: RandomForestRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/regressor.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/regressor.sdsstub new file mode 100644 index 000000000..bda8bef78 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/regressor.sdsstub @@ -0,0 +1,69 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable + +/** + * Abstract base class for all regressors. + */ +class Regressor { + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: Regressor + + /** + * Predict a target vector using a dataset containing feature vectors. The model has to be trained first. + * + * @param dataset The dataset containing the feature vectors. + * + * @result prediction A dataset containing the given feature vectors and the predicted target vector. + */ + @Pure + fun predict( + dataset: Table + ) -> prediction: TaggedTable + + /** + * Check if the classifier is fitted. + * + * @result isFitted Whether the regressor is fitted. + */ + @Pure + @PythonName("is_fitted") + fun isFitted() -> isFitted: Boolean + + /** + * Compute the mean squared error (MSE) on the given data. + * + * @param validationOrTestSet The validation or test set. + * + * @result meanSquaredError The calculated mean squared error (the average of the distance of each individual row squared). + */ + @Pure + @PythonName("mean_squared_error") + fun meanSquaredError( + @PythonName("validation_or_test_set") validationOrTestSet: TaggedTable + ) -> meanSquaredError: Float + + /** + * Compute the mean absolute error (MAE) of the regressor on the given data. + * + * @param validationOrTestSet The validation or test set. + * + * @result meanAbsoluteError The calculated mean absolute error (the average of the distance of each individual row). + */ + @Pure + @PythonName("mean_absolute_error") + fun meanAbsoluteError( + @PythonName("validation_or_test_set") validationOrTestSet: TaggedTable + ) -> meanAbsoluteError: Float +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/ridge_regression.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/ridge_regression.sdsstub new file mode 100644 index 000000000..824c21b40 --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/ridge_regression.sdsstub @@ -0,0 +1,33 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + +/** + * Ridge regression. + * + * @param alpha Controls the regularization of the model. The higher the value, the more regularized it becomes. + */ +@PythonName("RidgeRegression") +class RidgeRegressor( + alpha: Float = 1.0 +) sub Regressor { + /** + * Get the regularization of the model. + */ + attr alpha: Float + + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: RidgeRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/support_vector_machine.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/support_vector_machine.sdsstub new file mode 100644 index 000000000..36ce33d3f --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/classical/regression/support_vector_machine.sdsstub @@ -0,0 +1,67 @@ +package safeds.ml.classical.regression + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classical.regression import Regressor + + +/** + * Support vector machine. + * + * @param c The strength of regularization. Must be strictly positive. + * @param kernel The type of kernel to be used. Defaults to None. + */ +@PythonName("SupportVectorMachine") +class SupportVectorMachineRegressor( + c: Float = 1.0, + kernel: SupportVectorMachineRegressor.Kernel? = null +) sub Regressor { + /** + * The kernel functions that can be used in the support vector machine. + */ + enum Kernel { + /** + * A linear kernel. + */ + Linear + + /** + * A polynomial kernel. + * + * @param degree The degree of the polynomial. + */ + Polynomial(degree: Int) + + /** + * A sigmoid kernel. + */ + Sigmoid + + /** + * A radial basis function kernel. + */ + RadialBasisFunction + } + + /** + * Get the regularization strength. + */ + attr c: Float + /** + * Get the type of kernel used. + */ + attr kernel: SupportVectorMachineRegressor.Kernel + + /** + * Create a copy of this regressor and fit it with the given training data. + * + * This regressor is not modified. + * + * @param trainingSet The training data containing the feature and target vectors. + * + * @result fittedRegressor The fitted regressor. + */ + @Pure + fun fit( + @PythonName("training_set") trainingSet: TaggedTable + ) -> fittedRegressor: SupportVectorMachineRegressor +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/classifier.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/classifier.sdsstub new file mode 100644 index 000000000..5769938ef --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/classifier.sdsstub @@ -0,0 +1,50 @@ +package safeds.ml.nn + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.nn import FNNLayer + +@PythonName("ClassificationNeuralNetwork") +class NeuralNetworkClassifier( + layers: List +) { + /** + * Check if the model is fitted. + */ + @PythonName("is_fitted") attr isFitted: Boolean + + /** + * Train the neural network with given training data. + * + * The original model is not modified. + * + * @param trainData The data the network should be trained on. + * @param epochSize The number of times the training cycle should be done. + * @param batchSize The size of data batches that should be loaded at one time. + * @param callbackOnBatchCompletion Function used to view metrics while training. Gets called after a batch is completed with the index of the last batch and the overall loss average. + * @param callbackOnEpochCompletion Function used to view metrics while training. Gets called after an epoch is completed with the index of the last epoch and the overall loss average. + * + * @result fittedClassifier The trained Model + */ + @Pure + fun fit( + @PythonName("train_data") trainData: TaggedTable, + @PythonName("epoch_size") epochSize: Int = 25, + @PythonName("batch_size") batchSize: Int = 1, + @PythonName("callback_on_batch_completion") callbackOnBatchCompletion: (param1: Int, param2: Float) -> () = (param1, param2) {}, + @PythonName("callback_on_epoch_completion") callbackOnEpochCompletion: (param1: Int, param2: Float) -> () = (param1, param2) {} + ) -> fittedClassifier: NeuralNetworkClassifier + + /** + * Make a prediction for the given test data. + * + * The original Model is not modified. + * + * @param testData The data the network should predict. + * + * @result result1 The given test_data with an added "prediction" column at the end + */ + @Pure + fun predict( + @PythonName("test_data") testData: Table + ) -> result1: TaggedTable +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/fnn_layer.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/fnn_layer.sdsstub new file mode 100644 index 000000000..dff9e8e6e --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/fnn_layer.sdsstub @@ -0,0 +1,15 @@ +package safeds.ml.nn + +/** + * @param outputSize The number of neurons in this layer + * @param inputSize The number of neurons in the previous layer + */ +class FNNLayer( + @PythonName("output_size") outputSize: Int, + @PythonName("input_size") inputSize: Int? = null +) { + /** + * Get the output_size of this layer. + */ + @PythonName("output_size") attr outputSize: Int +} diff --git a/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/regressor.sdsstub b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/regressor.sdsstub new file mode 100644 index 000000000..d7a80143a --- /dev/null +++ b/packages/safe-ds-lang/src/resources/builtins/safeds/ml/nn/regressor.sdsstub @@ -0,0 +1,50 @@ +package safeds.ml.nn + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.nn import FNNLayer + +@PythonName("RegressionNeuralNetwork") +class NeuralNetworkRegressor( + layers: List +) { + /** + * Check if the model is fitted. + */ + @PythonName("is_fitted") attr isFitted: Boolean + + /** + * Train the neural network with given training data. + * + * The original model is not modified. + * + * @param trainData The data the network should be trained on. + * @param epochSize The number of times the training cycle should be done. + * @param batchSize The size of data batches that should be loaded at one time. + * @param callbackOnBatchCompletion Function used to view metrics while training. Gets called after a batch is completed with the index of the last batch and the overall loss average. + * @param callbackOnEpochCompletion Function used to view metrics while training. Gets called after an epoch is completed with the index of the last epoch and the overall loss average. + * + * @result trainedRegressor The trained Model + */ + @Pure + fun fit( + @PythonName("train_data") trainData: TaggedTable, + @PythonName("epoch_size") epochSize: Int = 25, + @PythonName("batch_size") batchSize: Int = 1, + @PythonName("callback_on_batch_completion") callbackOnBatchCompletion: (param1: Int, param2: Float) -> () = (param1, param2) {}, + @PythonName("callback_on_epoch_completion") callbackOnEpochCompletion: (param1: Int, param2: Float) -> () = (param1, param2) {} + ) -> trainedRegressor: NeuralNetworkRegressor + + /** + * Make a prediction for the given test data. + * + * The original Model is not modified. + * + * @param testData The data the network should predict. + * + * @result prediction The given test_data with an added "prediction" column at the end + */ + @Pure + fun predict( + @PythonName("test_data") testData: Table + ) -> prediction: TaggedTable +}