diff --git a/.gitignore b/.gitignore
index bf3e1b2..03b6134 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,3 +41,5 @@ bin/
### Mac OS ###
.DS_Store
/.idea/
+/.kotlin/
+/.idea/
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/gradle.xml b/.idea/gradle.xml
new file mode 100644
index 0000000..2a65317
--- /dev/null
+++ b/.idea/gradle.xml
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..46c1e48
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/build.gradle.kts b/build.gradle.kts
index ccc43ba..18940f0 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -1,22 +1,27 @@
import java.util.*
+
plugins {
kotlin("jvm") version "1.8.0"
- id("org.jetbrains.kotlinx.dataframe") version "0.13.1"
+ id("org.jetbrains.kotlinx.dataframe") version "0.15.0-dev"
}
group = "org.jetbrains.kotlinx.dataframe.examples"
version = "1.0-SNAPSHOT"
repositories {
+ mavenLocal()
mavenCentral()
}
dependencies {
- implementation ("org.jetbrains.kotlinx:dataframe:0.13.1")
- implementation ("org.jetbrains.kotlinx:kandy-lets-plot:0.6.0")
- implementation ("org.jetbrains.kotlinx:kandy-api:0.6.0")
+ implementation ("org.jetbrains.kotlin:kotlin-stdlib:1.8.0")
+ implementation ("org.jetbrains.kotlinx:dataframe:0.15.0-dev")
+ implementation ("org.jetbrains.kotlinx:kandy-lets-plot:0.7.1")
+ implementation ("org.jetbrains.kotlinx:kandy-api:0.7.1")
implementation ("org.mariadb.jdbc:mariadb-java-client:3.1.4")
+ implementation ("org.hsqldb:hsqldb:2.7.3")
+
testImplementation(kotlin("test"))
}
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
index d2d6fd4..3474a50 100644
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -1,6 +1,6 @@
#Thu Jan 18 12:11:25 CET 2024
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
diff --git a/gradlew b/gradlew
new file mode 100644
index 0000000..1b6c787
--- /dev/null
+++ b/gradlew
@@ -0,0 +1,234 @@
+#!/bin/sh
+
+#
+# Copyright © 2015-2021 the original authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+#
+# Gradle start up script for POSIX generated by Gradle.
+#
+# Important for running:
+#
+# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+# noncompliant, but you have some other compliant shell such as ksh or
+# bash, then to run this script, type that shell name before the whole
+# command line, like:
+#
+# ksh Gradle
+#
+# Busybox and similar reduced shells will NOT work, because this script
+# requires all of these POSIX shell features:
+# * functions;
+# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+# * compound commands having a testable exit status, especially «case»;
+# * various built-in commands including «command», «set», and «ulimit».
+#
+# Important for patching:
+#
+# (2) This script targets any POSIX shell, so it avoids extensions provided
+# by Bash, Ksh, etc; in particular arrays are avoided.
+#
+# The "traditional" practice of packing multiple parameters into a
+# space-separated string is a well documented source of bugs and security
+# problems, so this is (mostly) avoided, by progressively accumulating
+# options in "$@", and eventually passing that to Java.
+#
+# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+# see the in-line comments for details.
+#
+# There are tweaks for specific operating systems such as AIX, CygWin,
+# Darwin, MinGW, and NonStop.
+#
+# (3) This script is generated from the Groovy template
+# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+# within the Gradle project.
+#
+# You can find Gradle at https://github.com/gradle/gradle/.
+#
+##############################################################################
+
+# Attempt to set APP_HOME
+
+# Resolve links: $0 may be a link
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+ APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
+ [ -h "$app_path" ]
+do
+ ls=$( ls -ld "$app_path" )
+ link=${ls#*' -> '}
+ case $link in #(
+ /*) app_path=$link ;; #(
+ *) app_path=$APP_HOME$link ;;
+ esac
+done
+
+APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
+
+APP_NAME="Gradle"
+APP_BASE_NAME=${0##*/}
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD=maximum
+
+warn () {
+ echo "$*"
+} >&2
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+} >&2
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "$( uname )" in #(
+ CYGWIN* ) cygwin=true ;; #(
+ Darwin* ) darwin=true ;; #(
+ MSYS* | MINGW* ) msys=true ;; #(
+ NONSTOP* ) nonstop=true ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD=$JAVA_HOME/jre/sh/java
+ else
+ JAVACMD=$JAVA_HOME/bin/java
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD=java
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+ case $MAX_FD in #(
+ max*)
+ MAX_FD=$( ulimit -H -n ) ||
+ warn "Could not query maximum file descriptor limit"
+ esac
+ case $MAX_FD in #(
+ '' | soft) :;; #(
+ *)
+ ulimit -n "$MAX_FD" ||
+ warn "Could not set maximum file descriptor limit to $MAX_FD"
+ esac
+fi
+
+# Collect all arguments for the java command, stacking in reverse order:
+# * args from the command line
+# * the main class name
+# * -classpath
+# * -D...appname settings
+# * --module-path (only if needed)
+# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if "$cygwin" || "$msys" ; then
+ APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+ CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+ JAVACMD=$( cygpath --unix "$JAVACMD" )
+
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ for arg do
+ if
+ case $arg in #(
+ -*) false ;; # don't mess with options #(
+ /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
+ [ -e "$t" ] ;; #(
+ *) false ;;
+ esac
+ then
+ arg=$( cygpath --path --ignore --mixed "$arg" )
+ fi
+ # Roll the args list around exactly as many times as the number of
+ # args, so each arg winds up back in the position where it started, but
+ # possibly modified.
+ #
+ # NB: a `for` loop captures its iteration list before it begins, so
+ # changing the positional parameters here affects neither the number of
+ # iterations, nor the values presented in `arg`.
+ shift # remove old arg
+ set -- "$@" "$arg" # push replacement arg
+ done
+fi
+
+# Collect all arguments for the java command;
+# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
+# shell script including quotes and variable substitutions, so put them in
+# double quotes to make sure that they get re-expanded; and
+# * put everything else in single quotes, so that it's not re-expanded.
+
+set -- \
+ "-Dorg.gradle.appname=$APP_BASE_NAME" \
+ -classpath "$CLASSPATH" \
+ org.gradle.wrapper.GradleWrapperMain \
+ "$@"
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+# set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
+
+eval "set -- $(
+ printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+ xargs -n1 |
+ sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+ tr '\n' ' '
+ )" '"$@"'
+
+exec "$JAVACMD" "$@"
diff --git a/gradlew.bat b/gradlew.bat
new file mode 100644
index 0000000..107acd3
--- /dev/null
+++ b/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/notebooks/customdb.ipynb b/notebooks/customdb.ipynb
new file mode 100644
index 0000000..a53d6c9
--- /dev/null
+++ b/notebooks/customdb.ipynb
@@ -0,0 +1,1538 @@
+{
+ "cells": [
+ {
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-10-10T16:25:44.552813200Z",
+ "start_time": "2024-10-10T16:25:44.472998200Z"
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "import org.jetbrains.kotlinx.dataframe.DataFrame\n",
+ "import org.jetbrains.kotlinx.dataframe.api.describe\n",
+ "import org.jetbrains.kotlinx.dataframe.api.print\n",
+ "import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig\n",
+ "import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlTable\n",
+ "import org.jetbrains.kotlinx.dataframe.io.readSqlTable\n",
+ "import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables\n",
+ "import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema\n",
+ "import java.sql.DriverManager\n",
+ "import java.util.*\n"
+ ],
+ "outputs": [],
+ "execution_count": 1
+ },
+ {
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-10-10T16:26:08.785509400Z",
+ "start_time": "2024-10-10T16:26:07.914085300Z"
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "USE {\n",
+ " dependencies(\"org.hsqldb:hsqldb:2.7.3\")\n",
+ "}"
+ ],
+ "outputs": [],
+ "execution_count": 2
+ },
+ {
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-10-10T16:26:10.797640700Z",
+ "start_time": "2024-10-10T16:26:10.156473400Z"
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "DriverManager.getConnection(org.jetbrains.kotlinx.dataframe.examples.jdbc.customdb.URL, org.jetbrains.kotlinx.dataframe.examples.jdbc.customdb.USER_NAME, org.jetbrains.kotlinx.dataframe.examples.jdbc.customdb.PASSWORD).use { con ->\n",
+ " org.jetbrains.kotlinx.dataframe.examples.jdbc.customdb.createAndPopulateTable(con)\n",
+ "}"
+ ],
+ "outputs": [],
+ "execution_count": 3
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**The IMDB Database Exploration: printing schemas for all non-system tables**"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "val dbConfig = DbConnectionConfig(org.jetbrains.kotlinx.dataframe.examples.jdbc.customdb.URL, org.jetbrains.kotlinx.dataframe.examples.jdbc.customdb.USER_NAME, org.jetbrains.kotlinx.dataframe.examples.jdbc.customdb.PASSWORD)\n",
+ "\n",
+ "val dataschemas = DataFrame.getSchemaForAllSqlTables(dbConfig, dbType = customdb.HSQLDB)\n",
+ "\n",
+ "dataschemas.forEach { \n",
+ " println(\"--- Schema for Table ${it.key} ---\")\n",
+ " println(it.value)\n",
+ " println()\n",
+ "}"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-10-10T16:26:13.466682400Z",
+ "start_time": "2024-10-10T16:26:12.948497700Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- Schema for Table ORDERS ---\r\n",
+ "ID: Int\n",
+ "ITEM: String\n",
+ "PRICE: Double\n",
+ "ORDER_DATE: java.util.Date?\r\n",
+ "\r\n"
+ ]
+ }
+ ],
+ "execution_count": 4
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**The IMDB Data Quick Exploration: printing 100 rows from each non-system table**"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "val dfs = DataFrame.readAllSqlTables(dbConfig, dbType = customdb.HSQLDB).values\n",
+ "\n",
+ "dfs.forEach {\n",
+ " it.describe().print()\n",
+ " it.print(5)\n",
+ "}"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-10-10T16:26:16.231380700Z",
+ "start_time": "2024-10-10T16:26:15.617997500Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " name type count unique nulls top freq mean std min median max\n",
+ " 0 ID Int 4 4 0 0 1 1,5 1,290994 0 1 3\n",
+ " 1 ITEM String 4 2 0 Laptop 2 null null Laptop Laptop Smartphone\n",
+ " 2 PRICE Double 4 2 0 1500 2 1100,0 461,880215 700 1100 1500\n",
+ " 3 ORDER_DATE java.util.Date 4 1 0 2024-10-10 4 null null 2024-10-10 2024-10-10 2024-10-10\n",
+ "\r\n",
+ " ID ITEM PRICE ORDER_DATE\n",
+ " 0 0 Laptop 1500,0 2024-10-10\n",
+ " 1 1 Smartphone 700,0 2024-10-10\n",
+ " 2 2 Laptop 1500,0 2024-10-10\n",
+ " 3 3 Smartphone 700,0 2024-10-10\n",
+ "\r\n"
+ ]
+ }
+ ],
+ "execution_count": 5
+ },
+ {
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-10-10T16:26:22.186464800Z",
+ "start_time": "2024-10-10T16:26:22.096726600Z"
+ }
+ },
+ "cell_type": "code",
+ "source": "dbConfig",
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "DbConnectionConfig(url=jdbc:hsqldb:hsql://localhost/testdb, user=SA, password=)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 6
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "val ordersDf = DataFrame.readSqlTable(dbConfig, \"orders\", dbType = customdb.HSQLDB)\n",
+ "ordersDf"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-10-10T16:26:25.160902600Z",
+ "start_time": "2024-10-10T16:26:24.267113100Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " ID | ITEM | PRICE | ORDER_DATE |
---|
0 | Laptop | 1500,000000 | 2024-10-10 |
1 | Smartphone | 700,000000 | 2024-10-10 |
2 | Laptop | 1500,000000 | 2024-10-10 |
3 | Smartphone | 700,000000 | 2024-10-10 |
\n",
+ " \n",
+ " \n",
+ " "
+ ],
+ "application/kotlindataframe+json": "{\"$version\":\"2.1.1\",\"metadata\":{\"columns\":[\"ID\",\"ITEM\",\"PRICE\",\"ORDER_DATE\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.String\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double\"},{\"kind\":\"ValueColumn\",\"type\":\"java.util.Date\"}],\"nrow\":4,\"ncol\":4},\"kotlin_dataframe\":[{\"ID\":0,\"ITEM\":\"Laptop\",\"PRICE\":1500.0,\"ORDER_DATE\":\"2024-10-10\"},{\"ID\":1,\"ITEM\":\"Smartphone\",\"PRICE\":700.0,\"ORDER_DATE\":\"2024-10-10\"},{\"ID\":2,\"ITEM\":\"Laptop\",\"PRICE\":1500.0,\"ORDER_DATE\":\"2024-10-10\"},{\"ID\":3,\"ITEM\":\"Smartphone\",\"PRICE\":700.0,\"ORDER_DATE\":\"2024-10-10\"}]}"
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 7
+ },
+ {
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-10-10T16:26:53.415292100Z",
+ "start_time": "2024-10-10T16:26:52.945269200Z"
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "val updatedDf = ordersDf.add(\"TAX\") { it[\"PRICE\"] as Double * 0.1 }\n",
+ "updatedDf"
+ ],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " ID | ITEM | PRICE | ORDER_DATE | TAX |
---|
0 | Laptop | 1500,000000 | 2024-10-10 | 150,000000 |
1 | Smartphone | 700,000000 | 2024-10-10 | 70,000000 |
2 | Laptop | 1500,000000 | 2024-10-10 | 150,000000 |
3 | Smartphone | 700,000000 | 2024-10-10 | 70,000000 |
\n",
+ " \n",
+ " \n",
+ " "
+ ],
+ "application/kotlindataframe+json": "{\"$version\":\"2.1.1\",\"metadata\":{\"columns\":[\"ID\",\"ITEM\",\"PRICE\",\"ORDER_DATE\",\"TAX\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.String\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double\"},{\"kind\":\"ValueColumn\",\"type\":\"java.util.Date\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double\"}],\"nrow\":4,\"ncol\":5},\"kotlin_dataframe\":[{\"ID\":0,\"ITEM\":\"Laptop\",\"PRICE\":1500.0,\"ORDER_DATE\":\"2024-10-10\",\"TAX\":150.0},{\"ID\":1,\"ITEM\":\"Smartphone\",\"PRICE\":700.0,\"ORDER_DATE\":\"2024-10-10\",\"TAX\":70.0},{\"ID\":2,\"ITEM\":\"Laptop\",\"PRICE\":1500.0,\"ORDER_DATE\":\"2024-10-10\",\"TAX\":150.0},{\"ID\":3,\"ITEM\":\"Smartphone\",\"PRICE\":700.0,\"ORDER_DATE\":\"2024-10-10\",\"TAX\":70.0}]}"
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 8
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Kotlin",
+ "language": "kotlin",
+ "name": "kotlin"
+ },
+ "language_info": {
+ "name": "kotlin",
+ "version": "1.8.20",
+ "mimetype": "text/x-kotlin",
+ "file_extension": ".kt",
+ "pygments_lexer": "kotlin",
+ "codemirror_mode": "text/x-kotlin",
+ "nbconvert_exporter": ""
+ },
+ "ktnbPluginMetadata": {
+ "projectDependencies": true
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/imdb.ipynb b/notebooks/imdb.ipynb
index b516102..58b4c0c 100644
--- a/notebooks/imdb.ipynb
+++ b/notebooks/imdb.ipynb
@@ -3,8 +3,8 @@
{
"metadata": {
"ExecuteTime": {
- "end_time": "2024-03-25T15:58:40.085023800Z",
- "start_time": "2024-03-25T15:58:39.481638700Z"
+ "end_time": "2024-10-10T15:57:31.648930600Z",
+ "start_time": "2024-10-10T15:57:30.197064200Z"
}
},
"cell_type": "code",
@@ -14,25 +14,25 @@
"}"
],
"outputs": [],
- "execution_count": 5
+ "execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
- "end_time": "2024-03-25T15:58:48.248747200Z",
- "start_time": "2024-03-25T15:58:40.094999200Z"
+ "end_time": "2024-10-10T15:57:31.748663400Z",
+ "start_time": "2024-10-10T15:57:31.667881400Z"
}
},
"cell_type": "code",
- "source": "%use dataframe, kandy",
+ "source": "//%use dataframe, kandy",
"outputs": [],
- "execution_count": 6
+ "execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
- "end_time": "2024-03-25T15:58:48.396541400Z",
- "start_time": "2024-03-25T15:58:48.256723300Z"
+ "end_time": "2024-10-10T15:57:31.866348600Z",
+ "start_time": "2024-10-10T15:57:31.756642Z"
}
},
"cell_type": "code",
@@ -40,7 +40,7 @@
"import org.jetbrains.kotlinx.dataframe.DataFrame\n",
"import org.jetbrains.kotlinx.dataframe.api.describe\n",
"import org.jetbrains.kotlinx.dataframe.api.print\n",
- "import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration\n",
+ "import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig\n",
"import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlTable\n",
"import org.jetbrains.kotlinx.dataframe.io.readSqlTable\n",
"import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables\n",
@@ -49,7 +49,7 @@
"import java.util.*\n"
],
"outputs": [],
- "execution_count": 7
+ "execution_count": 3
},
{
"cell_type": "markdown",
@@ -67,21 +67,21 @@
"val USER_NAME = \"root\"\n",
"val PASSWORD = \"pass\"\n",
"\n",
- "val dbConfig = DatabaseConfiguration(URL, USER_NAME, PASSWORD)\n",
+ "val dbConfig = DbConnectionConfig(URL, USER_NAME, PASSWORD)\n",
"\n",
"val dataschemas = DataFrame.getSchemaForAllSqlTables(dbConfig)\n",
"\n",
"dataschemas.forEach { \n",
- " println(\"---Yet another table schema---\")\n",
- " println(it)\n",
+ " println(\"--- Schema for Table ${it.key} ---\")\n",
+ " println(it.value)\n",
" println()\n",
"}"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2024-03-25T15:58:50.320926400Z",
- "start_time": "2024-03-25T15:58:48.406706100Z"
+ "end_time": "2024-10-10T15:57:33.024637300Z",
+ "start_time": "2024-10-10T15:57:31.879315800Z"
}
},
"outputs": [
@@ -89,37 +89,37 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "---Yet another table schema---\r\n",
+ "--- Schema for Table actors ---\r\n",
"id: Int\n",
"first_name: String?\n",
"last_name: String?\n",
- "gender: Char?\r\n",
+ "gender: String?\r\n",
"\r\n",
- "---Yet another table schema---\r\n",
+ "--- Schema for Table directors ---\r\n",
"id: Int\n",
"first_name: String?\n",
"last_name: String?\r\n",
"\r\n",
- "---Yet another table schema---\r\n",
+ "--- Schema for Table directors_genres ---\r\n",
"director_id: Int\n",
"genre: String\n",
"prob: Float?\r\n",
"\r\n",
- "---Yet another table schema---\r\n",
+ "--- Schema for Table movies ---\r\n",
"id: Int\n",
"name: String?\n",
"year: Int?\n",
"rank: Float?\r\n",
"\r\n",
- "---Yet another table schema---\r\n",
+ "--- Schema for Table movies_directors ---\r\n",
"director_id: Int\n",
"movie_id: Int\r\n",
"\r\n",
- "---Yet another table schema---\r\n",
+ "--- Schema for Table movies_genres ---\r\n",
"movie_id: Int\n",
"genre: String\r\n",
"\r\n",
- "---Yet another table schema---\r\n",
+ "--- Schema for Table roles ---\r\n",
"actor_id: Int\n",
"movie_id: Int\n",
"role: String\r\n",
@@ -127,7 +127,7 @@
]
}
],
- "execution_count": 8
+ "execution_count": 4
},
{
"cell_type": "markdown",
@@ -141,7 +141,7 @@
{
"cell_type": "code",
"source": [
- "val dfs = DataFrame.readAllSqlTables(dbConfig, limit = 100)\n",
+ "val dfs = DataFrame.readAllSqlTables(dbConfig, limit = 100).values\n",
"\n",
"dfs.forEach {\n",
" it.describe().print()\n",
@@ -151,8 +151,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2024-03-25T15:58:52.840668900Z",
- "start_time": "2024-03-25T15:58:50.328181600Z"
+ "end_time": "2024-10-10T15:57:34.151814700Z",
+ "start_time": "2024-10-10T15:57:33.039598200Z"
}
},
"outputs": [
@@ -160,11 +160,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
- " name type count unique nulls top freq mean std min median max\n",
- " 0 id Int 100 100 0 2 1 53,37 30,245679 2 54 106\n",
- " 1 first_name String? 100 93 0 Antonio 3 null null Ahmed Krishna Yussuf Abed\n",
- " 2 last_name String? 100 81 0 A. 5 null null 'Chincheta' A. a'Hiller\n",
- " 3 gender Char? 100 1 0 M 100 null null M M M\n",
+ " name type count unique nulls top freq mean std min median max\n",
+ " 0 id Int 100 100 0 2 1 53,37 30,245679 2 54 106\n",
+ " 1 first_name String 100 93 0 Antonio 3 null null Ahmed Krishna Yussuf Abed\n",
+ " 2 last_name String 100 81 0 A. 5 null null 'Chincheta' A. a'Hiller\n",
+ " 3 gender String 100 1 0 M 100 null null M M M\n",
"\r\n",
" id first_name last_name gender\n",
" 0 2 Michael 'babeepower' Viera M\n",
@@ -174,10 +174,10 @@
" 4 6 José 'El Francés' M\n",
"...\n",
"\r\n",
- " name type count unique nulls top freq mean std min median max\n",
- " 0 id Int 100 100 0 1 1 53,22 30,099022 1 53 104\n",
- " 1 first_name String? 100 97 0 Mohamed 3 null null A.C. Lauri Zaki\n",
- " 2 last_name String? 100 65 0 Abbott 15 null null 1 Abashidze a'Hiller\n",
+ " name type count unique nulls top freq mean std min median max\n",
+ " 0 id Int 100 100 0 1 1 53,22 30,099022 1 53 104\n",
+ " 1 first_name String 100 97 0 Mohamed 3 null null A.C. Lauri Zaki\n",
+ " 2 last_name String 100 65 0 Abbott 15 null null 1 Abashidze a'Hiller\n",
"\r\n",
" id first_name last_name\n",
" 0 1 Todd 1\n",
@@ -190,7 +190,7 @@
" name type count unique nulls top freq mean std min median max\n",
" 0 director_id Int 100 41 0 8 8 35,340000 17,446148 2 37 61\n",
" 1 genre String 100 15 0 Drama 21 null null Action Drama War\n",
- " 2 prob Float? 100 27 0 1 40 0,623749 0,347447 0 1 1\n",
+ " 2 prob Float 100 27 0 1 40 0,623749 0,347447 0 1 1\n",
"\r\n",
" director_id genre prob\n",
" 0 2 Short 1,0\n",
@@ -200,11 +200,11 @@
" 4 6 Short 1,0\n",
"...\n",
"\r\n",
- " name type count unique nulls top freq mean std min median max\n",
- " 0 id Int 100 100 0 0 1 51,320000 30,230054 0 51 103\n",
- " 1 name String? 100 98 0 $1,000 Reward 3 null null #28 'A' 'burbs, The\n",
- " 2 year Int? 100 48 0 1901 9 1951,500000 41,555488 1897 1955 2004\n",
- " 3 rank Float? 100 22 74 7 2 6,150000 1,508178 3 6 10\n",
+ " name type count unique nulls top freq mean std min median max\n",
+ " 0 id Int 100 100 0 0 1 51,320000 30,230054 0 51 103\n",
+ " 1 name String 100 98 0 $1,000 Reward 3 null null #28 'A' 'burbs, The\n",
+ " 2 year Int 100 48 0 1901 9 1951,500000 41,555488 1897 1955 2004\n",
+ " 3 rank Float? 100 22 74 7 2 6,150000 1,508178 3 6 10\n",
"\r\n",
" id name year rank\n",
" 0 0 #28 2002 null\n",
@@ -254,7 +254,7 @@
]
}
],
- "execution_count": 9
+ "execution_count": 5
},
{
"cell_type": "markdown",
@@ -274,8 +274,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2024-03-25T15:58:56.151655600Z",
- "start_time": "2024-03-25T15:58:52.869437600Z"
+ "end_time": "2024-10-10T15:57:35.054917200Z",
+ "start_time": "2024-10-10T15:57:34.211654300Z"
}
},
"outputs": [
@@ -460,7 +460,7 @@
" </style>\n",
" </head>\n",
" <body>\n",
- " <table class="dataframe" id="df_956301312"></table>\n",
+ " <table class="dataframe" id="df_-2097152000"></table>\n",
"\n",
"<p class="dataframe_description">... showing only top 20 of 10000 rows</p><p class="dataframe_description">DataFrame: rowsCount = 10000, columnsCount = 4</p>\n",
"\n",
@@ -742,13 +742,13 @@
"\n",
"/*<!--*/\n",
"call_DataFrame(function() { DataFrame.addTable({ cols: [{ name: "<span title=\"id: Int\">id</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">2</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">3</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">4</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">5</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">6</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">7</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">8</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">9</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">10</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">11</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">12</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">13</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">14</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">15</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">16</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">17</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">18</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">19</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">20</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">21</span></span>"] }, \n",
- "{ name: "<span title=\"first_name: String?\">first_name</span>", children: [], rightAlign: false, values: ["Michael","Eloy","Dieguito","Antonio","José","Félix","Marcial","José","Francisco","Víctor","Antonio","Luis","Janny","Antonio","Baltazar","Luis Roberto","Murray the","Néstor","Tony","Pollino"] }, \n",
- "{ name: "<span title=\"last_name: String?\">last_name</span>", children: [], rightAlign: false, values: ["'babeepower' Viera","'Chincheta'","'El Cigala'","'El de Chipiona'","'El Francés'","'El Gato'","'El Jalisco'","'El Morito'","'El Niño de la Manola'","'El Payaso'","'El Pescaíto'","'El Plojo'","'el Portugues'","'El Rilete'","'El Toro'","'Formiga'","'K'","'Kick Boxer'","'La Chispa'","'Romero'"] }, \n",
- "{ name: "<span title=\"gender: Char?\">gender</span>", children: [], rightAlign: false, values: ["M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M"] }, \n",
- "], id: 956301312, rootId: 956301312, totalRows: 10000 } ) });\n",
+ "{ name: "<span title=\"first_name: String\">first_name</span>", children: [], rightAlign: false, values: ["Michael","Eloy","Dieguito","Antonio","José","Félix","Marcial","José","Francisco","Víctor","Antonio","Luis","Janny","Antonio","Baltazar","Luis Roberto","Murray the","Néstor","Tony","Pollino"] }, \n",
+ "{ name: "<span title=\"last_name: String\">last_name</span>", children: [], rightAlign: false, values: ["'babeepower' Viera","'Chincheta'","'El Cigala'","'El de Chipiona'","'El Francés'","'El Gato'","'El Jalisco'","'El Morito'","'El Niño de la Manola'","'El Payaso'","'El Pescaíto'","'El Plojo'","'el Portugues'","'El Rilete'","'El Toro'","'Formiga'","'K'","'Kick Boxer'","'La Chispa'","'Romero'"] }, \n",
+ "{ name: "<span title=\"gender: String\">gender</span>", children: [], rightAlign: false, values: ["M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M","M"] }, \n",
+ "], id: -2097152000, rootId: -2097152000, totalRows: 10000 } ) });\n",
"/*-->*/\n",
"\n",
- "call_DataFrame(function() { DataFrame.renderTable(956301312) });\n",
+ "call_DataFrame(function() { DataFrame.renderTable(-2097152000) });\n",
"\n",
"\n",
" </script>\n",
@@ -920,21 +920,21 @@
" \n",
" \n",
" \n",
- " id | first_name | last_name | gender |
---|
2 | Michael | 'babeepower' Viera | M |
3 | Eloy | 'Chincheta' | M |
4 | Dieguito | 'El Cigala' | M |
5 | Antonio | 'El de Chipiona' | M |
6 | José | 'El Francés' | M |
7 | Félix | 'El Gato' | M |
8 | Marcial | 'El Jalisco' | M |
9 | José | 'El Morito' | M |
10 | Francisco | 'El Niño de la Manola' | M |
11 | Víctor | 'El Payaso' | M |
12 | Antonio | 'El Pescaíto' | M |
13 | Luis | 'El Plojo' | M |
14 | Janny | 'el Portugues' | M |
15 | Antonio | 'El Rilete' | M |
16 | Baltazar | 'El Toro' | M |
17 | Luis Roberto | 'Formiga' | M |
18 | Murray the | 'K' | M |
19 | Néstor | 'Kick Boxer' | M |
20 | Tony | 'La Chispa' | M |
21 | Pollino | 'Romero' | M |
\n",
+ " id | first_name | last_name | gender |
---|
2 | Michael | 'babeepower' Viera | M |
3 | Eloy | 'Chincheta' | M |
4 | Dieguito | 'El Cigala' | M |
5 | Antonio | 'El de Chipiona' | M |
6 | José | 'El Francés' | M |
7 | Félix | 'El Gato' | M |
8 | Marcial | 'El Jalisco' | M |
9 | José | 'El Morito' | M |
10 | Francisco | 'El Niño de la Manola' | M |
11 | Víctor | 'El Payaso' | M |
12 | Antonio | 'El Pescaíto' | M |
13 | Luis | 'El Plojo' | M |
14 | Janny | 'el Portugues' | M |
15 | Antonio | 'El Rilete' | M |
16 | Baltazar | 'El Toro' | M |
17 | Luis Roberto | 'Formiga' | M |
18 | Murray the | 'K' | M |
19 | Néstor | 'Kick Boxer' | M |
20 | Tony | 'La Chispa' | M |
21 | Pollino | 'Romero' | M |
\n",
" \n",
" \n",
"