diff --git a/.releaserc.js b/.releaserc.js index b9c5dd05502b..3ec1f0494203 100644 --- a/.releaserc.js +++ b/.releaserc.js @@ -35,8 +35,8 @@ module.exports = { [ "@semantic-release/changelog", { - changelogTitle: "Release notes\n---", - changelogFile: "docs/release_notes.md" + changelogTitle: "---\n---", + changelogFile: "docs/release_notes_generated.qmd" } ], [ @@ -80,7 +80,11 @@ module.exports = { [ "@semantic-release/git", { - assets: ["pyproject.toml", "docs/release_notes.md", "ibis/__init__.py"], + assets: [ + "pyproject.toml", + "docs/release_notes_generated.qmd", + "ibis/__init__.py" + ], message: "chore(release): ${nextRelease.version}" } ] diff --git a/docs/_freeze/posts/ibis-version-6.0.0-release/index/execute-results/html.json b/docs/_freeze/posts/ibis-version-6.0.0-release/index/execute-results/html.json index 01cedd17c694..7868be8583d4 100644 --- a/docs/_freeze/posts/ibis-version-6.0.0-release/index/execute-results/html.json +++ b/docs/_freeze/posts/ibis-version-6.0.0-release/index/execute-results/html.json @@ -1,14 +1,15 @@ { - "hash": "11a33fda6aef0f256ced3d8488292de6", + "hash": "08808129893bcfdb11cb5903d9ae1ef1", "result": { - "markdown": "---\ntitle: Ibis v6.0.0\nauthor: \"Ibis team\"\ndate: \"2023-07-03\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 6.0.0 adds the Oracle backend, revamped UDF support, and many new features. This release also includes a number of refactors, bug fixes, and performance improvements. You can view the full changelog in [the release notes](../../../release_notes.md).\n\nIf you're new to Ibis, see [how to install](../../../install.qmd) and [the getting started tutorial](../../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=6,<7'`. First, we'll setup Ibis and fetch some sample data to use.\n\n::: {#df4d0de1 .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.options.interactive = True\nibis.options.repr.interactive.max_rows = 3\n```\n:::\n\n\nNow, fetch the penguins dataset.\n\n::: {#4967a305 .cell execution_count=2}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n## Breaking changes\n\n### Join duplicate column names\n\nPreviously when joining tables with duplicate column names, `_x` and `_y` suffixes would be appended by default to the left and right tables respectively. You could override this with the `suffix` argument, which is now removed in favor of `lname` and `rname` arguments. The default is changed to no suffix for the left table and `_right` for the right table.\n\n::: {#63b558d3 .cell execution_count=3}\n``` {.python .cell-code}\nt.join(t, \"island\").select(s.startswith(\"species\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ species  species_right ┃\n┡━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ stringstring        │\n├─────────┼───────────────┤\n│ Adelie Adelie        │\n│ Adelie Adelie        │\n│ Adelie Adelie        │\n│              │\n└─────────┴───────────────┘\n
\n```\n:::\n:::\n\n\nTo replicate the previous behavior:\n\n::: {#8870c8c1 .cell execution_count=4}\n``` {.python .cell-code}\nt.join(t, \"island\", lname=\"{name}_x\", rname=\"{name}_y\").select(\n s.startswith(\"species\")\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━━━━┓\n┃ species_x  species_y ┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━┩\n│ stringstring    │\n├───────────┼───────────┤\n│ Adelie   Adelie    │\n│ Adelie   Adelie    │\n│ Adelie   Adelie    │\n│          │\n└───────────┴───────────┘\n
\n```\n:::\n:::\n\n\n### `.count()` column names no longer named `count` automatically\n\nColumns created with the `.count()` aggregation are no longer automatically named `count`. This is to follow convention with other aggregations and reduce the likelihood of name collisions.\n\n::: {#07597e82 .cell execution_count=5}\n``` {.python .cell-code}\nt.group_by(\"species\").agg(ibis._.species.count())\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n┃ species    Count(species) ┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n│ stringint64          │\n├───────────┼────────────────┤\n│ Adelie   152 │\n│ Gentoo   124 │\n│ Chinstrap68 │\n└───────────┴────────────────┘\n
\n```\n:::\n:::\n\n\nTo reproduce the old behavior, you can rename the column to `count` with:\n\n\n```python\nt.group_by(\"species\").agg(count=ibis._.species.count())\n```\n\n## Backends\n\n### Oracle\n\nThe Oracle backend was added! See the [Voltron Data blog](https://voltrondata.com/resources/ibis-6-0-oracle-backend-support) for more details.\n\n```python\nibis.connect(f\"oracle://user:password@host\")\n```\n\n### DuckDB\n\nThere were various DuckDB improvements, but one notable new feature is the ability to attach to a SQLite database through DuckDB. This allows you to run OLAP queries via DuckDB significantly faster on source data from SQLite.\n\nFirst we'll create a DuckDB connection and show it has no tables:\n\n::: {#8ba6caec .cell execution_count=6}\n``` {.python .cell-code}\nduckdb_con = ibis.connect(\"duckdb://\")\nduckdb_con.list_tables()\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\n[]\n```\n:::\n:::\n\n\nThen create a SQLite database with a table:\n\n::: {#7a85bcc6 .cell execution_count=7}\n``` {.python .cell-code}\nsqlite_con = ibis.connect(\"sqlite://penguins.db\")\nsqlite_con.create_table(\"penguins\", t.to_pandas(), overwrite=True)\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64float64float64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.7181.03750.0male  2007 │\n│ Adelie Torgersen39.517.4186.03800.0female2007 │\n│ Adelie Torgersen40.318.0195.03250.0female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\nAnd attach it:\n\n::: {#e75c55eb .cell execution_count=8}\n``` {.python .cell-code}\nduckdb_con.attach_sqlite(\"./penguins.db\")\nduckdb_con.list_tables()\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```\n['penguins']\n```\n:::\n:::\n\n\n#### MotherDuck support!\n\nMotherDuck launched recently and is now supported in Ibis!\n\nSimply connect with the DuckDB backend using `md:` or `motherduck:` as the database.\n\n\n```python\nibis.connect(\"duckdb://md:\")\n```\n\n### Polars\n\nThe Polars backend received many improvements from community members [@alexander-beedie](https://github.com/alexander-beedie) and [@mesejo](https://github.com/mesejo), with plenty of operations now supported.\n\nSome additions in this version include:\n\n- `any` and `all` reductions\n- `argmin` and `argmax`\n- `identical_to`\n- `corr`\n- support for `.sql()`\n\nGive it a try by setting your backend to Polars with `ibis.set_backend(\"polars\")`.\n\n## Functionality\n\n### UDFs\n\nUser-defined functions (UDFs) have been revamped with a new syntax and new backends added. To get started, import the decorator:\n\n::: {#c85c3821 .cell execution_count=9}\n``` {.python .cell-code}\nfrom ibis import udf\n```\n:::\n\n\nDefine a UDF:\n\n::: {#1cf149ea .cell execution_count=10}\n``` {.python .cell-code}\n@udf.scalar.python\ndef num_vowels(s: str, include_y: bool = False) -> int:\n return sum(map(s.lower().count, \"aeiou\" + (\"y\" * include_y)))\n```\n:::\n\n\nAnd call it:\n\n::: {#158fdd6a .cell execution_count=11}\n``` {.python .cell-code}\nnum_vowels(t[:1].species.execute()[0])\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=11}\n\n::: {.ansi-escaped-output}\n```{=html}\n
4
\n```\n:::\n\n:::\n:::\n\n\n::: {#aab3553e .cell execution_count=12}\n``` {.python .cell-code}\nt.group_by(num_vowels=num_vowels(t.species)).agg(\n num_vowels_island_count=t.island.count()\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```{=html}\n
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ num_vowels  num_vowels_island_count ┃\n┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ int64int64                   │\n├────────────┼─────────────────────────┤\n│          4152 │\n│          3124 │\n│          268 │\n└────────────┴─────────────────────────┘\n
\n```\n:::\n:::\n\n\n::: {#9307cb71 .cell execution_count=13}\n``` {.python .cell-code}\nt.filter(num_vowels(t.species) < 4)\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Gentoo Biscoe46.113.22114500female2007 │\n│ Gentoo Biscoe50.016.32305700male  2007 │\n│ Gentoo Biscoe48.714.12104450female2007 │\n│  │\n└─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n### `to_torch` API\n\nA new `to_torch` output method was added. Combined with UDFs, this brings powerful ML capabilities into Ibis. See a complete example in the [Ibis + DuckDB + PyTorch blog](../torch).\n\n::: {#33e48456 .cell execution_count=14}\n``` {.python .cell-code}\nimport torch\n\ntorch.set_printoptions(threshold=10)\n```\n:::\n\n\n::: {#867cc0d3 .cell execution_count=15}\n``` {.python .cell-code}\nt.select(s.numeric()).to_torch()\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```\n{'bill_length_mm': tensor([39.1000, 39.5000, 40.3000, ..., 49.6000, 50.8000, 50.2000],\n dtype=torch.float64),\n 'bill_depth_mm': tensor([18.7000, 17.4000, 18.0000, ..., 18.2000, 19.0000, 18.7000],\n dtype=torch.float64),\n 'flipper_length_mm': tensor([181, 186, 195, ..., 193, 210, 198]),\n 'body_mass_g': tensor([3750, 3800, 3250, ..., 3775, 4100, 3775]),\n 'year': tensor([2007, 2007, 2007, ..., 2009, 2009, 2009])}\n```\n:::\n:::\n\n\n### Array zip support\n\nA new zip operation was added on array data types, allowing you to zip together multiple arrays.\n\n::: {#f0880cfc .cell execution_count=16}\n``` {.python .cell-code}\narrays = ibis.memtable(\n {\"numbers\": [[3, 2], [], None], \"strings\": [[\"a\", \"c\"], None, [\"e\"]]}\n)\narrays\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ numbers       strings       ┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ array<int64>array<string> │\n├──────────────┼───────────────┤\n│ [3, 2]['a', 'c']    │\n│ []NULL          │\n│ NULL['e']         │\n└──────────────┴───────────────┘\n
\n```\n:::\n:::\n\n\n::: {#cac84c5c .cell execution_count=17}\n``` {.python .cell-code}\narrays.numbers.zip(arrays.strings)\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayZip()                           ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ array<struct<f1: int64, f2: string>> │\n├──────────────────────────────────────┤\n│ [{...}, {...}]                       │\n│ []                                   │\n│ [{...}]                              │\n└──────────────────────────────────────┘\n
\n```\n:::\n:::\n\n\n::: {#4e961ab3 .cell execution_count=18}\n``` {.python .cell-code}\narrays.numbers.zip(arrays.strings).unnest()\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayZip()                    ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ struct<f1: int64, f2: string> │\n├───────────────────────────────┤\n│ {'f1': 3, 'f2': 'a'}          │\n│ {'f1': 2, 'f2': 'c'}          │\n│ {'f1': None, 'f2': 'e'}       │\n└───────────────────────────────┘\n
\n```\n:::\n:::\n\n\n### Try cast support\n\nA new `try_cast()` operation was added that allows you to cast a column to a type, but return null if the cast fails.\n\n::: {#d8e748e5 .cell execution_count=19}\n``` {.python .cell-code}\nibis.literal(\"a\").try_cast(\"int\")\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=19}\n\n::: {.ansi-escaped-output}\n```{=html}\n
None
\n```\n:::\n\n:::\n:::\n\n\n```python\nibis.literal(0).try_cast(\"float\")\n```\n\n### `__dataframe__` support\n\nIbis now supports the [dataframe interchange protocol](https://data-apis.org/dataframe-protocol/latest/index.html), allowing Ibis expressions to be used in any framework that supports it. Adoption of the protocol is still in its early stages, but we expect this to enable Ibis to be used in many new places going forward.\n\n::: {#418f5bdb .cell execution_count=20}\n``` {.python .cell-code}\nt.__dataframe__()\n```\n\n::: {.cell-output .cell-output-display execution_count=20}\n```\n\n```\n:::\n:::\n\n\n### Streamlit experimental connection interface\n\nA new experimental connection interface was added for Streamlit. See [how-to write a Streamlit app with Ibis](../../../how-to/visualization/streamlit.qmd).\n\n### SQL dialect parameter\n\nIn SQL methods, you can now pass the `dialect` parameter to specify the SQL dialect used. This leverages [`sqlglot`](https://github.com/tobymao/sqlglot) under the hood.\n\n::: {#da81bb20 .cell execution_count=21}\n``` {.python .cell-code}\nbigquery_sql = \"\"\"\nSELECT\n t0.`species`,\n COUNT(t0.`species`) AS `count`,\n CAST(COUNT(DISTINCT t0.`island`) AS FLOAT64) AS `islands`\nFROM penguins AS t0\nGROUP BY\n 1\n\"\"\"\n\nduckdb_con.sql(bigquery_sql, dialect=\"bigquery\")\n```\n\n::: {.cell-output .cell-output-display execution_count=21}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┓\n┃ species    count  islands ┃\n┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━┩\n│ stringint64float64 │\n├───────────┼───────┼─────────┤\n│ Adelie   1523.0 │\n│ Gentoo   1241.0 │\n│ Chinstrap681.0 │\n└───────────┴───────┴─────────┘\n
\n```\n:::\n:::\n\n\n### Delta Lake read/write support for some backends\n\n[Delta Lake tables](https://delta-io) are supported through the [`deltalake` package](https://github.com/delta-io/delta-rs) with `read_delta()` implemented for DuckDB, Polars, and DataFusion.\n\n::: {#202f5c55 .cell execution_count=22}\n``` {.python .cell-code}\nt.to_delta(\"penguins.delta\", mode=\"overwrite\")\n```\n:::\n\n\n::: {#4e3e2b24 .cell execution_count=23}\n``` {.python .cell-code}\nt = ibis.read_delta(\"penguins.delta\")\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n### Selectors\n\nSome minor selectors improvements were added including the ability to use abstract type names and lists of strings.\n\n::: {#4b1a65d9 .cell execution_count=24}\n``` {.python .cell-code}\nt.select(s.of_type(\"string\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┓\n┃ species  island     sex    ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━┩\n│ stringstringstring │\n├─────────┼───────────┼────────┤\n│ Adelie Torgersenmale   │\n│ Adelie Torgersenfemale │\n│ Adelie Torgersenfemale │\n│       │\n└─────────┴───────────┴────────┘\n
\n```\n:::\n:::\n\n\n::: {#7a7f62d3 .cell execution_count=25}\n``` {.python .cell-code}\nt.agg(s.across([\"species\", \"island\"], ibis._.count()))\n```\n\n::: {.cell-output .cell-output-display execution_count=25}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┓\n┃ species  island ┃\n┡━━━━━━━━━╇━━━━━━━━┩\n│ int64int64  │\n├─────────┼────────┤\n│     344344 │\n└─────────┴────────┘\n
\n```\n:::\n:::\n\n\n## Refactors\n\nSeveral internal refactors that shouldn't affect normal usage were made. See [the release notes](../../release_notes.md) for more details.\n\n## Wrapping up\n\nIbis v6.0.0 brings exciting new features that enable future support for ML and streaming workloads.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting started](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an issue](https://github.com/ibis-project/issues/new/choose) or [discussion](https://github.com/ibis-project/discussions/new/choose) and let us know!\n\n", + "engine": "jupyter", + "markdown": "---\ntitle: Ibis v6.0.0\nauthor: \"Ibis team\"\ndate: \"2023-07-03\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 6.0.0 adds the Oracle backend, revamped UDF support, and many new features. This release also includes a number of refactors, bug fixes, and performance improvements. You can view the full changelog in [the release notes](../../../release_notes.qmd).\n\nIf you're new to Ibis, see [how to install](../../../install.qmd) and [the getting started tutorial](../../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=6,<7'`. First, we'll setup Ibis and fetch some sample data to use.\n\n::: {#4e54cc04 .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.options.interactive = True\nibis.options.repr.interactive.max_rows = 3\n```\n:::\n\n\nNow, fetch the penguins dataset.\n\n::: {#ac13f836 .cell execution_count=2}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n## Breaking changes\n\n### Join duplicate column names\n\nPreviously when joining tables with duplicate column names, `_x` and `_y` suffixes would be appended by default to the left and right tables respectively. You could override this with the `suffix` argument, which is now removed in favor of `lname` and `rname` arguments. The default is changed to no suffix for the left table and `_right` for the right table.\n\n::: {#7e19ee04 .cell execution_count=3}\n``` {.python .cell-code}\nt.join(t, \"island\").select(s.startswith(\"species\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ species  species_right ┃\n┡━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ stringstring        │\n├─────────┼───────────────┤\n│ Adelie Adelie        │\n│ Adelie Adelie        │\n│ Adelie Adelie        │\n│              │\n└─────────┴───────────────┘\n
\n```\n:::\n:::\n\n\nTo replicate the previous behavior:\n\n::: {#98f400d2 .cell execution_count=4}\n``` {.python .cell-code}\nt.join(t, \"island\", lname=\"{name}_x\", rname=\"{name}_y\").select(\n s.startswith(\"species\")\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━━━━┓\n┃ species_x  species_y ┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━┩\n│ stringstring    │\n├───────────┼───────────┤\n│ Adelie   Adelie    │\n│ Adelie   Adelie    │\n│ Adelie   Adelie    │\n│          │\n└───────────┴───────────┘\n
\n```\n:::\n:::\n\n\n### `.count()` column names no longer named `count` automatically\n\nColumns created with the `.count()` aggregation are no longer automatically named `count`. This is to follow convention with other aggregations and reduce the likelihood of name collisions.\n\n::: {#f67e1084 .cell execution_count=5}\n``` {.python .cell-code}\nt.group_by(\"species\").agg(ibis._.species.count())\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n┃ species    Count(species) ┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n│ stringint64          │\n├───────────┼────────────────┤\n│ Adelie   152 │\n│ Chinstrap68 │\n│ Gentoo   124 │\n└───────────┴────────────────┘\n
\n```\n:::\n:::\n\n\nTo reproduce the old behavior, you can rename the column to `count` with:\n\n\n```python\nt.group_by(\"species\").agg(count=ibis._.species.count())\n```\n\n## Backends\n\n### Oracle\n\nThe Oracle backend was added! See the [Voltron Data blog](https://voltrondata.com/resources/ibis-6-0-oracle-backend-support) for more details.\n\n```python\nibis.connect(f\"oracle://user:password@host\")\n```\n\n### DuckDB\n\nThere were various DuckDB improvements, but one notable new feature is the ability to attach to a SQLite database through DuckDB. This allows you to run OLAP queries via DuckDB significantly faster on source data from SQLite.\n\nFirst we'll create a DuckDB connection and show it has no tables:\n\n::: {#b5da11e4 .cell execution_count=6}\n``` {.python .cell-code}\nduckdb_con = ibis.connect(\"duckdb://\")\nduckdb_con.list_tables()\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\n[]\n```\n:::\n:::\n\n\nThen create a SQLite database with a table:\n\n::: {#e6967133 .cell execution_count=7}\n``` {.python .cell-code}\nsqlite_con = ibis.connect(\"sqlite://penguins.db\")\nsqlite_con.create_table(\"penguins\", t.to_pandas(), overwrite=True)\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64float64float64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.7181.03750.0male  2007 │\n│ Adelie Torgersen39.517.4186.03800.0female2007 │\n│ Adelie Torgersen40.318.0195.03250.0female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\nAnd attach it:\n\n::: {#52786da4 .cell execution_count=8}\n``` {.python .cell-code}\nduckdb_con.attach_sqlite(\"./penguins.db\")\nduckdb_con.list_tables()\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```\n['penguins']\n```\n:::\n:::\n\n\n#### MotherDuck support!\n\nMotherDuck launched recently and is now supported in Ibis!\n\nSimply connect with the DuckDB backend using `md:` or `motherduck:` as the database.\n\n\n```python\nibis.connect(\"duckdb://md:\")\n```\n\n### Polars\n\nThe Polars backend received many improvements from community members [@alexander-beedie](https://github.com/alexander-beedie) and [@mesejo](https://github.com/mesejo), with plenty of operations now supported.\n\nSome additions in this version include:\n\n- `any` and `all` reductions\n- `argmin` and `argmax`\n- `identical_to`\n- `corr`\n- support for `.sql()`\n\nGive it a try by setting your backend to Polars with `ibis.set_backend(\"polars\")`.\n\n## Functionality\n\n### UDFs\n\nUser-defined functions (UDFs) have been revamped with a new syntax and new backends added. To get started, import the decorator:\n\n::: {#cc7dcdf3 .cell execution_count=9}\n``` {.python .cell-code}\nfrom ibis import udf\n```\n:::\n\n\nDefine a UDF:\n\n::: {#ff6c82af .cell execution_count=10}\n``` {.python .cell-code}\n@udf.scalar.python\ndef num_vowels(s: str, include_y: bool = False) -> int:\n return sum(map(s.lower().count, \"aeiou\" + (\"y\" * include_y)))\n```\n:::\n\n\nAnd call it:\n\n::: {#399c788e .cell execution_count=11}\n``` {.python .cell-code}\nnum_vowels(t[:1].species.execute()[0])\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=11}\n\n::: {.ansi-escaped-output}\n```{=html}\n
4
\n```\n:::\n\n:::\n:::\n\n\n::: {#419610eb .cell execution_count=12}\n``` {.python .cell-code}\nt.group_by(num_vowels=num_vowels(t.species)).agg(\n num_vowels_island_count=t.island.count()\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```{=html}\n
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ num_vowels  num_vowels_island_count ┃\n┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ int64int64                   │\n├────────────┼─────────────────────────┤\n│          268 │\n│          4152 │\n│          3124 │\n└────────────┴─────────────────────────┘\n
\n```\n:::\n:::\n\n\n::: {#a5d5fcae .cell execution_count=13}\n``` {.python .cell-code}\nt.filter(num_vowels(t.species) < 4)\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Gentoo Biscoe46.113.22114500female2007 │\n│ Gentoo Biscoe50.016.32305700male  2007 │\n│ Gentoo Biscoe48.714.12104450female2007 │\n│  │\n└─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n### `to_torch` API\n\nA new `to_torch` output method was added. Combined with UDFs, this brings powerful ML capabilities into Ibis. See a complete example in the [Ibis + DuckDB + PyTorch blog](../torch).\n\n::: {#39eca15c .cell execution_count=14}\n``` {.python .cell-code}\nimport torch\n\ntorch.set_printoptions(threshold=10)\n```\n:::\n\n\n::: {#21955f50 .cell execution_count=15}\n``` {.python .cell-code}\nt.select(s.numeric()).to_torch()\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```\n{'bill_length_mm': tensor([39.1000, 39.5000, 40.3000, ..., 49.6000, 50.8000, 50.2000],\n dtype=torch.float64),\n 'bill_depth_mm': tensor([18.7000, 17.4000, 18.0000, ..., 18.2000, 19.0000, 18.7000],\n dtype=torch.float64),\n 'flipper_length_mm': tensor([181, 186, 195, ..., 193, 210, 198]),\n 'body_mass_g': tensor([3750, 3800, 3250, ..., 3775, 4100, 3775]),\n 'year': tensor([2007, 2007, 2007, ..., 2009, 2009, 2009])}\n```\n:::\n:::\n\n\n### Array zip support\n\nA new zip operation was added on array data types, allowing you to zip together multiple arrays.\n\n::: {#9598a79a .cell execution_count=16}\n``` {.python .cell-code}\narrays = ibis.memtable(\n {\"numbers\": [[3, 2], [], None], \"strings\": [[\"a\", \"c\"], None, [\"e\"]]}\n)\narrays\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ numbers       strings       ┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ array<int64>array<string> │\n├──────────────┼───────────────┤\n│ [3, 2]['a', 'c']    │\n│ []NULL          │\n│ NULL['e']         │\n└──────────────┴───────────────┘\n
\n```\n:::\n:::\n\n\n::: {#229cffc2 .cell execution_count=17}\n``` {.python .cell-code}\narrays.numbers.zip(arrays.strings)\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayZip()                           ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ array<struct<f1: int64, f2: string>> │\n├──────────────────────────────────────┤\n│ [{...}, {...}]                       │\n│ NULL                                 │\n│ NULL                                 │\n└──────────────────────────────────────┘\n
\n```\n:::\n:::\n\n\n::: {#16919245 .cell execution_count=18}\n``` {.python .cell-code}\narrays.numbers.zip(arrays.strings).unnest()\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayZip()                    ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ struct<f1: int64, f2: string> │\n├───────────────────────────────┤\n│ {'f1': 3, 'f2': 'a'}          │\n│ {'f1': 2, 'f2': 'c'}          │\n└───────────────────────────────┘\n
\n```\n:::\n:::\n\n\n### Try cast support\n\nA new `try_cast()` operation was added that allows you to cast a column to a type, but return null if the cast fails.\n\n::: {#6c7989a4 .cell execution_count=19}\n``` {.python .cell-code}\nibis.literal(\"a\").try_cast(\"int\")\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=19}\n\n::: {.ansi-escaped-output}\n```{=html}\n
None
\n```\n:::\n\n:::\n:::\n\n\n```python\nibis.literal(0).try_cast(\"float\")\n```\n\n### `__dataframe__` support\n\nIbis now supports the [dataframe interchange protocol](https://data-apis.org/dataframe-protocol/latest/index.html), allowing Ibis expressions to be used in any framework that supports it. Adoption of the protocol is still in its early stages, but we expect this to enable Ibis to be used in many new places going forward.\n\n::: {#0b8c5822 .cell execution_count=20}\n``` {.python .cell-code}\nt.__dataframe__()\n```\n\n::: {.cell-output .cell-output-display execution_count=20}\n```\n\n```\n:::\n:::\n\n\n### Streamlit experimental connection interface\n\nA new experimental connection interface was added for Streamlit. See [how-to write a Streamlit app with Ibis](../../../how-to/visualization/streamlit.qmd).\n\n### SQL dialect parameter\n\nIn SQL methods, you can now pass the `dialect` parameter to specify the SQL dialect used. This leverages [`sqlglot`](https://github.com/tobymao/sqlglot) under the hood.\n\n::: {#35649010 .cell execution_count=21}\n``` {.python .cell-code}\nbigquery_sql = \"\"\"\nSELECT\n t0.`species`,\n COUNT(t0.`species`) AS `count`,\n CAST(COUNT(DISTINCT t0.`island`) AS FLOAT64) AS `islands`\nFROM penguins AS t0\nGROUP BY\n 1\n\"\"\"\n\nduckdb_con.sql(bigquery_sql, dialect=\"bigquery\")\n```\n\n::: {.cell-output .cell-output-display execution_count=21}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┓\n┃ species    count  islands ┃\n┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━┩\n│ stringint64float64 │\n├───────────┼───────┼─────────┤\n│ Chinstrap681.0 │\n│ Adelie   1523.0 │\n│ Gentoo   1241.0 │\n└───────────┴───────┴─────────┘\n
\n```\n:::\n:::\n\n\n### Delta Lake read/write support for some backends\n\n[Delta Lake tables](https://delta-io) are supported through the [`deltalake` package](https://github.com/delta-io/delta-rs) with `read_delta()` implemented for DuckDB, Polars, and DataFusion.\n\n::: {#92c9acb4 .cell execution_count=22}\n``` {.python .cell-code}\nt.to_delta(\"penguins.delta\", mode=\"overwrite\")\n```\n:::\n\n\n::: {#3fae3024 .cell execution_count=23}\n``` {.python .cell-code}\nt = ibis.read_delta(\"penguins.delta\")\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n### Selectors\n\nSome minor selectors improvements were added including the ability to use abstract type names and lists of strings.\n\n::: {#8778b857 .cell execution_count=24}\n``` {.python .cell-code}\nt.select(s.of_type(\"string\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┓\n┃ species  island     sex    ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━┩\n│ stringstringstring │\n├─────────┼───────────┼────────┤\n│ Adelie Torgersenmale   │\n│ Adelie Torgersenfemale │\n│ Adelie Torgersenfemale │\n│       │\n└─────────┴───────────┴────────┘\n
\n```\n:::\n:::\n\n\n::: {#c1074422 .cell execution_count=25}\n``` {.python .cell-code}\nt.agg(s.across([\"species\", \"island\"], ibis._.count()))\n```\n\n::: {.cell-output .cell-output-display execution_count=25}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┓\n┃ species  island ┃\n┡━━━━━━━━━╇━━━━━━━━┩\n│ int64int64  │\n├─────────┼────────┤\n│     344344 │\n└─────────┴────────┘\n
\n```\n:::\n:::\n\n\n## Refactors\n\nSeveral internal refactors that shouldn't affect normal usage were made. See [the release notes](../../release_notes.qmd) for more details.\n\n## Wrapping up\n\nIbis v6.0.0 brings exciting new features that enable future support for ML and streaming workloads.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting started](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an issue](https://github.com/ibis-project/issues/new/choose) or [discussion](https://github.com/ibis-project/discussions/new/choose) and let us know!\n\n", "supporting": [ - "index_files/figure-html" + "index_files" ], "filters": [], "includes": { "include-in-header": [ - "\n\n\n" + "\n\n\n" ] } } diff --git a/docs/_freeze/posts/ibis-version-8.0.0-release/index/execute-results/html.json b/docs/_freeze/posts/ibis-version-8.0.0-release/index/execute-results/html.json index b42ab052b335..bb236043dec8 100644 --- a/docs/_freeze/posts/ibis-version-8.0.0-release/index/execute-results/html.json +++ b/docs/_freeze/posts/ibis-version-8.0.0-release/index/execute-results/html.json @@ -1,10 +1,10 @@ { - "hash": "d4946fbf96b8f1af3a7612d86f7e8ba0", + "hash": "75cf791184119f27c7ac27691e49c36e", "result": { "engine": "jupyter", - "markdown": "---\ntitle: \"Ibis 8.0: streaming and more!\"\nauthor: \"Ibis team\"\ndate: \"2024-02-12\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 8.0 marks the first release of stream processing backends in Ibis! This\nenhances [the composable data ecosystem\nvision](../../concepts/composable-ecosystem.qmd) by allowing users to implement\ndata transformation logic in a standard Python dataframe API and execute it\nagainst either batch or streaming systems.\n\nThis release includes [Apache Flink](https://flink.apache.org/), a streaming\nbackend, and [RisingWave](https://risingwave.com), a streaming database backend.\nWe've also added a new batch backend with [Exasol](https://exasol.com), bringing\nthe total number of backends Ibis supports to 20.\n\n[Most geospatial operations are now supported in the DuckDB\nbackend](#geospatial-operations-in-duckdb), making Ibis a great local option for\ngeospatial analytics.\n\n### What is stream processing?\n\nStream processing systems are designed to handle high-throughput, low-latency\ndata processing with time semantics. They are used to process data in real-time\nwith minimum latency and are often used in applications such as fraud detection,\nreal-time analytics, and IoT. Systems using stream processing are increasingly\ncommon in modern data applications.\n\nApache Flink is the most popular open-source stream processing framework, with\nnumerous cloud options. RisingWave is an open-source Postgres-compatible\nstreaming database with a cloud offering that is gaining popularity and\nsimplifies the streaming experience.\n\nIbis now supports both and going forward can add more streaming backends to\nunify the Python user experience across batch and streaming systems.\n\n### Unifying batch and streaming UX in Python\n\nWhether you're using a batch or streaming data platform -- and the lines are\ncontinually blurring between them -- you'll need a frontend to interact with as\na data engineer, analyst, or scientist. If you're using Python, that frontend is\nlikely a dataframe API.\n\nStandards benefit individual users by reducing the cognitive load of learning\nand understanding new data systems. Organizations benefit from this in the form\nof lower onboarding costs, easier collaboration between teams, and better\ninterfaces for data systems.\n\nWe saw in the recent [one billion row challenge post how even CSV reader keyword\narguments can differ greatly between APIs](../1brc/index.qmd#setup-1). This is\ncompounded by tightly coupling a dataframe API to every query engine, whether\nbatch or streaming.\n\nIbis aims to solve this dilemma by providing a standard dataframe API that can\nwork across data systems, whether batch or streaming. This is a long-term vision\nand we're excited to take the first steps toward it in Ibis 8.0 with the launch\nof **two** streaming backends (and one more batch backend).\n\nThis allows a user to leverage DuckDB or Polars or DataFusion locally, then\nscale out batch processing to Snowflake or BigQuery or ClickHouse in the cloud,\nthen switch from batch to stream processing with Apache Flink or RisingWave, all\nwithout changing their dataframe code. As Ibis [adds new\nfeatures](#functionality) and implements them across backends, users can take\nadvantage of these features without needing to learn new APIs.\n\n## Backends\n\nThree new backends were added in this release.\n\n### Apache Flink\n\nIn collaboration with Claypot AI ([recently acquired by Voltron\nData](https://voltrondata.com/resources/voltron-data-acquires-claypot-ai)),\nwe've added the [first streaming backend with Apache\nFlink](../../backends/flink.qmd). You can check out the [blog\npost](../flink-announcement/index.qmd) and\n[tutorial](../../tutorials/open-source-software/apache-flink/0_setup.qmd) to get\nstarted with this new backend.\n\n### RisingWave\n\n[RisingWave](https://risingwave.com) has contributed [second streaming backend\nwith RisingWave](../../backends/risingwave.qmd). This backend is earlier in\ndevelopment, but we're excited to have it in Ibis and it will continue to\nimprove it.\n\n### Exasol\n\n[Exasol](https://exasol.com) has contributed the [Exasol\nbackend](../../backends/exasol.qmd). This is a traditional batch backend and\nbrings another great option for fast batch analytics to Ibis.\n\n## Breaking changes\n\nYou can view the [full changelog](../../release_notes.md) for additional\nbreaking changes. There have been few that we expect to affect most users.\n\n:::{.callout-note}\nThe PM for the team was distracted playing with LLMs and didn't write a v7 blog\npost, so we're covering breaking changes and features from both below.\n:::\n\nIf you're new to Ibis, see [how to install](../../../install.qmd) and [the\ngetting started tutorial](../../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=8,<9'`.\nFirst, we’ll setup Ibis and fetch some sample data to use.\n\n::: {#9b47e9e4 .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.options.interactive = True\nibis.options.repr.interactive.max_rows = 3\n```\n:::\n\n\nNow, fetch the penguins dataset.\n\n::: {#d091eb9a .cell execution_count=2}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n### rename\n\nThe largest breaking change in Ibis 7/8 is the deprecation of `relabel` in\nfavor of `rename`, swapping the order of the arguments. This change was made to\nbe consistent with the rest of the Ibis API. We apologize for any inconvenience\nthis may cause, but we believe this change will make Ibis a better and more\nconsistent dataframe standard going forward.\n\nIn the past, you would use `relabel` like this:\n\n::: {#72d0f6b3 .cell execution_count=3}\n``` {.python .cell-code}\nt.relabel({\"species\": \"SPECIES\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\nNow, you would use `rename` like this:\n\n::: {#bd87d11d .cell execution_count=4}\n``` {.python .cell-code}\nt.rename({\"SPECIES\": \"species\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\nor this:\n\n::: {#81d7bb14 .cell execution_count=5}\n``` {.python .cell-code}\nt.rename(SPECIES=\"species\")\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n## Functionality\n\nA lot of new functionality has been added in Ibis 7/8.\n\n### pandas batches\n\nThe `.to_pandas_batches()` method can be used to output batches of pandas\ndataframes:\n\n::: {#926ded0a .cell execution_count=6}\n``` {.python .cell-code}\nbatches = t.to_pandas_batches(chunk_size=200)\nfor df in batches:\n print(df.shape)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n(200, 8)\n(144, 8)\n```\n:::\n:::\n\n\n### range\n\nThe `range()` function can be used to create a monotonic sequence of integers:\n\n::: {#67ec4b7e .cell execution_count=7}\n``` {.python .cell-code}\ns = ibis.range(10)\ns\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=7}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[0, 1, ... +8]
\n```\n:::\n\n:::\n:::\n\n\nYou can turn it into a table:\n\n::: {#999d0db3 .cell execution_count=8}\n``` {.python .cell-code}\ns.unnest().name(\"index\").as_table()\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n
┏━━━━━━━┓\n┃ index ┃\n┡━━━━━━━┩\n│ int8  │\n├───────┤\n│     0 │\n│     1 │\n│     2 │\n│      │\n└───────┘\n
\n```\n:::\n:::\n\n\nThis can be useful for [creating synthetic\ndata](../1brc/index.qmd#bonus-more-billion-row-data-generation) and other use\ncases.\n\n### relocate\n\nThe `.relocate()` method can be used to move columns to the beginning of a\ntable, which is very useful for interactive data exploration with wide tables:\n\n::: {#b0225c66 .cell execution_count=9}\n``` {.python .cell-code}\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\nThen:\n\n::: {#a501242b .cell execution_count=10}\n``` {.python .cell-code}\nt.relocate(\"sex\", \"year\")\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n```{=html}\n
┏━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓\n┃ sex     year   species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g ┃\n┡━━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩\n│ stringint64stringstringfloat64float64int64int64       │\n├────────┼───────┼─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┤\n│ male  2007Adelie Torgersen39.118.71813750 │\n│ female2007Adelie Torgersen39.517.41863800 │\n│ female2007Adelie Torgersen40.318.01953250 │\n│  │\n└────────┴───────┴─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┘\n
\n```\n:::\n:::\n\n\n### sample\n\nThe `.sample()` method can be used to sample rows from a table:\n\n:::{.callout-info}\nNumber of rows returned may vary by invocation.\n:::\n\n::: {#3d554fd1 .cell execution_count=11}\n``` {.python .cell-code}\nt.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=11}\n\n::: {.ansi-escaped-output}\n```{=html}\n
344
\n```\n:::\n\n:::\n:::\n\n\n::: {#bcff524b .cell execution_count=12}\n``` {.python .cell-code}\nt.sample(fraction=0.1).count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=12}\n\n::: {.ansi-escaped-output}\n```{=html}\n
36
\n```\n:::\n\n:::\n:::\n\n\n### negative slicing\n\nMore Pythonic slicing is now supported:\n\n::: {#2feb70c9 .cell execution_count=13}\n``` {.python .cell-code}\nt[:3]\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n::: {#f06293de .cell execution_count=14}\n``` {.python .cell-code}\nt[-3:]\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species    island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ ChinstrapDream 49.618.21933775male  2009 │\n│ ChinstrapDream 50.819.02104100male  2009 │\n│ ChinstrapDream 50.218.71983775female2009 │\n└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n::: {#44af84d2 .cell execution_count=15}\n``` {.python .cell-code}\nt[-6:-3]\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species    island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ ChinstrapDream 45.717.01953650female2009 │\n│ ChinstrapDream 55.819.82074000male  2009 │\n│ ChinstrapDream 43.518.12023400female2009 │\n└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n### geospatial operations in DuckDB\n\nIbis supports over 50 geospatial operations, with many being recently added to\nDuckDB backend. While backend-specific, this is worth calling out because it\nbrings a great local option for geospatial analytics to Ibis. [Read the first\ngeospatial blog](../ibis-duckdb-geospatial/index.qmd) or [the second geospatial\nblog](../ibis-duckdb-geospatial-dev-guru/index.qmd) to learn more.\n\nA new `zones` example dataset with a geometric datatype has been added for a\nquick demonstration:\n\n::: {#2b0c05ae .cell execution_count=16}\n``` {.python .cell-code}\nz = ibis.examples.zones.fetch()\nz = z.relocate(\"geom\")\nz\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ geom                                                                              OBJECTID  Shape_Leng  Shape_Area  zone                     LocationID  borough  x_cent        y_cent        ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ geospatial:geometryint32float64float64stringint32stringfloat64float64       │\n├──────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┼─────────────────────────┼────────────┼─────────┼──────────────┼───────────────┤\n│ <POLYGON ((933100.918 192536.086, 933091.011 192572.175, 933088.585 192604.9...>10.1163570.000782Newark Airport         1EWR    9.359968e+05191376.749531 │\n│ <MULTIPOLYGON (((1033269.244 172126.008, 1033439.643 170883.946, 1033473.265...>20.4334700.004866Jamaica Bay            2Queens 1.031086e+06164018.754403 │\n│ <POLYGON ((1026308.77 256767.698, 1026495.593 256638.616, 1026567.23 256589....>30.0843410.000314Allerton/Pelham Gardens3Bronx  1.026453e+06254265.478659 │\n│  │\n└──────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┴─────────────────────────┴────────────┴─────────┴──────────────┴───────────────┘\n
\n```\n:::\n:::\n\n\nWe can use geospatial operations on that column:\n\n::: {#01537cf4 .cell execution_count=17}\n``` {.python .cell-code}\nz = z.mutate(\n area=z.geom.area(),\n centroid=z.geom.centroid(),\n).relocate(\"area\", \"centroid\")\nz\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ area          centroid                          geom                                                                              OBJECTID  Shape_Leng  Shape_Area  zone                     LocationID  borough  x_cent        y_cent        ┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ float64pointgeospatial:geometryint32float64float64stringint32stringfloat64float64       │\n├──────────────┼──────────────────────────────────┼──────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┼─────────────────────────┼────────────┼─────────┼──────────────┼───────────────┤\n│ 7.903953e+07<POINT (935996.821 191376.75)><POLYGON ((933100.918 192536.086, 933091.011 192572.175, 933088.585 192604.9...>10.1163570.000782Newark Airport         1EWR    9.359968e+05191376.749531 │\n│ 1.439095e+08<POINT (1031085.719 164018.754)><MULTIPOLYGON (((1033269.244 172126.008, 1033439.643 170883.946, 1033473.265...>20.4334700.004866Jamaica Bay            2Queens 1.031086e+06164018.754403 │\n│ 3.168508e+07<POINT (1026452.617 254265.479)><POLYGON ((1026308.77 256767.698, 1026495.593 256638.616, 1026567.23 256589....>30.0843410.000314Allerton/Pelham Gardens3Bronx  1.026453e+06254265.478659 │\n│             │\n└──────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┴─────────────────────────┴────────────┴─────────┴──────────────┴───────────────┘\n
\n```\n:::\n:::\n\n\n## Wrapping up\n\nIbis 8.0 brings exciting new features and the first streaming backends into Ibis!\nWe hope you're excited as we are about breaking down barriers between batch and\nstreaming systems with a standard Python dataframe API.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting\nstarted](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an\nissue](https://github.com/ibis-project/issues/new/choose) or\n[discussion](https://github.com/ibis-project/discussions/new/choose) and let us\nknow!\n\n", + "markdown": "---\ntitle: \"Ibis 8.0: streaming and more!\"\nauthor: \"Ibis team\"\ndate: \"2024-02-12\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 8.0 marks the first release of stream processing backends in Ibis! This\nenhances [the composable data ecosystem\nvision](../../concepts/composable-ecosystem.qmd) by allowing users to implement\ndata transformation logic in a standard Python dataframe API and execute it\nagainst either batch or streaming systems.\n\nThis release includes [Apache Flink](https://flink.apache.org/), a streaming\nbackend, and [RisingWave](https://risingwave.com), a streaming database backend.\nWe've also added a new batch backend with [Exasol](https://exasol.com), bringing\nthe total number of backends Ibis supports to 20.\n\n[Most geospatial operations are now supported in the DuckDB\nbackend](#geospatial-operations-in-duckdb), making Ibis a great local option for\ngeospatial analytics.\n\n### What is stream processing?\n\nStream processing systems are designed to handle high-throughput, low-latency\ndata processing with time semantics. They are used to process data in real-time\nwith minimum latency and are often used in applications such as fraud detection,\nreal-time analytics, and IoT. Systems using stream processing are increasingly\ncommon in modern data applications.\n\nApache Flink is the most popular open-source stream processing framework, with\nnumerous cloud options. RisingWave is an open-source Postgres-compatible\nstreaming database with a cloud offering that is gaining popularity and\nsimplifies the streaming experience.\n\nIbis now supports both and going forward can add more streaming backends to\nunify the Python user experience across batch and streaming systems.\n\n### Unifying batch and streaming UX in Python\n\nWhether you're using a batch or streaming data platform -- and the lines are\ncontinually blurring between them -- you'll need a frontend to interact with as\na data engineer, analyst, or scientist. If you're using Python, that frontend is\nlikely a dataframe API.\n\nStandards benefit individual users by reducing the cognitive load of learning\nand understanding new data systems. Organizations benefit from this in the form\nof lower onboarding costs, easier collaboration between teams, and better\ninterfaces for data systems.\n\nWe saw in the recent [one billion row challenge post how even CSV reader keyword\narguments can differ greatly between APIs](../1brc/index.qmd#setup-1). This is\ncompounded by tightly coupling a dataframe API to every query engine, whether\nbatch or streaming.\n\nIbis aims to solve this dilemma by providing a standard dataframe API that can\nwork across data systems, whether batch or streaming. This is a long-term vision\nand we're excited to take the first steps toward it in Ibis 8.0 with the launch\nof **two** streaming backends (and one more batch backend).\n\nThis allows a user to leverage DuckDB or Polars or DataFusion locally, then\nscale out batch processing to Snowflake or BigQuery or ClickHouse in the cloud,\nthen switch from batch to stream processing with Apache Flink or RisingWave, all\nwithout changing their dataframe code. As Ibis [adds new\nfeatures](#functionality) and implements them across backends, users can take\nadvantage of these features without needing to learn new APIs.\n\n## Backends\n\nThree new backends were added in this release.\n\n### Apache Flink\n\nIn collaboration with Claypot AI ([recently acquired by Voltron\nData](https://voltrondata.com/resources/voltron-data-acquires-claypot-ai)),\nwe've added the [first streaming backend with Apache\nFlink](../../backends/flink.qmd). You can check out the [blog\npost](../flink-announcement/index.qmd) and\n[tutorial](../../tutorials/open-source-software/apache-flink/0_setup.qmd) to get\nstarted with this new backend.\n\n### RisingWave\n\n[RisingWave](https://risingwave.com) has contributed [second streaming backend\nwith RisingWave](../../backends/risingwave.qmd). This backend is earlier in\ndevelopment, but we're excited to have it in Ibis and it will continue to\nimprove it.\n\n### Exasol\n\n[Exasol](https://exasol.com) has contributed the [Exasol\nbackend](../../backends/exasol.qmd). This is a traditional batch backend and\nbrings another great option for fast batch analytics to Ibis.\n\n## Breaking changes\n\nYou can view the [full changelog](../../release_notes.qmd) for additional\nbreaking changes. There have been few that we expect to affect most users.\n\n:::{.callout-note}\nThe PM for the team was distracted playing with LLMs and didn't write a v7 blog\npost, so we're covering breaking changes and features from both below.\n:::\n\nIf you're new to Ibis, see [how to install](../../../install.qmd) and [the\ngetting started tutorial](../../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=8,<9'`.\nFirst, we’ll setup Ibis and fetch some sample data to use.\n\n::: {#8662595c .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.options.interactive = True\nibis.options.repr.interactive.max_rows = 3\n```\n:::\n\n\nNow, fetch the penguins dataset.\n\n::: {#8466e108 .cell execution_count=2}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n### rename\n\nThe largest breaking change in Ibis 7/8 is the deprecation of `relabel` in\nfavor of `rename`, swapping the order of the arguments. This change was made to\nbe consistent with the rest of the Ibis API. We apologize for any inconvenience\nthis may cause, but we believe this change will make Ibis a better and more\nconsistent dataframe standard going forward.\n\nIn the past, you would use `relabel` like this:\n\n::: {#fcb662b6 .cell execution_count=3}\n``` {.python .cell-code}\nt.relabel({\"species\": \"SPECIES\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\nNow, you would use `rename` like this:\n\n::: {#69c6ea26 .cell execution_count=4}\n``` {.python .cell-code}\nt.rename({\"SPECIES\": \"species\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\nor this:\n\n::: {#89e73ac8 .cell execution_count=5}\n``` {.python .cell-code}\nt.rename(SPECIES=\"species\")\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n## Functionality\n\nA lot of new functionality has been added in Ibis 7/8.\n\n### pandas batches\n\nThe `.to_pandas_batches()` method can be used to output batches of pandas\ndataframes:\n\n::: {#033ec3b9 .cell execution_count=6}\n``` {.python .cell-code}\nbatches = t.to_pandas_batches(chunk_size=200)\nfor df in batches:\n print(df.shape)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n(200, 8)\n(144, 8)\n```\n:::\n:::\n\n\n### range\n\nThe `range()` function can be used to create a monotonic sequence of integers:\n\n::: {#0cdd59d9 .cell execution_count=7}\n``` {.python .cell-code}\ns = ibis.range(10)\ns\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=7}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[0, 1, ... +8]
\n```\n:::\n\n:::\n:::\n\n\nYou can turn it into a table:\n\n::: {#b8aabc3d .cell execution_count=8}\n``` {.python .cell-code}\ns.unnest().name(\"index\").as_table()\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n
┏━━━━━━━┓\n┃ index ┃\n┡━━━━━━━┩\n│ int8  │\n├───────┤\n│     0 │\n│     1 │\n│     2 │\n│      │\n└───────┘\n
\n```\n:::\n:::\n\n\nThis can be useful for [creating synthetic\ndata](../1brc/index.qmd#bonus-more-billion-row-data-generation) and other use\ncases.\n\n### relocate\n\nThe `.relocate()` method can be used to move columns to the beginning of a\ntable, which is very useful for interactive data exploration with wide tables:\n\n::: {#1fd81977 .cell execution_count=9}\n``` {.python .cell-code}\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\nThen:\n\n::: {#b6d79ebc .cell execution_count=10}\n``` {.python .cell-code}\nt.relocate(\"sex\", \"year\")\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n```{=html}\n
┏━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓\n┃ sex     year   species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g ┃\n┡━━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩\n│ stringint64stringstringfloat64float64int64int64       │\n├────────┼───────┼─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┤\n│ male  2007Adelie Torgersen39.118.71813750 │\n│ female2007Adelie Torgersen39.517.41863800 │\n│ female2007Adelie Torgersen40.318.01953250 │\n│  │\n└────────┴───────┴─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┘\n
\n```\n:::\n:::\n\n\n### sample\n\nThe `.sample()` method can be used to sample rows from a table:\n\n:::{.callout-info}\nNumber of rows returned may vary by invocation.\n:::\n\n::: {#26326136 .cell execution_count=11}\n``` {.python .cell-code}\nt.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=11}\n\n::: {.ansi-escaped-output}\n```{=html}\n
344
\n```\n:::\n\n:::\n:::\n\n\n::: {#8fc06700 .cell execution_count=12}\n``` {.python .cell-code}\nt.sample(fraction=0.1).count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=12}\n\n::: {.ansi-escaped-output}\n```{=html}\n
28
\n```\n:::\n\n:::\n:::\n\n\n### negative slicing\n\nMore Pythonic slicing is now supported:\n\n::: {#5ce21814 .cell execution_count=13}\n``` {.python .cell-code}\nt[:3]\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie Torgersen39.118.71813750male  2007 │\n│ Adelie Torgersen39.517.41863800female2007 │\n│ Adelie Torgersen40.318.01953250female2007 │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n::: {#e9394dd6 .cell execution_count=14}\n``` {.python .cell-code}\nt[-3:]\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species    island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ ChinstrapDream 49.618.21933775male  2009 │\n│ ChinstrapDream 50.819.02104100male  2009 │\n│ ChinstrapDream 50.218.71983775female2009 │\n└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n::: {#9675f198 .cell execution_count=15}\n``` {.python .cell-code}\nt[-6:-3]\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species    island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ stringstringfloat64float64int64int64stringint64 │\n├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ ChinstrapDream 45.717.01953650female2009 │\n│ ChinstrapDream 55.819.82074000male  2009 │\n│ ChinstrapDream 43.518.12023400female2009 │\n└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n
\n```\n:::\n:::\n\n\n### geospatial operations in DuckDB\n\nIbis supports over 50 geospatial operations, with many being recently added to\nDuckDB backend. While backend-specific, this is worth calling out because it\nbrings a great local option for geospatial analytics to Ibis. [Read the first\ngeospatial blog](../ibis-duckdb-geospatial/index.qmd) or [the second geospatial\nblog](../ibis-duckdb-geospatial-dev-guru/index.qmd) to learn more.\n\nA new `zones` example dataset with a geometric datatype has been added for a\nquick demonstration:\n\n::: {#333aa45d .cell execution_count=16}\n``` {.python .cell-code}\nz = ibis.examples.zones.fetch()\nz = z.relocate(\"geom\")\nz\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ geom                                                                              OBJECTID  Shape_Leng  Shape_Area  zone                     LocationID  borough  x_cent        y_cent        ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ geospatial:geometryint32float64float64stringint32stringfloat64float64       │\n├──────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┼─────────────────────────┼────────────┼─────────┼──────────────┼───────────────┤\n│ <POLYGON ((933100.918 192536.086, 933091.011 192572.175, 933088.585 192604.9...>10.1163570.000782Newark Airport         1EWR    9.359968e+05191376.749531 │\n│ <MULTIPOLYGON (((1033269.244 172126.008, 1033439.643 170883.946, 1033473.265...>20.4334700.004866Jamaica Bay            2Queens 1.031086e+06164018.754403 │\n│ <POLYGON ((1026308.77 256767.698, 1026495.593 256638.616, 1026567.23 256589....>30.0843410.000314Allerton/Pelham Gardens3Bronx  1.026453e+06254265.478659 │\n│  │\n└──────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┴─────────────────────────┴────────────┴─────────┴──────────────┴───────────────┘\n
\n```\n:::\n:::\n\n\nWe can use geospatial operations on that column:\n\n::: {#9f15bd3f .cell execution_count=17}\n``` {.python .cell-code}\nz = z.mutate(\n area=z.geom.area(),\n centroid=z.geom.centroid(),\n).relocate(\"area\", \"centroid\")\nz\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ area          centroid                          geom                                                                              OBJECTID  Shape_Leng  Shape_Area  zone                     LocationID  borough  x_cent        y_cent        ┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ float64pointgeospatial:geometryint32float64float64stringint32stringfloat64float64       │\n├──────────────┼──────────────────────────────────┼──────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┼─────────────────────────┼────────────┼─────────┼──────────────┼───────────────┤\n│ 7.903953e+07<POINT (935996.821 191376.75)><POLYGON ((933100.918 192536.086, 933091.011 192572.175, 933088.585 192604.9...>10.1163570.000782Newark Airport         1EWR    9.359968e+05191376.749531 │\n│ 1.439095e+08<POINT (1031085.719 164018.754)><MULTIPOLYGON (((1033269.244 172126.008, 1033439.643 170883.946, 1033473.265...>20.4334700.004866Jamaica Bay            2Queens 1.031086e+06164018.754403 │\n│ 3.168508e+07<POINT (1026452.617 254265.479)><POLYGON ((1026308.77 256767.698, 1026495.593 256638.616, 1026567.23 256589....>30.0843410.000314Allerton/Pelham Gardens3Bronx  1.026453e+06254265.478659 │\n│             │\n└──────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┴─────────────────────────┴────────────┴─────────┴──────────────┴───────────────┘\n
\n```\n:::\n:::\n\n\n## Wrapping up\n\nIbis 8.0 brings exciting new features and the first streaming backends into Ibis!\nWe hope you're excited as we are about breaking down barriers between batch and\nstreaming systems with a standard Python dataframe API.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting\nstarted](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an\nissue](https://github.com/ibis-project/issues/new/choose) or\n[discussion](https://github.com/ibis-project/discussions/new/choose) and let us\nknow!\n\n", "supporting": [ - "index_files" + "index_files/figure-html" ], "filters": [], "includes": { diff --git a/docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json b/docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json index 5c57f4f143a0..7e2ae3fe10ca 100644 --- a/docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json +++ b/docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json @@ -1,14 +1,15 @@ { - "hash": "cd79329d01e545ae82277e61a0330c34", + "hash": "87b787b52bf610df4ec240803f3db6fa", "result": { - "markdown": "---\ntitle: Ibis v6.1.0\nauthor: \"Ibis team\"\ndate: \"2023-08-02\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 6.1.0 is a minor release that includes new features, backend improvements, bug fixes, documentation improvements, and refactors. We are excited to see further adoption of the dataframe interchange protocol enabling visualization and other libraries to be used more easily with Ibis.\n\nYou can view the full changelog in [the release notes](../../release_notes.md).\n\nIf you're new to Ibis, see [how to install](../../install.qmd) and [the getting started tutorial](../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=6.1,<7'`. First, we\\'ll setup Ibis and fetch some\nsample data to use.\n\n::: {#24a6b50e .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.__version__\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\n'6.1.0'\n```\n:::\n:::\n\n\n::: {#4e2c9e69 .cell execution_count=2}\n``` {.python .cell-code}\n# interactive mode for demo purposes\nibis.options.interactive = True\n```\n:::\n\n\n::: {#866d8631 .cell execution_count=3}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt = t.mutate(year=t[\"year\"].cast(\"str\"))\nt.limit(3)\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year   ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstring │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┤\n│ Adelie Torgersen39.118.71813750male  2007   │\n│ Adelie Torgersen39.517.41863800female2007   │\n│ Adelie Torgersen40.318.01953250female2007   │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┘\n
\n```\n:::\n:::\n\n\n## Ecosystem integrations\n\nWith the introduction of `__dataframe__` support in v6.0.0 and efficiency improvements in this release, Ibis now works with [Altair](https://altair-viz.github.io/index.html), [Plotly](https://plotly.com/python/), [plotnine](https://plotnine.readthedocs.io/en/stable/), and any other visualization library that implements the protocol. This enables passing Ibis tables directly to visualization libraries without a `.to_pandas()` or `to_pyarrow()` call for any of the 15+ backends supported, with data efficiently transferred through Apache Arrow.\n\n::: {#cc0a2a10 .cell execution_count=4}\n``` {.python .cell-code code-fold=\"true\"}\nwidth = 640 # <1>\nheight = 480 # <1>\n```\n:::\n\n\n1. Set the width and height of the plots.\n\n::: {#71cadaa6 .cell execution_count=5}\n``` {.python .cell-code}\ngrouped = ( # <1>\n t.group_by(\"species\")\n .aggregate(count=ibis._.count())\n .order_by(ibis.desc(\"count\"))\n) # <1>\ngrouped # <2>\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━┓\n┃ species    count ┃\n┡━━━━━━━━━━━╇━━━━━━━┩\n│ stringint64 │\n├───────────┼───────┤\n│ Adelie   152 │\n│ Gentoo   124 │\n│ Chinstrap68 │\n└───────────┴───────┘\n
\n```\n:::\n:::\n\n\n1. Setup data to plot.\n2. Display the table.\n\n::: {.panel-tabset}\n\n## Altair\n\n```{.bash}\npip install altair\n```\n\n::: {#d7bb57e9 .cell execution_count=6}\n``` {.python .cell-code}\nimport altair as alt # <1>\n\nchart = (\n alt.Chart(grouped)\n .mark_bar()\n .encode(\n x=\"species\",\n y=\"count\",\n )\n .properties(width=width, height=height)\n)\nchart\n```\n\n::: {.cell-output .cell-output-error}\n```\nExpressionError: Use .count() instead\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\nalt.Chart(...)\n```\n:::\n:::\n\n\n## Plotly\n\n```{.bash}\npip install plotly\n```\n\n::: {#bedde087 .cell execution_count=7}\n``` {.python .cell-code}\nimport plotly.express as px # <1>\n\npx.bar(\n grouped.to_pandas(),\n x=\"species\",\n y=\"count\",\n width=width,\n height=height,\n)\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n:::\n\n\n## plotnine\n\n```{.bash}\npip install plotnine\n```\n\n::: {#e04b7abe .cell execution_count=8}\n``` {.python .cell-code}\nfrom plotnine import ggplot, aes, geom_bar, theme\n\n(\n ggplot(\n grouped,\n aes(x=\"species\", y=\"count\"),\n )\n + geom_bar(stat=\"identity\")\n + theme(figure_size=(width / 100, height / 100))\n)\n```\n\n::: {.cell-output .cell-output-display}\n![](index_files/figure-html/cell-9-output-1.png){}\n:::\n\n::: {.cell-output .cell-output-display execution_count=8}\n```\n
\n```\n:::\n:::\n\n\n:::\n\nA more modular, composable, and scalable way of working with data is taking shape with `__dataframe__` and `__array__` support in Ibis and increasingly the Python data ecosystem. Let\\'s combine the above with PCA after some preprocessing in Ibis to visualize all numeric columns in 2D.\n\n::: {#7810cfc2 .cell execution_count=9}\n``` {.python .cell-code}\nimport ibis.selectors as s # <1>\n\n\ndef transform(t): # <2>\n t = t.mutate( # <2>\n s.across(s.numeric(), {\"zscore\": lambda x: (x - x.mean()) / x.std()}) # <2>\n ).dropna() # <2>\n return t # <2>\n\n\nf = transform(t) # <3>\nf # <4>\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year    bill_length_mm_zscore  bill_depth_mm_zscore  flipper_length_mm_zscore  body_mass_g_zscore ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstringfloat64float64float64float64            │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────────────┼──────────────────────┼──────────────────────────┼────────────────────┤\n│ Adelie Torgersen39.118.71813750male  2007  -0.8832050.784300-1.416272-0.563317 │\n│ Adelie Torgersen39.517.41863800female2007  -0.8099390.126003-1.060696-0.500969 │\n│ Adelie Torgersen40.318.01953250female2007  -0.6634080.429833-0.420660-1.186793 │\n│ Adelie Torgersen36.719.31933450female2007  -1.3227991.088129-0.562890-0.937403 │\n│ Adelie Torgersen39.320.61903650male  2007  -0.8465721.746426-0.776236-0.688012 │\n│ Adelie Torgersen38.917.81813625female2007  -0.9198370.328556-1.416272-0.719186 │\n│ Adelie Torgersen39.219.61954675male  2007  -0.8648881.240044-0.4206600.590115 │\n│ Adelie Torgersen41.117.61823200female2007  -0.5168760.227280-1.345156-1.249141 │\n│ Adelie Torgersen38.621.21913800male  2007  -0.9747872.050255-0.705121-0.500969 │\n│ Adelie Torgersen34.621.11984400male  2007  -1.7074431.999617-0.2073150.247203 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────────────┴──────────────────────┴──────────────────────────┴────────────────────┘\n
\n```\n:::\n:::\n\n\n1. Import the selectors module.\n2. Define a function to transform the table for code reuse (compute z-scores on numeric columns).\n3. Apply the function to the table and assign it to a new variable.\n4. Display the transformed table.\n\n```bash\npip install scikit-learn\n```\n\n::: {#7812e59f .cell execution_count=10}\n``` {.python .cell-code}\nimport plotly.express as px # <1>\nfrom sklearn.decomposition import PCA # <1>\n\nX = f.select(s.contains(\"zscore\")) # <2>\n\nn_components = 3 # <3>\npca = PCA(n_components=n_components).fit(X) # <3>\n\nt_pca = ibis.memtable(pca.transform(X)).relabel( # <4>\n {\"col0\": \"pc1\", \"col1\": \"pc2\", \"col2\": \"pc3\"} # <4>\n) # <4>\n\nf = f.mutate(row_number=ibis.row_number().over()).join( # <5>\n t_pca.mutate(row_number=ibis.row_number().over()), \"row_number\" # <5>\n) # <5>\n\npx.scatter_3d( # <6>\n f.to_pandas(), # <6>\n x=\"pc1\", # <6>\n y=\"pc2\", # <6>\n z=\"pc3\", # <6>\n color=\"species\", # <6>\n) # <6>\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n:::\n\n\n1. Import data science libraries\n2. Select \"features\" (numeric columns) as X\n3. Compute PCA\n4. Create a table from the PCA results\n5. Join the PCA results to the original table\n6. Plot the results\n\n## Backends\n\nNumerous backends received improvements. See the [release notes](../../release_notes.md) for more details.\n\n::: {.panel-tabset}\n\n## DataFusion\n\nThe DataFusion backend (and a few others) received several improvements from community member [\\@mesejo](https://github.com/mesejo) with memtables and many new operations now supported. Some highlights include:\n\n::: {#c24f725c .cell execution_count=11}\n``` {.python .cell-code}\nurl = ibis.literal(\"https://ibis-project.org/concepts/why_ibis\")\ncon = ibis.datafusion.connect()\n\ncon.execute(url.host())\n```\n\n::: {.cell-output .cell-output-display execution_count=11}\n```\n'ibis-project.org'\n```\n:::\n:::\n\n\n::: {#16d76af6 .cell execution_count=12}\n``` {.python .cell-code}\ncon.execute(url.path())\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```\n'/concepts/why_ibis'\n```\n:::\n:::\n\n\n::: {#f5ee0679 .cell execution_count=13}\n``` {.python .cell-code}\ncon.execute(ibis.literal(\"aaabbbaaa\").re_search(\"bbb\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```\nTrue\n```\n:::\n:::\n\n\n::: {#a192e678 .cell execution_count=14}\n``` {.python .cell-code}\ncon.execute(ibis.literal(5.56).ln())\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```\n1.715598108262491\n```\n:::\n:::\n\n\n::: {#87803f17 .cell execution_count=15}\n``` {.python .cell-code}\ncon.execute(ibis.literal(5.56).log10())\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```\n0.7450747915820575\n```\n:::\n:::\n\n\n::: {#ec7d20c1 .cell execution_count=16}\n``` {.python .cell-code}\ncon.execute(ibis.literal(5.56).radians())\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```\n0.09704030641088471\n```\n:::\n:::\n\n\n## BigQuery\n\nSome remaining gaps in `CREATE TABLE` DDL options for BigQuery have been filled in, including the ability to pass in `overwrite=True` for table creation.\n\n## PySpark\n\nThe PySpark backend now supports reading/writing Delta Lake tables. Your PySpark session must be configured to use the Delta Lake package and you must have the `delta` package installed in your environment.\n\n```python\nt = ibis.read_delta(\"/path/to/delta\")\n\n...\n\nt.to_delta(\"/path/to/delta\", mode=\"overwrite\")\n```\n\n## Trino\n\nThe `.sql` API is now supported in Trino, enabling you to chain Ibis and SQL together.\n\n## SQLite\n\nScalar Python UDFs are now supported in SQLite.\n\nAdditionally, URL parsing has been added:\n\n::: {#18842920 .cell execution_count=17}\n``` {.python .cell-code}\ncon = ibis.sqlite.connect()\n\ncon.execute(url.host())\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```\n'ibis-project.org'\n```\n:::\n:::\n\n\n::: {#1953136c .cell execution_count=18}\n``` {.python .cell-code}\ncon.execute(url.path())\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```\n'/concepts/why_ibis'\n```\n:::\n:::\n\n\n## pandas\n\nURL parsing support was added.\n\n::: {#b2c845b3 .cell execution_count=19}\n``` {.python .cell-code}\ncon = ibis.pandas.connect()\n\ncon.execute(url.host())\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n```\n'ibis-project.org'\n```\n:::\n:::\n\n\n::: {#8462ce32 .cell execution_count=20}\n``` {.python .cell-code}\ncon.execute(url.path())\n```\n\n::: {.cell-output .cell-output-display execution_count=20}\n```\n'/concepts/why_ibis'\n```\n:::\n:::\n\n\n:::\n\n## Functionality\n\nVarious new features and were added.\n\n### `.nunique()` supported on tables\n\nYou can now call `.nunique()` on tables to get the number of unique\nrows.\n\n::: {#a4b7830c .cell execution_count=21}\n``` {.python .cell-code}\n# how many unique rows are there? equivalent to `.count()` in this case\nt.nunique()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=21}\n\n::: {.ansi-escaped-output}\n```{=html}\n
344
\n```\n:::\n\n:::\n:::\n\n\n::: {#09761376 .cell execution_count=22}\n``` {.python .cell-code}\n# how many unique species/island/year combinations are there?\nt.select(\"species\", \"island\", \"year\").nunique()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=22}\n\n::: {.ansi-escaped-output}\n```{=html}\n
15
\n```\n:::\n\n:::\n:::\n\n\n### `to_sql` returns a `str` type\n\nThe `ibis.expr.sql.SQLString` type resulting from `to_sql` is now a proper `str` subclass, enabling use without casting to `str` first.\n\n::: {#9684a5ea .cell execution_count=23}\n``` {.python .cell-code}\ntype(ibis.to_sql(t))\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```\nibis.expr.sql.SQLString\n```\n:::\n:::\n\n\n::: {#8bade411 .cell execution_count=24}\n``` {.python .cell-code}\nissubclass(type(ibis.to_sql(t)), str)\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```\nTrue\n```\n:::\n:::\n\n\n### Allow mixing literals and columns in `ibis.array` {#allow-mixing-literals-and-columns-in-ibisarray}\n\nNote that arrays must still be of a single type.\n\n::: {#6bfcdc09 .cell execution_count=25}\n``` {.python .cell-code}\nibis.array([t[\"species\"], \"hello\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=25}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayColumn()       ┃\n┡━━━━━━━━━━━━━━━━━━━━━┩\n│ array<string>       │\n├─────────────────────┤\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│                    │\n└─────────────────────┘\n
\n```\n:::\n:::\n\n\n::: {#bbe78626 .cell execution_count=26}\n``` {.python .cell-code}\nibis.array([t[\"flipper_length_mm\"], 42])\n```\n\n::: {.cell-output .cell-output-display execution_count=26}\n```{=html}\n
┏━━━━━━━━━━━━━━━┓\n┃ ArrayColumn() ┃\n┡━━━━━━━━━━━━━━━┩\n│ array<int64>  │\n├───────────────┤\n│ [181, 42]     │\n│ [186, 42]     │\n│ [195, 42]     │\n│ [None, 42]    │\n│ [193, 42]     │\n│ [190, 42]     │\n│ [181, 42]     │\n│ [195, 42]     │\n│ [193, 42]     │\n│ [190, 42]     │\n│              │\n└───────────────┘\n
\n```\n:::\n:::\n\n\n### Array `concat` and `repeat` methods\n\nYou can still use `+` or `*` in typical Python fashion, with new and more explicit `concat` and `repeat` methods added in this release.\n\n::: {#5b21260b .cell execution_count=27}\n``` {.python .cell-code}\na = ibis.array([1, 2, 3])\nb = ibis.array([4, 5])\n\nc = a.concat(b)\nc\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=27}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[1, 2, 3, 4, 5]
\n```\n:::\n\n:::\n:::\n\n\n::: {#909aca5a .cell execution_count=28}\n``` {.python .cell-code}\nc = a + b\nc\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=28}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[1, 2, 3, 4, 5]
\n```\n:::\n\n:::\n:::\n\n\n::: {#22351df0 .cell execution_count=29}\n``` {.python .cell-code}\nb.repeat(2)\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=29}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[4, 5, 4, 5]
\n```\n:::\n\n:::\n:::\n\n\n::: {#9818ea14 .cell execution_count=30}\n``` {.python .cell-code}\nb * 2\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=30}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[4, 5, 4, 5]
\n```\n:::\n\n:::\n:::\n\n\n### Support boolean literals in the join API\n\nThis allows for joins with boolean predicates.\n\n::: {#247f723b .cell execution_count=31}\n``` {.python .cell-code}\nt.join(t, True)\n```\n\n::: {.cell-output .cell-output-display execution_count=31}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year    species_right  island_right  bill_length_mm_right  bill_depth_mm_right  flipper_length_mm_right  body_mass_g_right  sex_right  year_right ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstringstringstringfloat64float64int64int64stringstring     │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────┼──────────────┼──────────────────────┼─────────────────────┼─────────────────────────┼───────────────────┼───────────┼────────────┤\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   39.118.71813750male     2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   39.517.41863800female   2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   40.318.01953250female   2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   nannanNULLNULLNULL2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   36.719.31933450female   2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   39.320.61903650male     2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   38.917.81813625female   2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   39.219.61954675male     2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   34.118.11933475NULL2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   42.020.21904250NULL2007       │\n│           │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘\n
\n```\n:::\n:::\n\n\n::: {#1540b3c9 .cell execution_count=32}\n``` {.python .cell-code}\nt.join(t, False)\n```\n\n::: {.cell-output .cell-output-display execution_count=32}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓\n┃ species  island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year    species_right  island_right  bill_length_mm_right  bill_depth_mm_right  flipper_length_mm_right  body_mass_g_right  sex_right  year_right ┃\n┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstringstringstringfloat64float64int64int64stringstring     │\n└─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘\n
\n```\n:::\n:::\n\n\n::: {#f6412d33 .cell execution_count=33}\n``` {.python .cell-code}\nt.join(t, False, how=\"outer\")\n```\n\n::: {.cell-output .cell-output-display execution_count=33}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year    species_right  island_right  bill_length_mm_right  bill_depth_mm_right  flipper_length_mm_right  body_mass_g_right  sex_right  year_right ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstringstringstringfloat64float64int64int64stringstring     │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────┼──────────────┼──────────────────────┼─────────────────────┼─────────────────────────┼───────────────────┼───────────┼────────────┤\n│ Adelie Torgersen39.118.71813750male  2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie Torgersen39.517.41863800female2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie Torgersen40.318.01953250female2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie TorgersennannanNULLNULLNULL2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie Torgersen36.719.31933450female2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie Torgersen39.320.61903650male  2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie Torgersen38.917.81813625female2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie Torgersen39.219.61954675male  2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie Torgersen34.118.11933475NULL2007  NULLNULLnannanNULLNULLNULLNULL       │\n│ Adelie Torgersen42.020.21904250NULL2007  NULLNULLnannanNULLNULLNULLNULL       │\n│           │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘\n
\n```\n:::\n:::\n\n\n## Refactors\n\nSeveral internal refactors that shouldn\\'t affect normal usage were made. See [the release notes](../../release_notes.md) for more details.\n\n## Wrapping up\n\nIbis v6.1.0 brings exciting enhancements to the library that enable broader ecosystem adoption of Python standards.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting started](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an issue](https://github.com/ibis-project/issues/new/choose) or [discussion](https://github.com/ibis-project/discussions/new/choose) and let us know!\n\n", + "engine": "jupyter", + "markdown": "---\ntitle: Ibis v6.1.0\nauthor: \"Ibis team\"\ndate: \"2023-08-02\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 6.1.0 is a minor release that includes new features, backend improvements, bug fixes, documentation improvements, and refactors. We are excited to see further adoption of the dataframe interchange protocol enabling visualization and other libraries to be used more easily with Ibis.\n\nYou can view the full changelog in [the release notes](../../release_notes.qmd).\n\nIf you're new to Ibis, see [how to install](../../install.qmd) and [the getting started tutorial](../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=6.1,<7'`. First, we\\'ll setup Ibis and fetch some\nsample data to use.\n\n::: {#d82f3327 .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.__version__\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\n'8.0.0'\n```\n:::\n:::\n\n\n::: {#fc1b29eb .cell execution_count=2}\n``` {.python .cell-code}\n# interactive mode for demo purposes\nibis.options.interactive = True\n```\n:::\n\n\n::: {#9b5855fe .cell execution_count=3}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt = t.mutate(year=t[\"year\"].cast(\"str\"))\nt.limit(3)\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year   ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstring │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┤\n│ Adelie Torgersen39.118.71813750male  2007   │\n│ Adelie Torgersen39.517.41863800female2007   │\n│ Adelie Torgersen40.318.01953250female2007   │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┘\n
\n```\n:::\n:::\n\n\n## Ecosystem integrations\n\nWith the introduction of `__dataframe__` support in v6.0.0 and efficiency improvements in this release, Ibis now works with [Altair](https://altair-viz.github.io/index.html), [Plotly](https://plotly.com/python/), [plotnine](https://plotnine.readthedocs.io/en/stable/), and any other visualization library that implements the protocol. This enables passing Ibis tables directly to visualization libraries without a `.to_pandas()` or `to_pyarrow()` call for any of the 15+ backends supported, with data efficiently transferred through Apache Arrow.\n\n::: {#a13cda2b .cell execution_count=4}\n``` {.python .cell-code code-fold=\"true\"}\nwidth = 640 # <1>\nheight = 480 # <1>\n```\n:::\n\n\n1. Set the width and height of the plots.\n\n::: {#89842486 .cell execution_count=5}\n``` {.python .cell-code}\ngrouped = ( # <1>\n t.group_by(\"species\")\n .aggregate(count=ibis._.count())\n .order_by(ibis.desc(\"count\"))\n) # <1>\ngrouped # <2>\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━┓\n┃ species    count ┃\n┡━━━━━━━━━━━╇━━━━━━━┩\n│ stringint64 │\n├───────────┼───────┤\n│ Adelie   152 │\n│ Gentoo   124 │\n│ Chinstrap68 │\n└───────────┴───────┘\n
\n```\n:::\n:::\n\n\n1. Setup data to plot.\n2. Display the table.\n\n::: {.panel-tabset}\n\n## Altair\n\n```{.bash}\npip install altair\n```\n\n::: {#a5c8b9ba .cell execution_count=6}\n``` {.python .cell-code}\nimport altair as alt # <1>\n\nchart = (\n alt.Chart(grouped)\n .mark_bar()\n .encode(\n x=\"species\",\n y=\"count\",\n )\n .properties(width=width, height=height)\n)\nchart\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```{=html}\n\n\n
\n\n```\n:::\n:::\n\n\n## Plotly\n\n```{.bash}\npip install plotly\n```\n\n::: {#2d0dd198 .cell execution_count=7}\n``` {.python .cell-code}\nimport plotly.express as px # <1>\n\npx.bar(\n grouped.to_pandas(),\n x=\"species\",\n y=\"count\",\n width=width,\n height=height,\n)\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n:::\n\n\n## plotnine\n\n```{.bash}\npip install plotnine\n```\n\n::: {#1b64ad84 .cell execution_count=8}\n``` {.python .cell-code}\nfrom plotnine import ggplot, aes, geom_bar, theme\n\n(\n ggplot(\n grouped,\n aes(x=\"species\", y=\"count\"),\n )\n + geom_bar(stat=\"identity\")\n + theme(figure_size=(width / 100, height / 100))\n)\n```\n\n::: {.cell-output .cell-output-display}\n![](index_files/figure-html/cell-9-output-1.png){}\n:::\n:::\n\n\n:::\n\nA more modular, composable, and scalable way of working with data is taking shape with `__dataframe__` and `__array__` support in Ibis and increasingly the Python data ecosystem. Let\\'s combine the above with PCA after some preprocessing in Ibis to visualize all numeric columns in 2D.\n\n::: {#500043e6 .cell execution_count=9}\n``` {.python .cell-code}\nimport ibis.selectors as s # <1>\n\n\ndef transform(t): # <2>\n t = t.mutate( # <2>\n s.across(s.numeric(), {\"zscore\": lambda x: (x - x.mean()) / x.std()}) # <2>\n ).dropna() # <2>\n return t # <2>\n\n\nf = transform(t) # <3>\nf # <4>\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year    bill_length_mm_zscore  bill_depth_mm_zscore  flipper_length_mm_zscore  body_mass_g_zscore ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstringfloat64float64float64float64            │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────────────┼──────────────────────┼──────────────────────────┼────────────────────┤\n│ Adelie Torgersen39.118.71813750male  2007  -0.8832050.784300-1.416272-0.563317 │\n│ Adelie Torgersen39.517.41863800female2007  -0.8099390.126003-1.060696-0.500969 │\n│ Adelie Torgersen40.318.01953250female2007  -0.6634080.429833-0.420660-1.186793 │\n│ Adelie Torgersen36.719.31933450female2007  -1.3227991.088129-0.562890-0.937403 │\n│ Adelie Torgersen39.320.61903650male  2007  -0.8465721.746426-0.776236-0.688012 │\n│ Adelie Torgersen38.917.81813625female2007  -0.9198370.328556-1.416272-0.719186 │\n│ Adelie Torgersen39.219.61954675male  2007  -0.8648881.240044-0.4206600.590115 │\n│ Adelie Torgersen41.117.61823200female2007  -0.5168760.227280-1.345156-1.249141 │\n│ Adelie Torgersen38.621.21913800male  2007  -0.9747872.050255-0.705121-0.500969 │\n│ Adelie Torgersen34.621.11984400male  2007  -1.7074431.999617-0.2073150.247203 │\n│  │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────────────┴──────────────────────┴──────────────────────────┴────────────────────┘\n
\n```\n:::\n:::\n\n\n1. Import the selectors module.\n2. Define a function to transform the table for code reuse (compute z-scores on numeric columns).\n3. Apply the function to the table and assign it to a new variable.\n4. Display the transformed table.\n\n```bash\npip install scikit-learn\n```\n\n::: {#b834ef6d .cell execution_count=10}\n``` {.python .cell-code}\nimport plotly.express as px # <1>\nfrom sklearn.decomposition import PCA # <1>\n\nX = f.select(s.contains(\"zscore\")) # <2>\n\nn_components = 3 # <3>\npca = PCA(n_components=n_components).fit(X) # <3>\n\nt_pca = ibis.memtable(pca.transform(X)).relabel( # <4>\n {\"col0\": \"pc1\", \"col1\": \"pc2\", \"col2\": \"pc3\"} # <4>\n) # <4>\n\nf = f.mutate(row_number=ibis.row_number().over()).join( # <5>\n t_pca.mutate(row_number=ibis.row_number().over()), \"row_number\" # <5>\n) # <5>\n\npx.scatter_3d( # <6>\n f.to_pandas(), # <6>\n x=\"pc1\", # <6>\n y=\"pc2\", # <6>\n z=\"pc3\", # <6>\n color=\"species\", # <6>\n) # <6>\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n:::\n\n\n1. Import data science libraries\n2. Select \"features\" (numeric columns) as X\n3. Compute PCA\n4. Create a table from the PCA results\n5. Join the PCA results to the original table\n6. Plot the results\n\n## Backends\n\nNumerous backends received improvements. See the [release notes](../../release_notes.qmd) for more details.\n\n::: {.panel-tabset}\n\n## DataFusion\n\nThe DataFusion backend (and a few others) received several improvements from community member [\\@mesejo](https://github.com/mesejo) with memtables and many new operations now supported. Some highlights include:\n\n::: {#ca412f33 .cell execution_count=11}\n``` {.python .cell-code}\nurl = ibis.literal(\"https://ibis-project.org/concepts/why_ibis\")\ncon = ibis.datafusion.connect()\n\ncon.execute(url.host())\n```\n\n::: {.cell-output .cell-output-display execution_count=11}\n```\n'ibis-project.org'\n```\n:::\n:::\n\n\n::: {#eb585003 .cell execution_count=12}\n``` {.python .cell-code}\ncon.execute(url.path())\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```\n'/concepts/why_ibis'\n```\n:::\n:::\n\n\n::: {#e99f1c36 .cell execution_count=13}\n``` {.python .cell-code}\ncon.execute(ibis.literal(\"aaabbbaaa\").re_search(\"bbb\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```\nTrue\n```\n:::\n:::\n\n\n::: {#c0a1f81c .cell execution_count=14}\n``` {.python .cell-code}\ncon.execute(ibis.literal(5.56).ln())\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```\n1.715598108262491\n```\n:::\n:::\n\n\n::: {#fc777ec0 .cell execution_count=15}\n``` {.python .cell-code}\ncon.execute(ibis.literal(5.56).log10())\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```\n0.7450747915820574\n```\n:::\n:::\n\n\n::: {#b977977a .cell execution_count=16}\n``` {.python .cell-code}\ncon.execute(ibis.literal(5.56).radians())\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```\n0.09704030641088472\n```\n:::\n:::\n\n\n## BigQuery\n\nSome remaining gaps in `CREATE TABLE` DDL options for BigQuery have been filled in, including the ability to pass in `overwrite=True` for table creation.\n\n## PySpark\n\nThe PySpark backend now supports reading/writing Delta Lake tables. Your PySpark session must be configured to use the Delta Lake package and you must have the `delta` package installed in your environment.\n\n```python\nt = ibis.read_delta(\"/path/to/delta\")\n\n...\n\nt.to_delta(\"/path/to/delta\", mode=\"overwrite\")\n```\n\n## Trino\n\nThe `.sql` API is now supported in Trino, enabling you to chain Ibis and SQL together.\n\n## SQLite\n\nScalar Python UDFs are now supported in SQLite.\n\nAdditionally, URL parsing has been added:\n\n::: {#c5f4bc44 .cell execution_count=17}\n``` {.python .cell-code}\ncon = ibis.sqlite.connect()\n\ncon.execute(url.host())\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```\n'ibis-project.org'\n```\n:::\n:::\n\n\n::: {#ba060ef6 .cell execution_count=18}\n``` {.python .cell-code}\ncon.execute(url.path())\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```\n'/concepts/why_ibis'\n```\n:::\n:::\n\n\n## pandas\n\nURL parsing support was added.\n\n::: {#1ac6b736 .cell execution_count=19}\n``` {.python .cell-code}\ncon = ibis.pandas.connect()\n\ncon.execute(url.host())\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n```\n'ibis-project.org'\n```\n:::\n:::\n\n\n::: {#c5fc624b .cell execution_count=20}\n``` {.python .cell-code}\ncon.execute(url.path())\n```\n\n::: {.cell-output .cell-output-display execution_count=20}\n```\n'/concepts/why_ibis'\n```\n:::\n:::\n\n\n:::\n\n## Functionality\n\nVarious new features and were added.\n\n### `.nunique()` supported on tables\n\nYou can now call `.nunique()` on tables to get the number of unique\nrows.\n\n::: {#5c78dba7 .cell execution_count=21}\n``` {.python .cell-code}\n# how many unique rows are there? equivalent to `.count()` in this case\nt.nunique()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=21}\n\n::: {.ansi-escaped-output}\n```{=html}\n
344
\n```\n:::\n\n:::\n:::\n\n\n::: {#54a61dca .cell execution_count=22}\n``` {.python .cell-code}\n# how many unique species/island/year combinations are there?\nt.select(\"species\", \"island\", \"year\").nunique()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=22}\n\n::: {.ansi-escaped-output}\n```{=html}\n
15
\n```\n:::\n\n:::\n:::\n\n\n### `to_sql` returns a `str` type\n\nThe `ibis.expr.sql.SQLString` type resulting from `to_sql` is now a proper `str` subclass, enabling use without casting to `str` first.\n\n::: {#dd0fd73c .cell execution_count=23}\n``` {.python .cell-code}\ntype(ibis.to_sql(t))\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```\nibis.expr.sql.SQLString\n```\n:::\n:::\n\n\n::: {#a8361226 .cell execution_count=24}\n``` {.python .cell-code}\nissubclass(type(ibis.to_sql(t)), str)\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```\nTrue\n```\n:::\n:::\n\n\n### Allow mixing literals and columns in `ibis.array` {#allow-mixing-literals-and-columns-in-ibisarray}\n\nNote that arrays must still be of a single type.\n\n::: {#af89df72 .cell execution_count=25}\n``` {.python .cell-code}\nibis.array([t[\"species\"], \"hello\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=25}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━┓\n┃ Array()             ┃\n┡━━━━━━━━━━━━━━━━━━━━━┩\n│ array<string>       │\n├─────────────────────┤\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│ ['Adelie', 'hello'] │\n│                    │\n└─────────────────────┘\n
\n```\n:::\n:::\n\n\n::: {#26de295f .cell execution_count=26}\n``` {.python .cell-code}\nibis.array([t[\"flipper_length_mm\"], 42])\n```\n\n::: {.cell-output .cell-output-display execution_count=26}\n```{=html}\n
┏━━━━━━━━━━━━━━┓\n┃ Array()      ┃\n┡━━━━━━━━━━━━━━┩\n│ array<int64> │\n├──────────────┤\n│ [181, 42]    │\n│ [186, 42]    │\n│ [195, 42]    │\n│ [None, 42]   │\n│ [193, 42]    │\n│ [190, 42]    │\n│ [181, 42]    │\n│ [195, 42]    │\n│ [193, 42]    │\n│ [190, 42]    │\n│             │\n└──────────────┘\n
\n```\n:::\n:::\n\n\n### Array `concat` and `repeat` methods\n\nYou can still use `+` or `*` in typical Python fashion, with new and more explicit `concat` and `repeat` methods added in this release.\n\n::: {#5da0e7f9 .cell execution_count=27}\n``` {.python .cell-code}\na = ibis.array([1, 2, 3])\nb = ibis.array([4, 5])\n\nc = a.concat(b)\nc\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=27}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[1, 2, ... +3]
\n```\n:::\n\n:::\n:::\n\n\n::: {#83bed364 .cell execution_count=28}\n``` {.python .cell-code}\nc = a + b\nc\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=28}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[1, 2, ... +3]
\n```\n:::\n\n:::\n:::\n\n\n::: {#40f2ae67 .cell execution_count=29}\n``` {.python .cell-code}\nb.repeat(2)\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=29}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[4, 5, ... +2]
\n```\n:::\n\n:::\n:::\n\n\n::: {#83e139a4 .cell execution_count=30}\n``` {.python .cell-code}\nb * 2\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n
\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=30}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[4, 5, ... +2]
\n```\n:::\n\n:::\n:::\n\n\n### Support boolean literals in the join API\n\nThis allows for joins with boolean predicates.\n\n::: {#51d8d59e .cell execution_count=31}\n``` {.python .cell-code}\nt.join(t, True)\n```\n\n::: {.cell-output .cell-output-display execution_count=31}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year    species_right  island_right  bill_length_mm_right  bill_depth_mm_right  flipper_length_mm_right  body_mass_g_right  sex_right  year_right ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstringstringstringfloat64float64int64int64stringstring     │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────┼──────────────┼──────────────────────┼─────────────────────┼─────────────────────────┼───────────────────┼───────────┼────────────┤\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   39.118.71813750male     2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   39.517.41863800female   2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   40.318.01953250female   2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   NULLNULLNULLNULLNULL2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   36.719.31933450female   2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   39.320.61903650male     2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   38.917.81813625female   2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   39.219.61954675male     2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   34.118.11933475NULL2007       │\n│ Adelie Torgersen39.118.71813750male  2007  Adelie       Torgersen   42.020.21904250NULL2007       │\n│           │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘\n
\n```\n:::\n:::\n\n\n::: {#be427419 .cell execution_count=32}\n``` {.python .cell-code}\nt.join(t, False)\n```\n\n::: {.cell-output .cell-output-display execution_count=32}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓\n┃ species  island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year    species_right  island_right  bill_length_mm_right  bill_depth_mm_right  flipper_length_mm_right  body_mass_g_right  sex_right  year_right ┃\n┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstringstringstringfloat64float64int64int64stringstring     │\n└─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘\n
\n```\n:::\n:::\n\n\n::: {#da5e38d7 .cell execution_count=33}\n``` {.python .cell-code}\nt.join(t, False, how=\"outer\")\n```\n\n::: {.cell-output .cell-output-display execution_count=33}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓\n┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year    species_right  island_right  bill_length_mm_right  bill_depth_mm_right  flipper_length_mm_right  body_mass_g_right  sex_right  year_right ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩\n│ stringstringfloat64float64int64int64stringstringstringstringfloat64float64int64int64stringstring     │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┼───────────────┼──────────────┼──────────────────────┼─────────────────────┼─────────────────────────┼───────────────────┼───────────┼────────────┤\n│ Adelie Torgersen39.118.71813750male  2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie Torgersen39.517.41863800female2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie Torgersen40.318.01953250female2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie TorgersenNULLNULLNULLNULLNULL2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie Torgersen36.719.31933450female2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie Torgersen39.320.61903650male  2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie Torgersen38.917.81813625female2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie Torgersen39.219.61954675male  2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie Torgersen34.118.11933475NULL2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│ Adelie Torgersen42.020.21904250NULL2007  NULLNULLNULLNULLNULLNULLNULLNULL       │\n│           │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┴───────────────┴──────────────┴──────────────────────┴─────────────────────┴─────────────────────────┴───────────────────┴───────────┴────────────┘\n
\n```\n:::\n:::\n\n\n## Refactors\n\nSeveral internal refactors that shouldn\\'t affect normal usage were made. See [the release notes](../../release_notes.qmd) for more details.\n\n## Wrapping up\n\nIbis v6.1.0 brings exciting enhancements to the library that enable broader ecosystem adoption of Python standards.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting started](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an issue](https://github.com/ibis-project/issues/new/choose) or [discussion](https://github.com/ibis-project/discussions/new/choose) and let us know!\n\n", "supporting": [ - "index_files/figure-html" + "index_files" ], "filters": [], "includes": { "include-in-header": [ - "\n\n\n\n\n" + "\n\n\n\n\n" ] } } diff --git a/docs/_freeze/posts/v6.1.0-release/index/figure-html/cell-9-output-1.png b/docs/_freeze/posts/v6.1.0-release/index/figure-html/cell-9-output-1.png index e5bad1688589..798b6aa09563 100644 Binary files a/docs/_freeze/posts/v6.1.0-release/index/figure-html/cell-9-output-1.png and b/docs/_freeze/posts/v6.1.0-release/index/figure-html/cell-9-output-1.png differ diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 1f48f122efd3..c323d20b7337 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -94,7 +94,7 @@ website: - sidebar:reference right: - posts.qmd - - release_notes.md + - release_notes.qmd - sidebar:contribute sidebar: diff --git a/docs/fix-version-href.lua b/docs/fix-version-href.lua new file mode 100644 index 000000000000..d7aa4d466543 --- /dev/null +++ b/docs/fix-version-href.lua @@ -0,0 +1,22 @@ +function Header(el) + -- Walk the pandoc AST and find all links in Header elements + -- Pluck out the version string and set it if it hasn't been set already + local version = nil; + + el = el:walk({ + Link = function(link) + return link:walk({ + Str = function(str) + if version == nil then + version = str.text + end + return str.text + end, + }) + end, + }) + + el.attr.identifier = version + + return el +end diff --git a/docs/posts/Ibis-version-3.1.0-release/index.qmd b/docs/posts/Ibis-version-3.1.0-release/index.qmd index a4ba7de94b1d..924562c9ca41 100644 --- a/docs/posts/Ibis-version-3.1.0-release/index.qmd +++ b/docs/posts/Ibis-version-3.1.0-release/index.qmd @@ -11,7 +11,7 @@ categories: Ibis 3.1 has officially been released as the latest version of the package. With this release comes new convenience features, increased backend operation coverage and a plethora of bug fixes. -As usual, a full list of the changes can be found in the project release notes [here](../../release_notes.md) Let’s talk about some of the new changes 3.1 brings for Ibis users. +As usual, a full list of the changes can be found in the project release notes [here](../../release_notes.qmd) Let’s talk about some of the new changes 3.1 brings for Ibis users. ## `ibis.connect` @@ -226,7 +226,7 @@ Along with these changes, the operation matrix has had a few more holes filled. Contributors should note that backend test data is now loaded dynamically. Most users won’t be exposed to this update, but it should make contribution a bit more streamlined. -To see the full patch notes, go to the [patch notes page](../../release_notes.md) +To see the full patch notes, go to the [patch notes page](../../release_notes.qmd) As always, Ibis is free and open source. Contributions are welcome and encouraged–drop into the discussions, raise an issue, or put in a pull request. diff --git a/docs/posts/ibis-version-4.0.0-release/index.qmd b/docs/posts/ibis-version-4.0.0-release/index.qmd index 5847c913d14b..ef9cfd7e318e 100644 --- a/docs/posts/ibis-version-4.0.0-release/index.qmd +++ b/docs/posts/ibis-version-4.0.0-release/index.qmd @@ -11,7 +11,7 @@ categories: Ibis 4.0 has officially been released as the latest version of the package. This release includes several new backends, improved functionality, and some major internal refactors. -A full list of the changes can be found in the [project release notes](../../release_notes.md). +A full list of the changes can be found in the [project release notes](../../release_notes.qmd). Let’s talk about some of the new changes 4.0 brings for Ibis users. ## Backends diff --git a/docs/posts/ibis-version-6.0.0-release/index.qmd b/docs/posts/ibis-version-6.0.0-release/index.qmd index 8837eca194cd..172eadfa0272 100644 --- a/docs/posts/ibis-version-6.0.0-release/index.qmd +++ b/docs/posts/ibis-version-6.0.0-release/index.qmd @@ -8,7 +8,7 @@ categories: --- ## Overview -Ibis 6.0.0 adds the Oracle backend, revamped UDF support, and many new features. This release also includes a number of refactors, bug fixes, and performance improvements. You can view the full changelog in [the release notes](../../../release_notes.md). +Ibis 6.0.0 adds the Oracle backend, revamped UDF support, and many new features. This release also includes a number of refactors, bug fixes, and performance improvements. You can view the full changelog in [the release notes](../../../release_notes.qmd). If you're new to Ibis, see [how to install](../../../install.qmd) and [the getting started tutorial](../../../tutorials/getting_started.qmd). @@ -273,7 +273,7 @@ t.agg(s.across(["species", "island"], ibis._.count())) ## Refactors -Several internal refactors that shouldn't affect normal usage were made. See [the release notes](../../release_notes.md) for more details. +Several internal refactors that shouldn't affect normal usage were made. See [the release notes](../../release_notes.qmd) for more details. ## Wrapping up diff --git a/docs/posts/ibis-version-8.0.0-release/index.qmd b/docs/posts/ibis-version-8.0.0-release/index.qmd index 6abc59affef9..2391273d5185 100644 --- a/docs/posts/ibis-version-8.0.0-release/index.qmd +++ b/docs/posts/ibis-version-8.0.0-release/index.qmd @@ -98,7 +98,7 @@ brings another great option for fast batch analytics to Ibis. ## Breaking changes -You can view the [full changelog](../../release_notes.md) for additional +You can view the [full changelog](../../release_notes.qmd) for additional breaking changes. There have been few that we expect to affect most users. :::{.callout-note} diff --git a/docs/posts/v6.1.0-release/index.qmd b/docs/posts/v6.1.0-release/index.qmd index 80c33c290c90..8de8c03d9d73 100644 --- a/docs/posts/v6.1.0-release/index.qmd +++ b/docs/posts/v6.1.0-release/index.qmd @@ -11,7 +11,7 @@ categories: Ibis 6.1.0 is a minor release that includes new features, backend improvements, bug fixes, documentation improvements, and refactors. We are excited to see further adoption of the dataframe interchange protocol enabling visualization and other libraries to be used more easily with Ibis. -You can view the full changelog in [the release notes](../../release_notes.md). +You can view the full changelog in [the release notes](../../release_notes.qmd). If you're new to Ibis, see [how to install](../../install.qmd) and [the getting started tutorial](../../tutorials/getting_started.qmd). @@ -182,7 +182,7 @@ px.scatter_3d( # <6> ## Backends -Numerous backends received improvements. See the [release notes](../../release_notes.md) for more details. +Numerous backends received improvements. See the [release notes](../../release_notes.qmd) for more details. ::: {.panel-tabset} @@ -355,7 +355,7 @@ t.join(t, False, how="outer") ## Refactors -Several internal refactors that shouldn\'t affect normal usage were made. See [the release notes](../../release_notes.md) for more details. +Several internal refactors that shouldn\'t affect normal usage were made. See [the release notes](../../release_notes.qmd) for more details. ## Wrapping up diff --git a/docs/release_notes.qmd b/docs/release_notes.qmd new file mode 100644 index 000000000000..c304ea2c3c96 --- /dev/null +++ b/docs/release_notes.qmd @@ -0,0 +1,6 @@ +--- +title: "Release notes" +filters: + - fix-version-href.lua +--- +{{< include ./release_notes_generated.qmd >}} diff --git a/docs/release_notes.md b/docs/release_notes_generated.qmd similarity index 99% rename from docs/release_notes.md rename to docs/release_notes_generated.qmd index f8def1134763..c9314cc3cca9 100644 --- a/docs/release_notes.md +++ b/docs/release_notes_generated.qmd @@ -1,4 +1,4 @@ -Release notes +--- --- ## [8.0.0](https://github.com/ibis-project/ibis/compare/7.2.0...8.0.0) (2024-02-05) @@ -2923,7 +2923,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * **setup.py:** set the correct version number for 2.1.0 ([f3d267b](https://github.com/ibis-project/ibis/commit/f3d267b96b9f14d3616c17b8f7bdeb8d0a6fc2cf)) -# [2.1.0](https://github.com/ibis-project/ibis/compare/2.0.0...2.1.0) (2022-01-12) +## [2.1.0](https://github.com/ibis-project/ibis/compare/2.0.0...2.1.0) (2022-01-12) ### Bug Fixes @@ -2956,9 +2956,9 @@ behavior from UUID datatypes will need to add an explicit cast first. * "ci: install gdal for fiona" ([8503361](https://github.com/ibis-project/ibis/commit/850336100a271ee2b6043b92a1ceeb1d1d7b30f2)) -# [2.0.0](https://github.com/ibis-project/ibis/releases/tag/2.0.0) (2021-10-06) +## [2.0.0](https://github.com/ibis-project/ibis/compare/1.4.0...2.0.0) (2021-10-06) -## Features +### Features * Serialization-deserialization of Node via pickle is now byte compatible between different processes ([#2938](https://github.com/ibis-project/ibis/issues/2938)) * Support joining on different columns in ClickHouse backend ([#2916](https://github.com/ibis-project/ibis/issues/2916)) @@ -2984,7 +2984,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Added support for case/when in PySpark backend ([#2610](https://github.com/ibis-project/ibis/issues/2610)) * Add support for np.array as literals for backends that already support lists as literals ([#2603](https://github.com/ibis-project/ibis/issues/2603)) -## Bugs +### Bugs * Fix data races in impala connection pool accounting ([#2991](https://github.com/ibis-project/ibis/issues/2991)) * Fix null literal compilation in the Clickhouse backend ([#2985](https://github.com/ibis-project/ibis/issues/2985)) @@ -3010,7 +3010,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Fixes binary data type translation into BigQuery bytes data type ([#2354](https://github.com/ibis-project/ibis/issues/2354)) * Make StructValue picklable ([#2577](https://github.com/ibis-project/ibis/issues/2577)) -## Support +### Support * Improvement of the backend API. The former `Client` subclasses have been replaced by a `Backend` class that must subclass `ibis.backends.base.BaseBackend`. The `BaseBackend` class contains abstract methods for the minimum subset of methods that backends must implement, and their signatures have been standardized across backends. The Ibis compiler has been refactored, and backends don't need to implement all compiler classes anymore if the default works for them. Only a subclass of `ibis.backends.base.sql.compiler.Compiler` is now required. Backends now need to register themselves as entry points. ([#2678](https://github.com/ibis-project/ibis/issues/2678)) * Deprecate `exists_table(table)` in favor of `table in list_tables()` ([#2905](https://github.com/ibis-project/ibis/issues/2905)) @@ -3028,9 +3028,9 @@ behavior from UUID datatypes will need to add an explicit cast first. * Remove deprecated `ibis.HDFS`, `ibis.WebHDFS` and `ibis.hdfs_connect` ([#2505](https://github.com/ibis-project/ibis/issues/2505)) -# [1.4.0](https://github.com/ibis-project/ibis/releases/tag/1.4.0) (2020-11-07) +## [1.4.0](https://github.com/ibis-project/ibis/compare/1.3.0...1.4.0) (2020-11-07) -## Features +### Features * Add Struct.from_dict ([#2514](https://github.com/ibis-project/ibis/issues/2514)) * Add hash and hashbytes support for BigQuery backend ([#2310](https://github.com/ibis-project/ibis/issues/2310)) @@ -3073,7 +3073,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Create ExtractDayOfYear operation and add its support to Clickhouse, CSV, MySQL, OmniSciDB, pandas, Parquet, PostgreSQL, PySpark, SQLite and Spark ([#2173](https://github.com/ibis-project/ibis/issues/2173)) * Implementations of Log Log2 Log10 for OmniSciDB backend ([#2095](https://github.com/ibis-project/ibis/issues/2095)) -## Bugs +### Bugs * Table expressions do not recognize inet datatype (Postgres backend) ([#2462](https://github.com/ibis-project/ibis/issues/2462)) * Table expressions do not recognize macaddr datatype (Postgres backend) ([#2461](https://github.com/ibis-project/ibis/issues/2461)) @@ -3097,7 +3097,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Fix millisecond issue for OmniSciDB :issue:`2167`, MySQL :issue:`2169`, PostgreSQL :issue:`2166`, pandas :issue:`2168`, BigQuery :issue:`2273` backends ([#2170](https://github.com/ibis-project/ibis/issues/2170)) * [OmniSciDB] Fix TopK when used as filter ([#2134](https://github.com/ibis-project/ibis/issues/2134)) -## Support +### Support * Move `ibis.HDFS`, `ibis.WebHDFS` and `ibis.hdfs_connect` to `ibis.impala.*` ([#2497](https://github.com/ibis-project/ibis/issues/2497)) * Drop support to Python 3.6 ([#2288](https://github.com/ibis-project/ibis/issues/2288)) @@ -3110,9 +3110,9 @@ behavior from UUID datatypes will need to add an explicit cast first. * Enabled cumulative ops support for OmniSciDB ([#2113](https://github.com/ibis-project/ibis/issues/2113)) -# [1.3.0](https://github.com/ibis-project/ibis/releases/tag/1.3.0) (2020-02-27) +## [1.3.0](https://github.com/ibis-project/ibis/compare/1.2.0...1.3.0) (2020-02-27) -## Features +### Features * Improve many arguments UDF performance in pandas backend. ([#2071](https://github.com/ibis-project/ibis/issues/2071)) * Add DenseRank, RowNumber, MinRank, Count, PercentRank/CumeDist window operations to OmniSciDB ([#1976](https://github.com/ibis-project/ibis/issues/1976)) @@ -3144,7 +3144,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Spark client ([#1807](https://github.com/ibis-project/ibis/issues/1807)) * Use pandas rolling apply to implement rows_with_max_lookback ([#1868](https://github.com/ibis-project/ibis/issues/1868)) -## Bugs +### Bugs * Pin "clickhouse-driver" to ">=0.1.3" ([#2089](https://github.com/ibis-project/ibis/issues/2089)) * Fix load data stage for Linux CI ([#2069](https://github.com/ibis-project/ibis/issues/2069)) @@ -3172,7 +3172,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Remove -x from pytest call in linux CI ([#1869](https://github.com/ibis-project/ibis/issues/1869)) * Fix return type of Struct.from_tuples ([#1867](https://github.com/ibis-project/ibis/issues/1867)) -## Support +### Support * Add support to Python 3.8 ([#2066](https://github.com/ibis-project/ibis/issues/2066)) * Pin back version of isort ([#2079](https://github.com/ibis-project/ibis/issues/2079)) @@ -3224,16 +3224,16 @@ behavior from UUID datatypes will need to add an explicit cast first. * Move CI pipelines to Azure ([#1856](https://github.com/ibis-project/ibis/issues/1856)) -# [1.2.0](https://github.com/ibis-project/ibis/releases/tag/1.2.0) (2019-06-24) +## [1.2.0](https://github.com/ibis-project/ibis/compare/1.1.0...1.2.0) (2019-06-24) -## Features +### Features * Add new geospatial functions to OmniSciDB backend ([#1836](https://github.com/ibis-project/ibis/issues/1836)) * allow pandas timedelta in rows_with_max_lookback ([#1838](https://github.com/ibis-project/ibis/issues/1838)) * Accept rows-with-max-lookback as preceding parameter ([#1825](https://github.com/ibis-project/ibis/issues/1825)) * PostGIS support ([#1787](https://github.com/ibis-project/ibis/issues/1787)) -## Bugs +### Bugs * Fix call to psql causing failing CI ([#1855](https://github.com/ibis-project/ibis/issues/1855)) * Fix nested array literal repr ([#1851](https://github.com/ibis-project/ibis/issues/1851)) @@ -3241,7 +3241,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Add max_lookback to window replace and combine functions ([#1843](https://github.com/ibis-project/ibis/issues/1843)) * Partially revert #1758 ([#1837](https://github.com/ibis-project/ibis/issues/1837)) -## Support +### Support * Skip SQLAlchemy backend tests in connect method in backends.py ([#1847](https://github.com/ibis-project/ibis/issues/1847)) * Validate order_by when using rows_with_max_lookback window ([#1848](https://github.com/ibis-project/ibis/issues/1848)) @@ -3251,9 +3251,9 @@ behavior from UUID datatypes will need to add an explicit cast first. * Allow passing a branch to ci/feedstock.py ([#1826](https://github.com/ibis-project/ibis/issues/1826)) -# [1.1.0](https://github.com/ibis-project/ibis/releases/tag/1.1.0) (2019-06-09) +## [1.1.0](https://github.com/ibis-project/ibis/compare/1.0.0...1.1.0) (2019-06-09) -## Features +### Features * Conslidate trailing window functions ([#1809](https://github.com/ibis-project/ibis/issues/1809)) * Call to_interval when casting integers to intervals ([#1766](https://github.com/ibis-project/ibis/issues/1766)) @@ -3268,7 +3268,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * is_computable_arg dispatcher ([#1743](https://github.com/ibis-project/ibis/issues/1743)) * Added float32 and geospatial types for create table from schema ([#1753](https://github.com/ibis-project/ibis/issues/1753)) -## Bugs +### Bugs * Fix group_concat test and implementations ([#1819](https://github.com/ibis-project/ibis/issues/1819)) * Fix failing strftime tests on Python 3.7 ([#1818](https://github.com/ibis-project/ibis/issues/1818)) @@ -3283,7 +3283,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Fix preceding and following with ``None`` ([#1765](https://github.com/ibis-project/ibis/issues/1765)) * PostgreSQL interval type not recognized ([#1661](https://github.com/ibis-project/ibis/issues/1661)) -## Support +### Support * Remove decorator hacks and add custom markers ([#1820](https://github.com/ibis-project/ibis/issues/1820)) * Add development deps to setup.py ([#1814](https://github.com/ibis-project/ibis/issues/1814)) @@ -3301,10 +3301,9 @@ behavior from UUID datatypes will need to add an explicit cast first. * Re-enable CI for building step ([#1700](https://github.com/ibis-project/ibis/issues/1700)) * Update README reference to MapD to say OmniSci ([#1749](https://github.com/ibis-project/ibis/issues/1749)) +## [1.0.0](https://github.com/ibis-project/ibis/compare/v0.14.0...1.0.0) (2019-03-26) -# [1.0.0](https://github.com/ibis-project/ibis/releases/tag/1.0.0) (2019-03-26) - -## Features +### Features * Add black as a pre-commit hook ([#1735](https://github.com/ibis-project/ibis/issues/1735)) * Add support for the arbitrary aggregate in the mapd backend ([#1680](https://github.com/ibis-project/ibis/issues/1680)) @@ -3333,7 +3332,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Make ibis node instances hashable ([#1611](https://github.com/ibis-project/ibis/issues/1611)) * Add ``range_window`` and ``trailing_range_window`` to docs ([#1608](https://github.com/ibis-project/ibis/issues/1608)) -## Bugs +### Bugs * Make ``dev/merge-pr.py`` script handle PR branches ([#1745](https://github.com/ibis-project/ibis/issues/1745)) * Fix ``NULLIF`` implementation for the pandas backend ([#1742](https://github.com/ibis-project/ibis/issues/1742)) @@ -3354,7 +3353,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * Make sure we cast when NULL is else in CASE expressions ([#1651](https://github.com/ibis-project/ibis/issues/1651)) * Fix equality ([#1600](https://github.com/ibis-project/ibis/issues/1600)) -## Support +### Support * Do not build universal wheels ([#1748](https://github.com/ibis-project/ibis/issues/1748)) * Remove tag prefix from versioneer ([#1747](https://github.com/ibis-project/ibis/issues/1747)) @@ -3377,13 +3376,13 @@ behavior from UUID datatypes will need to add an explicit cast first. * Set up CI with Azure Pipelines ([#1640](https://github.com/ibis-project/ibis/issues/1640)) * Fix conda builds ([#1609](https://github.com/ibis-project/ibis/issues/1609)) -# v0.14.0 (2018-08-23) +## [0.14](https://github.com/ibis-project/ibis/compare/v0.13.0...v0.14.0) (2018-08-23) This release brings refactored, more composable core components and rule system to ibis. We also focused quite heavily on the BigQuery backend this release. -## New Features +### New Features - Allow keyword arguments in Node subclasses ([#968](https://github.com/ibis-project/ibis/issues/968)) - Splat args into Node subclasses instead of requiring a list @@ -3418,7 +3417,7 @@ this release. - Implement `TimestampNow` for BigQuery and pandas ([#1575](https://github.com/ibis-project/ibis/issues/1575)) -## Bug Fixes +### Bug Fixes - Nullable property is now propagated through value types ([#1289](https://github.com/ibis-project/ibis/issues/1289)) @@ -3437,7 +3436,7 @@ this release. - Fix output type for some decimal operations ([#1541](https://github.com/ibis-project/ibis/issues/1541)) -## API Changes +### API Changes - The previous, private rules API has been rewritten ([#1366](https://github.com/ibis-project/ibis/issues/1366)) @@ -3449,21 +3448,21 @@ this release. ([#1390](https://github.com/ibis-project/ibis/issues/1390)) - Add `tolerance` parameter to `asof_join` ([#1443](https://github.com/ibis-project/ibis/issues/1443)) -# v0.13.0 (2018-03-30) +## [0.13](https://github.com/ibis-project/ibis/compare/v0.12.0...v0.13.0) (2018-03-30) This release brings new backends, including support for executing against files, MySQL, pandas user defined scalar and aggregations along with a number of bug fixes and reliability enhancements. We recommend that all users upgrade from earlier versions of Ibis. -## New Backends +### New Backends - File Support for CSV & HDF5 ([#1165](https://github.com/ibis-project/ibis/issues/1165), [#1194](https://github.com/ibis-project/ibis/issues/1194)) - File Support for Parquet Format ([#1175](https://github.com/ibis-project/ibis/issues/1175), [#1194](https://github.com/ibis-project/ibis/issues/1194)) - Experimental support for `MySQL` thanks to \@kszucs ([#1224](https://github.com/ibis-project/ibis/issues/1224)) -## New Features +### New Features - Support for Unsigned Integer Types ([#1194](https://github.com/ibis-project/ibis/issues/1194)) - Support for Interval types and expressions with support for @@ -3512,7 +3511,7 @@ that all users upgrade from earlier versions of Ibis. [#1047](https://github.com/ibis-project/ibis/issues/1047)) - Design documentation for ibis ([#1351](https://github.com/ibis-project/ibis/issues/1351)) -## Bug Fixes +### Bug Fixes - Unbound parameters were failing in the simple case of a `ibis.expr.types.TableExpr.mutate` @@ -3527,27 +3526,27 @@ that all users upgrade from earlier versions of Ibis. `ibis.expr.operations.NotContains`. - Support `REGEX_EXTRACT` in PostgreSQL 10 ([#1276](https://github.com/ibis-project/ibis/issues/1276), [#1278](https://github.com/ibis-project/ibis/issues/1278)) -## API Changes +### API Changes - Fixing [#1378](https://github.com/ibis-project/ibis/issues/1378) required the removal of the `name` parameter to the `ibis.param` function. Use the `ibis.expr.types.Expr.name` method instead. -# v0.12.0 (2017-10-28) +## [0.12](https://github.com/ibis-project/ibis/compare/v0.11.0...v0.12.0) (2017-10-28) This release brings Clickhouse and BigQuery SQL support along with a number of bug fixes and reliability enhancements. We recommend that all users upgrade from earlier versions of Ibis. -## New Backends +### New Backends - BigQuery backend ([#1170](https://github.com/ibis-project/ibis/issues/1170)), thanks to \@tsdlovell. - Clickhouse backend ([#1127](https://github.com/ibis-project/ibis/issues/1127)), thanks to \@kszucs. -## New Features +### New Features - Add support for `Binary` data type ([#1183](https://github.com/ibis-project/ibis/issues/1183)) - Allow users of the BigQuery client to define their own API proxy @@ -3584,7 +3583,7 @@ users upgrade from earlier versions of Ibis. ([#1061](https://github.com/ibis-project/ibis/issues/1061)) - Add a pre_execute step in pandas backend ([#1189](https://github.com/ibis-project/ibis/issues/1189)) -## Bug Fixes +### Bug Fixes - Remove global expression caching to ensure repeatable code generation ([#1179](https://github.com/ibis-project/ibis/issues/1179), @@ -3613,7 +3612,7 @@ users upgrade from earlier versions of Ibis. - Memoization should include expression name where available ([#1080](https://github.com/ibis-project/ibis/issues/1080)) -## Performance Enhancements +### Performance Enhancements - Speed up imports ([#1074](https://github.com/ibis-project/ibis/issues/1074)) - Fix execution perf of groupby and selection @@ -3622,7 +3621,7 @@ users upgrade from earlier versions of Ibis. ([#1070](https://github.com/ibis-project/ibis/issues/1070)) - Speed up pandas groupby ([#1067](https://github.com/ibis-project/ibis/issues/1067)) -## Contributors +### Contributors The following people contributed to the 0.12.0 release : @@ -3636,13 +3635,13 @@ The following people contributed to the 0.12.0 release : 1 dlovell 1 kwangin -# 0.11.0 (2017-06-28) +## [0.11](https://github.com/ibis-project/ibis/compare/v0.10.0...v0.11.0) (2017-06-28) This release brings initial pandas backend support along with a number of bug fixes and reliability enhancements. We recommend that all users upgrade from earlier versions of Ibis. -## New Features +### New Features - Experimental pandas backend to allow execution of ibis expression against pandas DataFrames @@ -3660,7 +3659,7 @@ upgrade from earlier versions of Ibis. - Added a `type` parameter to `ibis.literal` to allow user specification of literal types -## Bug Fixes +### Bug Fixes - Fix broken conda recipe - Fix incorrectly typed fillna operation @@ -3669,14 +3668,14 @@ upgrade from earlier versions of Ibis. - Fix equality of nested types and construction of nested types when the value type is specified as a string -## API Changes +### API Changes - Deprecate passing integer values to the `ibis.timestamp` literal constructor, this will be removed in 0.12.0 - Added the `admin_timeout` parameter to the kudu client `connect` function -## Contributors +### Contributors $ git shortlog --summary --numbered v0.10.0..v0.11.0 @@ -3686,19 +3685,19 @@ upgrade from earlier versions of Ibis. 1 Tarun Gogineni 1 Wes McKinney -# 0.8 (2016-05-19) +## [0.8](https://github.com/ibis-project/ibis/compare/v0.7.0...v0.8.0) (2016-05-19) This release brings initial PostgreSQL backend support along with a number of critical bug fixes and usability improvements. As several correctness bugs with the SQL compiler were fixed, we recommend that all users upgrade from earlier versions of Ibis. -## New Features +### New Features - Initial PostgreSQL backend contributed by Phillip Cloud. - Add `groupby` as an alias for `group_by` to table expressions -## Bug Fixes +### Bug Fixes - Fix an expression error when filtering based on a new field - Fix Impala\'s SQL compilation of using `OR` with compound filters @@ -3707,17 +3706,17 @@ users upgrade from earlier versions of Ibis. - Fix CTE (`WITH`) extraction inside `UNION ALL` expressions. - Fix `ImportError` on Python 2 when `mock` library not installed -## API Changes +### API Changes - The deprecated `ibis.impala_connect` and `ibis.make_client` APIs have been removed -# 0.7 (2016-03-16) +## [0.7](https://github.com/ibis-project/ibis/compare/v0.6.0...v0.7.0) (2016-03-16) This release brings initial Kudu-Impala integration and improved Impala and SQLite support, along with several critical bug fixes. -## New Features +### New Features - Apache Kudu (incubating) integration for Impala users. Will add some documentation here when possible. @@ -3758,7 +3757,7 @@ FROM ( ) t1 ``` -## Bug Fixes +### Bug Fixes - `CHAR(n)` and `VARCHAR(n)` Impala types now correctly map to Ibis string expressions @@ -3776,7 +3775,7 @@ FROM ( - Fix handling of fully-materialized joins (aka `select *` joins) in SQLAlchemy / SQLite. -## Contributors +### Contributors Thank you to all who contributed patches to this release. @@ -3785,7 +3784,7 @@ Thank you to all who contributed patches to this release. 1 Uri Laserson 1 Kristopher Overholt -# 0.6 (2015-12-01) +## [0.6](https://github.com/ibis-project/ibis/compare/v0.5.0...v0.6.0) (2015-12-01) This release brings expanded pandas and Impala integration, including support for managing partitioned tables in Impala. See the new @@ -3798,7 +3797,7 @@ also was written since the 0.5 release. This release also includes bug fixes affecting generated SQL correctness. All users should upgrade as soon as possible. -## New Features +### New Features - New integrated Impala functionality. See `Ibis for Impala Users` for more details on these things. @@ -3837,11 +3836,11 @@ correctness. All users should upgrade as soon as possible. - Passing `limit=None` to the `execute` method on expressions disables any default row limits. -## API Changes +### API Changes - `ImpalaTable.rename` no longer mutates the calling table expression. -## Contributors +### Contributors $ git log v0.5.0..v0.6.0 --pretty=format:%aN | sort | uniq -c | sort -rn 46 Wes McKinney @@ -3850,13 +3849,13 @@ correctness. All users should upgrade as soon as possible. 1 mariusvniekerk 1 Kristopher Overholt -# 0.5 (2015-09-10) +## [0.5](https://github.com/ibis-project/ibis/compare/v0.4.0...v0.5.0) (2015-09-10) Highlights in this release are the SQLite, Python 3, Impala UDA support, and an asynchronous execution API. There are also many usability improvements, bug fixes, and other new features. -## New Features +### New Features - SQLite client and built-in function support - Ibis now supports Python 3.4 as well as 2.6 and 2.7 @@ -3879,21 +3878,21 @@ improvements, bug fixes, and other new features. compilation and ability to compile (since many operations are unavailable in SQLite, for example) -## API Changes +### API Changes - Impala Ibis client creation now uses only `ibis.impala.connect`, and `ibis.make_client` has been deprecated -## Contributors +### Contributors $ git log v0.4.0..v0.5.0 --pretty=format:%aN | sort | uniq -c | sort -rn 55 Wes McKinney 9 Uri Laserson 1 Kristopher Overholt -# 0.4 (2015-08-14) +## [0.4](https://github.com/ibis-project/ibis/compare/v0.3.0...v0.4.0) (2015-08-14) -## New Features +### New Features - Add tooling to use Impala C++ scalar UDFs within Ibis (#262, #195) - Support and testing for Kerberos-enabled secure HDFS clusters @@ -3923,7 +3922,7 @@ improvements, bug fixes, and other new features. query to cluster, for better usability. - Add conda installation recipes -## Contributors +### Contributors $ git log v0.3.0..v0.4.0 --pretty=format:%aN | sort | uniq -c | sort -rn 38 Wes McKinney @@ -3932,11 +3931,11 @@ improvements, bug fixes, and other new features. 2 Kristopher Overholt 1 Marius van Niekerk -# 0.3 (2015-07-20) +## [0.3](https://github.com/ibis-project/ibis/compare/v0.2.0...v0.3.0) (2015-07-20) First public release. See https://ibis-project.org for more. -## New Features +### New Features - Implement window / analytic function support - Enable non-equijoins (join clauses with operations other than `==`). @@ -3960,7 +3959,7 @@ First public release. See https://ibis-project.org for more. - Add an internal operation type signature API to enhance developer productivity. -## Contributors +### Contributors $ git log v0.2.0..v0.3.0 --pretty=format:%aN | sort | uniq -c | sort -rn 59 Wes McKinney @@ -3968,9 +3967,9 @@ First public release. See https://ibis-project.org for more. 4 Isaac Hodes 2 Meghana Vuyyuru -# 0.2 (2015-06-16) +## [0.2](https://github.com/ibis-project/ibis/compare/v0.1.0...v0.2.0) (2015-06-16) -## New Features +### New Features - `insert` method on Ibis client for inserting data into existing tables. @@ -4012,23 +4011,23 @@ First public release. See https://ibis-project.org for more. - Add `add`, `sub`, and other explicit arithmetic methods to value expressions -## API Changes +### API Changes - New Ibis client and Impala connection workflow. Client now combined from an Impala connection and an optional HDFS connection -## Bug Fixes +### Bug Fixes - Numerous expression API bug fixes and rough edges fixed -## Contributors +### Contributors $ git log v0.1.0..v0.2.0 --pretty=format:%aN | sort | uniq -c | sort -rn 71 Wes McKinney 1 Juliet Hougland 1 Isaac Hodes -# 0.1 (2015-03-26) +## [0.1](https://github.com/ibis-project/ibis/tree/v0.1.0) (2015-03-26) First Ibis release.