diff --git a/.releaserc.js b/.releaserc.js index b9c5dd05502b..3ec1f0494203 100644 --- a/.releaserc.js +++ b/.releaserc.js @@ -35,8 +35,8 @@ module.exports = { [ "@semantic-release/changelog", { - changelogTitle: "Release notes\n---", - changelogFile: "docs/release_notes.md" + changelogTitle: "---\n---", + changelogFile: "docs/release_notes_generated.qmd" } ], [ @@ -80,7 +80,11 @@ module.exports = { [ "@semantic-release/git", { - assets: ["pyproject.toml", "docs/release_notes.md", "ibis/__init__.py"], + assets: [ + "pyproject.toml", + "docs/release_notes_generated.qmd", + "ibis/__init__.py" + ], message: "chore(release): ${nextRelease.version}" } ] diff --git a/docs/_freeze/posts/ibis-version-6.0.0-release/index/execute-results/html.json b/docs/_freeze/posts/ibis-version-6.0.0-release/index/execute-results/html.json index 01cedd17c694..7868be8583d4 100644 --- a/docs/_freeze/posts/ibis-version-6.0.0-release/index/execute-results/html.json +++ b/docs/_freeze/posts/ibis-version-6.0.0-release/index/execute-results/html.json @@ -1,14 +1,15 @@ { - "hash": "11a33fda6aef0f256ced3d8488292de6", + "hash": "08808129893bcfdb11cb5903d9ae1ef1", "result": { - "markdown": "---\ntitle: Ibis v6.0.0\nauthor: \"Ibis team\"\ndate: \"2023-07-03\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 6.0.0 adds the Oracle backend, revamped UDF support, and many new features. This release also includes a number of refactors, bug fixes, and performance improvements. You can view the full changelog in [the release notes](../../../release_notes.md).\n\nIf you're new to Ibis, see [how to install](../../../install.qmd) and [the getting started tutorial](../../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=6,<7'`. First, we'll setup Ibis and fetch some sample data to use.\n\n::: {#df4d0de1 .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.options.interactive = True\nibis.options.repr.interactive.max_rows = 3\n```\n:::\n\n\nNow, fetch the penguins dataset.\n\n::: {#4967a305 .cell execution_count=2}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n## Breaking changes\n\n### Join duplicate column names\n\nPreviously when joining tables with duplicate column names, `_x` and `_y` suffixes would be appended by default to the left and right tables respectively. You could override this with the `suffix` argument, which is now removed in favor of `lname` and `rname` arguments. The default is changed to no suffix for the left table and `_right` for the right table.\n\n::: {#63b558d3 .cell execution_count=3}\n``` {.python .cell-code}\nt.join(t, \"island\").select(s.startswith(\"species\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ species ┃ species_right ┃\n┡━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ string │ string │\n├─────────┼───────────────┤\n│ Adelie │ Adelie │\n│ Adelie │ Adelie │\n│ Adelie │ Adelie │\n│ … │ … │\n└─────────┴───────────────┘\n\n```\n:::\n:::\n\n\nTo replicate the previous behavior:\n\n::: {#8870c8c1 .cell execution_count=4}\n``` {.python .cell-code}\nt.join(t, \"island\", lname=\"{name}_x\", rname=\"{name}_y\").select(\n s.startswith(\"species\")\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━━━━┓\n┃ species_x ┃ species_y ┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━┩\n│ string │ string │\n├───────────┼───────────┤\n│ Adelie │ Adelie │\n│ Adelie │ Adelie │\n│ Adelie │ Adelie │\n│ … │ … │\n└───────────┴───────────┘\n\n```\n:::\n:::\n\n\n### `.count()` column names no longer named `count` automatically\n\nColumns created with the `.count()` aggregation are no longer automatically named `count`. This is to follow convention with other aggregations and reduce the likelihood of name collisions.\n\n::: {#07597e82 .cell execution_count=5}\n``` {.python .cell-code}\nt.group_by(\"species\").agg(ibis._.species.count())\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n┃ species ┃ Count(species) ┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n│ string │ int64 │\n├───────────┼────────────────┤\n│ Adelie │ 152 │\n│ Gentoo │ 124 │\n│ Chinstrap │ 68 │\n└───────────┴────────────────┘\n\n```\n:::\n:::\n\n\nTo reproduce the old behavior, you can rename the column to `count` with:\n\n\n```python\nt.group_by(\"species\").agg(count=ibis._.species.count())\n```\n\n## Backends\n\n### Oracle\n\nThe Oracle backend was added! See the [Voltron Data blog](https://voltrondata.com/resources/ibis-6-0-oracle-backend-support) for more details.\n\n```python\nibis.connect(f\"oracle://user:password@host\")\n```\n\n### DuckDB\n\nThere were various DuckDB improvements, but one notable new feature is the ability to attach to a SQLite database through DuckDB. This allows you to run OLAP queries via DuckDB significantly faster on source data from SQLite.\n\nFirst we'll create a DuckDB connection and show it has no tables:\n\n::: {#8ba6caec .cell execution_count=6}\n``` {.python .cell-code}\nduckdb_con = ibis.connect(\"duckdb://\")\nduckdb_con.list_tables()\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\n[]\n```\n:::\n:::\n\n\nThen create a SQLite database with a table:\n\n::: {#7a85bcc6 .cell execution_count=7}\n``` {.python .cell-code}\nsqlite_con = ibis.connect(\"sqlite://penguins.db\")\nsqlite_con.create_table(\"penguins\", t.to_pandas(), overwrite=True)\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ float64 │ float64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181.0 │ 3750.0 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186.0 │ 3800.0 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195.0 │ 3250.0 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\nAnd attach it:\n\n::: {#e75c55eb .cell execution_count=8}\n``` {.python .cell-code}\nduckdb_con.attach_sqlite(\"./penguins.db\")\nduckdb_con.list_tables()\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```\n['penguins']\n```\n:::\n:::\n\n\n#### MotherDuck support!\n\nMotherDuck launched recently and is now supported in Ibis!\n\nSimply connect with the DuckDB backend using `md:` or `motherduck:` as the database.\n\n\n```python\nibis.connect(\"duckdb://md:\")\n```\n\n### Polars\n\nThe Polars backend received many improvements from community members [@alexander-beedie](https://github.com/alexander-beedie) and [@mesejo](https://github.com/mesejo), with plenty of operations now supported.\n\nSome additions in this version include:\n\n- `any` and `all` reductions\n- `argmin` and `argmax`\n- `identical_to`\n- `corr`\n- support for `.sql()`\n\nGive it a try by setting your backend to Polars with `ibis.set_backend(\"polars\")`.\n\n## Functionality\n\n### UDFs\n\nUser-defined functions (UDFs) have been revamped with a new syntax and new backends added. To get started, import the decorator:\n\n::: {#c85c3821 .cell execution_count=9}\n``` {.python .cell-code}\nfrom ibis import udf\n```\n:::\n\n\nDefine a UDF:\n\n::: {#1cf149ea .cell execution_count=10}\n``` {.python .cell-code}\n@udf.scalar.python\ndef num_vowels(s: str, include_y: bool = False) -> int:\n return sum(map(s.lower().count, \"aeiou\" + (\"y\" * include_y)))\n```\n:::\n\n\nAnd call it:\n\n::: {#158fdd6a .cell execution_count=11}\n``` {.python .cell-code}\nnum_vowels(t[:1].species.execute()[0])\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=11}\n\n::: {.ansi-escaped-output}\n```{=html}\n
4
\n```\n:::\n\n:::\n:::\n\n\n::: {#aab3553e .cell execution_count=12}\n``` {.python .cell-code}\nt.group_by(num_vowels=num_vowels(t.species)).agg(\n num_vowels_island_count=t.island.count()\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```{=html}\n┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ num_vowels ┃ num_vowels_island_count ┃\n┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ int64 │ int64 │\n├────────────┼─────────────────────────┤\n│ 4 │ 152 │\n│ 3 │ 124 │\n│ 2 │ 68 │\n└────────────┴─────────────────────────┘\n\n```\n:::\n:::\n\n\n::: {#9307cb71 .cell execution_count=13}\n``` {.python .cell-code}\nt.filter(num_vowels(t.species) < 4)\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Gentoo │ Biscoe │ 46.1 │ 13.2 │ 211 │ 4500 │ female │ 2007 │\n│ Gentoo │ Biscoe │ 50.0 │ 16.3 │ 230 │ 5700 │ male │ 2007 │\n│ Gentoo │ Biscoe │ 48.7 │ 14.1 │ 210 │ 4450 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n### `to_torch` API\n\nA new `to_torch` output method was added. Combined with UDFs, this brings powerful ML capabilities into Ibis. See a complete example in the [Ibis + DuckDB + PyTorch blog](../torch).\n\n::: {#33e48456 .cell execution_count=14}\n``` {.python .cell-code}\nimport torch\n\ntorch.set_printoptions(threshold=10)\n```\n:::\n\n\n::: {#867cc0d3 .cell execution_count=15}\n``` {.python .cell-code}\nt.select(s.numeric()).to_torch()\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```\n{'bill_length_mm': tensor([39.1000, 39.5000, 40.3000, ..., 49.6000, 50.8000, 50.2000],\n dtype=torch.float64),\n 'bill_depth_mm': tensor([18.7000, 17.4000, 18.0000, ..., 18.2000, 19.0000, 18.7000],\n dtype=torch.float64),\n 'flipper_length_mm': tensor([181, 186, 195, ..., 193, 210, 198]),\n 'body_mass_g': tensor([3750, 3800, 3250, ..., 3775, 4100, 3775]),\n 'year': tensor([2007, 2007, 2007, ..., 2009, 2009, 2009])}\n```\n:::\n:::\n\n\n### Array zip support\n\nA new zip operation was added on array data types, allowing you to zip together multiple arrays.\n\n::: {#f0880cfc .cell execution_count=16}\n``` {.python .cell-code}\narrays = ibis.memtable(\n {\"numbers\": [[3, 2], [], None], \"strings\": [[\"a\", \"c\"], None, [\"e\"]]}\n)\narrays\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ numbers ┃ strings ┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ array<int64> │ array<string> │\n├──────────────┼───────────────┤\n│ [3, 2] │ ['a', 'c'] │\n│ [] │ NULL │\n│ NULL │ ['e'] │\n└──────────────┴───────────────┘\n\n```\n:::\n:::\n\n\n::: {#cac84c5c .cell execution_count=17}\n``` {.python .cell-code}\narrays.numbers.zip(arrays.strings)\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayZip() ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ array<struct<f1: int64, f2: string>> │\n├──────────────────────────────────────┤\n│ [{...}, {...}] │\n│ [] │\n│ [{...}] │\n└──────────────────────────────────────┘\n\n```\n:::\n:::\n\n\n::: {#4e961ab3 .cell execution_count=18}\n``` {.python .cell-code}\narrays.numbers.zip(arrays.strings).unnest()\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayZip() ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ struct<f1: int64, f2: string> │\n├───────────────────────────────┤\n│ {'f1': 3, 'f2': 'a'} │\n│ {'f1': 2, 'f2': 'c'} │\n│ {'f1': None, 'f2': 'e'} │\n└───────────────────────────────┘\n\n```\n:::\n:::\n\n\n### Try cast support\n\nA new `try_cast()` operation was added that allows you to cast a column to a type, but return null if the cast fails.\n\n::: {#d8e748e5 .cell execution_count=19}\n``` {.python .cell-code}\nibis.literal(\"a\").try_cast(\"int\")\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=19}\n\n::: {.ansi-escaped-output}\n```{=html}\n
None
\n```\n:::\n\n:::\n:::\n\n\n```python\nibis.literal(0).try_cast(\"float\")\n```\n\n### `__dataframe__` support\n\nIbis now supports the [dataframe interchange protocol](https://data-apis.org/dataframe-protocol/latest/index.html), allowing Ibis expressions to be used in any framework that supports it. Adoption of the protocol is still in its early stages, but we expect this to enable Ibis to be used in many new places going forward.\n\n::: {#418f5bdb .cell execution_count=20}\n``` {.python .cell-code}\nt.__dataframe__()\n```\n\n::: {.cell-output .cell-output-display execution_count=20}\n```\n┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┓\n┃ species ┃ count ┃ islands ┃\n┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━┩\n│ string │ int64 │ float64 │\n├───────────┼───────┼─────────┤\n│ Adelie │ 152 │ 3.0 │\n│ Gentoo │ 124 │ 1.0 │\n│ Chinstrap │ 68 │ 1.0 │\n└───────────┴───────┴─────────┘\n\n```\n:::\n:::\n\n\n### Delta Lake read/write support for some backends\n\n[Delta Lake tables](https://delta-io) are supported through the [`deltalake` package](https://github.com/delta-io/delta-rs) with `read_delta()` implemented for DuckDB, Polars, and DataFusion.\n\n::: {#202f5c55 .cell execution_count=22}\n``` {.python .cell-code}\nt.to_delta(\"penguins.delta\", mode=\"overwrite\")\n```\n:::\n\n\n::: {#4e3e2b24 .cell execution_count=23}\n``` {.python .cell-code}\nt = ibis.read_delta(\"penguins.delta\")\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n### Selectors\n\nSome minor selectors improvements were added including the ability to use abstract type names and lists of strings.\n\n::: {#4b1a65d9 .cell execution_count=24}\n``` {.python .cell-code}\nt.select(s.of_type(\"string\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┓\n┃ species ┃ island ┃ sex ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━┩\n│ string │ string │ string │\n├─────────┼───────────┼────────┤\n│ Adelie │ Torgersen │ male │\n│ Adelie │ Torgersen │ female │\n│ Adelie │ Torgersen │ female │\n│ … │ … │ … │\n└─────────┴───────────┴────────┘\n\n```\n:::\n:::\n\n\n::: {#7a7f62d3 .cell execution_count=25}\n``` {.python .cell-code}\nt.agg(s.across([\"species\", \"island\"], ibis._.count()))\n```\n\n::: {.cell-output .cell-output-display execution_count=25}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┓\n┃ species ┃ island ┃\n┡━━━━━━━━━╇━━━━━━━━┩\n│ int64 │ int64 │\n├─────────┼────────┤\n│ 344 │ 344 │\n└─────────┴────────┘\n\n```\n:::\n:::\n\n\n## Refactors\n\nSeveral internal refactors that shouldn't affect normal usage were made. See [the release notes](../../release_notes.md) for more details.\n\n## Wrapping up\n\nIbis v6.0.0 brings exciting new features that enable future support for ML and streaming workloads.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting started](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an issue](https://github.com/ibis-project/issues/new/choose) or [discussion](https://github.com/ibis-project/discussions/new/choose) and let us know!\n\n", + "engine": "jupyter", + "markdown": "---\ntitle: Ibis v6.0.0\nauthor: \"Ibis team\"\ndate: \"2023-07-03\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 6.0.0 adds the Oracle backend, revamped UDF support, and many new features. This release also includes a number of refactors, bug fixes, and performance improvements. You can view the full changelog in [the release notes](../../../release_notes.qmd).\n\nIf you're new to Ibis, see [how to install](../../../install.qmd) and [the getting started tutorial](../../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=6,<7'`. First, we'll setup Ibis and fetch some sample data to use.\n\n::: {#4e54cc04 .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.options.interactive = True\nibis.options.repr.interactive.max_rows = 3\n```\n:::\n\n\nNow, fetch the penguins dataset.\n\n::: {#ac13f836 .cell execution_count=2}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n## Breaking changes\n\n### Join duplicate column names\n\nPreviously when joining tables with duplicate column names, `_x` and `_y` suffixes would be appended by default to the left and right tables respectively. You could override this with the `suffix` argument, which is now removed in favor of `lname` and `rname` arguments. The default is changed to no suffix for the left table and `_right` for the right table.\n\n::: {#7e19ee04 .cell execution_count=3}\n``` {.python .cell-code}\nt.join(t, \"island\").select(s.startswith(\"species\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ species ┃ species_right ┃\n┡━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ string │ string │\n├─────────┼───────────────┤\n│ Adelie │ Adelie │\n│ Adelie │ Adelie │\n│ Adelie │ Adelie │\n│ … │ … │\n└─────────┴───────────────┘\n\n```\n:::\n:::\n\n\nTo replicate the previous behavior:\n\n::: {#98f400d2 .cell execution_count=4}\n``` {.python .cell-code}\nt.join(t, \"island\", lname=\"{name}_x\", rname=\"{name}_y\").select(\n s.startswith(\"species\")\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━━━━┓\n┃ species_x ┃ species_y ┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━┩\n│ string │ string │\n├───────────┼───────────┤\n│ Adelie │ Adelie │\n│ Adelie │ Adelie │\n│ Adelie │ Adelie │\n│ … │ … │\n└───────────┴───────────┘\n\n```\n:::\n:::\n\n\n### `.count()` column names no longer named `count` automatically\n\nColumns created with the `.count()` aggregation are no longer automatically named `count`. This is to follow convention with other aggregations and reduce the likelihood of name collisions.\n\n::: {#f67e1084 .cell execution_count=5}\n``` {.python .cell-code}\nt.group_by(\"species\").agg(ibis._.species.count())\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n┃ species ┃ Count(species) ┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n│ string │ int64 │\n├───────────┼────────────────┤\n│ Adelie │ 152 │\n│ Chinstrap │ 68 │\n│ Gentoo │ 124 │\n└───────────┴────────────────┘\n\n```\n:::\n:::\n\n\nTo reproduce the old behavior, you can rename the column to `count` with:\n\n\n```python\nt.group_by(\"species\").agg(count=ibis._.species.count())\n```\n\n## Backends\n\n### Oracle\n\nThe Oracle backend was added! See the [Voltron Data blog](https://voltrondata.com/resources/ibis-6-0-oracle-backend-support) for more details.\n\n```python\nibis.connect(f\"oracle://user:password@host\")\n```\n\n### DuckDB\n\nThere were various DuckDB improvements, but one notable new feature is the ability to attach to a SQLite database through DuckDB. This allows you to run OLAP queries via DuckDB significantly faster on source data from SQLite.\n\nFirst we'll create a DuckDB connection and show it has no tables:\n\n::: {#b5da11e4 .cell execution_count=6}\n``` {.python .cell-code}\nduckdb_con = ibis.connect(\"duckdb://\")\nduckdb_con.list_tables()\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\n[]\n```\n:::\n:::\n\n\nThen create a SQLite database with a table:\n\n::: {#e6967133 .cell execution_count=7}\n``` {.python .cell-code}\nsqlite_con = ibis.connect(\"sqlite://penguins.db\")\nsqlite_con.create_table(\"penguins\", t.to_pandas(), overwrite=True)\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ float64 │ float64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181.0 │ 3750.0 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186.0 │ 3800.0 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195.0 │ 3250.0 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\nAnd attach it:\n\n::: {#52786da4 .cell execution_count=8}\n``` {.python .cell-code}\nduckdb_con.attach_sqlite(\"./penguins.db\")\nduckdb_con.list_tables()\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```\n['penguins']\n```\n:::\n:::\n\n\n#### MotherDuck support!\n\nMotherDuck launched recently and is now supported in Ibis!\n\nSimply connect with the DuckDB backend using `md:` or `motherduck:` as the database.\n\n\n```python\nibis.connect(\"duckdb://md:\")\n```\n\n### Polars\n\nThe Polars backend received many improvements from community members [@alexander-beedie](https://github.com/alexander-beedie) and [@mesejo](https://github.com/mesejo), with plenty of operations now supported.\n\nSome additions in this version include:\n\n- `any` and `all` reductions\n- `argmin` and `argmax`\n- `identical_to`\n- `corr`\n- support for `.sql()`\n\nGive it a try by setting your backend to Polars with `ibis.set_backend(\"polars\")`.\n\n## Functionality\n\n### UDFs\n\nUser-defined functions (UDFs) have been revamped with a new syntax and new backends added. To get started, import the decorator:\n\n::: {#cc7dcdf3 .cell execution_count=9}\n``` {.python .cell-code}\nfrom ibis import udf\n```\n:::\n\n\nDefine a UDF:\n\n::: {#ff6c82af .cell execution_count=10}\n``` {.python .cell-code}\n@udf.scalar.python\ndef num_vowels(s: str, include_y: bool = False) -> int:\n return sum(map(s.lower().count, \"aeiou\" + (\"y\" * include_y)))\n```\n:::\n\n\nAnd call it:\n\n::: {#399c788e .cell execution_count=11}\n``` {.python .cell-code}\nnum_vowels(t[:1].species.execute()[0])\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=11}\n\n::: {.ansi-escaped-output}\n```{=html}\n
4
\n```\n:::\n\n:::\n:::\n\n\n::: {#419610eb .cell execution_count=12}\n``` {.python .cell-code}\nt.group_by(num_vowels=num_vowels(t.species)).agg(\n num_vowels_island_count=t.island.count()\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```{=html}\n┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ num_vowels ┃ num_vowels_island_count ┃\n┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ int64 │ int64 │\n├────────────┼─────────────────────────┤\n│ 2 │ 68 │\n│ 4 │ 152 │\n│ 3 │ 124 │\n└────────────┴─────────────────────────┘\n\n```\n:::\n:::\n\n\n::: {#a5d5fcae .cell execution_count=13}\n``` {.python .cell-code}\nt.filter(num_vowels(t.species) < 4)\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Gentoo │ Biscoe │ 46.1 │ 13.2 │ 211 │ 4500 │ female │ 2007 │\n│ Gentoo │ Biscoe │ 50.0 │ 16.3 │ 230 │ 5700 │ male │ 2007 │\n│ Gentoo │ Biscoe │ 48.7 │ 14.1 │ 210 │ 4450 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n### `to_torch` API\n\nA new `to_torch` output method was added. Combined with UDFs, this brings powerful ML capabilities into Ibis. See a complete example in the [Ibis + DuckDB + PyTorch blog](../torch).\n\n::: {#39eca15c .cell execution_count=14}\n``` {.python .cell-code}\nimport torch\n\ntorch.set_printoptions(threshold=10)\n```\n:::\n\n\n::: {#21955f50 .cell execution_count=15}\n``` {.python .cell-code}\nt.select(s.numeric()).to_torch()\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```\n{'bill_length_mm': tensor([39.1000, 39.5000, 40.3000, ..., 49.6000, 50.8000, 50.2000],\n dtype=torch.float64),\n 'bill_depth_mm': tensor([18.7000, 17.4000, 18.0000, ..., 18.2000, 19.0000, 18.7000],\n dtype=torch.float64),\n 'flipper_length_mm': tensor([181, 186, 195, ..., 193, 210, 198]),\n 'body_mass_g': tensor([3750, 3800, 3250, ..., 3775, 4100, 3775]),\n 'year': tensor([2007, 2007, 2007, ..., 2009, 2009, 2009])}\n```\n:::\n:::\n\n\n### Array zip support\n\nA new zip operation was added on array data types, allowing you to zip together multiple arrays.\n\n::: {#9598a79a .cell execution_count=16}\n``` {.python .cell-code}\narrays = ibis.memtable(\n {\"numbers\": [[3, 2], [], None], \"strings\": [[\"a\", \"c\"], None, [\"e\"]]}\n)\narrays\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ numbers ┃ strings ┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ array<int64> │ array<string> │\n├──────────────┼───────────────┤\n│ [3, 2] │ ['a', 'c'] │\n│ [] │ NULL │\n│ NULL │ ['e'] │\n└──────────────┴───────────────┘\n\n```\n:::\n:::\n\n\n::: {#229cffc2 .cell execution_count=17}\n``` {.python .cell-code}\narrays.numbers.zip(arrays.strings)\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayZip() ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ array<struct<f1: int64, f2: string>> │\n├──────────────────────────────────────┤\n│ [{...}, {...}] │\n│ NULL │\n│ NULL │\n└──────────────────────────────────────┘\n\n```\n:::\n:::\n\n\n::: {#16919245 .cell execution_count=18}\n``` {.python .cell-code}\narrays.numbers.zip(arrays.strings).unnest()\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ ArrayZip() ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ struct<f1: int64, f2: string> │\n├───────────────────────────────┤\n│ {'f1': 3, 'f2': 'a'} │\n│ {'f1': 2, 'f2': 'c'} │\n└───────────────────────────────┘\n\n```\n:::\n:::\n\n\n### Try cast support\n\nA new `try_cast()` operation was added that allows you to cast a column to a type, but return null if the cast fails.\n\n::: {#6c7989a4 .cell execution_count=19}\n``` {.python .cell-code}\nibis.literal(\"a\").try_cast(\"int\")\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=19}\n\n::: {.ansi-escaped-output}\n```{=html}\n
None
\n```\n:::\n\n:::\n:::\n\n\n```python\nibis.literal(0).try_cast(\"float\")\n```\n\n### `__dataframe__` support\n\nIbis now supports the [dataframe interchange protocol](https://data-apis.org/dataframe-protocol/latest/index.html), allowing Ibis expressions to be used in any framework that supports it. Adoption of the protocol is still in its early stages, but we expect this to enable Ibis to be used in many new places going forward.\n\n::: {#0b8c5822 .cell execution_count=20}\n``` {.python .cell-code}\nt.__dataframe__()\n```\n\n::: {.cell-output .cell-output-display execution_count=20}\n```\n┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┓\n┃ species ┃ count ┃ islands ┃\n┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━┩\n│ string │ int64 │ float64 │\n├───────────┼───────┼─────────┤\n│ Chinstrap │ 68 │ 1.0 │\n│ Adelie │ 152 │ 3.0 │\n│ Gentoo │ 124 │ 1.0 │\n└───────────┴───────┴─────────┘\n\n```\n:::\n:::\n\n\n### Delta Lake read/write support for some backends\n\n[Delta Lake tables](https://delta-io) are supported through the [`deltalake` package](https://github.com/delta-io/delta-rs) with `read_delta()` implemented for DuckDB, Polars, and DataFusion.\n\n::: {#92c9acb4 .cell execution_count=22}\n``` {.python .cell-code}\nt.to_delta(\"penguins.delta\", mode=\"overwrite\")\n```\n:::\n\n\n::: {#3fae3024 .cell execution_count=23}\n``` {.python .cell-code}\nt = ibis.read_delta(\"penguins.delta\")\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n### Selectors\n\nSome minor selectors improvements were added including the ability to use abstract type names and lists of strings.\n\n::: {#8778b857 .cell execution_count=24}\n``` {.python .cell-code}\nt.select(s.of_type(\"string\"))\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┓\n┃ species ┃ island ┃ sex ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━┩\n│ string │ string │ string │\n├─────────┼───────────┼────────┤\n│ Adelie │ Torgersen │ male │\n│ Adelie │ Torgersen │ female │\n│ Adelie │ Torgersen │ female │\n│ … │ … │ … │\n└─────────┴───────────┴────────┘\n\n```\n:::\n:::\n\n\n::: {#c1074422 .cell execution_count=25}\n``` {.python .cell-code}\nt.agg(s.across([\"species\", \"island\"], ibis._.count()))\n```\n\n::: {.cell-output .cell-output-display execution_count=25}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━┓\n┃ species ┃ island ┃\n┡━━━━━━━━━╇━━━━━━━━┩\n│ int64 │ int64 │\n├─────────┼────────┤\n│ 344 │ 344 │\n└─────────┴────────┘\n\n```\n:::\n:::\n\n\n## Refactors\n\nSeveral internal refactors that shouldn't affect normal usage were made. See [the release notes](../../release_notes.qmd) for more details.\n\n## Wrapping up\n\nIbis v6.0.0 brings exciting new features that enable future support for ML and streaming workloads.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting started](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an issue](https://github.com/ibis-project/issues/new/choose) or [discussion](https://github.com/ibis-project/discussions/new/choose) and let us know!\n\n", "supporting": [ - "index_files/figure-html" + "index_files" ], "filters": [], "includes": { "include-in-header": [ - "\n\n\n" + "\n\n\n" ] } } diff --git a/docs/_freeze/posts/ibis-version-8.0.0-release/index/execute-results/html.json b/docs/_freeze/posts/ibis-version-8.0.0-release/index/execute-results/html.json index b42ab052b335..bb236043dec8 100644 --- a/docs/_freeze/posts/ibis-version-8.0.0-release/index/execute-results/html.json +++ b/docs/_freeze/posts/ibis-version-8.0.0-release/index/execute-results/html.json @@ -1,10 +1,10 @@ { - "hash": "d4946fbf96b8f1af3a7612d86f7e8ba0", + "hash": "75cf791184119f27c7ac27691e49c36e", "result": { "engine": "jupyter", - "markdown": "---\ntitle: \"Ibis 8.0: streaming and more!\"\nauthor: \"Ibis team\"\ndate: \"2024-02-12\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 8.0 marks the first release of stream processing backends in Ibis! This\nenhances [the composable data ecosystem\nvision](../../concepts/composable-ecosystem.qmd) by allowing users to implement\ndata transformation logic in a standard Python dataframe API and execute it\nagainst either batch or streaming systems.\n\nThis release includes [Apache Flink](https://flink.apache.org/), a streaming\nbackend, and [RisingWave](https://risingwave.com), a streaming database backend.\nWe've also added a new batch backend with [Exasol](https://exasol.com), bringing\nthe total number of backends Ibis supports to 20.\n\n[Most geospatial operations are now supported in the DuckDB\nbackend](#geospatial-operations-in-duckdb), making Ibis a great local option for\ngeospatial analytics.\n\n### What is stream processing?\n\nStream processing systems are designed to handle high-throughput, low-latency\ndata processing with time semantics. They are used to process data in real-time\nwith minimum latency and are often used in applications such as fraud detection,\nreal-time analytics, and IoT. Systems using stream processing are increasingly\ncommon in modern data applications.\n\nApache Flink is the most popular open-source stream processing framework, with\nnumerous cloud options. RisingWave is an open-source Postgres-compatible\nstreaming database with a cloud offering that is gaining popularity and\nsimplifies the streaming experience.\n\nIbis now supports both and going forward can add more streaming backends to\nunify the Python user experience across batch and streaming systems.\n\n### Unifying batch and streaming UX in Python\n\nWhether you're using a batch or streaming data platform -- and the lines are\ncontinually blurring between them -- you'll need a frontend to interact with as\na data engineer, analyst, or scientist. If you're using Python, that frontend is\nlikely a dataframe API.\n\nStandards benefit individual users by reducing the cognitive load of learning\nand understanding new data systems. Organizations benefit from this in the form\nof lower onboarding costs, easier collaboration between teams, and better\ninterfaces for data systems.\n\nWe saw in the recent [one billion row challenge post how even CSV reader keyword\narguments can differ greatly between APIs](../1brc/index.qmd#setup-1). This is\ncompounded by tightly coupling a dataframe API to every query engine, whether\nbatch or streaming.\n\nIbis aims to solve this dilemma by providing a standard dataframe API that can\nwork across data systems, whether batch or streaming. This is a long-term vision\nand we're excited to take the first steps toward it in Ibis 8.0 with the launch\nof **two** streaming backends (and one more batch backend).\n\nThis allows a user to leverage DuckDB or Polars or DataFusion locally, then\nscale out batch processing to Snowflake or BigQuery or ClickHouse in the cloud,\nthen switch from batch to stream processing with Apache Flink or RisingWave, all\nwithout changing their dataframe code. As Ibis [adds new\nfeatures](#functionality) and implements them across backends, users can take\nadvantage of these features without needing to learn new APIs.\n\n## Backends\n\nThree new backends were added in this release.\n\n### Apache Flink\n\nIn collaboration with Claypot AI ([recently acquired by Voltron\nData](https://voltrondata.com/resources/voltron-data-acquires-claypot-ai)),\nwe've added the [first streaming backend with Apache\nFlink](../../backends/flink.qmd). You can check out the [blog\npost](../flink-announcement/index.qmd) and\n[tutorial](../../tutorials/open-source-software/apache-flink/0_setup.qmd) to get\nstarted with this new backend.\n\n### RisingWave\n\n[RisingWave](https://risingwave.com) has contributed [second streaming backend\nwith RisingWave](../../backends/risingwave.qmd). This backend is earlier in\ndevelopment, but we're excited to have it in Ibis and it will continue to\nimprove it.\n\n### Exasol\n\n[Exasol](https://exasol.com) has contributed the [Exasol\nbackend](../../backends/exasol.qmd). This is a traditional batch backend and\nbrings another great option for fast batch analytics to Ibis.\n\n## Breaking changes\n\nYou can view the [full changelog](../../release_notes.md) for additional\nbreaking changes. There have been few that we expect to affect most users.\n\n:::{.callout-note}\nThe PM for the team was distracted playing with LLMs and didn't write a v7 blog\npost, so we're covering breaking changes and features from both below.\n:::\n\nIf you're new to Ibis, see [how to install](../../../install.qmd) and [the\ngetting started tutorial](../../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=8,<9'`.\nFirst, we’ll setup Ibis and fetch some sample data to use.\n\n::: {#9b47e9e4 .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.options.interactive = True\nibis.options.repr.interactive.max_rows = 3\n```\n:::\n\n\nNow, fetch the penguins dataset.\n\n::: {#d091eb9a .cell execution_count=2}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n### rename\n\nThe largest breaking change in Ibis 7/8 is the deprecation of `relabel` in\nfavor of `rename`, swapping the order of the arguments. This change was made to\nbe consistent with the rest of the Ibis API. We apologize for any inconvenience\nthis may cause, but we believe this change will make Ibis a better and more\nconsistent dataframe standard going forward.\n\nIn the past, you would use `relabel` like this:\n\n::: {#72d0f6b3 .cell execution_count=3}\n``` {.python .cell-code}\nt.relabel({\"species\": \"SPECIES\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\nNow, you would use `rename` like this:\n\n::: {#bd87d11d .cell execution_count=4}\n``` {.python .cell-code}\nt.rename({\"SPECIES\": \"species\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\nor this:\n\n::: {#81d7bb14 .cell execution_count=5}\n``` {.python .cell-code}\nt.rename(SPECIES=\"species\")\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n## Functionality\n\nA lot of new functionality has been added in Ibis 7/8.\n\n### pandas batches\n\nThe `.to_pandas_batches()` method can be used to output batches of pandas\ndataframes:\n\n::: {#926ded0a .cell execution_count=6}\n``` {.python .cell-code}\nbatches = t.to_pandas_batches(chunk_size=200)\nfor df in batches:\n print(df.shape)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n(200, 8)\n(144, 8)\n```\n:::\n:::\n\n\n### range\n\nThe `range()` function can be used to create a monotonic sequence of integers:\n\n::: {#67ec4b7e .cell execution_count=7}\n``` {.python .cell-code}\ns = ibis.range(10)\ns\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=7}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[0, 1, ... +8]\n```\n:::\n\n:::\n:::\n\n\nYou can turn it into a table:\n\n::: {#999d0db3 .cell execution_count=8}\n``` {.python .cell-code}\ns.unnest().name(\"index\").as_table()\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n
┏━━━━━━━┓\n┃ index ┃\n┡━━━━━━━┩\n│ int8 │\n├───────┤\n│ 0 │\n│ 1 │\n│ 2 │\n│ … │\n└───────┘\n\n```\n:::\n:::\n\n\nThis can be useful for [creating synthetic\ndata](../1brc/index.qmd#bonus-more-billion-row-data-generation) and other use\ncases.\n\n### relocate\n\nThe `.relocate()` method can be used to move columns to the beginning of a\ntable, which is very useful for interactive data exploration with wide tables:\n\n::: {#b0225c66 .cell execution_count=9}\n``` {.python .cell-code}\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\nThen:\n\n::: {#a501242b .cell execution_count=10}\n``` {.python .cell-code}\nt.relocate(\"sex\", \"year\")\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n```{=html}\n
┏━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓\n┃ sex ┃ year ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃\n┡━━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩\n│ string │ int64 │ string │ string │ float64 │ float64 │ int64 │ int64 │\n├────────┼───────┼─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┤\n│ male │ 2007 │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │\n│ female │ 2007 │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │\n│ female │ 2007 │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└────────┴───────┴─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┘\n\n```\n:::\n:::\n\n\n### sample\n\nThe `.sample()` method can be used to sample rows from a table:\n\n:::{.callout-info}\nNumber of rows returned may vary by invocation.\n:::\n\n::: {#3d554fd1 .cell execution_count=11}\n``` {.python .cell-code}\nt.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=11}\n\n::: {.ansi-escaped-output}\n```{=html}\n
344
\n```\n:::\n\n:::\n:::\n\n\n::: {#bcff524b .cell execution_count=12}\n``` {.python .cell-code}\nt.sample(fraction=0.1).count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=12}\n\n::: {.ansi-escaped-output}\n```{=html}\n36
\n```\n:::\n\n:::\n:::\n\n\n### negative slicing\n\nMore Pythonic slicing is now supported:\n\n::: {#2feb70c9 .cell execution_count=13}\n``` {.python .cell-code}\nt[:3]\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n::: {#f06293de .cell execution_count=14}\n``` {.python .cell-code}\nt[-3:]\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Chinstrap │ Dream │ 49.6 │ 18.2 │ 193 │ 3775 │ male │ 2009 │\n│ Chinstrap │ Dream │ 50.8 │ 19.0 │ 210 │ 4100 │ male │ 2009 │\n│ Chinstrap │ Dream │ 50.2 │ 18.7 │ 198 │ 3775 │ female │ 2009 │\n└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n::: {#44af84d2 .cell execution_count=15}\n``` {.python .cell-code}\nt[-6:-3]\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Chinstrap │ Dream │ 45.7 │ 17.0 │ 195 │ 3650 │ female │ 2009 │\n│ Chinstrap │ Dream │ 55.8 │ 19.8 │ 207 │ 4000 │ male │ 2009 │\n│ Chinstrap │ Dream │ 43.5 │ 18.1 │ 202 │ 3400 │ female │ 2009 │\n└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n### geospatial operations in DuckDB\n\nIbis supports over 50 geospatial operations, with many being recently added to\nDuckDB backend. While backend-specific, this is worth calling out because it\nbrings a great local option for geospatial analytics to Ibis. [Read the first\ngeospatial blog](../ibis-duckdb-geospatial/index.qmd) or [the second geospatial\nblog](../ibis-duckdb-geospatial-dev-guru/index.qmd) to learn more.\n\nA new `zones` example dataset with a geometric datatype has been added for a\nquick demonstration:\n\n::: {#2b0c05ae .cell execution_count=16}\n``` {.python .cell-code}\nz = ibis.examples.zones.fetch()\nz = z.relocate(\"geom\")\nz\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ geom ┃ OBJECTID ┃ Shape_Leng ┃ Shape_Area ┃ zone ┃ LocationID ┃ borough ┃ x_cent ┃ y_cent ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ geospatial:geometry │ int32 │ float64 │ float64 │ string │ int32 │ string │ float64 │ float64 │\n├──────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┼─────────────────────────┼────────────┼─────────┼──────────────┼───────────────┤\n│ <POLYGON ((933100.918 192536.086, 933091.011 192572.175, 933088.585 192604.9...> │ 1 │ 0.116357 │ 0.000782 │ Newark Airport │ 1 │ EWR │ 9.359968e+05 │ 191376.749531 │\n│ <MULTIPOLYGON (((1033269.244 172126.008, 1033439.643 170883.946, 1033473.265...> │ 2 │ 0.433470 │ 0.004866 │ Jamaica Bay │ 2 │ Queens │ 1.031086e+06 │ 164018.754403 │\n│ <POLYGON ((1026308.77 256767.698, 1026495.593 256638.616, 1026567.23 256589....> │ 3 │ 0.084341 │ 0.000314 │ Allerton/Pelham Gardens │ 3 │ Bronx │ 1.026453e+06 │ 254265.478659 │\n│ … │ … │ … │ … │ … │ … │ … │ … │ … │\n└──────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┴─────────────────────────┴────────────┴─────────┴──────────────┴───────────────┘\n\n```\n:::\n:::\n\n\nWe can use geospatial operations on that column:\n\n::: {#01537cf4 .cell execution_count=17}\n``` {.python .cell-code}\nz = z.mutate(\n area=z.geom.area(),\n centroid=z.geom.centroid(),\n).relocate(\"area\", \"centroid\")\nz\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ area ┃ centroid ┃ geom ┃ OBJECTID ┃ Shape_Leng ┃ Shape_Area ┃ zone ┃ LocationID ┃ borough ┃ x_cent ┃ y_cent ┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ float64 │ point │ geospatial:geometry │ int32 │ float64 │ float64 │ string │ int32 │ string │ float64 │ float64 │\n├──────────────┼──────────────────────────────────┼──────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┼─────────────────────────┼────────────┼─────────┼──────────────┼───────────────┤\n│ 7.903953e+07 │ <POINT (935996.821 191376.75)> │ <POLYGON ((933100.918 192536.086, 933091.011 192572.175, 933088.585 192604.9...> │ 1 │ 0.116357 │ 0.000782 │ Newark Airport │ 1 │ EWR │ 9.359968e+05 │ 191376.749531 │\n│ 1.439095e+08 │ <POINT (1031085.719 164018.754)> │ <MULTIPOLYGON (((1033269.244 172126.008, 1033439.643 170883.946, 1033473.265...> │ 2 │ 0.433470 │ 0.004866 │ Jamaica Bay │ 2 │ Queens │ 1.031086e+06 │ 164018.754403 │\n│ 3.168508e+07 │ <POINT (1026452.617 254265.479)> │ <POLYGON ((1026308.77 256767.698, 1026495.593 256638.616, 1026567.23 256589....> │ 3 │ 0.084341 │ 0.000314 │ Allerton/Pelham Gardens │ 3 │ Bronx │ 1.026453e+06 │ 254265.478659 │\n│ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │\n└──────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┴─────────────────────────┴────────────┴─────────┴──────────────┴───────────────┘\n\n```\n:::\n:::\n\n\n## Wrapping up\n\nIbis 8.0 brings exciting new features and the first streaming backends into Ibis!\nWe hope you're excited as we are about breaking down barriers between batch and\nstreaming systems with a standard Python dataframe API.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting\nstarted](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an\nissue](https://github.com/ibis-project/issues/new/choose) or\n[discussion](https://github.com/ibis-project/discussions/new/choose) and let us\nknow!\n\n", + "markdown": "---\ntitle: \"Ibis 8.0: streaming and more!\"\nauthor: \"Ibis team\"\ndate: \"2024-02-12\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 8.0 marks the first release of stream processing backends in Ibis! This\nenhances [the composable data ecosystem\nvision](../../concepts/composable-ecosystem.qmd) by allowing users to implement\ndata transformation logic in a standard Python dataframe API and execute it\nagainst either batch or streaming systems.\n\nThis release includes [Apache Flink](https://flink.apache.org/), a streaming\nbackend, and [RisingWave](https://risingwave.com), a streaming database backend.\nWe've also added a new batch backend with [Exasol](https://exasol.com), bringing\nthe total number of backends Ibis supports to 20.\n\n[Most geospatial operations are now supported in the DuckDB\nbackend](#geospatial-operations-in-duckdb), making Ibis a great local option for\ngeospatial analytics.\n\n### What is stream processing?\n\nStream processing systems are designed to handle high-throughput, low-latency\ndata processing with time semantics. They are used to process data in real-time\nwith minimum latency and are often used in applications such as fraud detection,\nreal-time analytics, and IoT. Systems using stream processing are increasingly\ncommon in modern data applications.\n\nApache Flink is the most popular open-source stream processing framework, with\nnumerous cloud options. RisingWave is an open-source Postgres-compatible\nstreaming database with a cloud offering that is gaining popularity and\nsimplifies the streaming experience.\n\nIbis now supports both and going forward can add more streaming backends to\nunify the Python user experience across batch and streaming systems.\n\n### Unifying batch and streaming UX in Python\n\nWhether you're using a batch or streaming data platform -- and the lines are\ncontinually blurring between them -- you'll need a frontend to interact with as\na data engineer, analyst, or scientist. If you're using Python, that frontend is\nlikely a dataframe API.\n\nStandards benefit individual users by reducing the cognitive load of learning\nand understanding new data systems. Organizations benefit from this in the form\nof lower onboarding costs, easier collaboration between teams, and better\ninterfaces for data systems.\n\nWe saw in the recent [one billion row challenge post how even CSV reader keyword\narguments can differ greatly between APIs](../1brc/index.qmd#setup-1). This is\ncompounded by tightly coupling a dataframe API to every query engine, whether\nbatch or streaming.\n\nIbis aims to solve this dilemma by providing a standard dataframe API that can\nwork across data systems, whether batch or streaming. This is a long-term vision\nand we're excited to take the first steps toward it in Ibis 8.0 with the launch\nof **two** streaming backends (and one more batch backend).\n\nThis allows a user to leverage DuckDB or Polars or DataFusion locally, then\nscale out batch processing to Snowflake or BigQuery or ClickHouse in the cloud,\nthen switch from batch to stream processing with Apache Flink or RisingWave, all\nwithout changing their dataframe code. As Ibis [adds new\nfeatures](#functionality) and implements them across backends, users can take\nadvantage of these features without needing to learn new APIs.\n\n## Backends\n\nThree new backends were added in this release.\n\n### Apache Flink\n\nIn collaboration with Claypot AI ([recently acquired by Voltron\nData](https://voltrondata.com/resources/voltron-data-acquires-claypot-ai)),\nwe've added the [first streaming backend with Apache\nFlink](../../backends/flink.qmd). You can check out the [blog\npost](../flink-announcement/index.qmd) and\n[tutorial](../../tutorials/open-source-software/apache-flink/0_setup.qmd) to get\nstarted with this new backend.\n\n### RisingWave\n\n[RisingWave](https://risingwave.com) has contributed [second streaming backend\nwith RisingWave](../../backends/risingwave.qmd). This backend is earlier in\ndevelopment, but we're excited to have it in Ibis and it will continue to\nimprove it.\n\n### Exasol\n\n[Exasol](https://exasol.com) has contributed the [Exasol\nbackend](../../backends/exasol.qmd). This is a traditional batch backend and\nbrings another great option for fast batch analytics to Ibis.\n\n## Breaking changes\n\nYou can view the [full changelog](../../release_notes.qmd) for additional\nbreaking changes. There have been few that we expect to affect most users.\n\n:::{.callout-note}\nThe PM for the team was distracted playing with LLMs and didn't write a v7 blog\npost, so we're covering breaking changes and features from both below.\n:::\n\nIf you're new to Ibis, see [how to install](../../../install.qmd) and [the\ngetting started tutorial](../../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=8,<9'`.\nFirst, we’ll setup Ibis and fetch some sample data to use.\n\n::: {#8662595c .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.options.interactive = True\nibis.options.repr.interactive.max_rows = 3\n```\n:::\n\n\nNow, fetch the penguins dataset.\n\n::: {#8466e108 .cell execution_count=2}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n### rename\n\nThe largest breaking change in Ibis 7/8 is the deprecation of `relabel` in\nfavor of `rename`, swapping the order of the arguments. This change was made to\nbe consistent with the rest of the Ibis API. We apologize for any inconvenience\nthis may cause, but we believe this change will make Ibis a better and more\nconsistent dataframe standard going forward.\n\nIn the past, you would use `relabel` like this:\n\n::: {#fcb662b6 .cell execution_count=3}\n``` {.python .cell-code}\nt.relabel({\"species\": \"SPECIES\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\nNow, you would use `rename` like this:\n\n::: {#69c6ea26 .cell execution_count=4}\n``` {.python .cell-code}\nt.rename({\"SPECIES\": \"species\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\nor this:\n\n::: {#89e73ac8 .cell execution_count=5}\n``` {.python .cell-code}\nt.rename(SPECIES=\"species\")\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ SPECIES ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n## Functionality\n\nA lot of new functionality has been added in Ibis 7/8.\n\n### pandas batches\n\nThe `.to_pandas_batches()` method can be used to output batches of pandas\ndataframes:\n\n::: {#033ec3b9 .cell execution_count=6}\n``` {.python .cell-code}\nbatches = t.to_pandas_batches(chunk_size=200)\nfor df in batches:\n print(df.shape)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n(200, 8)\n(144, 8)\n```\n:::\n:::\n\n\n### range\n\nThe `range()` function can be used to create a monotonic sequence of integers:\n\n::: {#0cdd59d9 .cell execution_count=7}\n``` {.python .cell-code}\ns = ibis.range(10)\ns\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=7}\n\n::: {.ansi-escaped-output}\n```{=html}\n
[0, 1, ... +8]\n```\n:::\n\n:::\n:::\n\n\nYou can turn it into a table:\n\n::: {#b8aabc3d .cell execution_count=8}\n``` {.python .cell-code}\ns.unnest().name(\"index\").as_table()\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n
┏━━━━━━━┓\n┃ index ┃\n┡━━━━━━━┩\n│ int8 │\n├───────┤\n│ 0 │\n│ 1 │\n│ 2 │\n│ … │\n└───────┘\n\n```\n:::\n:::\n\n\nThis can be useful for [creating synthetic\ndata](../1brc/index.qmd#bonus-more-billion-row-data-generation) and other use\ncases.\n\n### relocate\n\nThe `.relocate()` method can be used to move columns to the beginning of a\ntable, which is very useful for interactive data exploration with wide tables:\n\n::: {#1fd81977 .cell execution_count=9}\n``` {.python .cell-code}\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\nThen:\n\n::: {#b6d79ebc .cell execution_count=10}\n``` {.python .cell-code}\nt.relocate(\"sex\", \"year\")\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n```{=html}\n
┏━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓\n┃ sex ┃ year ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃\n┡━━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩\n│ string │ int64 │ string │ string │ float64 │ float64 │ int64 │ int64 │\n├────────┼───────┼─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┤\n│ male │ 2007 │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │\n│ female │ 2007 │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │\n│ female │ 2007 │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │\n│ … │ … │ … │ … │ … │ … │ … │ … │\n└────────┴───────┴─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┘\n\n```\n:::\n:::\n\n\n### sample\n\nThe `.sample()` method can be used to sample rows from a table:\n\n:::{.callout-info}\nNumber of rows returned may vary by invocation.\n:::\n\n::: {#26326136 .cell execution_count=11}\n``` {.python .cell-code}\nt.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=11}\n\n::: {.ansi-escaped-output}\n```{=html}\n
344
\n```\n:::\n\n:::\n:::\n\n\n::: {#8fc06700 .cell execution_count=12}\n``` {.python .cell-code}\nt.sample(fraction=0.1).count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=12}\n\n::: {.ansi-escaped-output}\n```{=html}\n28
\n```\n:::\n\n:::\n:::\n\n\n### negative slicing\n\nMore Pythonic slicing is now supported:\n\n::: {#5ce21814 .cell execution_count=13}\n``` {.python .cell-code}\nt[:3]\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n::: {#e9394dd6 .cell execution_count=14}\n``` {.python .cell-code}\nt[-3:]\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Chinstrap │ Dream │ 49.6 │ 18.2 │ 193 │ 3775 │ male │ 2009 │\n│ Chinstrap │ Dream │ 50.8 │ 19.0 │ 210 │ 4100 │ male │ 2009 │\n│ Chinstrap │ Dream │ 50.2 │ 18.7 │ 198 │ 3775 │ female │ 2009 │\n└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n::: {#9675f198 .cell execution_count=15}\n``` {.python .cell-code}\nt[-6:-3]\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │\n├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ Chinstrap │ Dream │ 45.7 │ 17.0 │ 195 │ 3650 │ female │ 2009 │\n│ Chinstrap │ Dream │ 55.8 │ 19.8 │ 207 │ 4000 │ male │ 2009 │\n│ Chinstrap │ Dream │ 43.5 │ 18.1 │ 202 │ 3400 │ female │ 2009 │\n└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘\n\n```\n:::\n:::\n\n\n### geospatial operations in DuckDB\n\nIbis supports over 50 geospatial operations, with many being recently added to\nDuckDB backend. While backend-specific, this is worth calling out because it\nbrings a great local option for geospatial analytics to Ibis. [Read the first\ngeospatial blog](../ibis-duckdb-geospatial/index.qmd) or [the second geospatial\nblog](../ibis-duckdb-geospatial-dev-guru/index.qmd) to learn more.\n\nA new `zones` example dataset with a geometric datatype has been added for a\nquick demonstration:\n\n::: {#333aa45d .cell execution_count=16}\n``` {.python .cell-code}\nz = ibis.examples.zones.fetch()\nz = z.relocate(\"geom\")\nz\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ geom ┃ OBJECTID ┃ Shape_Leng ┃ Shape_Area ┃ zone ┃ LocationID ┃ borough ┃ x_cent ┃ y_cent ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ geospatial:geometry │ int32 │ float64 │ float64 │ string │ int32 │ string │ float64 │ float64 │\n├──────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┼─────────────────────────┼────────────┼─────────┼──────────────┼───────────────┤\n│ <POLYGON ((933100.918 192536.086, 933091.011 192572.175, 933088.585 192604.9...> │ 1 │ 0.116357 │ 0.000782 │ Newark Airport │ 1 │ EWR │ 9.359968e+05 │ 191376.749531 │\n│ <MULTIPOLYGON (((1033269.244 172126.008, 1033439.643 170883.946, 1033473.265...> │ 2 │ 0.433470 │ 0.004866 │ Jamaica Bay │ 2 │ Queens │ 1.031086e+06 │ 164018.754403 │\n│ <POLYGON ((1026308.77 256767.698, 1026495.593 256638.616, 1026567.23 256589....> │ 3 │ 0.084341 │ 0.000314 │ Allerton/Pelham Gardens │ 3 │ Bronx │ 1.026453e+06 │ 254265.478659 │\n│ … │ … │ … │ … │ … │ … │ … │ … │ … │\n└──────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┴─────────────────────────┴────────────┴─────────┴──────────────┴───────────────┘\n\n```\n:::\n:::\n\n\nWe can use geospatial operations on that column:\n\n::: {#9f15bd3f .cell execution_count=17}\n``` {.python .cell-code}\nz = z.mutate(\n area=z.geom.area(),\n centroid=z.geom.centroid(),\n).relocate(\"area\", \"centroid\")\nz\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃ area ┃ centroid ┃ geom ┃ OBJECTID ┃ Shape_Leng ┃ Shape_Area ┃ zone ┃ LocationID ┃ borough ┃ x_cent ┃ y_cent ┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ float64 │ point │ geospatial:geometry │ int32 │ float64 │ float64 │ string │ int32 │ string │ float64 │ float64 │\n├──────────────┼──────────────────────────────────┼──────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┼─────────────────────────┼────────────┼─────────┼──────────────┼───────────────┤\n│ 7.903953e+07 │ <POINT (935996.821 191376.75)> │ <POLYGON ((933100.918 192536.086, 933091.011 192572.175, 933088.585 192604.9...> │ 1 │ 0.116357 │ 0.000782 │ Newark Airport │ 1 │ EWR │ 9.359968e+05 │ 191376.749531 │\n│ 1.439095e+08 │ <POINT (1031085.719 164018.754)> │ <MULTIPOLYGON (((1033269.244 172126.008, 1033439.643 170883.946, 1033473.265...> │ 2 │ 0.433470 │ 0.004866 │ Jamaica Bay │ 2 │ Queens │ 1.031086e+06 │ 164018.754403 │\n│ 3.168508e+07 │ <POINT (1026452.617 254265.479)> │ <POLYGON ((1026308.77 256767.698, 1026495.593 256638.616, 1026567.23 256589....> │ 3 │ 0.084341 │ 0.000314 │ Allerton/Pelham Gardens │ 3 │ Bronx │ 1.026453e+06 │ 254265.478659 │\n│ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │\n└──────────────┴──────────────────────────────────┴──────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┴─────────────────────────┴────────────┴─────────┴──────────────┴───────────────┘\n\n```\n:::\n:::\n\n\n## Wrapping up\n\nIbis 8.0 brings exciting new features and the first streaming backends into Ibis!\nWe hope you're excited as we are about breaking down barriers between batch and\nstreaming systems with a standard Python dataframe API.\n\nAs always, try Ibis by [installing](../../install.qmd) and [getting\nstarted](../../tutorials/getting_started.qmd).\n\nIf you run into any issues or find support is lacking for your backend, [open an\nissue](https://github.com/ibis-project/issues/new/choose) or\n[discussion](https://github.com/ibis-project/discussions/new/choose) and let us\nknow!\n\n", "supporting": [ - "index_files" + "index_files/figure-html" ], "filters": [], "includes": { diff --git a/docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json b/docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json index 5c57f4f143a0..7e2ae3fe10ca 100644 --- a/docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json +++ b/docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json @@ -1,14 +1,15 @@ { - "hash": "cd79329d01e545ae82277e61a0330c34", + "hash": "87b787b52bf610df4ec240803f3db6fa", "result": { - "markdown": "---\ntitle: Ibis v6.1.0\nauthor: \"Ibis team\"\ndate: \"2023-08-02\"\ncategories:\n - release\n - blog\n---\n\n## Overview\n\nIbis 6.1.0 is a minor release that includes new features, backend improvements, bug fixes, documentation improvements, and refactors. We are excited to see further adoption of the dataframe interchange protocol enabling visualization and other libraries to be used more easily with Ibis.\n\nYou can view the full changelog in [the release notes](../../release_notes.md).\n\nIf you're new to Ibis, see [how to install](../../install.qmd) and [the getting started tutorial](../../tutorials/getting_started.qmd).\n\nTo follow along with this blog, ensure you're on `'ibis-framework>=6.1,<7'`. First, we\\'ll setup Ibis and fetch some\nsample data to use.\n\n::: {#24a6b50e .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\nimport ibis.selectors as s\n\nibis.__version__\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\n'6.1.0'\n```\n:::\n:::\n\n\n::: {#4e2c9e69 .cell execution_count=2}\n``` {.python .cell-code}\n# interactive mode for demo purposes\nibis.options.interactive = True\n```\n:::\n\n\n::: {#866d8631 .cell execution_count=3}\n``` {.python .cell-code}\nt = ibis.examples.penguins.fetch()\nt = t.mutate(year=t[\"year\"].cast(\"str\"))\nt.limit(3)\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓\n┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩\n│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ string │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼────────┤\n│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │\n│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │\n│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴────────┘\n\n```\n:::\n:::\n\n\n## Ecosystem integrations\n\nWith the introduction of `__dataframe__` support in v6.0.0 and efficiency improvements in this release, Ibis now works with [Altair](https://altair-viz.github.io/index.html), [Plotly](https://plotly.com/python/), [plotnine](https://plotnine.readthedocs.io/en/stable/), and any other visualization library that implements the protocol. This enables passing Ibis tables directly to visualization libraries without a `.to_pandas()` or `to_pyarrow()` call for any of the 15+ backends supported, with data efficiently transferred through Apache Arrow.\n\n::: {#cc0a2a10 .cell execution_count=4}\n``` {.python .cell-code code-fold=\"true\"}\nwidth = 640 # <1>\nheight = 480 # <1>\n```\n:::\n\n\n1. Set the width and height of the plots.\n\n::: {#71cadaa6 .cell execution_count=5}\n``` {.python .cell-code}\ngrouped = ( # <1>\n t.group_by(\"species\")\n .aggregate(count=ibis._.count())\n .order_by(ibis.desc(\"count\"))\n) # <1>\ngrouped # <2>\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━━━┳━━━━━━━┓\n┃ species ┃ count ┃\n┡━━━━━━━━━━━╇━━━━━━━┩\n│ string │ int64 │\n├───────────┼───────┤\n│ Adelie │ 152 │\n│ Gentoo │ 124 │\n│ Chinstrap │ 68 │\n└───────────┴───────┘\n\n```\n:::\n:::\n\n\n1. Setup data to plot.\n2. Display the table.\n\n::: {.panel-tabset}\n\n## Altair\n\n```{.bash}\npip install altair\n```\n\n::: {#d7bb57e9 .cell execution_count=6}\n``` {.python .cell-code}\nimport altair as alt # <1>\n\nchart = (\n alt.Chart(grouped)\n .mark_bar()\n .encode(\n x=\"species\",\n y=\"count\",\n )\n .properties(width=width, height=height)\n)\nchart\n```\n\n::: {.cell-output .cell-output-error}\n```\nExpressionError: Use .count() instead\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\nalt.Chart(...)\n```\n:::\n:::\n\n\n## Plotly\n\n```{.bash}\npip install plotly\n```\n\n::: {#bedde087 .cell execution_count=7}\n``` {.python .cell-code}\nimport plotly.express as px # <1>\n\npx.bar(\n grouped.to_pandas(),\n x=\"species\",\n y=\"count\",\n width=width,\n height=height,\n)\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n