From 989ebcc2688ff37d31c14239401e575fc386d9cc Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 22 Nov 2023 15:20:19 -0800 Subject: [PATCH 01/12] wip --- Cargo.lock | 54 +++++++++++++++ Cargo.toml | 1 + daft/dataframe/dataframe.py | 1 - src/daft-table/Cargo.toml | 2 + src/daft-table/src/lib.rs | 130 ++++++++++++++++++++++++++---------- 5 files changed, 153 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a013e5abc4..34f070eba2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -888,6 +888,18 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "comfy-table" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" +dependencies = [ + "crossterm", + "strum", + "strum_macros", + "unicode-width", +] + [[package]] name = "common-error" version = "0.1.10" @@ -998,6 +1010,28 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags 2.4.0", + "crossterm_winapi", + "libc", + "parking_lot", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "crypto-bigint" version = "0.3.2" @@ -1326,6 +1360,7 @@ name = "daft-table" version = "0.1.10" dependencies = [ "arrow2", + "comfy-table", "common-error", "daft-core", "daft-dsl", @@ -3520,6 +3555,25 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.38", +] + [[package]] name = "subtle" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 89d213a2d8..3119955a8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,6 +102,7 @@ tokio = {version = "1.32.0", features = ["net", "time", "bytes", "process", "sig tokio-stream = {version = "0.1.14", features = ["fs"]} tokio-util = "0.7.8" url = "2.4.0" +comfy-table = "7.1.0" [workspace.dependencies.arrow2] # branch = "jay/fix-parquet-timezone-parsing" diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index 6661b2a1a6..fda4fff17c 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -1078,7 +1078,6 @@ def show(self, n: int = 8) -> None: except ImportError: print(dataframe_display) return None - def __len__(self): """Returns the count of rows when dataframe is materialized. If dataframe is not materialized yet, raises a runtime error. diff --git a/src/daft-table/Cargo.toml b/src/daft-table/Cargo.toml index 30dfe1bf3b..c1760618c6 100644 --- a/src/daft-table/Cargo.toml +++ b/src/daft-table/Cargo.toml @@ -6,6 +6,8 @@ daft-dsl = {path = "../daft-dsl", default-features = false} html-escape = {workspace = true} num-traits = {workspace = true} prettytable-rs = {workspace = true} +comfy-table = {workspace = true} + pyo3 = {workspace = true, optional = true} pyo3-log = {workspace = true} rand = {workspace = true} diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index eb7feaa7f6..928079418b 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -468,32 +468,69 @@ impl Table { res } - pub fn to_prettytable(&self, max_col_width: Option) -> prettytable::Table { - let mut table = prettytable::Table::new(); - let header = self - .schema - .fields - .iter() - .map(|(name, field)| { - prettytable::Cell::new(format!("{}\n{}", name, field.dtype).as_str()) - .with_style(prettytable::Attr::Bold) - }) - .collect(); - table.add_row(header); + pub fn to_comfytable(&self, max_col_width: Option) -> comfy_table::Table { + let mut table = comfy_table::Table::new(); + + table.load_preset(comfy_table::presets::UTF8_FULL) + .apply_modifier(comfy_table::modifiers::UTF8_ROUND_CORNERS) + .set_content_arrangement(comfy_table::ContentArrangement::Dynamic); + // .set_width(100); + + const DOTS: &str = "…"; + + const TOTAL_ROWS: usize = 10; let head_rows; let tail_rows; - if self.len() > 10 { - head_rows = 5; - tail_rows = 5; + if self.len() > TOTAL_ROWS { + head_rows = TOTAL_ROWS / 2; + tail_rows = TOTAL_ROWS / 2; } else { head_rows = self.len(); tail_rows = 0; } + const MAX_COLS: usize = 8; + + let head_cols; + let tail_cols; + let total_cols; + if self.num_columns() > MAX_COLS { + head_cols = (MAX_COLS + 1) / 2; + tail_cols = MAX_COLS / 2; + total_cols = head_cols + tail_cols + 1; + } else { + head_cols = self.num_columns(); + tail_cols = 0; + total_cols = head_cols; + } + let mut header = self + .schema + .fields + .iter() + .take(head_cols) + .map(|(name, field)| { + comfy_table::Cell::new(format!("{}\n┈\n{}", name, field.dtype).as_str()) + .add_attribute(comfy_table::Attribute::Bold) + }).collect::>(); + if tail_cols > 0 { + header.push(comfy_table::Cell::new(DOTS)); + header.extend( + self + .schema + .fields + .iter() + .skip(self.num_columns() - tail_cols) + .map(|(name, field)| { + comfy_table::Cell::new(format!("{}\n┈\n{}", name, field.dtype).as_str()) + .add_attribute(comfy_table::Attribute::Bold) + }) + ) + } + table.add_row(header); for i in 0..head_rows { - let row = self + let all_cols = self .columns .iter() .map(|s| { @@ -501,40 +538,65 @@ impl Table { if let Some(max_col_width) = max_col_width { if str_val.len() > max_col_width { str_val = format!( - "{}...", - &str_val - .char_indices() - .take(max_col_width - 3) - .map(|(_, c)| c) - .collect::() + "{}{DOTS}", + &str_val[..max_col_width - 3] ); } } str_val - }) - .collect::>(); - table.add_row(row.into()); + }).collect::>(); + + if tail_cols > 0 { + let mut final_row = all_cols.iter().take(head_cols).cloned().collect::>(); + final_row.push(DOTS.into()); + final_row.extend( + all_cols + .iter() + .skip(self.num_columns() - tail_cols) + .cloned() + ); + table.add_row(final_row); + + } else { + table.add_row(all_cols); + } + + } if tail_rows != 0 { - let row: prettytable::Row = (0..self.num_columns()).map(|_| "...").collect(); - table.add_row(row); + table.add_row((0..total_cols).map(|_| DOTS).collect::>()); } for i in (self.len() - tail_rows)..(self.len()) { - let row = self + let all_cols = self .columns .iter() .map(|s| { let mut str_val = s.str_value(i).unwrap(); if let Some(max_col_width) = max_col_width { if str_val.len() > max_col_width { - str_val = format!("{}...", &str_val[..max_col_width - 3]); + str_val = format!( + "{}{DOTS}", + &str_val[..max_col_width - 3] + ); } } str_val - }) - .collect::>(); - table.add_row(row.into()); + }).collect::>(); + + if tail_cols > 0 { + let mut final_row = all_cols.iter().take(head_cols).cloned().collect::>(); + final_row.push(DOTS.into()); + final_row.extend( + all_cols + .iter() + .skip(self.num_columns() - tail_cols) + .cloned() + ); + table.add_row(final_row); + } else { + table.add_row(all_cols); + } } table @@ -544,7 +606,7 @@ impl Table { impl Display for Table { // `f` is a buffer, and this method must write the formatted string into it fn fmt(&self, f: &mut Formatter) -> Result { - let table = self.to_prettytable(Some(32)); + let table = self.to_comfytable(Some(32)); write!(f, "{table}") } } @@ -579,4 +641,4 @@ mod test { Ok(()) } -} +} \ No newline at end of file From 3b97862661ffab409ece6e2a49cf99ddd819c1f9 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 22 Nov 2023 16:15:49 -0800 Subject: [PATCH 02/12] add dynamic columns numbers --- daft/dataframe/dataframe.py | 3 ++- src/daft-table/src/lib.rs | 28 +++++++++++++++++----------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index fda4fff17c..1fa3387908 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -1074,10 +1074,11 @@ def show(self, n: int = 8) -> None: try: from IPython.display import display - display(dataframe_display) + display(dataframe_display, clear=True) except ImportError: print(dataframe_display) return None + def __len__(self): """Returns the count of rows when dataframe is materialized. If dataframe is not materialized yet, raises a runtime error. diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index 928079418b..60816d21ef 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -471,12 +471,19 @@ impl Table { pub fn to_comfytable(&self, max_col_width: Option) -> comfy_table::Table { let mut table = comfy_table::Table::new(); + let default_width_if_no_tty = 120; + table.load_preset(comfy_table::presets::UTF8_FULL) .apply_modifier(comfy_table::modifiers::UTF8_ROUND_CORNERS) .set_content_arrangement(comfy_table::ContentArrangement::Dynamic); - // .set_width(100); + if table.width().is_none() && !table.is_tty() { + table.set_width(default_width_if_no_tty); + } + let terminal_width = table.width().expect("should have already been set with default"); + const EXPECTED_COL_WIDTH: u16 = 24; + let max_cols = ((terminal_width + EXPECTED_COL_WIDTH - 1) / EXPECTED_COL_WIDTH) as usize; const DOTS: &str = "…"; const TOTAL_ROWS: usize = 10; @@ -490,14 +497,13 @@ impl Table { head_rows = self.len(); tail_rows = 0; } - const MAX_COLS: usize = 8; let head_cols; let tail_cols; let total_cols; - if self.num_columns() > MAX_COLS { - head_cols = (MAX_COLS + 1) / 2; - tail_cols = MAX_COLS / 2; + if self.num_columns() > max_cols { + head_cols = (max_cols + 1) / 2; + tail_cols = max_cols / 2; total_cols = head_cols + tail_cols + 1; } else { head_cols = self.num_columns(); @@ -510,7 +516,7 @@ impl Table { .iter() .take(head_cols) .map(|(name, field)| { - comfy_table::Cell::new(format!("{}\n┈\n{}", name, field.dtype).as_str()) + comfy_table::Cell::new(format!("{}\n---\n{}", name, field.dtype).as_str()) .add_attribute(comfy_table::Attribute::Bold) }).collect::>(); if tail_cols > 0 { @@ -522,7 +528,7 @@ impl Table { .iter() .skip(self.num_columns() - tail_cols) .map(|(name, field)| { - comfy_table::Cell::new(format!("{}\n┈\n{}", name, field.dtype).as_str()) + comfy_table::Cell::new(format!("{}\n---\n{}", name, field.dtype).as_str()) .add_attribute(comfy_table::Attribute::Bold) }) ) @@ -536,10 +542,10 @@ impl Table { .map(|s| { let mut str_val = s.str_value(i).unwrap(); if let Some(max_col_width) = max_col_width { - if str_val.len() > max_col_width { + if str_val.len() > max_col_width - DOTS.len() { str_val = format!( "{}{DOTS}", - &str_val[..max_col_width - 3] + &str_val[..max_col_width - DOTS.len()] ); } } @@ -574,10 +580,10 @@ impl Table { .map(|s| { let mut str_val = s.str_value(i).unwrap(); if let Some(max_col_width) = max_col_width { - if str_val.len() > max_col_width { + if str_val.len() > max_col_width - DOTS.len() { str_val = format!( "{}{DOTS}", - &str_val[..max_col_width - 3] + &str_val[..max_col_width - DOTS.len()] ); } } From a872eac252418fb260b62b530ef57b48a0cd0652 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 22 Nov 2023 16:58:07 -0800 Subject: [PATCH 03/12] drop pretty table --- Cargo.lock | 86 +------------ Cargo.toml | 3 +- src/daft-core/Cargo.toml | 2 +- src/daft-core/src/schema.rs | 20 +-- src/daft-core/src/series/mod.rs | 44 ++----- src/daft-core/src/utils/display_table.rs | 147 ++++++++++++++++++++++ src/daft-core/src/utils/mod.rs | 1 + src/daft-table/Cargo.toml | 4 +- src/daft-table/src/lib.rs | 149 ++--------------------- 9 files changed, 184 insertions(+), 272 deletions(-) create mode 100644 src/daft-core/src/utils/display_table.rs diff --git a/Cargo.lock b/Cargo.lock index 34f070eba2..311234cbe7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1117,6 +1117,7 @@ dependencies = [ "bincode", "chrono", "chrono-tz", + "comfy-table", "common-error", "dyn-clone", "fnv", @@ -1129,7 +1130,6 @@ dependencies = [ "num-derive", "num-traits", "numpy", - "prettytable-rs", "pyo3", "pyo3-log", "rand 0.8.5", @@ -1366,7 +1366,6 @@ dependencies = [ "daft-dsl", "html-escape", "num-traits", - "prettytable-rs", "pyo3", "pyo3-log", "rand 0.8.5", @@ -1410,27 +1409,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "dirs-next" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" -dependencies = [ - "cfg-if", - "dirs-sys-next", -] - -[[package]] -name = "dirs-sys-next" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - [[package]] name = "doc-comment" version = "0.3.3" @@ -1449,12 +1427,6 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" -[[package]] -name = "encode_unicode" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" - [[package]] name = "encoding_rs" version = "0.8.33" @@ -2102,17 +2074,6 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" -[[package]] -name = "is-terminal" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" -dependencies = [ - "hermit-abi", - "rustix", - "windows-sys", -] - [[package]] name = "itertools" version = "0.11.0" @@ -2847,20 +2808,6 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" -[[package]] -name = "prettytable-rs" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eea25e07510aa6ab6547308ebe3c036016d162b8da920dbb079e3ba8acf3d95a" -dependencies = [ - "csv", - "encode_unicode", - "is-terminal", - "lazy_static", - "term", - "unicode-width", -] - [[package]] name = "proc-macro2" version = "1.0.69" @@ -3058,15 +3005,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.3.5" @@ -3085,17 +3023,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "redox_users" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" -dependencies = [ - "getrandom 0.2.10", - "redox_syscall 0.2.16", - "thiserror", -] - [[package]] name = "regex" version = "1.10.0" @@ -3648,17 +3575,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "term" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" -dependencies = [ - "dirs-next", - "rustversion", - "winapi", -] - [[package]] name = "thiserror" version = "1.0.49" diff --git a/Cargo.toml b/Cargo.toml index 3119955a8e..e52b1ee25f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,13 +86,13 @@ async-stream = "0.3.5" bytes = "1.4.0" chrono = "0.4.26" chrono-tz = "0.8.3" +comfy-table = "7.1.0" futures = "0.3.28" html-escape = "0.2.13" indexmap = "2.0.0" itertools = "0.11" num-derive = "0.3.3" num-traits = "0.2" -prettytable-rs = "0.10" rand = "^0.8" rayon = "1.7.0" rstest = "0.18.2" @@ -102,7 +102,6 @@ tokio = {version = "1.32.0", features = ["net", "time", "bytes", "process", "sig tokio-stream = {version = "0.1.14", features = ["fs"]} tokio-util = "0.7.8" url = "2.4.0" -comfy-table = "7.1.0" [workspace.dependencies.arrow2] # branch = "jay/fix-parquet-timezone-parsing" diff --git a/src/daft-core/Cargo.toml b/src/daft-core/Cargo.toml index 5213292bfd..4372178cdf 100644 --- a/src/daft-core/Cargo.toml +++ b/src/daft-core/Cargo.toml @@ -4,6 +4,7 @@ base64 = "0.21.5" bincode = {workspace = true} chrono = {workspace = true} chrono-tz = {workspace = true} +comfy-table = {workspace = true} common-error = {path = "../common/error", default-features = false} dyn-clone = "1.0.16" fnv = "1.0.7" @@ -14,7 +15,6 @@ log = {workspace = true} ndarray = "0.15.6" num-derive = {workspace = true} num-traits = {workspace = true} -prettytable-rs = {workspace = true} pyo3 = {workspace = true, optional = true} pyo3-log = {workspace = true} rand = {workspace = true} diff --git a/src/daft-core/src/schema.rs b/src/daft-core/src/schema.rs index c69e8519e2..f67da44d64 100644 --- a/src/daft-core/src/schema.rs +++ b/src/daft-core/src/schema.rs @@ -1,4 +1,5 @@ use std::{ + borrow::Cow, collections::{hash_map::DefaultHasher, HashSet}, fmt::{Display, Formatter, Result}, hash::{Hash, Hasher}, @@ -8,7 +9,7 @@ use std::{ use indexmap::IndexMap; use serde::{Deserialize, Serialize}; -use crate::datatypes::Field; +use crate::{datatypes::Field, utils::display_table::make_comfy_table, Series}; use common_error::{DaftError, DaftResult}; @@ -167,14 +168,15 @@ impl Default for Schema { impl Display for Schema { // Produces an ASCII table. fn fmt(&self, f: &mut Formatter) -> Result { - let mut table = prettytable::Table::new(); - - let header = self - .fields - .iter() - .map(|(name, field)| format!("{}\n{}", name, field.dtype)) - .collect(); - table.add_row(header); + let table = make_comfy_table( + self.fields + .values() + .map(|f| Cow::Borrowed(f)) + .collect::>() + .as_slice(), + None, + None, + ); write!(f, "{table}") } } diff --git a/src/daft-core/src/series/mod.rs b/src/daft-core/src/series/mod.rs index 40cfe5b5f2..55afb91ff5 100644 --- a/src/daft-core/src/series/mod.rs +++ b/src/daft-core/src/series/mod.rs @@ -4,6 +4,7 @@ mod ops; mod serdes; mod series_like; use std::{ + borrow::Cow, fmt::{Display, Formatter, Result}, sync::Arc, }; @@ -11,6 +12,7 @@ use std::{ use crate::{ array::ops::{from_arrow::FromArrow, full::FullNull}, datatypes::{DataType, Field}, + utils::display_table::make_comfy_table, with_match_daft_types, }; use common_error::DaftResult; @@ -77,47 +79,19 @@ impl Series { } } - pub fn to_prettytable(&self) -> prettytable::Table { - let mut table = prettytable::Table::new(); - - let header = - prettytable::Cell::new(format!("{}\n{}", self.name(), self.data_type()).as_str()) - .with_style(prettytable::Attr::Bold); - table.add_row(prettytable::Row::new(vec![header])); - - let head_rows; - let tail_rows; - - if self.len() > 10 { - head_rows = 5; - tail_rows = 5; - } else { - head_rows = self.len(); - tail_rows = 0; - } - - for i in 0..head_rows { - let row = vec![self.str_value(i).unwrap()]; - table.add_row(row.into()); - } - if tail_rows != 0 { - let row = vec!["..."]; - table.add_row(row.into()); - } - - for i in 0..tail_rows { - let row = vec![self.str_value(self.len() - tail_rows - 1 + i).unwrap()]; - table.add_row(row.into()); - } - - table + pub fn to_comfy_table(&self) -> comfy_table::Table { + make_comfy_table( + vec![Cow::Borrowed(self.field())].as_slice(), + Some([self].as_slice()), + Some(32), + ) } } impl Display for Series { // `f` is a buffer, and this method must write the formatted string into it fn fmt(&self, f: &mut Formatter) -> Result { - let table = self.to_prettytable(); + let table = self.to_comfy_table(); write!(f, "{table}") } } diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs new file mode 100644 index 0000000000..33135f2b44 --- /dev/null +++ b/src/daft-core/src/utils/display_table.rs @@ -0,0 +1,147 @@ +use crate::{datatypes::Field, Series}; + +pub fn make_comfy_table>( + fields: &[F], + columns: Option<&[&Series]>, + max_col_width: Option, +) -> comfy_table::Table { + let mut table = comfy_table::Table::new(); + + let default_width_if_no_tty = 120; + + table + .load_preset(comfy_table::presets::UTF8_FULL) + .apply_modifier(comfy_table::modifiers::UTF8_ROUND_CORNERS) + .set_content_arrangement(comfy_table::ContentArrangement::Dynamic); + if table.width().is_none() && !table.is_tty() { + table.set_width(default_width_if_no_tty); + } + let terminal_width = table + .width() + .expect("should have already been set with default"); + + const EXPECTED_COL_WIDTH: u16 = 24; + + let max_cols = ((terminal_width + EXPECTED_COL_WIDTH - 1) / EXPECTED_COL_WIDTH) as usize; + const DOTS: &str = "…"; + let num_columns = fields.len(); + + let head_cols; + let tail_cols; + let total_cols; + if num_columns > max_cols { + head_cols = (max_cols + 1) / 2; + tail_cols = max_cols / 2; + total_cols = head_cols + tail_cols + 1; + } else { + head_cols = num_columns; + tail_cols = 0; + total_cols = head_cols; + } + let mut header = fields + .iter() + .take(head_cols) + .map(|field| { + comfy_table::Cell::new( + format!("{}\n---\n{}", field.as_ref().name, field.as_ref().dtype).as_str(), + ) + .add_attribute(comfy_table::Attribute::Bold) + }) + .collect::>(); + if tail_cols > 0 { + header.push(comfy_table::Cell::new(DOTS)); + header.extend(fields.iter().skip(num_columns - tail_cols).map(|field| { + comfy_table::Cell::new( + format!("{}\n---\n{}", field.as_ref().name, field.as_ref().dtype).as_str(), + ) + .add_attribute(comfy_table::Attribute::Bold) + })) + } + table.add_row(header); + + if let Some(columns) = columns && columns.len() > 0 { + let len = columns.first().unwrap().len(); + const TOTAL_ROWS: usize = 10; + let head_rows; + let tail_rows; + + if len > TOTAL_ROWS { + head_rows = TOTAL_ROWS / 2; + tail_rows = TOTAL_ROWS / 2; + } else { + head_rows = len; + tail_rows = 0; + } + + + + for i in 0..head_rows { + let all_cols = columns + .iter() + .map(|s| { + let mut str_val = s.str_value(i).unwrap(); + if let Some(max_col_width) = max_col_width { + if str_val.len() > max_col_width - DOTS.len() { + str_val = format!( + "{}{DOTS}", + &str_val[..max_col_width - DOTS.len()] + ); + } + } + str_val + }).collect::>(); + + if tail_cols > 0 { + let mut final_row = all_cols.iter().take(head_cols).cloned().collect::>(); + final_row.push(DOTS.into()); + final_row.extend( + all_cols + .iter() + .skip(num_columns - tail_cols) + .cloned() + ); + table.add_row(final_row); + + } else { + table.add_row(all_cols); + } + + + } + if tail_rows != 0 { + table.add_row((0..total_cols).map(|_| DOTS).collect::>()); + } + + for i in (len - tail_rows)..(len) { + let all_cols = columns + .iter() + .map(|s| { + let mut str_val = s.str_value(i).unwrap(); + if let Some(max_col_width) = max_col_width { + if str_val.len() > max_col_width - DOTS.len() { + str_val = format!( + "{}{DOTS}", + &str_val[..max_col_width - DOTS.len()] + ); + } + } + str_val + }).collect::>(); + + if tail_cols > 0 { + let mut final_row = all_cols.iter().take(head_cols).cloned().collect::>(); + final_row.push(DOTS.into()); + final_row.extend( + all_cols + .iter() + .skip(num_columns - tail_cols) + .cloned() + ); + table.add_row(final_row); + } else { + table.add_row(all_cols); + } + } + } + table +} diff --git a/src/daft-core/src/utils/mod.rs b/src/daft-core/src/utils/mod.rs index d4c644994a..70038c8317 100644 --- a/src/daft-core/src/utils/mod.rs +++ b/src/daft-core/src/utils/mod.rs @@ -1,4 +1,5 @@ pub mod arrow; +pub mod display_table; pub mod hashable_float_wrapper; pub mod supertype; diff --git a/src/daft-table/Cargo.toml b/src/daft-table/Cargo.toml index c1760618c6..a3ca4034f4 100644 --- a/src/daft-table/Cargo.toml +++ b/src/daft-table/Cargo.toml @@ -1,13 +1,11 @@ [dependencies] arrow2 = {workspace = true} +comfy-table = {workspace = true} common-error = {path = "../common/error", default-features = false} daft-core = {path = "../daft-core", default-features = false} daft-dsl = {path = "../daft-dsl", default-features = false} html-escape = {workspace = true} num-traits = {workspace = true} -prettytable-rs = {workspace = true} -comfy-table = {workspace = true} - pyo3 = {workspace = true, optional = true} pyo3-log = {workspace = true} rand = {workspace = true} diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index 60816d21ef..fba8f6d9bd 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -1,9 +1,11 @@ #![feature(hash_raw_entry)] +use std::borrow::Cow; use std::collections::HashSet; use std::fmt::{Display, Formatter, Result}; use daft_core::array::ops::full::FullNull; +use daft_core::utils::display_table::make_comfy_table; use num_traits::ToPrimitive; use daft_core::array::ops::GroupIndices; @@ -469,143 +471,16 @@ impl Table { } pub fn to_comfytable(&self, max_col_width: Option) -> comfy_table::Table { - let mut table = comfy_table::Table::new(); - - let default_width_if_no_tty = 120; - - table.load_preset(comfy_table::presets::UTF8_FULL) - .apply_modifier(comfy_table::modifiers::UTF8_ROUND_CORNERS) - .set_content_arrangement(comfy_table::ContentArrangement::Dynamic); - if table.width().is_none() && !table.is_tty() { - table.set_width(default_width_if_no_tty); - } - let terminal_width = table.width().expect("should have already been set with default"); - - const EXPECTED_COL_WIDTH: u16 = 24; - - let max_cols = ((terminal_width + EXPECTED_COL_WIDTH - 1) / EXPECTED_COL_WIDTH) as usize; - const DOTS: &str = "…"; - - const TOTAL_ROWS: usize = 10; - let head_rows; - let tail_rows; - - if self.len() > TOTAL_ROWS { - head_rows = TOTAL_ROWS / 2; - tail_rows = TOTAL_ROWS / 2; - } else { - head_rows = self.len(); - tail_rows = 0; - } - - let head_cols; - let tail_cols; - let total_cols; - if self.num_columns() > max_cols { - head_cols = (max_cols + 1) / 2; - tail_cols = max_cols / 2; - total_cols = head_cols + tail_cols + 1; - } else { - head_cols = self.num_columns(); - tail_cols = 0; - total_cols = head_cols; - } - let mut header = self - .schema - .fields - .iter() - .take(head_cols) - .map(|(name, field)| { - comfy_table::Cell::new(format!("{}\n---\n{}", name, field.dtype).as_str()) - .add_attribute(comfy_table::Attribute::Bold) - }).collect::>(); - if tail_cols > 0 { - header.push(comfy_table::Cell::new(DOTS)); - header.extend( - self - .schema + make_comfy_table( + self.schema .fields - .iter() - .skip(self.num_columns() - tail_cols) - .map(|(name, field)| { - comfy_table::Cell::new(format!("{}\n---\n{}", name, field.dtype).as_str()) - .add_attribute(comfy_table::Attribute::Bold) - }) - ) - } - table.add_row(header); - - for i in 0..head_rows { - let all_cols = self - .columns - .iter() - .map(|s| { - let mut str_val = s.str_value(i).unwrap(); - if let Some(max_col_width) = max_col_width { - if str_val.len() > max_col_width - DOTS.len() { - str_val = format!( - "{}{DOTS}", - &str_val[..max_col_width - DOTS.len()] - ); - } - } - str_val - }).collect::>(); - - if tail_cols > 0 { - let mut final_row = all_cols.iter().take(head_cols).cloned().collect::>(); - final_row.push(DOTS.into()); - final_row.extend( - all_cols - .iter() - .skip(self.num_columns() - tail_cols) - .cloned() - ); - table.add_row(final_row); - - } else { - table.add_row(all_cols); - } - - - } - if tail_rows != 0 { - table.add_row((0..total_cols).map(|_| DOTS).collect::>()); - } - - for i in (self.len() - tail_rows)..(self.len()) { - let all_cols = self - .columns - .iter() - .map(|s| { - let mut str_val = s.str_value(i).unwrap(); - if let Some(max_col_width) = max_col_width { - if str_val.len() > max_col_width - DOTS.len() { - str_val = format!( - "{}{DOTS}", - &str_val[..max_col_width - DOTS.len()] - ); - } - } - str_val - }).collect::>(); - - if tail_cols > 0 { - let mut final_row = all_cols.iter().take(head_cols).cloned().collect::>(); - final_row.push(DOTS.into()); - final_row.extend( - all_cols - .iter() - .skip(self.num_columns() - tail_cols) - .cloned() - ); - table.add_row(final_row); - } else { - table.add_row(all_cols); - } - } - - table + .values() + .map(|f| Cow::Borrowed(f)) + .collect::>() + .as_slice(), + Some(&self.columns.iter().collect::>().as_slice()), + max_col_width, + ) } } @@ -647,4 +522,4 @@ mod test { Ok(()) } -} \ No newline at end of file +} From 1fdb542449108fa78a4d7c13218a0120a1108300 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 22 Nov 2023 16:59:06 -0800 Subject: [PATCH 04/12] clippy fixes --- src/daft-core/src/schema.rs | 4 ++-- src/daft-core/src/utils/display_table.rs | 2 +- src/daft-csv/src/read.rs | 4 ++-- src/daft-table/src/lib.rs | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/daft-core/src/schema.rs b/src/daft-core/src/schema.rs index f67da44d64..96a0be5fe2 100644 --- a/src/daft-core/src/schema.rs +++ b/src/daft-core/src/schema.rs @@ -9,7 +9,7 @@ use std::{ use indexmap::IndexMap; use serde::{Deserialize, Serialize}; -use crate::{datatypes::Field, utils::display_table::make_comfy_table, Series}; +use crate::{datatypes::Field, utils::display_table::make_comfy_table}; use common_error::{DaftError, DaftResult}; @@ -171,7 +171,7 @@ impl Display for Schema { let table = make_comfy_table( self.fields .values() - .map(|f| Cow::Borrowed(f)) + .map(Cow::Borrowed) .collect::>() .as_slice(), None, diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs index 33135f2b44..0a7fa611db 100644 --- a/src/daft-core/src/utils/display_table.rs +++ b/src/daft-core/src/utils/display_table.rs @@ -59,7 +59,7 @@ pub fn make_comfy_table>( } table.add_row(header); - if let Some(columns) = columns && columns.len() > 0 { + if let Some(columns) = columns && !columns.is_empty() { let len = columns.first().unwrap().len(); const TOTAL_ROWS: usize = 10; let head_rows; diff --git a/src/daft-csv/src/read.rs b/src/daft-csv/src/read.rs index a51e56ef60..460e01f377 100644 --- a/src/daft-csv/src/read.rs +++ b/src/daft-csv/src/read.rs @@ -1543,7 +1543,7 @@ mod tests { let io_client = Arc::new(IOClient::new(io_config.into())?); - let column_names = vec!["a", "b"]; + let column_names = ["a", "b"]; let table = read_csv( file, Some( @@ -1579,7 +1579,7 @@ mod tests { let io_client = Arc::new(IOClient::new(io_config.into())?); - let column_names = vec!["a", "b"]; + let column_names = ["a", "b"]; let table = read_csv( file, Some( diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index fba8f6d9bd..c772f7295c 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -475,10 +475,10 @@ impl Table { self.schema .fields .values() - .map(|f| Cow::Borrowed(f)) + .map(Cow::Borrowed) .collect::>() .as_slice(), - Some(&self.columns.iter().collect::>().as_slice()), + Some(self.columns.iter().collect::>().as_slice()), max_col_width, ) } From fa1102d9f6354c0b0691e9eebb1fca7271240fbb Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 22 Nov 2023 17:00:36 -0800 Subject: [PATCH 05/12] name fix --- src/daft-table/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index c772f7295c..01fbef7823 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -470,7 +470,7 @@ impl Table { res } - pub fn to_comfytable(&self, max_col_width: Option) -> comfy_table::Table { + pub fn to_comfy_table(&self, max_col_width: Option) -> comfy_table::Table { make_comfy_table( self.schema .fields @@ -487,7 +487,7 @@ impl Table { impl Display for Table { // `f` is a buffer, and this method must write the formatted string into it fn fmt(&self, f: &mut Formatter) -> Result { - let table = self.to_comfytable(Some(32)); + let table = self.to_comfy_table(Some(32)); write!(f, "{table}") } } From aa8783c095a46e0c1247aafb5fdf529a5f32ce9a Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 22 Nov 2023 17:02:33 -0800 Subject: [PATCH 06/12] bump up series width --- src/daft-core/src/series/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/daft-core/src/series/mod.rs b/src/daft-core/src/series/mod.rs index 55afb91ff5..2017848f3e 100644 --- a/src/daft-core/src/series/mod.rs +++ b/src/daft-core/src/series/mod.rs @@ -83,7 +83,7 @@ impl Series { make_comfy_table( vec![Cow::Borrowed(self.field())].as_slice(), Some([self].as_slice()), - Some(32), + Some(80), ) } } From 06eb75b5f0027678dbc68783ef7a469309c08f26 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 22 Nov 2023 17:45:11 -0800 Subject: [PATCH 07/12] handle logic differently in schema only print --- src/daft-core/src/utils/display_table.rs | 31 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs index 0a7fa611db..8dce552bfd 100644 --- a/src/daft-core/src/utils/display_table.rs +++ b/src/daft-core/src/utils/display_table.rs @@ -7,22 +7,37 @@ pub fn make_comfy_table>( ) -> comfy_table::Table { let mut table = comfy_table::Table::new(); - let default_width_if_no_tty = 120; + let default_width_if_no_tty = 120usize; table .load_preset(comfy_table::presets::UTF8_FULL) .apply_modifier(comfy_table::modifiers::UTF8_ROUND_CORNERS) .set_content_arrangement(comfy_table::ContentArrangement::Dynamic); if table.width().is_none() && !table.is_tty() { - table.set_width(default_width_if_no_tty); + table.set_width(default_width_if_no_tty as u16); } let terminal_width = table .width() - .expect("should have already been set with default"); + .expect("should have already been set with default") as usize; - const EXPECTED_COL_WIDTH: u16 = 24; - - let max_cols = ((terminal_width + EXPECTED_COL_WIDTH - 1) / EXPECTED_COL_WIDTH) as usize; + let expected_col_width: usize = if columns.is_some() || fields.is_empty() { + 24usize + } else { + let mut all_lens = fields + .iter() + .map(|f| f.as_ref().name.len() + 3) + .collect::>(); + all_lens.sort(); + + // get 90 percentile + let index = ((9 * all_lens.len()) / 10).min(all_lens.len() - 1); + let expected_len = all_lens + .get(index) + .expect("we clamped so this shouldnt happen"); + *expected_len + }; + + let max_cols = ((terminal_width + expected_col_width - 1) / expected_col_width); const DOTS: &str = "…"; let num_columns = fields.len(); @@ -57,9 +72,9 @@ pub fn make_comfy_table>( .add_attribute(comfy_table::Attribute::Bold) })) } - table.add_row(header); if let Some(columns) = columns && !columns.is_empty() { + table.set_header(header); let len = columns.first().unwrap().len(); const TOTAL_ROWS: usize = 10; let head_rows; @@ -142,6 +157,8 @@ pub fn make_comfy_table>( table.add_row(all_cols); } } + } else { + table.add_row(header); } table } From 5323205b4e7d17cdfe8cc6d75bb09904f8500257 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 22 Nov 2023 17:46:01 -0800 Subject: [PATCH 08/12] handle logic differently in schema only print --- src/daft-core/src/utils/display_table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs index 8dce552bfd..e82c4217c5 100644 --- a/src/daft-core/src/utils/display_table.rs +++ b/src/daft-core/src/utils/display_table.rs @@ -37,7 +37,7 @@ pub fn make_comfy_table>( *expected_len }; - let max_cols = ((terminal_width + expected_col_width - 1) / expected_col_width); + let max_cols = (terminal_width + expected_col_width - 1) / expected_col_width; const DOTS: &str = "…"; let num_columns = fields.len(); From 79eec63173fcadefb365eacaf402168596dbf75f Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Fri, 24 Nov 2023 16:32:31 -0800 Subject: [PATCH 09/12] remove dynamic schema sizing --- src/daft-core/src/utils/display_table.rs | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs index e82c4217c5..cb63fc1bbe 100644 --- a/src/daft-core/src/utils/display_table.rs +++ b/src/daft-core/src/utils/display_table.rs @@ -20,22 +20,7 @@ pub fn make_comfy_table>( .width() .expect("should have already been set with default") as usize; - let expected_col_width: usize = if columns.is_some() || fields.is_empty() { - 24usize - } else { - let mut all_lens = fields - .iter() - .map(|f| f.as_ref().name.len() + 3) - .collect::>(); - all_lens.sort(); - - // get 90 percentile - let index = ((9 * all_lens.len()) / 10).min(all_lens.len() - 1); - let expected_len = all_lens - .get(index) - .expect("we clamped so this shouldnt happen"); - *expected_len - }; + let expected_col_width = 24usize; let max_cols = (terminal_width + expected_col_width - 1) / expected_col_width; const DOTS: &str = "…"; From a57832d41cd34bd831a62e63345654d88a4de3b8 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Fri, 24 Nov 2023 17:16:27 -0800 Subject: [PATCH 10/12] fix repr tests --- src/daft-core/src/schema.rs | 2 +- src/daft-core/src/series/mod.rs | 2 +- src/daft-table/src/lib.rs | 2 +- tests/dataframe/test_repr.py | 50 ++++++++++++++++++--------------- tests/test_schema.py | 15 ++++++---- 5 files changed, 40 insertions(+), 31 deletions(-) diff --git a/src/daft-core/src/schema.rs b/src/daft-core/src/schema.rs index 96a0be5fe2..bd26f6d4ac 100644 --- a/src/daft-core/src/schema.rs +++ b/src/daft-core/src/schema.rs @@ -177,7 +177,7 @@ impl Display for Schema { None, None, ); - write!(f, "{table}") + writeln!(f, "{table}") } } diff --git a/src/daft-core/src/series/mod.rs b/src/daft-core/src/series/mod.rs index 2017848f3e..06010998a3 100644 --- a/src/daft-core/src/series/mod.rs +++ b/src/daft-core/src/series/mod.rs @@ -92,7 +92,7 @@ impl Display for Series { // `f` is a buffer, and this method must write the formatted string into it fn fmt(&self, f: &mut Formatter) -> Result { let table = self.to_comfy_table(); - write!(f, "{table}") + writeln!(f, "{table}") } } diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index 01fbef7823..c296d7cc57 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -488,7 +488,7 @@ impl Display for Table { // `f` is a buffer, and this method must write the formatted string into it fn fmt(&self, f: &mut Formatter) -> Result { let table = self.to_comfy_table(Some(32)); - write!(f, "{table}") + writeln!(f, "{table}") } } diff --git a/tests/dataframe/test_repr.py b/tests/dataframe/test_repr.py index 5ddd9590f9..6f80b32089 100644 --- a/tests/dataframe/test_repr.py +++ b/tests/dataframe/test_repr.py @@ -8,18 +8,21 @@ import daft -ROW_DIVIDER_REGEX = re.compile(r"\+-+\+") +ROW_DIVIDER_REGEX = re.compile(r"╭─+┬*─*╮|├╌+┼*╌+┤") SHOWING_N_ROWS_REGEX = re.compile(r".*\(Showing first (\d+) of (\d+) rows\).*") UNMATERIALIZED_REGEX = re.compile(r".*\(No data to display: Dataframe not materialized\).*") MATERIALIZED_NO_ROWS_REGEX = re.compile(r".*\(No data to display: Materialized dataframe has no rows\).*") TD_STYLE = 'style="text-align:left; max-width:192px; max-height:64px; overflow:auto"' +ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") def parse_str_table( table: str, expected_user_msg_regex: re.Pattern = SHOWING_N_ROWS_REGEX ) -> dict[str, tuple[str, list[str]]]: + table = ANSI_ESCAPE.sub("", table) + def _split_table_row(row: str) -> list[str]: - return [cell.strip() for cell in row.split("|")[1:-1]] + return [cell.strip() for cell in re.split("┆|│", row)[1:-1]] lines = table.split("\n") assert len(lines) > 4 @@ -27,15 +30,15 @@ def _split_table_row(row: str) -> list[str]: assert expected_user_msg_regex.match(lines[-1]) column_names = _split_table_row(lines[1]) - column_types = _split_table_row(lines[2]) + column_types = _split_table_row(lines[3]) data = [] - for line in lines[4:-2]: + for line in lines[5:-3]: if ROW_DIVIDER_REGEX.match(line): continue data.append(_split_table_row(line)) - - return {column_names[i]: (column_types[i], [row[i] for row in data]) for i in range(len(column_names))} + val = {column_names[i]: (column_types[i], [row[i] for row in data]) for i in range(len(column_names))} + return val def parse_html_table( @@ -200,23 +203,24 @@ def test_repr_html_custom_hooks(): df.collect() assert ( - df.__repr__().replace("\r", "") - == """+-------------------+-------------+----------------------------------+ -| objects | np | pil | -| Python | Python | Python | -+-------------------+-------------+----------------------------------+ -| myobj-custom-repr | [[1. 1. 1.] | Date: Fri, 24 Nov 2023 17:18:33 -0800 Subject: [PATCH 11/12] fix newline --- daft/dataframe/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index 1fa3387908..444ceb6081 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -1078,7 +1078,7 @@ def show(self, n: int = 8) -> None: except ImportError: print(dataframe_display) return None - + def __len__(self): """Returns the count of rows when dataframe is materialized. If dataframe is not materialized yet, raises a runtime error. From ecfcb048d26af030781231250f3900cdda1d98a4 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Fri, 24 Nov 2023 17:37:33 -0800 Subject: [PATCH 12/12] fix tensor repr test --- src/daft-core/src/utils/display_table.rs | 2 +- tests/dataframe/test_repr.py | 2 +- tests/series/test_tensor.py | 31 ++++++++++++------------ tests/test_schema.py | 4 +-- tests/utils.py | 4 +++ 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs index cb63fc1bbe..7982e4aac9 100644 --- a/src/daft-core/src/utils/display_table.rs +++ b/src/daft-core/src/utils/display_table.rs @@ -20,7 +20,7 @@ pub fn make_comfy_table>( .width() .expect("should have already been set with default") as usize; - let expected_col_width = 24usize; + let expected_col_width = 18usize; let max_cols = (terminal_width + expected_col_width - 1) / expected_col_width; const DOTS: &str = "…"; diff --git a/tests/dataframe/test_repr.py b/tests/dataframe/test_repr.py index 6f80b32089..26849f20a0 100644 --- a/tests/dataframe/test_repr.py +++ b/tests/dataframe/test_repr.py @@ -7,13 +7,13 @@ from PIL import Image import daft +from tests.utils import ANSI_ESCAPE ROW_DIVIDER_REGEX = re.compile(r"╭─+┬*─*╮|├╌+┼*╌+┤") SHOWING_N_ROWS_REGEX = re.compile(r".*\(Showing first (\d+) of (\d+) rows\).*") UNMATERIALIZED_REGEX = re.compile(r".*\(No data to display: Dataframe not materialized\).*") MATERIALIZED_NO_ROWS_REGEX = re.compile(r".*\(No data to display: Materialized dataframe has no rows\).*") TD_STYLE = 'style="text-align:left; max-width:192px; max-height:64px; overflow:auto"' -ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") def parse_str_table( diff --git a/tests/series/test_tensor.py b/tests/series/test_tensor.py index 6e9295fdff..fbea2ba64a 100644 --- a/tests/series/test_tensor.py +++ b/tests/series/test_tensor.py @@ -10,6 +10,7 @@ from daft.series import Series from daft.utils import pyarrow_supports_fixed_shape_tensor from tests.series import ARROW_FLOAT_TYPES, ARROW_INT_TYPES +from tests.utils import ANSI_ESCAPE ARROW_VERSION = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) @@ -122,20 +123,20 @@ def test_tensor_repr(): arr = np.arange(np.prod((2, 2)), dtype=np.int64).reshape((2, 2)) arrs = [arr, arr, None] s = Series.from_pylist(arrs, pyobj="allow") + + out_repr = ANSI_ESCAPE.sub("", repr(s)) assert ( - repr(s).replace("\r", "") - == """ -+-----------------------+ -| list_series | -| Tensor(Int64) | -+-----------------------+ -| | -+-----------------------+ -| | -+-----------------------+ -| None | -+-----------------------+ -"""[ - 1: - ] + out_repr.replace("\r", "") + == """╭───────────────────────╮ +│ list_series │ +│ --- │ +│ Tensor(Int64) │ +╞═══════════════════════╡ +│ │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ None │ +╰───────────────────────╯ +""" ) diff --git a/tests/test_schema.py b/tests/test_schema.py index ebb8fbd7a2..234df3637c 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,7 +1,6 @@ from __future__ import annotations import copy -import re import pyarrow as pa import pytest @@ -20,7 +19,8 @@ TABLE = Table.from_pydict({k: data for k, (data, _) in DATA.items()}) EXPECTED_TYPES = {k: t for k, (_, t) in DATA.items()} -ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + +from tests.utils import ANSI_ESCAPE def test_schema_len(): diff --git a/tests/utils.py b/tests/utils.py index 673322e205..052e917a80 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,8 +1,12 @@ from __future__ import annotations +import re + import pyarrow as pa import pyarrow.compute as pac +ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + def sort_arrow_table(tbl: pa.Table, sort_by: str): """In arrow versions < 7, pa.Table does not support sorting yet so we add a helper method here"""