Skip to content

Commit

Permalink
feat: New Config options for numeric formatting: digit grouping and…
Browse files Browse the repository at this point in the history
… thousands/decimal separator (pola-rs#12099)

Co-authored-by: Stijn de Gooijer <[email protected]>
  • Loading branch information
alexander-beedie and stinodego authored Nov 7, 2023
1 parent 0363f10 commit 89fadaf
Show file tree
Hide file tree
Showing 15 changed files with 628 additions and 125 deletions.
9 changes: 9 additions & 0 deletions crates/polars-core/src/datatypes/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,15 @@ impl DataType {
self.is_float() || self.is_integer()
}

/// Check if this [`DataType`] is a Decimal type (of any scale/precision).
pub fn is_decimal(&self) -> bool {
match self {
#[cfg(feature = "dtype-decimal")]
DataType::Decimal(_, _) => true,
_ => false,
}
}

/// Check if this [`DataType`] is a basic floating point type (excludes Decimal).
pub fn is_float(&self) -> bool {
matches!(self, DataType::Float32 | DataType::Float64)
Expand Down
175 changes: 134 additions & 41 deletions crates/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[cfg(any(feature = "fmt", feature = "fmt_no_tty"))]
use std::borrow::Cow;
use std::fmt::{Debug, Display, Formatter, Write};
use std::sync::atomic::{AtomicU8, Ordering};
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
use std::sync::RwLock;
use std::{fmt, str};

Expand Down Expand Up @@ -33,35 +33,62 @@ pub enum FloatFmt {
Mixed,
Full,
}
static FLOAT_FMT: AtomicU8 = AtomicU8::new(FloatFmt::Mixed as u8);
static FLOAT_PRECISION: RwLock<Option<usize>> = RwLock::new(None);
static FLOAT_FMT: AtomicU8 = AtomicU8::new(FloatFmt::Mixed as u8);

static TRIM_DECIMAL_ZEROS: AtomicBool = AtomicBool::new(false);
static THOUSANDS_SEPARATOR: AtomicU8 = AtomicU8::new(b'\0');
static DECIMAL_SEPARATOR: AtomicU8 = AtomicU8::new(b'.');

// Numeric formatting getters
pub fn get_float_fmt() -> FloatFmt {
match FLOAT_FMT.load(Ordering::Relaxed) {
0 => FloatFmt::Mixed,
1 => FloatFmt::Full,
_ => panic!(),
}
}

pub fn get_float_precision() -> Option<usize> {
*FLOAT_PRECISION.read().unwrap()
}
pub fn get_decimal_separator() -> char {
DECIMAL_SEPARATOR.load(Ordering::Relaxed) as char
}
pub fn get_thousands_separator() -> String {
let sep = THOUSANDS_SEPARATOR.load(Ordering::Relaxed) as char;
if sep == '\0' {
"".to_string()
} else {
sep.to_string()
}
}
pub fn get_trim_decimal_zeros() -> bool {
TRIM_DECIMAL_ZEROS.load(Ordering::Relaxed)
}

// Numeric formatting setters
pub fn set_float_fmt(fmt: FloatFmt) {
FLOAT_FMT.store(fmt as u8, Ordering::Relaxed)
}

pub fn set_float_precision(precision: Option<usize>) {
*FLOAT_PRECISION.write().unwrap() = precision;
}
pub fn set_decimal_separator(dec: Option<char>) {
DECIMAL_SEPARATOR.store(dec.unwrap_or('.') as u8, Ordering::Relaxed)
}
pub fn set_thousands_separator(sep: Option<char>) {
THOUSANDS_SEPARATOR.store(sep.unwrap_or('\0') as u8, Ordering::Relaxed)
}
pub fn set_trim_decimal_zeros(trim: Option<bool>) {
TRIM_DECIMAL_ZEROS.store(trim.unwrap_or(false), Ordering::Relaxed)
}

macro_rules! format_array {
($f:ident, $a:expr, $dtype:expr, $name:expr, $array_type:expr) => {{
write!(
$f,
"shape: ({},)\n{}: '{}' [{}]\n[\n",
fmt_uint(&$a.len()),
fmt_int_string_custom(&$a.len().to_string(), 3, "_"),
$array_type,
$name,
$dtype
Expand Down Expand Up @@ -138,21 +165,18 @@ fn format_object_array(
match object.dtype() {
DataType::Object(inner_type) => {
let limit = std::cmp::min(LIMIT, object.len());

write!(
f,
"shape: ({},)\n{}: '{}' [o][{}]\n[\n",
fmt_uint(&object.len()),
fmt_int_string_custom(&object.len().to_string(), 3, "_"),
array_type,
name,
inner_type
)?;

for i in 0..limit {
let v = object.str_value(i);
writeln!(f, "\t{}", v.unwrap())?;
}

write!(f, "]")
},
_ => unreachable!(),
Expand Down Expand Up @@ -442,22 +466,13 @@ fn env_is_true(varname: &str) -> bool {
std::env::var(varname).as_deref().unwrap_or("0") == "1"
}

fn fmt_uint(num: &usize) -> String {
// Return a string with thousands separated by _
// e.g. 1_000_000
num.to_string()
.as_bytes()
.rchunks(3)
.rev()
.map(str::from_utf8)
.collect::<Result<Vec<&str>, _>>()
.unwrap()
.join("_") // separator
}

fn fmt_df_shape((shape0, shape1): &(usize, usize)) -> String {
// e.g. (1_000_000, 4_000)
format!("({}, {})", fmt_uint(shape0), fmt_uint(shape1))
format!(
"({}, {})",
fmt_int_string_custom(&shape0.to_string(), 3, "_"),
fmt_int_string_custom(&shape1.to_string(), 3, "_")
)
}

impl Display for DataFrame {
Expand Down Expand Up @@ -675,7 +690,7 @@ impl Display for DataFrame {
for (column_index, column) in table.column_iter_mut().enumerate() {
let dtype = fields[column_index].data_type();
let mut preset = str_preset.as_str();
if dtype.is_numeric() {
if dtype.is_numeric() || dtype.is_decimal() {
preset = num_preset.as_str();
}
match preset {
Expand All @@ -699,7 +714,6 @@ impl Display for DataFrame {
}
}
}

#[cfg(not(any(feature = "fmt", feature = "fmt_no_tty")))]
{
write!(
Expand All @@ -712,12 +726,76 @@ impl Display for DataFrame {
}
}

fn fmt_int_string_custom(num: &str, group_size: u8, group_separator: &str) -> String {
if group_size == 0 || num.len() <= 1 {
num.to_string()
} else {
let mut out = String::new();
let sign_offset = if num.starts_with('-') || num.starts_with('+') {
out.push(num.chars().next().unwrap());
1
} else {
0
};
let int_body = num[sign_offset..]
.as_bytes()
.rchunks(group_size as usize)
.rev()
.map(str::from_utf8)
.collect::<Result<Vec<&str>, _>>()
.unwrap()
.join(group_separator);
out.push_str(&int_body);
out
}
}

fn fmt_int_string(num: &str) -> String {
fmt_int_string_custom(num, 3, &get_thousands_separator())
}

fn fmt_float_string_custom(
num: &str,
group_size: u8,
group_separator: &str,
decimal: char,
) -> String {
// Quick exit if no formatting would be applied
if num.len() <= 1 || (group_size == 0 && decimal == '.') {
num.to_string()
} else {
// Take existing numeric string and apply digit grouping & separator/decimal chars
// e.g. "1000000" → "1_000_000", "-123456.798" → "-123,456.789", etc
let (idx, has_fractional) = match num.find('.') {
Some(i) => (i, true),
None => (num.len(), false),
};
let mut out = String::new();
let integer_part = &num[..idx];

out.push_str(&fmt_int_string_custom(
integer_part,
group_size,
group_separator,
));
if has_fractional {
out.push(decimal);
out.push_str(&num[idx + 1..]);
};
out
}
}

fn fmt_float_string(num: &str) -> String {
fmt_float_string_custom(num, 3, &get_thousands_separator(), get_decimal_separator())
}

fn fmt_integer<T: Num + NumCast + Display>(
f: &mut Formatter<'_>,
width: usize,
v: T,
) -> fmt::Result {
write!(f, "{v:>width$}")
write!(f, "{:>width$}", fmt_int_string(&v.to_string()))
}

const SCIENTIFIC_BOUND: f64 = 999999.0;
Expand All @@ -731,20 +809,27 @@ fn fmt_float<T: Num + NumCast>(f: &mut Formatter<'_>, width: usize, v: T) -> fmt
if format!("{v:.precision$}", precision = precision).len() > 19 {
return write!(f, "{v:>width$.precision$e}", precision = precision);
}
return write!(f, "{v:>width$.precision$}", precision = precision);
let s = format!("{v:>width$.precision$}", precision = precision);
return write!(f, "{}", fmt_float_string(s.as_str()));
}

if matches!(get_float_fmt(), FloatFmt::Full) {
return write!(f, "{v:>width$}");
let s = format!("{v:>width$}");
return write!(f, "{}", fmt_float_string(s.as_str()));
}

// show integers as 0.0, 1.0 ... 101.0
if v.fract() == 0.0 && v.abs() < SCIENTIFIC_BOUND {
write!(f, "{v:>width$.1}")
let s = format!("{v:>width$.1}");
write!(f, "{}", fmt_float_string(s.as_str()))
} else if format!("{v}").len() > 9 {
// large and small floats in scientific notation
if !(0.000001..=SCIENTIFIC_BOUND).contains(&v.abs()) | (v.abs() > SCIENTIFIC_BOUND) {
write!(f, "{v:>width$.4e}")
// large and small floats in scientific notation.
// (note: scientific notation does not play well with digit grouping)
if (!(0.000001..=SCIENTIFIC_BOUND).contains(&v.abs()) | (v.abs() > SCIENTIFIC_BOUND))
&& get_thousands_separator().is_empty()
{
let s = format!("{v:>width$.4e}");
write!(f, "{}", fmt_float_string(s.as_str()))
} else {
// this makes sure we don't write 12.00000 in case of a long flt that is 12.0000000001
// instead we write 12.0
Expand All @@ -758,22 +843,27 @@ fn fmt_float<T: Num + NumCast>(f: &mut Formatter<'_>, width: usize, v: T) -> fmt
s = &s[..len];
len -= 1;
}
if s.ends_with('.') {
write!(f, "{s}0")
let s = if s.ends_with('.') {
format!("{s}0")
} else {
write!(f, "{s}")
}
s.to_string()
};
write!(f, "{}", fmt_float_string(s.as_str()))
} else {
// 12.0934509341243124
// written as
// 12.09345
write!(f, "{v:>width$.6}")
let s = format!("{v:>width$.6}");
write!(f, "{}", fmt_float_string(s.as_str()))
}
}
} else if v.fract() == 0.0 {
write!(f, "{v:>width$e}")
} else {
write!(f, "{v:>width$}")
let s = if v.fract() == 0.0 {
format!("{v:>width$e}")
} else {
format!("{v:>width$}")
};
write!(f, "{}", fmt_float_string(s.as_str()))
}
}

Expand Down Expand Up @@ -1019,6 +1109,8 @@ mod decimal {
use std::fmt::Formatter;
use std::{fmt, ptr, str};

use crate::fmt::{fmt_float_string, get_trim_decimal_zeros};

const BUF_LEN: usize = 48;

#[derive(Clone, Copy)]
Expand Down Expand Up @@ -1124,7 +1216,8 @@ mod decimal {

#[inline]
pub fn fmt_decimal(f: &mut Formatter<'_>, v: i128, scale: usize) -> fmt::Result {
f.write_str(format_decimal(v, scale, !f.alternate()).as_str())
let trim_zeros = get_trim_decimal_zeros();
f.write_str(fmt_float_string(format_decimal(v, scale, trim_zeros).as_str()).as_str())
}
}

Expand Down
1 change: 0 additions & 1 deletion crates/polars-core/src/utils/supertype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ pub fn get_supertype(l: &DataType, r: &DataType) -> Option<DataType> {
if l == r {
return Some(l.clone());
}

match (l, r) {
#[cfg(feature = "dtype-i8")]
(Int8, Boolean) => Some(Int8),
Expand Down
6 changes: 6 additions & 0 deletions py-polars/docs/source/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ Config options

Config.activate_decimals
Config.set_ascii_tables
Config.set_auto_structify
Config.set_decimal_separator
Config.set_float_precision
Config.set_fmt_float
Config.set_fmt_str_lengths
Config.set_fmt_table_cell_list_len
Config.set_streaming_chunk_size
Config.set_tbl_cell_alignment
Config.set_tbl_cell_numeric_alignment
Config.set_tbl_cols
Config.set_tbl_column_data_type_inline
Config.set_tbl_dataframe_shape_below
Expand All @@ -26,6 +30,8 @@ Config options
Config.set_tbl_hide_dtype_separator
Config.set_tbl_rows
Config.set_tbl_width_chars
Config.set_thousands_separator
Config.set_trim_decimal_zeros
Config.set_verbose

Config load, save, and current state
Expand Down
Loading

0 comments on commit 89fadaf

Please sign in to comment.