Skip to content

feature #49580: support new-style float_format string in to_csv #61650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
19 changes: 19 additions & 0 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,25 @@ def time_frame(self, kind):
self.df.to_csv(self.fname)


class ToCSVFloatFormatVariants(BaseIO):
fname = "__test__.csv"

def setup(self):
self.df = DataFrame(np.random.default_rng(seed=42).random((1000, 1000)))

def time_old_style_percent_format(self):
self.df.to_csv(self.fname, float_format="%.6f")

def time_new_style_brace_format(self):
self.df.to_csv(self.fname, float_format="{:.6f}")

def time_new_style_thousands_format(self):
self.df.to_csv(self.fname, float_format="{:,.2f}")

def time_callable_format(self):
self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}")


class ToCSVMultiIndexUnusedLevels(BaseIO):
fname = "__test__.csv"

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Other enhancements
- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`, :issue:`61650`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`, :issue:`61650`)
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)

Probably only needs reference to the issue. Was there a specific reason to reference this PR?


.. ---------------------------------------------------------------------------
.. _whatsnew_300.notable_bug_fixes:
Expand Down
25 changes: 24 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def __init__(
self.na_rep = na_rep
self.formatters = self._initialize_formatters(formatters)
self.justify = self._initialize_justify(justify)
self.float_format = float_format
self.float_format = self._validate_float_format(float_format)
self.sparsify = self._initialize_sparsify(sparsify)
self.show_index_names = index_names
self.decimal = decimal
Expand Down Expand Up @@ -850,6 +850,29 @@ def _get_column_name_list(self) -> list[Hashable]:
names.append("" if columns.name is None else columns.name)
return names

def _validate_float_format(
self, fmt: FloatFormatType | None
) -> FloatFormatType | None:
"""
Validates and processes the float_format argument.
Converts new-style format strings to callables.
"""
if fmt is None or callable(fmt):
return fmt

if isinstance(fmt, str):
if "%" in fmt:
# Keeps old-style format strings as they are (C code handles them)
return fmt
else:
try:
_ = fmt.format(1.0) # Test with an arbitrary float
return fmt.format
except (ValueError, KeyError, IndexError) as e:
raise ValueError(f"Invalid new-style format string {fmt!r}") from e

raise ValueError("float_format must be a string or callable")


class DataFrameRenderer:
"""Class for creating dataframe output in multiple formats.
Expand Down
141 changes: 141 additions & 0 deletions pandas/tests/io/formats/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,3 +741,144 @@ def test_to_csv_iterative_compression_buffer(compression):
pd.read_csv(buffer, compression=compression, index_col=0), df
)
assert not buffer.closed


def test_new_style_float_format_basic():
df = DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A\n0,1234.57\n1,9876.54\n"
assert result == expected


def test_new_style_float_format_thousands():
df = DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="{:,.2f}", lineterminator="\n")
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
assert result == expected


def test_new_style_scientific_format():
df = DataFrame({"A": [0.000123, 0.000456]})
result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
assert result == expected


def test_new_style_with_nan():
df = DataFrame({"A": [1.23, np.nan, 4.56]})
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
assert result == expected


def test_new_style_with_mixed_types():
df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
assert result == expected


def test_new_style_with_mixed_types_in_column():
df = DataFrame({"A": [1.23, "text", 4.56]})
with tm.assert_produces_warning(
UserWarning, match="could not convert string to float"
):
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")

expected = ",A\n0,1.23\n1,text\n2,4.56\n"
assert result == expected


def test_invalid_new_style_format_missing_brace():
df = DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
df.to_csv(float_format="{:.2f")


def test_invalid_new_style_format_specifier():
df = DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
df.to_csv(float_format="{:.2z}")


def test_old_style_format_compatibility():
df = DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="%.2f", lineterminator="\n")
expected = ",A\n0,1234.57\n1,9876.54\n"
assert result == expected


def test_callable_float_format_compatibility():
df = DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format=lambda x: f"{x:,.2f}", lineterminator="\n")
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
assert result == expected


def test_no_float_format():
df = DataFrame({"A": [1.23, 4.56]})
result = df.to_csv(float_format=None, lineterminator="\n")
expected = ",A\n0,1.23\n1,4.56\n"
assert result == expected


def test_large_numbers():
df = DataFrame({"A": [1e308, 2e308]})
result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
expected = ",A\n0,1.00e+308\n1,inf\n"
assert result == expected


def test_zero_and_negative():
df = DataFrame({"A": [0.0, -1.23456]})
result = df.to_csv(float_format="{:+.2f}", lineterminator="\n")
expected = ",A\n0,+0.00\n1,-1.23\n"
assert result == expected


def test_unicode_format():
df = DataFrame({"A": [1.23, 4.56]})
result = df.to_csv(float_format="{:.2f}€", encoding="utf-8", lineterminator="\n")
expected = ",A\n0,1.23€\n1,4.56€\n"
assert result == expected


def test_empty_dataframe():
df = DataFrame({"A": []})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A\n"
assert result == expected


def test_multi_column_float():
df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
assert result == expected


def test_invalid_float_format_type():
df = DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="float_format must be a string or callable"):
df.to_csv(float_format=123)


def test_new_style_with_inf():
df = DataFrame({"A": [1.23, np.inf, -np.inf]})
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
assert result == expected


def test_new_style_with_precision_edge():
df = DataFrame({"A": [1.23456789]})
result = df.to_csv(float_format="{:.10f}", lineterminator="\n")
expected = ",A\n0,1.2345678900\n"
assert result == expected


def test_new_style_with_template():
df = DataFrame({"A": [1234.56789]})
result = df.to_csv(float_format="Value: {:,.2f}", lineterminator="\n")
expected = ',A\n0,"Value: 1,234.57"\n'
assert result == expected
Loading