diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 3a15f754ae523..9ee867260aa39 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -53,6 +53,25 @@ def time_frame(self, kind): self.df.to_csv(self.fname) +class ToCSVFloatFormatVariants(BaseIO): + fname = "__test__.csv" + + def setup(self): + self.df = DataFrame(np.random.default_rng(seed=42).random((1000, 1000))) + + def time_old_style_percent_format(self): + self.df.to_csv(self.fname, float_format="%.6f") + + def time_new_style_brace_format(self): + self.df.to_csv(self.fname, float_format="{:.6f}") + + def time_new_style_thousands_format(self): + self.df.to_csv(self.fname, float_format="{:,.2f}") + + def time_callable_format(self): + self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}") + + class ToCSVMultiIndexUnusedLevels(BaseIO): fname = "__test__.csv" diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4154942f92907..a065940b18ce5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -76,6 +76,7 @@ Other enhancements - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`) - :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`) +- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 097e508d4889a..7e0900f64b6bf 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -454,7 +454,7 @@ def __init__( self.na_rep = na_rep self.formatters = self._initialize_formatters(formatters) self.justify = self._initialize_justify(justify) - self.float_format = float_format + self.float_format = self._validate_float_format(float_format) self.sparsify = self._initialize_sparsify(sparsify) self.show_index_names = index_names self.decimal = decimal @@ -849,6 +849,29 @@ def _get_column_name_list(self) -> list[Hashable]: names.append("" if columns.name is None else columns.name) return names + def _validate_float_format( + self, fmt: FloatFormatType | None + ) -> FloatFormatType | None: + """ + Validates and processes the float_format argument. + Converts new-style format strings to callables. + """ + if fmt is None or callable(fmt): + return fmt + + if isinstance(fmt, str): + if "%" in fmt: + # Keeps old-style format strings as they are (C code handles them) + return fmt + else: + try: + _ = fmt.format(1.0) # Test with an arbitrary float + return fmt.format + except (ValueError, KeyError, IndexError) as e: + raise ValueError(f"Invalid new-style format string {fmt!r}") from e + + raise ValueError("float_format must be a string or callable") + class DataFrameRenderer: """Class for creating dataframe output in multiple formats. diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 6d762fdeb8d79..dd2d85c4755af 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -741,3 +741,140 @@ def test_to_csv_iterative_compression_buffer(compression): pd.read_csv(buffer, compression=compression, index_col=0), df ) assert not buffer.closed + + +def test_new_style_float_format_basic(): + df = DataFrame({"A": [1234.56789, 9876.54321]}) + result = df.to_csv(float_format="{:.2f}", lineterminator="\n") + expected = ",A\n0,1234.57\n1,9876.54\n" + assert result == expected + + +def test_new_style_float_format_thousands(): + df = DataFrame({"A": [1234.56789, 9876.54321]}) + result = df.to_csv(float_format="{:,.2f}", lineterminator="\n") + expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n' + assert result == expected + + +def test_new_style_scientific_format(): + df = DataFrame({"A": [0.000123, 0.000456]}) + result = df.to_csv(float_format="{:.2e}", lineterminator="\n") + expected = ",A\n0,1.23e-04\n1,4.56e-04\n" + assert result == expected + + +def test_new_style_with_nan(): + df = DataFrame({"A": [1.23, np.nan, 4.56]}) + result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n") + expected = ",A\n0,1.23\n1,NA\n2,4.56\n" + assert result == expected + + +def test_new_style_with_mixed_types(): + df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]}) + result = df.to_csv(float_format="{:.2f}", lineterminator="\n") + expected = ",A,B\n0,1.23,x\n1,4.56,y\n" + assert result == expected + + +def test_new_style_with_mixed_types_in_column(): + df = DataFrame({"A": [1.23, "text", 4.56]}) + result = df.to_csv(float_format="{:.2f}", lineterminator="\n") + expected = ",A\n0,1.23\n1,text\n2,4.56\n" + assert result == expected + + +def test_invalid_new_style_format_missing_brace(): + df = DataFrame({"A": [1.23]}) + with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"): + df.to_csv(float_format="{:.2f") + + +def test_invalid_new_style_format_specifier(): + df = DataFrame({"A": [1.23]}) + with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"): + df.to_csv(float_format="{:.2z}") + + +def test_old_style_format_compatibility(): + df = DataFrame({"A": [1234.56789, 9876.54321]}) + result = df.to_csv(float_format="%.2f", lineterminator="\n") + expected = ",A\n0,1234.57\n1,9876.54\n" + assert result == expected + + +def test_callable_float_format_compatibility(): + df = DataFrame({"A": [1234.56789, 9876.54321]}) + result = df.to_csv(float_format=lambda x: f"{x:,.2f}", lineterminator="\n") + expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n' + assert result == expected + + +def test_no_float_format(): + df = DataFrame({"A": [1.23, 4.56]}) + result = df.to_csv(float_format=None, lineterminator="\n") + expected = ",A\n0,1.23\n1,4.56\n" + assert result == expected + + +def test_large_numbers(): + df = DataFrame({"A": [1e308, 2e308]}) + result = df.to_csv(float_format="{:.2e}", lineterminator="\n") + expected = ",A\n0,1.00e+308\n1,inf\n" + assert result == expected + + +def test_zero_and_negative(): + df = DataFrame({"A": [0.0, -1.23456]}) + result = df.to_csv(float_format="{:+.2f}", lineterminator="\n") + expected = ",A\n0,+0.00\n1,-1.23\n" + assert result == expected + + +def test_unicode_format(): + df = DataFrame({"A": [1.23, 4.56]}) + result = df.to_csv(float_format="{:.2f}€", encoding="utf-8", lineterminator="\n") + expected = ",A\n0,1.23€\n1,4.56€\n" + assert result == expected + + +def test_empty_dataframe(): + df = DataFrame({"A": []}) + result = df.to_csv(float_format="{:.2f}", lineterminator="\n") + expected = ",A\n" + assert result == expected + + +def test_multi_column_float(): + df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]}) + result = df.to_csv(float_format="{:.2f}", lineterminator="\n") + expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n" + assert result == expected + + +def test_invalid_float_format_type(): + df = DataFrame({"A": [1.23]}) + with pytest.raises(ValueError, match="float_format must be a string or callable"): + df.to_csv(float_format=123) + + +def test_new_style_with_inf(): + df = DataFrame({"A": [1.23, np.inf, -np.inf]}) + result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n") + expected = ",A\n0,1.23\n1,inf\n2,-inf\n" + assert result == expected + + +def test_new_style_with_precision_edge(): + df = DataFrame({"A": [1.23456789]}) + result = df.to_csv(float_format="{:.10f}", lineterminator="\n") + expected = ",A\n0,1.2345678900\n" + assert result == expected + + +def test_new_style_with_template(): + df = DataFrame({"A": [1234.56789]}) + result = df.to_csv(float_format="Value: {:,.2f}", lineterminator="\n") + expected = ',A\n0,"Value: 1,234.57"\n' + assert result == expected