diff --git a/Cargo.lock b/Cargo.lock index 2c98c5ce20ad..a088005a0f19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1818,7 +1818,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-ipc", @@ -1890,7 +1890,7 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion", @@ -1914,7 +1914,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -1938,7 +1938,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -1959,7 +1959,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "assert_cmd", @@ -1991,7 +1991,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "apache-avro", @@ -2019,7 +2019,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "48.0.0" +version = "49.0.0" dependencies = [ "futures", "log", @@ -2028,7 +2028,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-compression", @@ -2063,7 +2063,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "48.0.0" +version = "49.0.0" dependencies = [ "apache-avro", "arrow", @@ -2088,7 +2088,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2111,7 +2111,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2134,7 +2134,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2166,11 +2166,11 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "48.0.0" +version = "49.0.0" [[package]] name = "datafusion-examples" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-flight", @@ -2201,7 +2201,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "chrono", @@ -2220,7 +2220,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2243,7 +2243,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2254,7 +2254,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "48.0.0" +version = "49.0.0" dependencies = [ "abi_stable", "arrow", @@ -2275,7 +2275,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2304,7 +2304,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2325,7 +2325,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2338,7 +2338,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-ord", @@ -2360,7 +2360,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2374,7 +2374,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2390,7 +2390,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2398,7 +2398,7 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "48.0.0" +version = "49.0.0" dependencies = [ "datafusion-expr", "quote", @@ -2407,7 +2407,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2434,7 +2434,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2459,7 +2459,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2471,7 +2471,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2492,7 +2492,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2528,7 +2528,7 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "chrono", @@ -2550,7 +2550,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2563,7 +2563,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-schema", @@ -2582,7 +2582,7 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2604,7 +2604,7 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "criterion", @@ -2620,7 +2620,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2644,7 +2644,7 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2677,7 +2677,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "48.0.0" +version = "49.0.0" dependencies = [ "async-recursion", "async-trait", @@ -2697,7 +2697,7 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "48.0.0" +version = "49.0.0" dependencies = [ "chrono", "console_error_panic_hook", diff --git a/Cargo.toml b/Cargo.toml index 7f5c79ae3a57..11cd3c637a97 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,7 @@ repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) rust-version = "1.85.1" # Define DataFusion version -version = "48.0.0" +version = "49.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -110,41 +110,41 @@ chrono = { version = "0.4.41", default-features = false } criterion = "0.5.1" ctor = "0.4.0" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "48.0.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "48.0.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "48.0.0" } -datafusion-common = { path = "datafusion/common", version = "48.0.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "48.0.0" } -datafusion-datasource = { path = "datafusion/datasource", version = "48.0.0", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "48.0.0", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "48.0.0", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "48.0.0", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "48.0.0", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "48.0.0" } -datafusion-execution = { path = "datafusion/execution", version = "48.0.0" } -datafusion-expr = { path = "datafusion/expr", version = "48.0.0" } -datafusion-expr-common = { path = "datafusion/expr-common", version = "48.0.0" } -datafusion-ffi = { path = "datafusion/ffi", version = "48.0.0" } -datafusion-functions = { path = "datafusion/functions", version = "48.0.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "48.0.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "48.0.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "48.0.0" } -datafusion-functions-table = { path = "datafusion/functions-table", version = "48.0.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "48.0.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "48.0.0" } -datafusion-macros = { path = "datafusion/macros", version = "48.0.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "48.0.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "48.0.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "48.0.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "48.0.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "48.0.0" } -datafusion-proto = { path = "datafusion/proto", version = "48.0.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "48.0.0" } -datafusion-pruning = { path = "datafusion/pruning", version = "48.0.0" } -datafusion-session = { path = "datafusion/session", version = "48.0.0" } -datafusion-spark = { path = "datafusion/spark", version = "48.0.0" } -datafusion-sql = { path = "datafusion/sql", version = "48.0.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "48.0.0" } +datafusion = { path = "datafusion/core", version = "49.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "49.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "49.0.0" } +datafusion-common = { path = "datafusion/common", version = "49.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "49.0.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "49.0.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "49.0.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "49.0.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "49.0.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "49.0.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "49.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "49.0.0" } +datafusion-expr = { path = "datafusion/expr", version = "49.0.0" } +datafusion-expr-common = { path = "datafusion/expr-common", version = "49.0.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "49.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "49.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "49.0.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "49.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "49.0.0" } +datafusion-functions-table = { path = "datafusion/functions-table", version = "49.0.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "49.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "49.0.0" } +datafusion-macros = { path = "datafusion/macros", version = "49.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "49.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "49.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "49.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "49.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "49.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "49.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "49.0.0" } +datafusion-pruning = { path = "datafusion/pruning", version = "49.0.0" } +datafusion-session = { path = "datafusion/session", version = "49.0.0" } +datafusion-spark = { path = "datafusion/spark", version = "49.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "49.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "49.0.0" } doc-comment = "0.3" env_logger = "0.11" futures = "0.3" diff --git a/dev/changelog/49.0.0.md b/dev/changelog/49.0.0.md new file mode 100644 index 000000000000..c30bfaf3ea65 --- /dev/null +++ b/dev/changelog/49.0.0.md @@ -0,0 +1,373 @@ + + +# Apache DataFusion 49.0.0 Changelog + +This release consists of 251 commits from 71 contributors. See credits at the end of this changelog for more information. + +See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions. + +**Breaking changes:** + +- feat: add metadata to literal expressions [#16170](https://github.com/apache/datafusion/pull/16170) (timsaucer) +- [MAJOR] Equivalence System Overhaul [#16217](https://github.com/apache/datafusion/pull/16217) (ozankabak) +- remove unused methods in SortExec [#16457](https://github.com/apache/datafusion/pull/16457) (adriangb) +- Move Pruning Logic to a Dedicated datafusion-pruning Crate for Improved Modularity [#16549](https://github.com/apache/datafusion/pull/16549) (kosiew) +- Fix type of ExecutionOptions::time_zone [#16569](https://github.com/apache/datafusion/pull/16569) (findepi) +- Convert Option> to Vec [#16615](https://github.com/apache/datafusion/pull/16615) (ViggoC) +- Refactor error handling to use boxed errors for DataFusionError variants [#16672](https://github.com/apache/datafusion/pull/16672) (kosiew) +- Reuse Rows allocation in RowCursorStream [#16647](https://github.com/apache/datafusion/pull/16647) (Dandandan) +- refactor: shrink `SchemaError` [#16653](https://github.com/apache/datafusion/pull/16653) (crepererum) +- Remove unused AggregateUDF struct [#16683](https://github.com/apache/datafusion/pull/16683) (ViggoC) + +**Performance related:** + +- Add late pruning of Parquet files based on file level statistics [#16014](https://github.com/apache/datafusion/pull/16014) (adriangb) +- Add fast paths for try_process_unnest [#16389](https://github.com/apache/datafusion/pull/16389) (simonvandel) +- Set the default value of `datafusion.execution.collect_statistics` to `true` [#16447](https://github.com/apache/datafusion/pull/16447) (AdamGS) +- Perf: Optimize CursorValues compare performance for StringViewArray (1.4X faster for sort-tpch Q11) [#16509](https://github.com/apache/datafusion/pull/16509) (zhuqi-lucas) +- Simplify predicates in `PushDownFilter` optimizer rule [#16362](https://github.com/apache/datafusion/pull/16362) (xudong963) +- optimize `ScalarValue::to_array_of_size` for structural types [#16706](https://github.com/apache/datafusion/pull/16706) (ding-young) +- Refactor filter pushdown APIs to enable joins to pass through filters [#16732](https://github.com/apache/datafusion/pull/16732) (adriangb) +- perf: Optimize hash joins with an empty build side [#16716](https://github.com/apache/datafusion/pull/16716) (nuno-faria) +- Per file filter evaluation [#15057](https://github.com/apache/datafusion/pull/15057) (adriangb) + +**Implemented enhancements:** + +- feat: Support defining custom MetricValues in PhysicalPlans [#16195](https://github.com/apache/datafusion/pull/16195) (sfluor) +- feat: Allow cancelling of grouping operations which are CPU bound [#16196](https://github.com/apache/datafusion/pull/16196) (zhuqi-lucas) +- feat: support FixedSizeList for array_has [#16333](https://github.com/apache/datafusion/pull/16333) (chenkovsky) +- feat: Support tpch and tpch10 benchmark for csv format [#16373](https://github.com/apache/datafusion/pull/16373) (zhuqi-lucas) +- feat: Support RightMark join for NestedLoop and Hash join [#16083](https://github.com/apache/datafusion/pull/16083) (jonathanc-n) +- feat: mapping sql Char/Text/String default to Utf8View [#16290](https://github.com/apache/datafusion/pull/16290) (zhuqi-lucas) +- feat: support fixed size list for array reverse [#16423](https://github.com/apache/datafusion/pull/16423) (chenkovsky) +- feat: add SchemaProvider::table_type(table_name: &str) [#16401](https://github.com/apache/datafusion/pull/16401) (epgif) +- feat: derive `Debug` and `Clone` for `ScalarFunctionArgs` [#16471](https://github.com/apache/datafusion/pull/16471) (crepererum) +- feat: support `map_entries` builtin function [#16557](https://github.com/apache/datafusion/pull/16557) (comphead) +- feat: add `array_min` scalar function and associated tests [#16574](https://github.com/apache/datafusion/pull/16574) (dharanad) +- feat: Finalize support for `RightMark` join + `Mark` join swap [#16488](https://github.com/apache/datafusion/pull/16488) (jonathanc-n) +- feat: Parquet modular encryption [#16351](https://github.com/apache/datafusion/pull/16351) (corwinjoy) +- feat: Support `u32` indices for `HashJoinExec` [#16434](https://github.com/apache/datafusion/pull/16434) (jonathanc-n) +- feat: expose intersect distinct/except distinct in dataframe api [#16578](https://github.com/apache/datafusion/pull/16578) (chenkovsky) +- feat: Add a configuration to make parquet encryption optional [#16649](https://github.com/apache/datafusion/pull/16649) (corwinjoy) + +**Fixed bugs:** + +- fix: preserve null_equals_null flag in eliminate_cross_join rule [#16356](https://github.com/apache/datafusion/pull/16356) (waynexia) +- fix: Fix SparkSha2 to be compliant with Spark response and add support for Int32 [#16350](https://github.com/apache/datafusion/pull/16350) (rishvin) +- fix: Fixed error handling for `generate_series/range` [#16391](https://github.com/apache/datafusion/pull/16391) (jonathanc-n) +- fix: Enable WASM compilation by making sqlparser's recursive-protection optional [#16418](https://github.com/apache/datafusion/pull/16418) (jonmmease) +- fix: create file for empty stream [#16342](https://github.com/apache/datafusion/pull/16342) (chenkovsky) +- fix: document and fix macro hygiene for `config_field!` [#16473](https://github.com/apache/datafusion/pull/16473) (crepererum) +- fix: make `with_new_state` a trait method for `ExecutionPlan` [#16469](https://github.com/apache/datafusion/pull/16469) (geoffreyclaude) +- fix: column indices in FFI partition evaluator [#16480](https://github.com/apache/datafusion/pull/16480) (timsaucer) +- fix: support within_group [#16538](https://github.com/apache/datafusion/pull/16538) (chenkovsky) +- fix: disallow specify both order_by and within_group [#16606](https://github.com/apache/datafusion/pull/16606) (watchingthewheelsgo) +- fix: format within_group error message [#16613](https://github.com/apache/datafusion/pull/16613) (watchingthewheelsgo) +- fix: reserved keywords in qualified column names [#16584](https://github.com/apache/datafusion/pull/16584) (crepererum) +- fix: support scalar function nested in get_field in Unparser [#16610](https://github.com/apache/datafusion/pull/16610) (chenkovsky) +- fix: sqllogictest runner label condition mismatch [#16633](https://github.com/apache/datafusion/pull/16633) (lliangyu-lin) +- fix: port arrow inline fast key fix to datafusion [#16698](https://github.com/apache/datafusion/pull/16698) (zhuqi-lucas) +- fix: try to lower plain reserved functions to columns as well [#16669](https://github.com/apache/datafusion/pull/16669) (crepererum) +- fix: Fix CI failing due to #16686 [#16718](https://github.com/apache/datafusion/pull/16718) (jonathanc-n) +- fix: return NULL if any of the param to make_date is NULL [#16759](https://github.com/apache/datafusion/pull/16759) (feniljain) +- fix: add `order_requirement` & `dist_requirement` to `OutputRequirementExec` display [#16726](https://github.com/apache/datafusion/pull/16726) (Loaki07) +- fix: support nullable columns in pre-sorted data sources [#16783](https://github.com/apache/datafusion/pull/16783) (crepererum) + +**Documentation updates:** + +- Minor: Add upgrade guide for `Expr::WindowFunction` [#16313](https://github.com/apache/datafusion/pull/16313) (alamb) +- Fix `array_position` on empty list [#16292](https://github.com/apache/datafusion/pull/16292) (Blizzara) +- Fix: mark "Spilling (to disk) Joins" as supported in features [#16343](https://github.com/apache/datafusion/pull/16343) (kosiew) +- Fix cp_solver doc formatting [#16352](https://github.com/apache/datafusion/pull/16352) (xudong963) +- docs: Expand `MemoryPool` docs with related structs [#16289](https://github.com/apache/datafusion/pull/16289) (2010YOUY01) +- Support datafusion-cli access to public S3 buckets that do not require authentication [#16300](https://github.com/apache/datafusion/pull/16300) (alamb) +- Document Table Constraint Enforcement Behavior in Custom Table Providers Guide [#16340](https://github.com/apache/datafusion/pull/16340) (kosiew) +- doc: Add SQL examples for SEMI + ANTI Joins [#16316](https://github.com/apache/datafusion/pull/16316) (jonathanc-n) +- [datafusion-spark] Example of using Spark compatible function library [#16384](https://github.com/apache/datafusion/pull/16384) (alamb) +- Add note in upgrade guide about changes to `Expr::Scalar` in 48.0.0 [#16360](https://github.com/apache/datafusion/pull/16360) (alamb) +- Update PMC management instructions to follow new ASF process [#16417](https://github.com/apache/datafusion/pull/16417) (alamb) +- Add design process section to the docs [#16397](https://github.com/apache/datafusion/pull/16397) (alamb) +- Unify Metadata Handing: use `FieldMetadata` in `Expr::Alias` and `ExprSchemable` [#16320](https://github.com/apache/datafusion/pull/16320) (alamb) +- TopK dynamic filter pushdown attempt 2 [#15770](https://github.com/apache/datafusion/pull/15770) (adriangb) +- Update Roadmap documentation [#16399](https://github.com/apache/datafusion/pull/16399) (alamb) +- doc: Add comments to clarify algorithm for `MarkJoin`s [#16436](https://github.com/apache/datafusion/pull/16436) (jonathanc-n) +- Add compression option to SpillManager [#16268](https://github.com/apache/datafusion/pull/16268) (ding-young) +- Redirect user defined function webpage [#16475](https://github.com/apache/datafusion/pull/16475) (alamb) +- Use Tokio's task budget consistently, better APIs to support task cancellation [#16398](https://github.com/apache/datafusion/pull/16398) (pepijnve) +- doc: upgrade guide for new compression option for spill files [#16472](https://github.com/apache/datafusion/pull/16472) (2010YOUY01) +- Introduce Async User Defined Functions [#14837](https://github.com/apache/datafusion/pull/14837) (goldmedal) +- Minor: Add more links to cooperative / scheduling docs [#16484](https://github.com/apache/datafusion/pull/16484) (alamb) +- doc: Document DESCRIBE comman in ddl.md [#16524](https://github.com/apache/datafusion/pull/16524) (krikera) +- Add more doc for physical filter pushdown [#16504](https://github.com/apache/datafusion/pull/16504) (xudong963) +- chore: fix CI failures on `ddl.md` [#16526](https://github.com/apache/datafusion/pull/16526) (comphead) +- Add some comments about adding new dependencies in datafusion-sql [#16543](https://github.com/apache/datafusion/pull/16543) (alamb) +- Add note for planning release in Upgrade Guides [#16534](https://github.com/apache/datafusion/pull/16534) (xudong963) +- Consolidate configuration sections in docs [#16544](https://github.com/apache/datafusion/pull/16544) (alamb) +- Minor: add clearer link to the main website from intro paragraph. [#16556](https://github.com/apache/datafusion/pull/16556) (alamb) +- Simplify AsyncScalarUdfImpl so it extends ScalarUdfImpl [#16523](https://github.com/apache/datafusion/pull/16523) (alamb) +- docs: Minor grammatical fixes for the scalar UDF docs [#16618](https://github.com/apache/datafusion/pull/16618) (ianthetechie) +- Implementation for regex_instr [#15928](https://github.com/apache/datafusion/pull/15928) (nirnayroy) +- Update Upgrade Guide for 48.0.1 [#16699](https://github.com/apache/datafusion/pull/16699) (alamb) +- ensure MemTable has at least one partition [#16754](https://github.com/apache/datafusion/pull/16754) (waynexia) +- Restore custom SchemaAdapter functionality for Parquet [#16791](https://github.com/apache/datafusion/pull/16791) (adriangb) +- Update `upgrading.md` for new unified config for sql string mapping to utf8view [#16809](https://github.com/apache/datafusion/pull/16809) (zhuqi-lucas) +- docs: Remove reference to forthcoming example (#16817) [#16818](https://github.com/apache/datafusion/pull/16818) (m09526) + +**Other:** + +- chore(deps): bump sqllogictest from 0.28.2 to 0.28.3 [#16286](https://github.com/apache/datafusion/pull/16286) (dependabot[bot]) +- chore(deps-dev): bump webpack-dev-server from 4.15.1 to 5.2.1 in /datafusion/wasmtest/datafusion-wasm-app [#16253](https://github.com/apache/datafusion/pull/16253) (dependabot[bot]) +- Improve DataFusion subcrate readme files [#16263](https://github.com/apache/datafusion/pull/16263) (alamb) +- Fix intermittent SQL logic test failure in limit.slt by adding ORDER BY clause [#16257](https://github.com/apache/datafusion/pull/16257) (kosiew) +- Extend benchmark comparison script with more detailed statistics [#16262](https://github.com/apache/datafusion/pull/16262) (pepijnve) +- chore(deps): bump flate2 from 1.1.1 to 1.1.2 [#16338](https://github.com/apache/datafusion/pull/16338) (dependabot[bot]) +- chore(deps): bump petgraph from 0.8.1 to 0.8.2 [#16337](https://github.com/apache/datafusion/pull/16337) (dependabot[bot]) +- chore(deps): bump substrait from 0.56.0 to 0.57.0 [#16143](https://github.com/apache/datafusion/pull/16143) (dependabot[bot]) +- Add test for ordering of predicate pushdown into parquet [#16169](https://github.com/apache/datafusion/pull/16169) (adriangb) +- Fix distinct count for DictionaryArray to correctly account for nulls in values array [#16258](https://github.com/apache/datafusion/pull/16258) (kosiew) +- Fix inconsistent schema projection in ListingTable even when schema is specified [#16305](https://github.com/apache/datafusion/pull/16305) (kosiew) +- tpch: move reading of SQL queries out of timed span. [#16357](https://github.com/apache/datafusion/pull/16357) (pepijnve) +- chore(deps): bump clap from 4.5.39 to 4.5.40 [#16354](https://github.com/apache/datafusion/pull/16354) (dependabot[bot]) +- chore(deps): bump syn from 2.0.101 to 2.0.102 [#16355](https://github.com/apache/datafusion/pull/16355) (dependabot[bot]) +- Encapsulate metadata for literals on to a `FieldMetadata` structure [#16317](https://github.com/apache/datafusion/pull/16317) (alamb) +- Add support `UInt64` and other integer data types for `to_hex` [#16335](https://github.com/apache/datafusion/pull/16335) (tlm365) +- Document `copy_array_data` function with example [#16361](https://github.com/apache/datafusion/pull/16361) (alamb) +- Fix array_agg memory over use [#16346](https://github.com/apache/datafusion/pull/16346) (gabotechs) +- Update publish command [#16377](https://github.com/apache/datafusion/pull/16377) (xudong963) +- Add more context to error message for datafusion-cli config failure [#16379](https://github.com/apache/datafusion/pull/16379) (alamb) +- Fix: datafusion-sqllogictest 48.0.0 can't be published [#16376](https://github.com/apache/datafusion/pull/16376) (xudong963) +- bug: remove busy-wait while sort is ongoing [#16322](https://github.com/apache/datafusion/pull/16322) (pepijnve) +- chore: refactor Substrait consumer's "rename_field" and implement the rest of types [#16345](https://github.com/apache/datafusion/pull/16345) (Blizzara) +- chore(deps): bump object_store from 0.12.1 to 0.12.2 [#16368](https://github.com/apache/datafusion/pull/16368) (dependabot[bot]) +- Disable `datafusion-cli` tests for hash_collision tests, fix extended CI [#16382](https://github.com/apache/datafusion/pull/16382) (alamb) +- Fix array_concat with NULL arrays [#16348](https://github.com/apache/datafusion/pull/16348) (alexanderbianchi) +- Minor: add testing case for add YieldStreamExec and polish docs [#16369](https://github.com/apache/datafusion/pull/16369) (zhuqi-lucas) +- chore(deps): bump aws-config from 1.6.3 to 1.8.0 [#16394](https://github.com/apache/datafusion/pull/16394) (dependabot[bot]) +- fix typo in test file name [#16403](https://github.com/apache/datafusion/pull/16403) (adriangb) +- Add topk_tpch benchmark [#16410](https://github.com/apache/datafusion/pull/16410) (Dandandan) +- Reduce some cloning [#16404](https://github.com/apache/datafusion/pull/16404) (simonvandel) +- chore(deps): bump syn from 2.0.102 to 2.0.103 [#16393](https://github.com/apache/datafusion/pull/16393) (dependabot[bot]) +- Simplify expressions passed to table functions [#16388](https://github.com/apache/datafusion/pull/16388) (simonvandel) +- Minor: Clean-up `bench.sh` usage message [#16416](https://github.com/apache/datafusion/pull/16416) (2010YOUY01) +- chore(deps): bump rust_decimal from 1.37.1 to 1.37.2 [#16422](https://github.com/apache/datafusion/pull/16422) (dependabot[bot]) +- Migrate core test to insta, part1 [#16324](https://github.com/apache/datafusion/pull/16324) (Chen-Yuan-Lai) +- chore(deps): bump mimalloc from 0.1.46 to 0.1.47 [#16426](https://github.com/apache/datafusion/pull/16426) (dependabot[bot]) +- chore(deps): bump libc from 0.2.172 to 0.2.173 [#16421](https://github.com/apache/datafusion/pull/16421) (dependabot[bot]) +- Use dedicated NullEquality enum instead of null_equals_null boolean [#16419](https://github.com/apache/datafusion/pull/16419) (tobixdev) +- chore: generate basic spark function tests [#16409](https://github.com/apache/datafusion/pull/16409) (shehabgamin) +- Fix CI Failure: replace false with NullEqualsNothing [#16437](https://github.com/apache/datafusion/pull/16437) (ding-young) +- chore(deps): bump bzip2 from 0.5.2 to 0.6.0 [#16441](https://github.com/apache/datafusion/pull/16441) (dependabot[bot]) +- chore(deps): bump libc from 0.2.173 to 0.2.174 [#16440](https://github.com/apache/datafusion/pull/16440) (dependabot[bot]) +- Remove redundant license-header-check CI job [#16451](https://github.com/apache/datafusion/pull/16451) (alamb) +- Remove unused feature in `physical-plan` and fix compilation error in benchmark [#16449](https://github.com/apache/datafusion/pull/16449) (AdamGS) +- Temporarily fix bug in dynamic top-k optimization [#16465](https://github.com/apache/datafusion/pull/16465) (AdamGS) +- Ignore `sort_query_fuzzer_runner` [#16462](https://github.com/apache/datafusion/pull/16462) (blaginin) +- Revert "Ignore `sort_query_fuzzer_runner` (#16462)" [#16470](https://github.com/apache/datafusion/pull/16470) (2010YOUY01) +- Reapply "Ignore `sort_query_fuzzer_runner` (#16462)" (#16470) [#16485](https://github.com/apache/datafusion/pull/16485) (alamb) +- Fix constant window for evaluate stateful [#16430](https://github.com/apache/datafusion/pull/16430) (suibianwanwank) +- Use UDTF name in logical plan table scan [#16468](https://github.com/apache/datafusion/pull/16468) (Jeadie) +- refactor reassign_predicate_columns to accept an &Schema instead of &Arc [#16499](https://github.com/apache/datafusion/pull/16499) (adriangb) +- re-enable `sort_query_fuzzer_runner` [#16491](https://github.com/apache/datafusion/pull/16491) (adriangb) +- Example for using a separate threadpool for CPU bound work (try 3) [#16331](https://github.com/apache/datafusion/pull/16331) (alamb) +- chore(deps): bump syn from 2.0.103 to 2.0.104 [#16507](https://github.com/apache/datafusion/pull/16507) (dependabot[bot]) +- use 'lit' as the field name for literal values [#16498](https://github.com/apache/datafusion/pull/16498) (adriangb) +- [datafusion-spark] Implement `factorical` function [#16125](https://github.com/apache/datafusion/pull/16125) (tlm365) +- Add DESC alias for DESCRIBE command. [#16514](https://github.com/apache/datafusion/pull/16514) (lucqui) +- Split clickbench query set into one file per query [#16476](https://github.com/apache/datafusion/pull/16476) (pepijnve) +- Support query filter on all benchmarks [#16477](https://github.com/apache/datafusion/pull/16477) (pepijnve) +- `TableProvider` to skip files in the folder which non relevant to selected reader [#16487](https://github.com/apache/datafusion/pull/16487) (comphead) +- Reuse `BaselineMetrics` in `UnnestMetrics` [#16497](https://github.com/apache/datafusion/pull/16497) (hendrikmakait) +- Fix array_has to return false for empty arrays instead of null [#16529](https://github.com/apache/datafusion/pull/16529) (kosiew) +- Minor: Add documentation to `AggregateWindowExpr::get_result_column` [#16479](https://github.com/apache/datafusion/pull/16479) (alamb) +- Fix WindowFrame::new with order_by [#16537](https://github.com/apache/datafusion/pull/16537) (findepi) +- chore(deps): bump object_store from 0.12.1 to 0.12.2 [#16548](https://github.com/apache/datafusion/pull/16548) (dependabot[bot]) +- chore(deps): bump mimalloc from 0.1.46 to 0.1.47 [#16547](https://github.com/apache/datafusion/pull/16547) (dependabot[bot]) +- Add support for Arrow Duration type in Substrait [#16503](https://github.com/apache/datafusion/pull/16503) (jkosh44) +- Allow unparser to override the alias name for the specific dialect [#16540](https://github.com/apache/datafusion/pull/16540) (goldmedal) +- Avoid clones when calling find_window_exprs [#16551](https://github.com/apache/datafusion/pull/16551) (findepi) +- Update `spilled_bytes` metric to reflect actual disk usage [#16535](https://github.com/apache/datafusion/pull/16535) (ding-young) +- adapt filter expressions to file schema during parquet scan [#16461](https://github.com/apache/datafusion/pull/16461) (adriangb) +- datafusion-cli: Use correct S3 region if it is not specified [#16502](https://github.com/apache/datafusion/pull/16502) (liamzwbao) +- Add nested struct casting support and integrate into SchemaAdapter [#16371](https://github.com/apache/datafusion/pull/16371) (kosiew) +- Improve err message grammar [#16566](https://github.com/apache/datafusion/pull/16566) (findepi) +- refactor: move PruningPredicate into its own module [#16587](https://github.com/apache/datafusion/pull/16587) (adriangb) +- chore(deps): bump indexmap from 2.9.0 to 2.10.0 [#16582](https://github.com/apache/datafusion/pull/16582) (dependabot[bot]) +- Skip re-pruning based on partition values and file level stats if there are no dynamic filters [#16424](https://github.com/apache/datafusion/pull/16424) (adriangb) +- Support timestamp and date arguments for `range` and `generate_series` table functions [#16552](https://github.com/apache/datafusion/pull/16552) (simonvandel) +- Fix normalization of columns in JOIN ... USING. [#16560](https://github.com/apache/datafusion/pull/16560) (brunal) +- Revert Finalize support for `RightMark` join + `Mark` join [#16597](https://github.com/apache/datafusion/pull/16597) (comphead) +- move min_batch/max_batch to functions-aggregate-common [#16593](https://github.com/apache/datafusion/pull/16593) (adriangb) +- Allow usage of table functions in relations [#16571](https://github.com/apache/datafusion/pull/16571) (osipovartem) +- Update to arrow/parquet 55.2.0 [#16575](https://github.com/apache/datafusion/pull/16575) (alamb) +- Improve field naming in first_value, last_value implementation [#16631](https://github.com/apache/datafusion/pull/16631) (findepi) +- Fix spurious failure in convert_batches test helper [#16627](https://github.com/apache/datafusion/pull/16627) (findepi) +- Aggregate UDF cleanup [#16628](https://github.com/apache/datafusion/pull/16628) (findepi) +- Avoid treating incomparable scalars as equal [#16624](https://github.com/apache/datafusion/pull/16624) (findepi) +- restore topk pre-filtering of batches and make sort query fuzzer less sensitive to expected non determinism [#16501](https://github.com/apache/datafusion/pull/16501) (alamb) +- Add support for Arrow Time types in Substrait [#16558](https://github.com/apache/datafusion/pull/16558) (jkosh44) +- chore(deps): bump substrait from 0.57.0 to 0.58.0 [#16640](https://github.com/apache/datafusion/pull/16640) (dependabot[bot]) +- Support explain tree format debug for benchmark debug [#16604](https://github.com/apache/datafusion/pull/16604) (zhuqi-lucas) +- Add microbenchmark for spilling with compression [#16512](https://github.com/apache/datafusion/pull/16512) (ding-young) +- Fix parquet filter_pushdown: respect parquet filter pushdown config in scan [#16646](https://github.com/apache/datafusion/pull/16646) (adriangb) +- chore(deps): bump aws-config from 1.8.0 to 1.8.1 [#16651](https://github.com/apache/datafusion/pull/16651) (dependabot[bot]) +- Migrate core test to insta, part 2 [#16617](https://github.com/apache/datafusion/pull/16617) (Chen-Yuan-Lai) +- Update all spark SLT files [#16637](https://github.com/apache/datafusion/pull/16637) (findepi) +- Add PhysicalExpr optimizer and cast unwrapping [#16530](https://github.com/apache/datafusion/pull/16530) (adriangb) +- benchmark: Support sort_tpch10 for benchmark [#16671](https://github.com/apache/datafusion/pull/16671) (zhuqi-lucas) +- chore(deps): bump tokio from 1.45.1 to 1.46.0 [#16666](https://github.com/apache/datafusion/pull/16666) (dependabot[bot]) +- Fix TopK Sort incorrectly pushed down past Join with anti join [#16641](https://github.com/apache/datafusion/pull/16641) (zhuqi-lucas) +- Improve error message when ScalarValue fails to cast array [#16670](https://github.com/apache/datafusion/pull/16670) (findepi) +- Add an example of embedding indexes inside a parquet file [#16395](https://github.com/apache/datafusion/pull/16395) (zhuqi-lucas) +- `datafusion-cli`: Refactor statement execution logic [#16634](https://github.com/apache/datafusion/pull/16634) (liamzwbao) +- Add SchemaAdapterFactory Support for ListingTable with Schema Evolution and Mapping [#16583](https://github.com/apache/datafusion/pull/16583) (kosiew) +- Perf: fast CursorValues compare for StringViewArray using inline*key*… [#16630](https://github.com/apache/datafusion/pull/16630) (zhuqi-lucas) +- Update to Rust 1.88 [#16663](https://github.com/apache/datafusion/pull/16663) (melroy12) +- Refactor StreamJoinMetrics to reuse BaselineMetrics [#16674](https://github.com/apache/datafusion/pull/16674) (Standing-Man) +- chore: refactor `BuildProbeJoinMetrics` to use `BaselineMetrics` [#16500](https://github.com/apache/datafusion/pull/16500) (Samyak2) +- Use compression type in CSV file suffices [#16609](https://github.com/apache/datafusion/pull/16609) (theirix) +- Clarify the generality of the embedded parquet index [#16692](https://github.com/apache/datafusion/pull/16692) (alamb) +- Refactor SortMergeJoinMetrics to reuse BaselineMetrics [#16675](https://github.com/apache/datafusion/pull/16675) (Standing-Man) +- Add support for Arrow Dictionary type in Substrait [#16608](https://github.com/apache/datafusion/pull/16608) (jkosh44) +- Fix duplicate field name error in Join::try_new_with_project_input during physical planning [#16454](https://github.com/apache/datafusion/pull/16454) (LiaCastaneda) +- chore(deps): bump tokio from 1.46.0 to 1.46.1 [#16700](https://github.com/apache/datafusion/pull/16700) (dependabot[bot]) +- Add reproducer for tpch Q16 deserialization bug [#16662](https://github.com/apache/datafusion/pull/16662) (NGA-TRAN) +- Minor: Update release instructions [#16701](https://github.com/apache/datafusion/pull/16701) (alamb) +- refactor filter pushdown APIs [#16642](https://github.com/apache/datafusion/pull/16642) (adriangb) +- Add comments to ClickBench queries about setting binary_as_string [#16605](https://github.com/apache/datafusion/pull/16605) (alamb) +- minor: improve display output for FFI execution plans [#16713](https://github.com/apache/datafusion/pull/16713) (timsaucer) +- Revert "fix: create file for empty stream" [#16682](https://github.com/apache/datafusion/pull/16682) (brunal) +- Add the missing equivalence info for filter pushdown [#16686](https://github.com/apache/datafusion/pull/16686) (liamzwbao) +- Fix sqllogictests test running compatibility (ignore `--test-threads`) [#16694](https://github.com/apache/datafusion/pull/16694) (mjgarton) +- Fix: Make `CopyTo` logical plan output schema consistent with physical schema [#16705](https://github.com/apache/datafusion/pull/16705) (bert-beyondloops) +- chore(devcontainer): use debian's `protobuf-compiler` package [#16687](https://github.com/apache/datafusion/pull/16687) (fvj) +- Add link to upgrade guide in changelog script [#16680](https://github.com/apache/datafusion/pull/16680) (alamb) +- Improve display format of BoundedWindowAggExec [#16645](https://github.com/apache/datafusion/pull/16645) (geetanshjuneja) +- Bump the MSRV due to transitive dependencies [#16728](https://github.com/apache/datafusion/pull/16728) (rtyler) +- Fix: optimize projections for unnest logical plan. [#16632](https://github.com/apache/datafusion/pull/16632) (bert-beyondloops) +- Use the `test-threads` option in sqllogictests [#16722](https://github.com/apache/datafusion/pull/16722) (mjgarton) +- chore(deps): bump clap from 4.5.40 to 4.5.41 [#16735](https://github.com/apache/datafusion/pull/16735) (dependabot[bot]) +- chore: make more clarity for internal errors [#16741](https://github.com/apache/datafusion/pull/16741) (comphead) +- Remove parquet_filter and parquet `sort` benchmarks [#16730](https://github.com/apache/datafusion/pull/16730) (alamb) +- Perform type coercion for corr aggregate function [#15776](https://github.com/apache/datafusion/pull/15776) (kumarlokesh) +- Improve dictionary null handling in hashing and expand aggregate test coverage for nulls [#16466](https://github.com/apache/datafusion/pull/16466) (kosiew) +- Improve Ci cache [#16709](https://github.com/apache/datafusion/pull/16709) (blaginin) +- Fix in list round trip in df proto [#16744](https://github.com/apache/datafusion/pull/16744) (XiangpengHao) +- chore: Make `GroupValues` and APIs on `PhysicalGroupBy` aggregation APIs public [#16733](https://github.com/apache/datafusion/pull/16733) (haohuaijin) +- Extend binary coercion rules to support Decimal arithmetic operations with integer(signed and unsigned) types [#16668](https://github.com/apache/datafusion/pull/16668) (jatin510) +- Support Type Coercion for NULL in Binary Arithmetic Expressions [#16761](https://github.com/apache/datafusion/pull/16761) (kosiew) +- chore(deps): bump chrono-tz from 0.10.3 to 0.10.4 [#16769](https://github.com/apache/datafusion/pull/16769) (dependabot[bot]) +- limit intermediate batch size in nested_loop_join [#16443](https://github.com/apache/datafusion/pull/16443) (UBarney) +- Add serialization/deserialization and round-trip tests for all tpc-h queries [#16742](https://github.com/apache/datafusion/pull/16742) (NGA-TRAN) +- Auto start testcontainers for `datafusion-cli` [#16644](https://github.com/apache/datafusion/pull/16644) (blaginin) +- Refactor BinaryTypeCoercer to Handle Null Coercion Early and Avoid Redundant Checks [#16768](https://github.com/apache/datafusion/pull/16768) (kosiew) +- Remove fixed version from MSRV check [#16786](https://github.com/apache/datafusion/pull/16786) (findepi) +- Add `clickbench_pushdown` benchmark [#16731](https://github.com/apache/datafusion/pull/16731) (alamb) +- add filter to handle backtrace [#16752](https://github.com/apache/datafusion/pull/16752) (geetanshjuneja) +- Support min/max aggregates for FixedSizeBinary type [#16765](https://github.com/apache/datafusion/pull/16765) (theirix) +- fix tests in page_pruning when filter pushdown is enabled by default [#16794](https://github.com/apache/datafusion/pull/16794) (XiangpengHao) +- Automatically split large single RecordBatches in `MemorySource` into smaller batches [#16734](https://github.com/apache/datafusion/pull/16734) (kosiew) +- CI: Fix slow join test [#16796](https://github.com/apache/datafusion/pull/16796) (2010YOUY01) +- Benchmark for char expression [#16743](https://github.com/apache/datafusion/pull/16743) (ajita-asthana) +- Add example of custom file schema casting rules [#16803](https://github.com/apache/datafusion/pull/16803) (adriangb) +- Fix discrepancy in Float64 to timestamp(9) casts for constants [#16639](https://github.com/apache/datafusion/pull/16639) (findepi) +- Fix: Preserve sorting for the COPY TO plan [#16785](https://github.com/apache/datafusion/pull/16785) (bert-beyondloops) +- chore(deps): bump object_store from 0.12.2 to 0.12.3 [#16807](https://github.com/apache/datafusion/pull/16807) (dependabot[bot]) +- Implement equals for stateful functions [#16781](https://github.com/apache/datafusion/pull/16781) (findepi) +- benchmark: Add parquet h2o support [#16804](https://github.com/apache/datafusion/pull/16804) (zhuqi-lucas) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 32 Andrew Lamb + 26 dependabot[bot] + 19 Adrian Garcia Badaracco + 14 kosiew + 13 Piotr Findeisen + 13 Qi Zhu + 7 Jonathan Chen + 6 Chen Chongchen + 6 Marco Neumann + 6 Pepijn Van Eeckhoudt + 6 xudong.w + 5 Oleks V + 5 Yongting You + 5 ding-young + 4 Simon Vandel Sillesen + 3 Adam Gutglick + 3 Bert Vermeiren + 3 Dmitrii Blaginin + 3 Joseph Koshakow + 3 Liam Bao + 3 Tim Saucer + 2 Alan Tang + 2 Arttu + 2 Bruno + 2 Corwin Joy + 2 Daniël Heres + 2 Geetansh Juneja + 2 Ian Lai + 2 Jax Liu + 2 Martin Garton + 2 Nga Tran + 2 Ruihang Xia + 2 Tai Le Manh + 2 ViggoC + 2 Xiangpeng Hao + 2 haiywu + 2 theirix + 1 Ajeeta Asthana + 1 Artem Osipov + 1 Dharan Aditya + 1 Gabriel + 1 Geoffrey Claude + 1 Hendrik Makait + 1 Huaijin + 1 Ian Wagner + 1 Jack Eadie + 1 Jagdish Parihar + 1 Jon Mease + 1 Julius von Froreich + 1 K + 1 Leon Lin + 1 Loakesh Indiran + 1 Lokesh + 1 Lucas Earl + 1 Lía Adriana + 1 Mehmet Ozan Kabak + 1 Melroy dsilva + 1 Nirnay Roy + 1 Nuno Faria + 1 R. Tyler Croy + 1 Rishab Joshi + 1 Sami Tabet + 1 Samyak Sarnayak + 1 Shehab Amin + 1 Tobias Schwarzinger + 1 UBarney + 1 alexanderbianchi + 1 epgif + 1 feniljain + 1 m09526 + 1 suibianwanwan +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index c618aa18c231..9ac1c59caa80 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -70,7 +70,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_statistics_size | 4096 | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting max_statistics_size is deprecated, currently it is not being used | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 48.0.0 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 49.0.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.statistics_truncate_length | NULL | (writing) Sets statictics truncate length. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page |