Skip to content

Commit 98070a5

Browse files
committed
fix: arrow schema not constructed with metadata in unrelated test
1 parent e24406b commit 98070a5

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

crates/iceberg/src/arrow/value.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
436436
"The schema partner is not a struct type",
437437
));
438438
}
439+
439440
Ok(schema_partner)
440441
}
441442

@@ -453,6 +454,7 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
453454
"The struct partner is not a struct array",
454455
)
455456
})?;
457+
456458
let field_pos = struct_array
457459
.fields()
458460
.iter()
@@ -467,6 +469,7 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
467469
format!("Field id {} not found in struct array", field.id),
468470
)
469471
})?;
472+
470473
Ok(struct_array.column(field_pos))
471474
}
472475

crates/iceberg/src/writer/base_writer/data_file_writer.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,13 @@ impl<B: FileWriterBuilder> CurrentFileStatus for DataFileWriter<B> {
9898

9999
#[cfg(test)]
100100
mod test {
101+
use std::collections::HashMap;
101102
use std::sync::Arc;
102103

103104
use arrow_array::{Int32Array, StringArray};
104105
use arrow_schema::{DataType, Field};
105106
use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
107+
use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
106108
use parquet::file::properties::WriterProperties;
107109
use tempfile::TempDir;
108110

@@ -145,8 +147,14 @@ mod test {
145147
let mut data_file_writer = DataFileWriterBuilder::new(pw, None).build().await.unwrap();
146148

147149
let arrow_schema = arrow_schema::Schema::new(vec![
148-
Field::new("foo", DataType::Int32, false),
149-
Field::new("bar", DataType::Utf8, false),
150+
Field::new("foo", DataType::Int32, false).with_metadata(HashMap::from([(
151+
PARQUET_FIELD_ID_META_KEY.to_string(),
152+
3.to_string(),
153+
)])),
154+
Field::new("bar", DataType::Utf8, false).with_metadata(HashMap::from([(
155+
PARQUET_FIELD_ID_META_KEY.to_string(),
156+
4.to_string(),
157+
)])),
150158
]);
151159
let batch = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![
152160
Arc::new(Int32Array::from(vec![1, 2, 3])),
@@ -216,8 +224,14 @@ mod test {
216224
.await?;
217225

218226
let arrow_schema = arrow_schema::Schema::new(vec![
219-
Field::new("id", DataType::Int32, false),
220-
Field::new("name", DataType::Utf8, false),
227+
Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
228+
PARQUET_FIELD_ID_META_KEY.to_string(),
229+
5.to_string(),
230+
)])),
231+
Field::new("name", DataType::Utf8, false).with_metadata(HashMap::from([(
232+
PARQUET_FIELD_ID_META_KEY.to_string(),
233+
6.to_string(),
234+
)])),
221235
]);
222236
let batch = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![
223237
Arc::new(Int32Array::from(vec![1, 2, 3])),

crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,14 +226,23 @@ mod tests {
226226
use datafusion::prelude::{Expr, SessionContext};
227227
use iceberg::expr::{Predicate, Reference};
228228
use iceberg::spec::Datum;
229+
use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
229230

230231
use super::convert_filters_to_predicate;
231232

232233
fn create_test_schema() -> DFSchema {
233234
let arrow_schema = Schema::new(vec![
234-
Field::new("foo", DataType::Int32, true),
235-
Field::new("bar", DataType::Utf8, true),
236-
Field::new("ts", DataType::Timestamp(TimeUnit::Second, None), true),
235+
Field::new("foo", DataType::Int32, true).with_metadata(HashMap::from([(
236+
PARQUET_FIELD_ID_META_KEY.to_string(),
237+
"1".to_string(),
238+
)])),
239+
Field::new("bar", DataType::Utf8, true).with_metadata(HashMap::from([(
240+
PARQUET_FIELD_ID_META_KEY.to_string(),
241+
"2".to_string(),
242+
)])),
243+
Field::new("ts", DataType::Timestamp(TimeUnit::Second, None), true).with_metadata(
244+
HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "3".to_string())]),
245+
),
237246
]);
238247
DFSchema::try_from_qualified_schema("my_table", &arrow_schema).unwrap()
239248
}

0 commit comments

Comments
 (0)