@@ -21,6 +21,8 @@ use std::sync::Arc;
21
21
22
22
use arrow_array:: { ArrayRef , BooleanArray , Int32Array , RecordBatch , StringArray } ;
23
23
use futures:: TryStreamExt ;
24
+ use iceberg:: spec:: DataFile ;
25
+ use iceberg:: table:: Table ;
24
26
use iceberg:: transaction:: Transaction ;
25
27
use iceberg:: writer:: base_writer:: data_file_writer:: DataFileWriterBuilder ;
26
28
use iceberg:: writer:: file_writer:: location_generator:: {
@@ -458,3 +460,111 @@ async fn test_sequence_number_in_manifest_entry() {
458
460
}
459
461
}
460
462
}
463
+
464
+ #[ tokio:: test]
465
+ async fn test_partition_spec_id_in_manifest ( ) {
466
+ let fixture = get_shared_containers ( ) ;
467
+ let rest_catalog = RestCatalog :: new ( fixture. catalog_config . clone ( ) ) ;
468
+ let ns = random_ns ( ) . await ;
469
+ let schema = test_schema ( ) ;
470
+
471
+ let table_creation = TableCreation :: builder ( )
472
+ . name ( "t1" . to_string ( ) )
473
+ . schema ( schema. clone ( ) )
474
+ . build ( ) ;
475
+
476
+ let mut table = rest_catalog
477
+ . create_table ( ns. name ( ) , table_creation)
478
+ . await
479
+ . unwrap ( ) ;
480
+
481
+ // Create the writer and write the data
482
+ let schema: Arc < arrow_schema:: Schema > = Arc :: new (
483
+ table
484
+ . metadata ( )
485
+ . current_schema ( )
486
+ . as_ref ( )
487
+ . try_into ( )
488
+ . unwrap ( ) ,
489
+ ) ;
490
+ let location_generator = DefaultLocationGenerator :: new ( table. metadata ( ) . clone ( ) ) . unwrap ( ) ;
491
+ let file_name_generator = DefaultFileNameGenerator :: new (
492
+ "test" . to_string ( ) ,
493
+ None ,
494
+ iceberg:: spec:: DataFileFormat :: Parquet ,
495
+ ) ;
496
+
497
+ // commit result
498
+ let mut data_files_vec = Vec :: default ( ) ;
499
+
500
+ async fn build_data_file_f (
501
+ schema : Arc < arrow_schema:: Schema > ,
502
+ table : & Table ,
503
+ location_generator : DefaultLocationGenerator ,
504
+ file_name_generator : DefaultFileNameGenerator ,
505
+ ) -> DataFile {
506
+ let parquet_writer_builder = ParquetWriterBuilder :: new (
507
+ WriterProperties :: default ( ) ,
508
+ table. metadata ( ) . current_schema ( ) . clone ( ) ,
509
+ table. file_io ( ) . clone ( ) ,
510
+ location_generator,
511
+ file_name_generator,
512
+ ) ;
513
+ let data_file_writer_builder = DataFileWriterBuilder :: new ( parquet_writer_builder, None , 0 ) ;
514
+ let mut data_file_writer = data_file_writer_builder. build ( ) . await . unwrap ( ) ;
515
+ let col1 = StringArray :: from ( vec ! [ Some ( "foo" ) , Some ( "bar" ) , None , Some ( "baz" ) ] ) ;
516
+ let col2 = Int32Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , Some ( 3 ) , Some ( 4 ) ] ) ;
517
+ let col3 = BooleanArray :: from ( vec ! [ Some ( true ) , Some ( false ) , None , Some ( false ) ] ) ;
518
+ let batch = RecordBatch :: try_new ( schema. clone ( ) , vec ! [
519
+ Arc :: new( col1) as ArrayRef ,
520
+ Arc :: new( col2) as ArrayRef ,
521
+ Arc :: new( col3) as ArrayRef ,
522
+ ] )
523
+ . unwrap ( ) ;
524
+ data_file_writer. write ( batch. clone ( ) ) . await . unwrap ( ) ;
525
+ data_file_writer. close ( ) . await . unwrap ( ) [ 0 ] . clone ( )
526
+ }
527
+
528
+ for _ in 0 ..10 {
529
+ let data_file = build_data_file_f (
530
+ schema. clone ( ) ,
531
+ & table,
532
+ location_generator. clone ( ) ,
533
+ file_name_generator. clone ( ) ,
534
+ )
535
+ . await ;
536
+ data_files_vec. push ( data_file. clone ( ) ) ;
537
+ let tx = Transaction :: new ( & table) ;
538
+ let mut append_action = tx. fast_append ( None , None , vec ! [ ] ) . unwrap ( ) ;
539
+ append_action. add_data_files ( vec ! [ data_file] ) . unwrap ( ) ;
540
+ let tx = append_action. apply ( ) . await . unwrap ( ) ;
541
+ table = tx. commit ( & rest_catalog) . await . unwrap ( ) ;
542
+ }
543
+
544
+ let last_data_files = data_files_vec. last ( ) . unwrap ( ) ;
545
+ let partition_spec_id = last_data_files. partition_spec_id ( ) ;
546
+
547
+ // remove the data files by RewriteAction
548
+ for data_file in & data_files_vec {
549
+ let tx = Transaction :: new ( & table) ;
550
+ let mut rewrite_action = tx. rewrite_files ( None , vec ! [ ] ) . unwrap ( ) ;
551
+ rewrite_action = rewrite_action
552
+ . delete_files ( vec ! [ data_file. clone( ) ] )
553
+ . unwrap ( ) ;
554
+ let tx = rewrite_action. apply ( ) . await . unwrap ( ) ;
555
+ table = tx. commit ( & rest_catalog) . await . unwrap ( ) ;
556
+ }
557
+
558
+ // TODO: test update partition spec
559
+ // Verify that the partition spec ID is correctly set
560
+
561
+ let last_snapshot = table. metadata ( ) . current_snapshot ( ) . unwrap ( ) ;
562
+ let manifest_list = last_snapshot
563
+ . load_manifest_list ( table. file_io ( ) , table. metadata ( ) )
564
+ . await
565
+ . unwrap ( ) ;
566
+ assert_eq ! ( manifest_list. entries( ) . len( ) , 1 ) ;
567
+ for manifest_file in manifest_list. entries ( ) {
568
+ assert_eq ! ( manifest_file. partition_spec_id, partition_spec_id) ;
569
+ }
570
+ }
0 commit comments