@@ -19,6 +19,8 @@ use std::sync::Arc;
19
19
20
20
use futures:: StreamExt ;
21
21
use futures:: stream:: BoxStream ;
22
+ use tracing:: Instrument ;
23
+ use tracing:: field:: Empty ;
22
24
23
25
use crate :: delete_file_index:: DeleteFileIndex ;
24
26
use crate :: expr:: { Bind , BoundPredicate , Predicate } ;
@@ -31,6 +33,7 @@ use crate::spec::{
31
33
ManifestContentType , ManifestEntryRef , ManifestFile , ManifestList , SchemaRef , SnapshotRef ,
32
34
TableMetadataRef ,
33
35
} ;
36
+ use crate :: traced_stream:: TracedStream ;
34
37
use crate :: { Error , ErrorKind , Result } ;
35
38
36
39
/// Wraps a [`ManifestFile`] alongside the objects that are needed
@@ -53,14 +56,25 @@ pub(crate) struct ManifestEntryContext {
53
56
pub bound_predicates : Option < Arc < BoundPredicates > > ,
54
57
pub partition_spec_id : i32 ,
55
58
pub snapshot_schema : SchemaRef ,
59
+ pub ( crate ) span : tracing:: Span ,
56
60
}
57
61
58
62
impl ManifestFileContext {
59
63
/// Consumes this [`ManifestFileContext`], fetching its Manifest from FileIO and then
60
64
/// streaming its constituent [`ManifestEntries`]
61
65
pub ( crate ) async fn fetch_manifest_and_stream_entries (
62
66
self ,
67
+ parent_span : tracing:: Span ,
63
68
) -> Result < BoxStream < ' static , Result < ManifestEntryContext > > > {
69
+ let manifest_span = tracing:: debug_span!(
70
+ parent: & parent_span,
71
+ "iceberg.scan.plan.process_manifest" ,
72
+ iceberg. scan. plan. manifest. file_path = self . manifest_file. manifest_path,
73
+ iceberg. scan. plan. manifest. entries_count = Empty ,
74
+ ) ;
75
+
76
+ let span = manifest_span. clone ( ) ;
77
+
64
78
let ManifestFileContext {
65
79
object_cache,
66
80
manifest_file,
@@ -71,21 +85,46 @@ impl ManifestFileContext {
71
85
..
72
86
} = self ;
73
87
74
- let manifest = object_cache. get_manifest ( & manifest_file) . await ?;
88
+ let ( manifest, manifest_file) = async move {
89
+ let manifest = object_cache. get_manifest ( & manifest_file) . await ;
90
+ ( manifest, manifest_file)
91
+ }
92
+ . instrument ( manifest_span. clone ( ) )
93
+ . await ;
94
+ let manifest = manifest?;
95
+
96
+ span. record (
97
+ "iceberg.scan.plan.manifest.entries_count" ,
98
+ manifest. entries ( ) . len ( ) ,
99
+ ) ;
75
100
76
- Ok ( async_stream:: stream! {
101
+ let stream = async_stream:: stream! {
77
102
for manifest_entry in manifest. entries( ) {
103
+ let manifest_entry_span = tracing:: debug_span!(
104
+ parent: span. clone( ) ,
105
+ "iceberg.scan.plan.process_data_file" ,
106
+ iceberg. scam. plan. data_file. file_path = manifest_entry. file_path( ) ,
107
+ "iceberg.scan.plan_data_file.type" = Empty ,
108
+ iceberg. scan. plan. data_file. skipped = Empty ,
109
+ iceberg. scan. plan. data_file. skipped_reason = Empty ,
110
+ ) ;
111
+
78
112
yield Ok ( ManifestEntryContext {
79
113
manifest_entry: manifest_entry. clone( ) ,
80
114
expression_evaluator_cache: expression_evaluator_cache. clone( ) ,
81
115
field_ids: field_ids. clone( ) ,
82
116
partition_spec_id: manifest_file. partition_spec_id,
83
117
bound_predicates: bound_predicates. clone( ) ,
84
118
snapshot_schema: snapshot_schema. clone( ) ,
119
+ span: manifest_entry_span,
85
120
} ) ;
86
121
}
87
122
}
88
- . boxed ( ) )
123
+ . boxed ( ) ;
124
+
125
+ Ok ( Box :: pin ( TracedStream :: new ( stream, vec ! [
126
+ manifest_span. clone( ) ,
127
+ ] ) ) )
89
128
}
90
129
91
130
pub ( crate ) fn is_delete ( & self ) -> bool {
@@ -144,7 +183,11 @@ pub(crate) struct PlanContext {
144
183
}
145
184
146
185
impl PlanContext {
147
- #[ tracing:: instrument( skip_all) ]
186
+ #[ tracing:: instrument(
187
+ skip_all,
188
+ level = "debug" ,
189
+ fields( iceberg. scan. plan. manifest_list. file_path = ?self . snapshot. manifest_list( ) ) ,
190
+ ) ]
148
191
pub ( crate ) async fn get_manifest_list ( & self ) -> Result < Arc < ManifestList > > {
149
192
self . object_cache
150
193
. as_ref ( )
@@ -175,14 +218,19 @@ impl PlanContext {
175
218
176
219
#[ tracing:: instrument(
177
220
skip_all,
221
+ level = "debug" ,
222
+ name = "iceberg.scan.plan.process_manifest_list" ,
178
223
fields(
179
- manifest_list. len = manifest_list. entries( ) . len( ) ,
224
+ iceberg . scan . plan . manifest_list. entries_count = manifest_list. entries( ) . len( ) ,
180
225
)
181
226
) ]
182
- pub ( crate ) fn build_manifest_file_context_iter (
227
+ pub ( crate ) fn build_manifest_file_contexts (
183
228
& self ,
184
229
manifest_list : Arc < ManifestList > ,
185
- ) -> impl Iterator < Item = Result < ManifestFileContext > > {
230
+ ) -> (
231
+ Vec < Result < ManifestFileContext > > ,
232
+ Vec < Result < ManifestFileContext > > ,
233
+ ) {
186
234
let has_predicate = self . predicate . is_some ( ) ;
187
235
188
236
( 0 ..manifest_list. entries ( ) . len ( ) )
@@ -198,24 +246,28 @@ impl PlanContext {
198
246
. get ( manifest_file. partition_spec_id , predicate. clone ( ) )
199
247
. eval ( & manifest_file) ?
200
248
{
201
- tracing:: trace!( file_path = manifest_file. manifest_path, "iceberg.scan.manifest_file.skipped" ) ;
202
- metrics:: counter!( "iceberg.scan.manifest_file.skipped" , "reason" => "partition" ) . increment ( 1 ) ;
249
+ tracing:: debug!(
250
+ iceberg. scan. plan. manifest. file_path = manifest_file. manifest_path,
251
+ iceberg. scan. plan. manifest. skip_reason = "partition" ,
252
+ "iceberg.scan.plan.manifest_file.skipped"
253
+ ) ;
254
+ metrics:: counter!( "iceberg.scan.plan.manifest_file.skipped" , "reason" => "partition" ) . increment ( 1 ) ;
203
255
return Ok ( None ) ; // Skip this file.
204
256
}
205
257
Some ( predicate)
206
258
} else {
207
259
None
208
260
} ;
209
261
210
- tracing:: trace!( file_path = manifest_file. manifest_path, "iceberg.scan.manifest_file.included" ) ;
211
- metrics:: counter!( "iceberg.scan.manifest_file.included" ) . increment ( 1 ) ;
262
+ metrics:: counter!( "iceberg.scan.plan.manifest_file.included" ) . increment ( 1 ) ;
212
263
213
264
let context = self
214
265
. create_manifest_file_context ( manifest_file, partition_bound_predicate) ?;
215
266
Ok ( Some ( context) )
216
267
} ) ( )
217
268
. transpose ( )
218
269
} )
270
+ . partition ( |ctx| ctx. as_ref ( ) . map_or ( true , |ctx| ctx. is_delete ( ) ) )
219
271
}
220
272
221
273
fn create_manifest_file_context (
0 commit comments