iceberg/expr/visitors/
manifest_evaluator.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use fnv::FnvHashSet;
19use serde_bytes::ByteBuf;
20
21use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit};
22use crate::expr::{BoundPredicate, BoundReference};
23use crate::spec::{Datum, FieldSummary, ManifestFile, PrimitiveLiteral, Type};
24use crate::{Error, ErrorKind, Result};
25
26/// Builder for [`ManifestEvaluator`] with optional NOT rewriting capability.
27#[derive(Debug)]
28pub(crate) struct ManifestEvaluatorBuilder {
29    partition_filter: BoundPredicate,
30    rewrite_not: bool,
31}
32
33impl ManifestEvaluatorBuilder {
34    /// Creates a new `ManifestEvaluatorBuilder` with the given partition filter.
35    pub(crate) fn new(partition_filter: BoundPredicate) -> Self {
36        Self {
37            partition_filter,
38            rewrite_not: false,
39        }
40    }
41
42    /// Enables NOT rewriting optimization for the partition filter.
43    /// When enabled, the builder will apply NOT elimination to simplify the predicate
44    /// before creating the evaluator.
45    #[allow(unused)]
46    pub(crate) fn with_rewrite_not(mut self, rewrite_not: bool) -> Self {
47        self.rewrite_not = rewrite_not;
48        self
49    }
50
51    /// Builds the `ManifestEvaluator` with the configured options.
52    pub(crate) fn build(self) -> ManifestEvaluator {
53        let partition_filter = if self.rewrite_not {
54            self.partition_filter.rewrite_not()
55        } else {
56            self.partition_filter
57        };
58
59        ManifestEvaluator { partition_filter }
60    }
61}
62
63/// Evaluates a [`ManifestFile`] to see if the partition summaries
64/// match a provided [`BoundPredicate`].
65///
66/// Used by [`TableScan`] to prune the list of [`ManifestFile`]s
67/// in which data might be found that matches the TableScan's filter.
68#[derive(Debug)]
69pub(crate) struct ManifestEvaluator {
70    partition_filter: BoundPredicate,
71}
72
73impl ManifestEvaluator {
74    /// Creates a new `ManifestEvaluatorBuilder` for building a `ManifestEvaluator`.
75    pub(crate) fn builder(partition_filter: BoundPredicate) -> ManifestEvaluatorBuilder {
76        ManifestEvaluatorBuilder::new(partition_filter)
77    }
78
79    /// Evaluate this `ManifestEvaluator`'s filter predicate against the
80    /// provided [`ManifestFile`]'s partitions. Used by [`TableScan`] to
81    /// see if this `ManifestFile` could possibly contain data that matches
82    /// the scan's filter.
83    pub(crate) fn eval(&self, manifest_file: &ManifestFile) -> Result<bool> {
84        match &manifest_file.partitions {
85            Some(p) if !p.is_empty() => {
86                let mut evaluator = ManifestFilterVisitor::new(p);
87                visit(&mut evaluator, &self.partition_filter)
88            }
89            _ => Ok(true),
90        }
91    }
92}
93
94struct ManifestFilterVisitor<'a> {
95    partitions: &'a Vec<FieldSummary>,
96}
97
98impl<'a> ManifestFilterVisitor<'a> {
99    fn new(partitions: &'a Vec<FieldSummary>) -> Self {
100        ManifestFilterVisitor { partitions }
101    }
102}
103
104const ROWS_MIGHT_MATCH: Result<bool> = Ok(true);
105const ROWS_CANNOT_MATCH: Result<bool> = Ok(false);
106const IN_PREDICATE_LIMIT: usize = 200;
107
108impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
109    type T = bool;
110
111    fn always_true(&mut self) -> crate::Result<bool> {
112        ROWS_MIGHT_MATCH
113    }
114
115    fn always_false(&mut self) -> crate::Result<bool> {
116        ROWS_CANNOT_MATCH
117    }
118
119    fn and(&mut self, lhs: bool, rhs: bool) -> crate::Result<bool> {
120        Ok(lhs && rhs)
121    }
122
123    fn or(&mut self, lhs: bool, rhs: bool) -> crate::Result<bool> {
124        Ok(lhs || rhs)
125    }
126
127    fn not(&mut self, _: bool) -> crate::Result<bool> {
128        Err(Error::new(
129            ErrorKind::Unexpected,
130            "not operator is not supported in partition filter",
131        ))
132    }
133
134    fn is_null(
135        &mut self,
136        reference: &BoundReference,
137        _predicate: &BoundPredicate,
138    ) -> crate::Result<bool> {
139        Ok(self.field_summary_for_reference(reference).contains_null)
140    }
141
142    fn not_null(
143        &mut self,
144        reference: &BoundReference,
145        _predicate: &BoundPredicate,
146    ) -> crate::Result<bool> {
147        let field = self.field_summary_for_reference(reference);
148
149        // contains_null encodes whether at least one partition value is null,
150        // lowerBound is null if all partition values are null
151        if ManifestFilterVisitor::are_all_null(field, &reference.field().field_type) {
152            ROWS_CANNOT_MATCH
153        } else {
154            ROWS_MIGHT_MATCH
155        }
156    }
157
158    fn is_nan(
159        &mut self,
160        reference: &BoundReference,
161        _predicate: &BoundPredicate,
162    ) -> crate::Result<bool> {
163        let field = self.field_summary_for_reference(reference);
164        if let Some(contains_nan) = field.contains_nan
165            && !contains_nan
166        {
167            return ROWS_CANNOT_MATCH;
168        }
169
170        if ManifestFilterVisitor::are_all_null(field, &reference.field().field_type) {
171            return ROWS_CANNOT_MATCH;
172        }
173
174        ROWS_MIGHT_MATCH
175    }
176
177    fn not_nan(
178        &mut self,
179        reference: &BoundReference,
180        _predicate: &BoundPredicate,
181    ) -> crate::Result<bool> {
182        let field = self.field_summary_for_reference(reference);
183        if let Some(contains_nan) = field.contains_nan {
184            // check if all values are nan
185            if contains_nan && !field.contains_null && field.lower_bound.is_none() {
186                return ROWS_CANNOT_MATCH;
187            }
188        }
189        ROWS_MIGHT_MATCH
190    }
191
192    fn less_than(
193        &mut self,
194        reference: &BoundReference,
195        datum: &Datum,
196        _predicate: &BoundPredicate,
197    ) -> crate::Result<bool> {
198        let field = self.field_summary_for_reference(reference);
199
200        match &field.lower_bound {
201            Some(bound_bytes) => {
202                let bound = ManifestFilterVisitor::bytes_to_datum(
203                    bound_bytes,
204                    *reference.field().field_type.clone(),
205                );
206                if datum <= &bound {
207                    ROWS_CANNOT_MATCH
208                } else {
209                    ROWS_MIGHT_MATCH
210                }
211            }
212            None => ROWS_CANNOT_MATCH,
213        }
214    }
215
216    fn less_than_or_eq(
217        &mut self,
218        reference: &BoundReference,
219        datum: &Datum,
220        _predicate: &BoundPredicate,
221    ) -> crate::Result<bool> {
222        let field = self.field_summary_for_reference(reference);
223        match &field.lower_bound {
224            Some(bound_bytes) => {
225                let bound = ManifestFilterVisitor::bytes_to_datum(
226                    bound_bytes,
227                    *reference.field().field_type.clone(),
228                );
229                if datum < &bound {
230                    ROWS_CANNOT_MATCH
231                } else {
232                    ROWS_MIGHT_MATCH
233                }
234            }
235            None => ROWS_CANNOT_MATCH,
236        }
237    }
238
239    fn greater_than(
240        &mut self,
241        reference: &BoundReference,
242        datum: &Datum,
243        _predicate: &BoundPredicate,
244    ) -> crate::Result<bool> {
245        let field = self.field_summary_for_reference(reference);
246        match &field.upper_bound {
247            Some(bound_bytes) => {
248                let bound = ManifestFilterVisitor::bytes_to_datum(
249                    bound_bytes,
250                    *reference.field().field_type.clone(),
251                );
252                if datum >= &bound {
253                    ROWS_CANNOT_MATCH
254                } else {
255                    ROWS_MIGHT_MATCH
256                }
257            }
258            None => ROWS_CANNOT_MATCH,
259        }
260    }
261
262    fn greater_than_or_eq(
263        &mut self,
264        reference: &BoundReference,
265        datum: &Datum,
266        _predicate: &BoundPredicate,
267    ) -> crate::Result<bool> {
268        let field = self.field_summary_for_reference(reference);
269        match &field.upper_bound {
270            Some(bound_bytes) => {
271                let bound = ManifestFilterVisitor::bytes_to_datum(
272                    bound_bytes,
273                    *reference.field().field_type.clone(),
274                );
275                if datum > &bound {
276                    ROWS_CANNOT_MATCH
277                } else {
278                    ROWS_MIGHT_MATCH
279                }
280            }
281            None => ROWS_CANNOT_MATCH,
282        }
283    }
284
285    fn eq(
286        &mut self,
287        reference: &BoundReference,
288        datum: &Datum,
289        _predicate: &BoundPredicate,
290    ) -> crate::Result<bool> {
291        let field = self.field_summary_for_reference(reference);
292
293        if field.lower_bound.is_none() || field.upper_bound.is_none() {
294            return ROWS_CANNOT_MATCH;
295        }
296
297        if let Some(lower_bound_bytes) = &field.lower_bound {
298            let lower_bound = ManifestFilterVisitor::bytes_to_datum(
299                lower_bound_bytes,
300                *reference.field().field_type.clone(),
301            );
302            if &lower_bound > datum {
303                return ROWS_CANNOT_MATCH;
304            }
305        }
306
307        if let Some(upper_bound_bytes) = &field.upper_bound {
308            let upper_bound = ManifestFilterVisitor::bytes_to_datum(
309                upper_bound_bytes,
310                *reference.field().field_type.clone(),
311            );
312            if &upper_bound < datum {
313                return ROWS_CANNOT_MATCH;
314            }
315        }
316
317        ROWS_MIGHT_MATCH
318    }
319
320    fn not_eq(
321        &mut self,
322        _reference: &BoundReference,
323        _datum: &Datum,
324        _predicate: &BoundPredicate,
325    ) -> crate::Result<bool> {
326        // because the bounds are not necessarily a min or max value, this cannot be answered using
327        // them. notEq(col, X) with (X, Y) doesn't guarantee that X is a value in col.
328        ROWS_MIGHT_MATCH
329    }
330
331    fn starts_with(
332        &mut self,
333        reference: &BoundReference,
334        datum: &Datum,
335        _predicate: &BoundPredicate,
336    ) -> crate::Result<bool> {
337        let field = self.field_summary_for_reference(reference);
338
339        if field.lower_bound.is_none() || field.upper_bound.is_none() {
340            return ROWS_CANNOT_MATCH;
341        }
342
343        let prefix = ManifestFilterVisitor::datum_as_str(
344            datum,
345            "Cannot perform starts_with on non-string value",
346        )?;
347        let prefix_len = prefix.len();
348
349        if let Some(lower_bound) = &field.lower_bound {
350            let min_len = lower_bound.len().min(prefix_len);
351            if prefix.as_bytes().lt(&lower_bound[..min_len]) {
352                return ROWS_CANNOT_MATCH;
353            }
354        }
355
356        if let Some(upper_bound) = &field.upper_bound {
357            let min_len = upper_bound.len().min(prefix_len);
358            if prefix.as_bytes().gt(&upper_bound[..min_len]) {
359                return ROWS_CANNOT_MATCH;
360            }
361        }
362
363        ROWS_MIGHT_MATCH
364    }
365
366    fn not_starts_with(
367        &mut self,
368        reference: &BoundReference,
369        datum: &Datum,
370        _predicate: &BoundPredicate,
371    ) -> crate::Result<bool> {
372        let field = self.field_summary_for_reference(reference);
373
374        if field.contains_null || field.lower_bound.is_none() || field.upper_bound.is_none() {
375            return ROWS_MIGHT_MATCH;
376        }
377
378        let prefix = ManifestFilterVisitor::datum_as_str(
379            datum,
380            "Cannot perform not_starts_with on non-string value",
381        )?;
382        let prefix_len = prefix.len();
383
384        // not_starts_with will match unless all values must start with the prefix. This happens when
385        // the lower and upper bounds both start with the prefix.
386        if let Some(lower_bound) = &field.lower_bound {
387            // if lower is shorter than the prefix then lower doesn't start with the prefix
388            if prefix_len > lower_bound.len() {
389                return ROWS_MIGHT_MATCH;
390            }
391
392            if prefix.as_bytes().eq(&lower_bound[..prefix_len])
393                && let Some(upper_bound) = &field.upper_bound
394            {
395                // if upper is shorter than the prefix then upper can't start with the prefix
396                if prefix_len > upper_bound.len() {
397                    return ROWS_MIGHT_MATCH;
398                }
399
400                if prefix.as_bytes().eq(&upper_bound[..prefix_len]) {
401                    return ROWS_CANNOT_MATCH;
402                }
403            }
404        }
405
406        ROWS_MIGHT_MATCH
407    }
408
409    fn r#in(
410        &mut self,
411        reference: &BoundReference,
412        literals: &FnvHashSet<Datum>,
413        _predicate: &BoundPredicate,
414    ) -> crate::Result<bool> {
415        let field = self.field_summary_for_reference(reference);
416        if field.lower_bound.is_none() {
417            return ROWS_CANNOT_MATCH;
418        }
419
420        if literals.len() > IN_PREDICATE_LIMIT {
421            return ROWS_MIGHT_MATCH;
422        }
423
424        if let Some(lower_bound) = &field.lower_bound {
425            let lower_bound = ManifestFilterVisitor::bytes_to_datum(
426                lower_bound,
427                *reference.field().clone().field_type,
428            );
429            if literals.iter().all(|datum| &lower_bound > datum) {
430                return ROWS_CANNOT_MATCH;
431            }
432        }
433
434        if let Some(upper_bound) = &field.upper_bound {
435            let upper_bound = ManifestFilterVisitor::bytes_to_datum(
436                upper_bound,
437                *reference.field().clone().field_type,
438            );
439            if literals.iter().all(|datum| &upper_bound < datum) {
440                return ROWS_CANNOT_MATCH;
441            }
442        }
443
444        ROWS_MIGHT_MATCH
445    }
446
447    fn not_in(
448        &mut self,
449        _reference: &BoundReference,
450        _literals: &FnvHashSet<Datum>,
451        _predicate: &BoundPredicate,
452    ) -> crate::Result<bool> {
453        // because the bounds are not necessarily a min or max value, this cannot be answered using
454        // them. notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X is a value in col.
455        ROWS_MIGHT_MATCH
456    }
457}
458
459impl ManifestFilterVisitor<'_> {
460    fn field_summary_for_reference(&self, reference: &BoundReference) -> &FieldSummary {
461        let pos = reference.accessor().position();
462        &self.partitions[pos]
463    }
464
465    fn are_all_null(field: &FieldSummary, r#type: &Type) -> bool {
466        // contains_null encodes whether at least one partition value is null,
467        // lowerBound is null if all partition values are null
468        let mut all_null: bool = field.contains_null && field.lower_bound.is_none();
469
470        if all_null && r#type.is_floating_type() {
471            // floating point types may include NaN values, which we check separately.
472            // In case bounds don't include NaN value, contains_nan needs to be checked against.
473            all_null = match field.contains_nan {
474                Some(val) => !val,
475                None => false,
476            }
477        }
478
479        all_null
480    }
481
482    fn datum_as_str<'a>(bound: &'a Datum, err_msg: &str) -> crate::Result<&'a String> {
483        let PrimitiveLiteral::String(bound) = bound.literal() else {
484            return Err(Error::new(ErrorKind::Unexpected, err_msg));
485        };
486        Ok(bound)
487    }
488
489    fn bytes_to_datum(bytes: &ByteBuf, t: Type) -> Datum {
490        let p = t.as_primitive_type().unwrap();
491        Datum::try_from_bytes(bytes, p.clone()).unwrap()
492    }
493}
494
495#[cfg(test)]
496mod test {
497    use std::ops::Not;
498    use std::sync::Arc;
499
500    use fnv::FnvHashSet;
501
502    use crate::Result;
503    use crate::expr::visitors::manifest_evaluator::ManifestEvaluator;
504    use crate::expr::{
505        BinaryExpression, Bind, Predicate, PredicateOperator, Reference, SetExpression,
506        UnaryExpression,
507    };
508    use crate::spec::{
509        Datum, FieldSummary, ManifestContentType, ManifestFile, NestedField, PrimitiveType, Schema,
510        SchemaRef, Type,
511    };
512
513    const INT_MIN_VALUE: i32 = 30;
514    const INT_MAX_VALUE: i32 = 79;
515
516    const STRING_MIN_VALUE: &str = "a";
517    const STRING_MAX_VALUE: &str = "z";
518
519    fn create_schema() -> Result<SchemaRef> {
520        let schema = Schema::builder()
521            .with_fields(vec![
522                Arc::new(NestedField::required(
523                    1,
524                    "id",
525                    Type::Primitive(PrimitiveType::Int),
526                )),
527                Arc::new(NestedField::optional(
528                    2,
529                    "all_nulls_missing_nan",
530                    Type::Primitive(PrimitiveType::String),
531                )),
532                Arc::new(NestedField::optional(
533                    3,
534                    "some_nulls",
535                    Type::Primitive(PrimitiveType::String),
536                )),
537                Arc::new(NestedField::optional(
538                    4,
539                    "no_nulls",
540                    Type::Primitive(PrimitiveType::String),
541                )),
542                Arc::new(NestedField::optional(
543                    5,
544                    "float",
545                    Type::Primitive(PrimitiveType::Float),
546                )),
547                Arc::new(NestedField::optional(
548                    6,
549                    "all_nulls_double",
550                    Type::Primitive(PrimitiveType::Double),
551                )),
552                Arc::new(NestedField::optional(
553                    7,
554                    "all_nulls_no_nans",
555                    Type::Primitive(PrimitiveType::Float),
556                )),
557                Arc::new(NestedField::optional(
558                    8,
559                    "all_nans",
560                    Type::Primitive(PrimitiveType::Double),
561                )),
562                Arc::new(NestedField::optional(
563                    9,
564                    "both_nan_and_null",
565                    Type::Primitive(PrimitiveType::Float),
566                )),
567                Arc::new(NestedField::optional(
568                    10,
569                    "no_nan_or_null",
570                    Type::Primitive(PrimitiveType::Double),
571                )),
572                Arc::new(NestedField::optional(
573                    11,
574                    "all_nulls_missing_nan_float",
575                    Type::Primitive(PrimitiveType::Float),
576                )),
577                Arc::new(NestedField::optional(
578                    12,
579                    "all_same_value_or_null",
580                    Type::Primitive(PrimitiveType::String),
581                )),
582                Arc::new(NestedField::optional(
583                    13,
584                    "no_nulls_same_value_a",
585                    Type::Primitive(PrimitiveType::String),
586                )),
587            ])
588            .build()?;
589
590        Ok(Arc::new(schema))
591    }
592
593    fn create_partitions() -> Vec<FieldSummary> {
594        vec![
595            // id
596            FieldSummary {
597                contains_null: false,
598                contains_nan: None,
599                lower_bound: Some(Datum::int(INT_MIN_VALUE).to_bytes().unwrap()),
600                upper_bound: Some(Datum::int(INT_MAX_VALUE).to_bytes().unwrap()),
601            },
602            // all_nulls_missing_nan
603            FieldSummary {
604                contains_null: true,
605                contains_nan: None,
606                lower_bound: None,
607                upper_bound: None,
608            },
609            // some_nulls
610            FieldSummary {
611                contains_null: true,
612                contains_nan: None,
613                lower_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
614                upper_bound: Some(Datum::string(STRING_MAX_VALUE).to_bytes().unwrap()),
615            },
616            // no_nulls
617            FieldSummary {
618                contains_null: false,
619                contains_nan: None,
620                lower_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
621                upper_bound: Some(Datum::string(STRING_MAX_VALUE).to_bytes().unwrap()),
622            },
623            // float
624            FieldSummary {
625                contains_null: true,
626                contains_nan: None,
627                lower_bound: Some(Datum::float(0.0).to_bytes().unwrap()),
628                upper_bound: Some(Datum::float(20.0).to_bytes().unwrap()),
629            },
630            // all_nulls_double
631            FieldSummary {
632                contains_null: true,
633                contains_nan: None,
634                lower_bound: None,
635                upper_bound: None,
636            },
637            // all_nulls_no_nans
638            FieldSummary {
639                contains_null: true,
640                contains_nan: Some(false),
641                lower_bound: None,
642                upper_bound: None,
643            },
644            // all_nans
645            FieldSummary {
646                contains_null: false,
647                contains_nan: Some(true),
648                lower_bound: None,
649                upper_bound: None,
650            },
651            // both_nan_and_null
652            FieldSummary {
653                contains_null: true,
654                contains_nan: Some(true),
655                lower_bound: None,
656                upper_bound: None,
657            },
658            // no_nan_or_null
659            FieldSummary {
660                contains_null: false,
661                contains_nan: Some(false),
662                lower_bound: Some(Datum::float(0.0).to_bytes().unwrap()),
663                upper_bound: Some(Datum::float(20.0).to_bytes().unwrap()),
664            },
665            // all_nulls_missing_nan_float
666            FieldSummary {
667                contains_null: true,
668                contains_nan: None,
669                lower_bound: None,
670                upper_bound: None,
671            },
672            // all_same_value_or_null
673            FieldSummary {
674                contains_null: true,
675                contains_nan: None,
676                lower_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
677                upper_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
678            },
679            // no_nulls_same_value_a
680            FieldSummary {
681                contains_null: false,
682                contains_nan: None,
683                lower_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
684                upper_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
685            },
686        ]
687    }
688
689    fn create_manifest_file(partitions: Vec<FieldSummary>) -> ManifestFile {
690        ManifestFile {
691            manifest_path: "/test/path".to_string(),
692            manifest_length: 0,
693            partition_spec_id: 1,
694            content: ManifestContentType::Data,
695            sequence_number: 0,
696            min_sequence_number: 0,
697            added_snapshot_id: 0,
698            added_files_count: None,
699            existing_files_count: None,
700            deleted_files_count: None,
701            added_rows_count: None,
702            existing_rows_count: None,
703            deleted_rows_count: None,
704            partitions: Some(partitions),
705            key_metadata: None,
706            first_row_id: None,
707        }
708    }
709
710    #[test]
711    fn test_always_true() -> Result<()> {
712        let case_sensitive = false;
713        let schema = create_schema()?;
714        let partitions = create_partitions();
715        let manifest_file = create_manifest_file(partitions);
716
717        let filter = Predicate::AlwaysTrue.bind(schema.clone(), case_sensitive)?;
718
719        assert!(
720            ManifestEvaluator::builder(filter)
721                .build()
722                .eval(&manifest_file)?
723        );
724
725        Ok(())
726    }
727
728    #[test]
729    fn test_always_false() -> Result<()> {
730        let case_sensitive = false;
731        let schema = create_schema()?;
732        let partitions = create_partitions();
733        let manifest_file = create_manifest_file(partitions);
734
735        let filter = Predicate::AlwaysFalse.bind(schema.clone(), case_sensitive)?;
736
737        assert!(
738            !ManifestEvaluator::builder(filter)
739                .build()
740                .eval(&manifest_file)?
741        );
742
743        Ok(())
744    }
745
746    #[test]
747    fn test_all_nulls() -> Result<()> {
748        let case_sensitive = true;
749        let schema = create_schema()?;
750        let partitions = create_partitions();
751        let manifest_file = create_manifest_file(partitions);
752
753        // all_nulls_missing_nan
754        let all_nulls_missing_nan_filter = Predicate::Unary(UnaryExpression::new(
755            PredicateOperator::NotNull,
756            Reference::new("all_nulls_missing_nan"),
757        ))
758        .bind(schema.clone(), case_sensitive)?;
759        assert!(
760            !ManifestEvaluator::builder(all_nulls_missing_nan_filter)
761                .build()
762                .eval(&manifest_file)?,
763            "Should skip: all nulls column with non-floating type contains all null"
764        );
765
766        // all_nulls_missing_nan_float
767        let all_nulls_missing_nan_float_filter = Predicate::Unary(UnaryExpression::new(
768            PredicateOperator::NotNull,
769            Reference::new("all_nulls_missing_nan_float"),
770        ))
771        .bind(schema.clone(), case_sensitive)?;
772        assert!(
773            ManifestEvaluator::builder(all_nulls_missing_nan_float_filter)
774                .build()
775                .eval(&manifest_file)?,
776            "Should read: no NaN information may indicate presence of NaN value"
777        );
778
779        // some_nulls
780        let some_nulls_filter = Predicate::Unary(UnaryExpression::new(
781            PredicateOperator::NotNull,
782            Reference::new("some_nulls"),
783        ))
784        .bind(schema.clone(), case_sensitive)?;
785        assert!(
786            ManifestEvaluator::builder(some_nulls_filter)
787                .build()
788                .eval(&manifest_file)?,
789            "Should read: column with some nulls contains a non-null value"
790        );
791
792        // no_nulls
793        let no_nulls_filter = Predicate::Unary(UnaryExpression::new(
794            PredicateOperator::NotNull,
795            Reference::new("no_nulls"),
796        ))
797        .bind(schema.clone(), case_sensitive)?;
798
799        assert!(
800            ManifestEvaluator::builder(no_nulls_filter)
801                .build()
802                .eval(&manifest_file)?,
803            "Should read: non-null column contains a non-null value"
804        );
805
806        Ok(())
807    }
808
809    #[test]
810    fn test_no_nulls() -> Result<()> {
811        let case_sensitive = true;
812        let schema = create_schema()?;
813        let partitions = create_partitions();
814        let manifest_file = create_manifest_file(partitions);
815
816        // all_nulls_missing_nan
817        let all_nulls_missing_nan_filter = Predicate::Unary(UnaryExpression::new(
818            PredicateOperator::IsNull,
819            Reference::new("all_nulls_missing_nan"),
820        ))
821        .bind(schema.clone(), case_sensitive)?;
822        assert!(
823            ManifestEvaluator::builder(all_nulls_missing_nan_filter)
824                .build()
825                .eval(&manifest_file)?,
826            "Should read: at least one null value in all null column"
827        );
828
829        // some_nulls
830        let some_nulls_filter = Predicate::Unary(UnaryExpression::new(
831            PredicateOperator::IsNull,
832            Reference::new("some_nulls"),
833        ))
834        .bind(schema.clone(), case_sensitive)?;
835        assert!(
836            ManifestEvaluator::builder(some_nulls_filter)
837                .build()
838                .eval(&manifest_file)?,
839            "Should read: column with some nulls contains a null value"
840        );
841
842        // no_nulls
843        let no_nulls_filter = Predicate::Unary(UnaryExpression::new(
844            PredicateOperator::IsNull,
845            Reference::new("no_nulls"),
846        ))
847        .bind(schema.clone(), case_sensitive)?;
848
849        assert!(
850            !ManifestEvaluator::builder(no_nulls_filter)
851                .build()
852                .eval(&manifest_file)?,
853            "Should skip: non-null column contains no null values"
854        );
855
856        // both_nan_and_null
857        let both_nan_and_null_filter = Predicate::Unary(UnaryExpression::new(
858            PredicateOperator::IsNull,
859            Reference::new("both_nan_and_null"),
860        ))
861        .bind(schema.clone(), case_sensitive)?;
862        assert!(
863            ManifestEvaluator::builder(both_nan_and_null_filter)
864                .build()
865                .eval(&manifest_file)?,
866            "Should read: both_nan_and_null column contains no null values"
867        );
868
869        Ok(())
870    }
871
872    #[test]
873    fn test_is_nan() -> Result<()> {
874        let case_sensitive = true;
875        let schema = create_schema()?;
876        let partitions = create_partitions();
877        let manifest_file = create_manifest_file(partitions);
878
879        // float
880        let float_filter = Predicate::Unary(UnaryExpression::new(
881            PredicateOperator::IsNan,
882            Reference::new("float"),
883        ))
884        .bind(schema.clone(), case_sensitive)?;
885        assert!(
886            ManifestEvaluator::builder(float_filter)
887                .build()
888                .eval(&manifest_file)?,
889            "Should read: no information on if there are nan value in float column"
890        );
891
892        // all_nulls_double
893        let all_nulls_double_filter = Predicate::Unary(UnaryExpression::new(
894            PredicateOperator::IsNan,
895            Reference::new("all_nulls_double"),
896        ))
897        .bind(schema.clone(), case_sensitive)?;
898        assert!(
899            ManifestEvaluator::builder(all_nulls_double_filter)
900                .build()
901                .eval(&manifest_file)?,
902            "Should read: no NaN information may indicate presence of NaN value"
903        );
904
905        // all_nulls_missing_nan_float
906        let all_nulls_missing_nan_float_filter = Predicate::Unary(UnaryExpression::new(
907            PredicateOperator::IsNan,
908            Reference::new("all_nulls_missing_nan_float"),
909        ))
910        .bind(schema.clone(), case_sensitive)?;
911        assert!(
912            ManifestEvaluator::builder(all_nulls_missing_nan_float_filter)
913                .build()
914                .eval(&manifest_file)?,
915            "Should read: no NaN information may indicate presence of NaN value"
916        );
917
918        // all_nulls_no_nans
919        let all_nulls_no_nans_filter = Predicate::Unary(UnaryExpression::new(
920            PredicateOperator::IsNan,
921            Reference::new("all_nulls_no_nans"),
922        ))
923        .bind(schema.clone(), case_sensitive)?;
924        assert!(
925            !ManifestEvaluator::builder(all_nulls_no_nans_filter)
926                .build()
927                .eval(&manifest_file)?,
928            "Should skip: no nan column doesn't contain nan value"
929        );
930
931        // all_nans
932        let all_nans_filter = Predicate::Unary(UnaryExpression::new(
933            PredicateOperator::IsNan,
934            Reference::new("all_nans"),
935        ))
936        .bind(schema.clone(), case_sensitive)?;
937        assert!(
938            ManifestEvaluator::builder(all_nans_filter)
939                .build()
940                .eval(&manifest_file)?,
941            "Should read: all_nans column contains nan value"
942        );
943
944        // both_nan_and_null
945        let both_nan_and_null_filter = Predicate::Unary(UnaryExpression::new(
946            PredicateOperator::IsNan,
947            Reference::new("both_nan_and_null"),
948        ))
949        .bind(schema.clone(), case_sensitive)?;
950        assert!(
951            ManifestEvaluator::builder(both_nan_and_null_filter)
952                .build()
953                .eval(&manifest_file)?,
954            "Should read: both_nan_and_null column contains nan value"
955        );
956
957        // no_nan_or_null
958        let no_nan_or_null_filter = Predicate::Unary(UnaryExpression::new(
959            PredicateOperator::IsNan,
960            Reference::new("no_nan_or_null"),
961        ))
962        .bind(schema.clone(), case_sensitive)?;
963        assert!(
964            !ManifestEvaluator::builder(no_nan_or_null_filter)
965                .build()
966                .eval(&manifest_file)?,
967            "Should skip: no_nan_or_null column doesn't contain nan value"
968        );
969
970        Ok(())
971    }
972
973    #[test]
974    fn test_not_nan() -> Result<()> {
975        let case_sensitive = true;
976        let schema = create_schema()?;
977        let partitions = create_partitions();
978        let manifest_file = create_manifest_file(partitions);
979
980        // float
981        let float_filter = Predicate::Unary(UnaryExpression::new(
982            PredicateOperator::NotNan,
983            Reference::new("float"),
984        ))
985        .bind(schema.clone(), case_sensitive)?;
986        assert!(
987            ManifestEvaluator::builder(float_filter)
988                .build()
989                .eval(&manifest_file)?,
990            "Should read: no information on if there are nan value in float column"
991        );
992
993        // all_nulls_double
994        let all_nulls_double_filter = Predicate::Unary(UnaryExpression::new(
995            PredicateOperator::NotNan,
996            Reference::new("all_nulls_double"),
997        ))
998        .bind(schema.clone(), case_sensitive)?;
999        assert!(
1000            ManifestEvaluator::builder(all_nulls_double_filter)
1001                .build()
1002                .eval(&manifest_file)?,
1003            "Should read: all null column contains non nan value"
1004        );
1005
1006        // all_nulls_no_nans
1007        let all_nulls_no_nans_filter = Predicate::Unary(UnaryExpression::new(
1008            PredicateOperator::NotNan,
1009            Reference::new("all_nulls_no_nans"),
1010        ))
1011        .bind(schema.clone(), case_sensitive)?;
1012        assert!(
1013            ManifestEvaluator::builder(all_nulls_no_nans_filter)
1014                .build()
1015                .eval(&manifest_file)?,
1016            "Should read: no_nans column contains non nan value"
1017        );
1018
1019        // all_nans
1020        let all_nans_filter = Predicate::Unary(UnaryExpression::new(
1021            PredicateOperator::NotNan,
1022            Reference::new("all_nans"),
1023        ))
1024        .bind(schema.clone(), case_sensitive)?;
1025        assert!(
1026            !ManifestEvaluator::builder(all_nans_filter)
1027                .build()
1028                .eval(&manifest_file)?,
1029            "Should skip: all nans column doesn't contain non nan value"
1030        );
1031
1032        // both_nan_and_null
1033        let both_nan_and_null_filter = Predicate::Unary(UnaryExpression::new(
1034            PredicateOperator::NotNan,
1035            Reference::new("both_nan_and_null"),
1036        ))
1037        .bind(schema.clone(), case_sensitive)?;
1038        assert!(
1039            ManifestEvaluator::builder(both_nan_and_null_filter)
1040                .build()
1041                .eval(&manifest_file)?,
1042            "Should read: both_nan_and_null nans column contains non nan value"
1043        );
1044
1045        // no_nan_or_null
1046        let no_nan_or_null_filter = Predicate::Unary(UnaryExpression::new(
1047            PredicateOperator::NotNan,
1048            Reference::new("no_nan_or_null"),
1049        ))
1050        .bind(schema.clone(), case_sensitive)?;
1051        assert!(
1052            ManifestEvaluator::builder(no_nan_or_null_filter)
1053                .build()
1054                .eval(&manifest_file)?,
1055            "Should read: no_nan_or_null column contains non nan value"
1056        );
1057
1058        Ok(())
1059    }
1060
1061    #[test]
1062    fn test_and() -> Result<()> {
1063        let case_sensitive = true;
1064        let schema = create_schema()?;
1065        let partitions = create_partitions();
1066        let manifest_file = create_manifest_file(partitions);
1067
1068        let filter = Predicate::Binary(BinaryExpression::new(
1069            PredicateOperator::LessThan,
1070            Reference::new("id"),
1071            Datum::int(INT_MIN_VALUE - 25),
1072        ))
1073        .and(Predicate::Binary(BinaryExpression::new(
1074            PredicateOperator::GreaterThanOrEq,
1075            Reference::new("id"),
1076            Datum::int(INT_MIN_VALUE - 30),
1077        )))
1078        .bind(schema.clone(), case_sensitive)?;
1079        assert!(
1080            !ManifestEvaluator::builder(filter)
1081                .build()
1082                .eval(&manifest_file)?,
1083            "Should read: no information on if there are nan value in float column"
1084        );
1085
1086        Ok(())
1087    }
1088
1089    #[test]
1090    fn test_or() -> Result<()> {
1091        let case_sensitive = true;
1092        let schema = create_schema()?;
1093        let partitions = create_partitions();
1094        let manifest_file = create_manifest_file(partitions);
1095
1096        let filter = Predicate::Binary(BinaryExpression::new(
1097            PredicateOperator::LessThan,
1098            Reference::new("id"),
1099            Datum::int(INT_MIN_VALUE - 25),
1100        ))
1101        .or(Predicate::Binary(BinaryExpression::new(
1102            PredicateOperator::GreaterThanOrEq,
1103            Reference::new("id"),
1104            Datum::int(INT_MAX_VALUE + 1),
1105        )))
1106        .bind(schema.clone(), case_sensitive)?;
1107        assert!(
1108            !ManifestEvaluator::builder(filter)
1109                .build()
1110                .eval(&manifest_file)?,
1111            "Should skip: or(false, false)"
1112        );
1113
1114        Ok(())
1115    }
1116
1117    #[test]
1118    fn test_not() -> Result<()> {
1119        let case_sensitive = true;
1120        let schema = create_schema()?;
1121        let partitions = create_partitions();
1122        let manifest_file = create_manifest_file(partitions);
1123
1124        let filter = Predicate::Binary(BinaryExpression::new(
1125            PredicateOperator::LessThan,
1126            Reference::new("id"),
1127            Datum::int(INT_MIN_VALUE - 25),
1128        ))
1129        .not()
1130        .bind(schema.clone(), case_sensitive)?;
1131        assert!(
1132            ManifestEvaluator::builder(filter)
1133                .build()
1134                .eval(&manifest_file)
1135                .is_err(),
1136        );
1137        let filter = Predicate::Binary(BinaryExpression::new(
1138            PredicateOperator::LessThan,
1139            Reference::new("id"),
1140            Datum::int(INT_MIN_VALUE - 25),
1141        ))
1142        .not()
1143        .rewrite_not()
1144        .bind(schema.clone(), case_sensitive)?;
1145        assert!(
1146            ManifestEvaluator::builder(filter)
1147                .build()
1148                .eval(&manifest_file)?,
1149            "Should read: not(false)"
1150        );
1151
1152        let filter = Predicate::Binary(BinaryExpression::new(
1153            PredicateOperator::GreaterThan,
1154            Reference::new("id"),
1155            Datum::int(INT_MIN_VALUE - 25),
1156        ))
1157        .not()
1158        .bind(schema.clone(), case_sensitive)?;
1159        assert!(
1160            ManifestEvaluator::builder(filter)
1161                .build()
1162                .eval(&manifest_file)
1163                .is_err()
1164        );
1165
1166        let filter = Predicate::Binary(BinaryExpression::new(
1167            PredicateOperator::GreaterThan,
1168            Reference::new("id"),
1169            Datum::int(INT_MIN_VALUE - 25),
1170        ))
1171        .not()
1172        .rewrite_not()
1173        .bind(schema.clone(), case_sensitive)?;
1174        assert!(
1175            !ManifestEvaluator::builder(filter)
1176                .build()
1177                .eval(&manifest_file)?,
1178            "Should skip: not(true)"
1179        );
1180
1181        Ok(())
1182    }
1183
1184    #[test]
1185    fn test_less_than() -> Result<()> {
1186        let case_sensitive = true;
1187        let schema = create_schema()?;
1188        let partitions = create_partitions();
1189        let manifest_file = create_manifest_file(partitions);
1190
1191        let filter = Predicate::Binary(BinaryExpression::new(
1192            PredicateOperator::LessThan,
1193            Reference::new("id"),
1194            Datum::int(INT_MIN_VALUE - 25),
1195        ))
1196        .bind(schema.clone(), case_sensitive)?;
1197        assert!(
1198            !ManifestEvaluator::builder(filter)
1199                .build()
1200                .eval(&manifest_file)?,
1201            "Should not read: id range below lower bound (5 < 30)"
1202        );
1203
1204        Ok(())
1205    }
1206
1207    #[test]
1208    fn test_less_than_or_eq() -> Result<()> {
1209        let case_sensitive = true;
1210        let schema = create_schema()?;
1211        let partitions = create_partitions();
1212        let manifest_file = create_manifest_file(partitions);
1213
1214        let filter = Predicate::Binary(BinaryExpression::new(
1215            PredicateOperator::LessThanOrEq,
1216            Reference::new("id"),
1217            Datum::int(INT_MIN_VALUE - 25),
1218        ))
1219        .bind(schema.clone(), case_sensitive)?;
1220        assert!(
1221            !ManifestEvaluator::builder(filter)
1222                .build()
1223                .eval(&manifest_file)?,
1224            "Should not read: id range below lower bound (5 < 30)"
1225        );
1226
1227        Ok(())
1228    }
1229
1230    #[test]
1231    fn test_greater_than() -> Result<()> {
1232        let case_sensitive = true;
1233        let schema = create_schema()?;
1234        let partitions = create_partitions();
1235        let manifest_file = create_manifest_file(partitions);
1236
1237        let filter = Predicate::Binary(BinaryExpression::new(
1238            PredicateOperator::GreaterThan,
1239            Reference::new("id"),
1240            Datum::int(INT_MAX_VALUE + 6),
1241        ))
1242        .bind(schema.clone(), case_sensitive)?;
1243        assert!(
1244            !ManifestEvaluator::builder(filter)
1245                .build()
1246                .eval(&manifest_file)?,
1247            "Should not read: id range above upper bound (85 < 79)"
1248        );
1249
1250        Ok(())
1251    }
1252
1253    #[test]
1254    fn test_greater_than_or_eq() -> Result<()> {
1255        let case_sensitive = true;
1256        let schema = create_schema()?;
1257        let partitions = create_partitions();
1258        let manifest_file = create_manifest_file(partitions);
1259
1260        let filter = Predicate::Binary(BinaryExpression::new(
1261            PredicateOperator::GreaterThanOrEq,
1262            Reference::new("id"),
1263            Datum::int(INT_MAX_VALUE + 6),
1264        ))
1265        .bind(schema.clone(), case_sensitive)?;
1266        assert!(
1267            !ManifestEvaluator::builder(filter)
1268                .build()
1269                .eval(&manifest_file)?,
1270            "Should not read: id range above upper bound (85 < 79)"
1271        );
1272
1273        let filter = Predicate::Binary(BinaryExpression::new(
1274            PredicateOperator::GreaterThanOrEq,
1275            Reference::new("id"),
1276            Datum::int(INT_MAX_VALUE),
1277        ))
1278        .bind(schema.clone(), case_sensitive)?;
1279        assert!(
1280            ManifestEvaluator::builder(filter)
1281                .build()
1282                .eval(&manifest_file)?,
1283            "Should read: one possible id"
1284        );
1285
1286        Ok(())
1287    }
1288
1289    #[test]
1290    fn test_eq() -> Result<()> {
1291        let case_sensitive = true;
1292        let schema = create_schema()?;
1293        let partitions = create_partitions();
1294        let manifest_file = create_manifest_file(partitions);
1295
1296        let filter = Predicate::Binary(BinaryExpression::new(
1297            PredicateOperator::Eq,
1298            Reference::new("id"),
1299            Datum::int(INT_MIN_VALUE - 25),
1300        ))
1301        .bind(schema.clone(), case_sensitive)?;
1302        assert!(
1303            !ManifestEvaluator::builder(filter)
1304                .build()
1305                .eval(&manifest_file)?,
1306            "Should not read: id below lower bound"
1307        );
1308
1309        let filter = Predicate::Binary(BinaryExpression::new(
1310            PredicateOperator::Eq,
1311            Reference::new("id"),
1312            Datum::int(INT_MIN_VALUE),
1313        ))
1314        .bind(schema.clone(), case_sensitive)?;
1315        assert!(
1316            ManifestEvaluator::builder(filter)
1317                .build()
1318                .eval(&manifest_file)?,
1319            "Should read: id equal to lower bound"
1320        );
1321
1322        Ok(())
1323    }
1324
1325    #[test]
1326    fn test_not_eq() -> Result<()> {
1327        let case_sensitive = true;
1328        let schema = create_schema()?;
1329        let partitions = create_partitions();
1330        let manifest_file = create_manifest_file(partitions);
1331
1332        let filter = Predicate::Binary(BinaryExpression::new(
1333            PredicateOperator::NotEq,
1334            Reference::new("id"),
1335            Datum::int(INT_MIN_VALUE - 25),
1336        ))
1337        .bind(schema.clone(), case_sensitive)?;
1338        assert!(
1339            ManifestEvaluator::builder(filter)
1340                .build()
1341                .eval(&manifest_file)?,
1342            "Should read: id below lower bound"
1343        );
1344
1345        Ok(())
1346    }
1347
1348    #[test]
1349    fn test_in() -> Result<()> {
1350        let case_sensitive = true;
1351        let schema = create_schema()?;
1352        let partitions = create_partitions();
1353        let manifest_file = create_manifest_file(partitions);
1354
1355        let filter = Predicate::Set(SetExpression::new(
1356            PredicateOperator::In,
1357            Reference::new("id"),
1358            FnvHashSet::from_iter(vec![
1359                Datum::int(INT_MIN_VALUE - 25),
1360                Datum::int(INT_MIN_VALUE - 24),
1361            ]),
1362        ))
1363        .bind(schema.clone(), case_sensitive)?;
1364        assert!(
1365            !ManifestEvaluator::builder(filter)
1366                .build()
1367                .eval(&manifest_file)?,
1368            "Should not read: id below lower bound (5 < 30, 6 < 30)"
1369        );
1370
1371        let filter = Predicate::Set(SetExpression::new(
1372            PredicateOperator::In,
1373            Reference::new("id"),
1374            FnvHashSet::from_iter(vec![
1375                Datum::int(INT_MIN_VALUE - 1),
1376                Datum::int(INT_MIN_VALUE),
1377            ]),
1378        ))
1379        .bind(schema.clone(), case_sensitive)?;
1380        assert!(
1381            ManifestEvaluator::builder(filter)
1382                .build()
1383                .eval(&manifest_file)?,
1384            "Should read: id equal to lower bound (30 == 30)"
1385        );
1386
1387        Ok(())
1388    }
1389
1390    #[test]
1391    fn test_not_in() -> Result<()> {
1392        let case_sensitive = true;
1393        let schema = create_schema()?;
1394        let partitions = create_partitions();
1395        let manifest_file = create_manifest_file(partitions);
1396
1397        let filter = Predicate::Set(SetExpression::new(
1398            PredicateOperator::NotIn,
1399            Reference::new("id"),
1400            FnvHashSet::from_iter(vec![
1401                Datum::int(INT_MIN_VALUE - 25),
1402                Datum::int(INT_MIN_VALUE - 24),
1403            ]),
1404        ))
1405        .bind(schema.clone(), case_sensitive)?;
1406        assert!(
1407            ManifestEvaluator::builder(filter)
1408                .build()
1409                .eval(&manifest_file)?,
1410            "Should read: id below lower bound (5 < 30, 6 < 30)"
1411        );
1412
1413        Ok(())
1414    }
1415
1416    #[test]
1417    fn test_starts_with() -> Result<()> {
1418        let case_sensitive = false;
1419        let schema = create_schema()?;
1420        let partitions = create_partitions();
1421        let manifest_file = create_manifest_file(partitions);
1422
1423        let filter = Predicate::Binary(BinaryExpression::new(
1424            PredicateOperator::StartsWith,
1425            Reference::new("some_nulls"),
1426            Datum::string("a"),
1427        ))
1428        .bind(schema.clone(), case_sensitive)?;
1429        assert!(
1430            ManifestEvaluator::builder(filter)
1431                .build()
1432                .eval(&manifest_file)?,
1433            "Should read: range matches"
1434        );
1435
1436        let filter = Predicate::Binary(BinaryExpression::new(
1437            PredicateOperator::StartsWith,
1438            Reference::new("some_nulls"),
1439            Datum::string("zzzz"),
1440        ))
1441        .bind(schema.clone(), case_sensitive)?;
1442        assert!(
1443            !ManifestEvaluator::builder(filter)
1444                .build()
1445                .eval(&manifest_file)?,
1446            "Should skip: range doesn't match"
1447        );
1448
1449        Ok(())
1450    }
1451
1452    #[test]
1453    fn test_not_starts_with() -> Result<()> {
1454        let case_sensitive = false;
1455        let schema = create_schema()?;
1456        let partitions = create_partitions();
1457        let manifest_file = create_manifest_file(partitions);
1458
1459        let filter = Predicate::Binary(BinaryExpression::new(
1460            PredicateOperator::NotStartsWith,
1461            Reference::new("some_nulls"),
1462            Datum::string("a"),
1463        ))
1464        .bind(schema.clone(), case_sensitive)?;
1465        assert!(
1466            ManifestEvaluator::builder(filter)
1467                .build()
1468                .eval(&manifest_file)?,
1469            "Should read: range matches"
1470        );
1471
1472        let filter = Predicate::Binary(BinaryExpression::new(
1473            PredicateOperator::NotStartsWith,
1474            Reference::new("no_nulls_same_value_a"),
1475            Datum::string("a"),
1476        ))
1477        .bind(schema.clone(), case_sensitive)?;
1478        assert!(
1479            !ManifestEvaluator::builder(filter)
1480                .build()
1481                .eval(&manifest_file)?,
1482            "Should not read: all values start with the prefix"
1483        );
1484
1485        Ok(())
1486    }
1487
1488    #[test]
1489    fn test_manifest_evaluator_builder_with_rewrite() -> Result<()> {
1490        let case_sensitive = true;
1491        let schema = create_schema()?;
1492        let partitions = create_partitions();
1493        let manifest_file = create_manifest_file(partitions);
1494
1495        // Create a predicate with NOT that should be rewritten
1496        // NOT(id < 25) should become (id >= 25)
1497        let filter = Predicate::Binary(BinaryExpression::new(
1498            PredicateOperator::LessThan,
1499            Reference::new("id"),
1500            Datum::int(25), // This is less than our range [30, 79]
1501        ))
1502        .not()
1503        .bind(schema.clone(), case_sensitive)?;
1504
1505        // Test without rewrite - should fail because NOT is not supported
1506        let evaluator = ManifestEvaluator::builder(filter.clone())
1507            .with_rewrite_not(false)
1508            .build();
1509        assert!(
1510            evaluator.eval(&manifest_file).is_err(),
1511            "Should error: NOT is not supported without rewrite"
1512        );
1513
1514        // Test with rewrite enabled - should succeed
1515        let evaluator = ManifestEvaluator::builder(filter)
1516            .with_rewrite_not(true)
1517            .build();
1518        let result = evaluator.eval(&manifest_file)?;
1519        assert!(
1520            result,
1521            "Should read: NOT(id < 25) becomes (id >= 25), which matches our range [30, 79]"
1522        );
1523
1524        // Test default behavior (no rewrite) with a simple predicate
1525        let simple_filter = Predicate::Binary(BinaryExpression::new(
1526            PredicateOperator::GreaterThan,
1527            Reference::new("id"),
1528            Datum::int(20),
1529        ))
1530        .bind(schema, case_sensitive)?;
1531
1532        let evaluator = ManifestEvaluator::builder(simple_filter).build();
1533        assert!(
1534            evaluator.eval(&manifest_file)?,
1535            "Should read: simple predicate without NOT works by default"
1536        );
1537
1538        Ok(())
1539    }
1540}