iceberg/expr/visitors/
strict_metrics_evaluator.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use fnv::FnvHashSet;
19
20use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit};
21use crate::expr::{BoundPredicate, BoundReference};
22use crate::spec::{DataFile, Datum};
23use crate::{Error, ErrorKind, Result};
24
25#[allow(dead_code)]
26const ROWS_MUST_MATCH: Result<bool> = Ok(true);
27#[allow(dead_code)]
28const ROWS_MIGHT_NOT_MATCH: Result<bool> = Ok(false);
29
30#[allow(dead_code)]
31/// Evaluates an `Expression` on a `DataFile` to test whether all rows in the file match.
32///  
33/// This evaluation is strict: it returns true if all rows in a file must match the expression.
34/// For example, if a file's ts column has min X and max Y, this evaluator will return true for ts
35/// &lt; Y+1 but not for ts &lt; Y-1.
36///
37/// Files are passed to `eval(DataFile)`, which returns true if all rows in the file
38/// must contain matching rows and false if the file may contain rows that do not match.
39pub(crate) struct StrictMetricsEvaluator<'a> {
40    data_file: &'a DataFile,
41}
42
43impl<'a> StrictMetricsEvaluator<'a> {
44    #[allow(dead_code)]
45    fn new(data_file: &'a DataFile) -> Self {
46        StrictMetricsEvaluator { data_file }
47    }
48
49    /// Evaluate this `StrictMetricsEvaluator`'s filter predicate against the
50    /// provided [`DataFile`]'s metrics. Used by [`TableScan`] to
51    /// see if this `DataFile` contains data that could match
52    /// the scan's filter.
53    #[allow(dead_code)]
54    pub(crate) fn eval(filter: &'a BoundPredicate, data_file: &'a DataFile) -> crate::Result<bool> {
55        if data_file.record_count == 0 {
56            return ROWS_MUST_MATCH;
57        }
58
59        let mut evaluator = Self::new(data_file);
60        visit(&mut evaluator, filter)
61    }
62
63    fn nan_count(&self, field_id: i32) -> Option<&u64> {
64        self.data_file.nan_value_counts.get(&field_id)
65    }
66
67    fn null_count(&self, field_id: i32) -> Option<&u64> {
68        self.data_file.null_value_counts.get(&field_id)
69    }
70
71    fn value_count(&self, field_id: i32) -> Option<&u64> {
72        self.data_file.value_counts.get(&field_id)
73    }
74
75    fn lower_bound(&self, field_id: i32) -> Option<&Datum> {
76        self.data_file.lower_bounds.get(&field_id)
77    }
78
79    fn upper_bound(&self, field_id: i32) -> Option<&Datum> {
80        self.data_file.upper_bounds.get(&field_id)
81    }
82
83    fn contains_nans_only(&self, field_id: i32) -> bool {
84        let nan_count = self.nan_count(field_id);
85        let value_count = self.value_count(field_id);
86
87        nan_count.is_some() && nan_count == value_count
88    }
89
90    fn contains_nulls_only(&self, field_id: i32) -> bool {
91        let null_count = self.null_count(field_id);
92        let value_count = self.value_count(field_id);
93
94        null_count.is_some() && null_count == value_count
95    }
96
97    fn may_contain_null(&self, field_id: i32) -> bool {
98        if let Some(&null_count) = self.null_count(field_id) {
99            null_count > 0
100        } else {
101            true
102        }
103    }
104
105    fn may_contain_nan(&self, field_id: i32) -> bool {
106        if let Some(&nan_count) = self.nan_count(field_id) {
107            nan_count > 0
108        } else {
109            true
110        }
111    }
112
113    fn visit_inequality(
114        &mut self,
115        reference: &BoundReference,
116        datum: &Datum,
117        cmp_fn: fn(&Datum, &Datum) -> bool,
118        use_lower_bound: bool,
119    ) -> crate::Result<bool> {
120        let field_id = reference.field().id;
121
122        if self.may_contain_null(field_id) || self.may_contain_nan(field_id) {
123            return ROWS_MIGHT_NOT_MATCH;
124        }
125
126        let bound = if use_lower_bound {
127            self.lower_bound(field_id)
128        } else {
129            self.upper_bound(field_id)
130        };
131
132        if let Some(bound) = bound
133            && cmp_fn(bound, datum)
134        {
135            return ROWS_MUST_MATCH;
136        }
137
138        ROWS_MIGHT_NOT_MATCH
139    }
140}
141
142impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> {
143    type T = bool;
144
145    fn always_true(&mut self) -> crate::Result<bool> {
146        ROWS_MUST_MATCH
147    }
148
149    fn always_false(&mut self) -> crate::Result<bool> {
150        ROWS_MIGHT_NOT_MATCH
151    }
152
153    fn and(&mut self, lhs: bool, rhs: bool) -> crate::Result<bool> {
154        Ok(lhs && rhs)
155    }
156
157    fn or(&mut self, lhs: bool, rhs: bool) -> crate::Result<bool> {
158        Ok(lhs || rhs)
159    }
160
161    fn not(&mut self, _inner: bool) -> crate::Result<bool> {
162        Err(Error::new(
163            ErrorKind::DataInvalid,
164            "NOT should be rewritten",
165        ))
166    }
167
168    fn is_null(
169        &mut self,
170        reference: &BoundReference,
171        _predicate: &BoundPredicate,
172    ) -> crate::Result<bool> {
173        let field_id = reference.field().id;
174
175        if self.contains_nulls_only(field_id) {
176            return ROWS_MUST_MATCH;
177        }
178
179        ROWS_MIGHT_NOT_MATCH
180    }
181
182    fn not_null(
183        &mut self,
184        reference: &BoundReference,
185        _predicate: &BoundPredicate,
186    ) -> crate::Result<bool> {
187        let field_id = reference.field().id;
188
189        if let Some(&count) = self.null_count(field_id) {
190            if count == 0 {
191                return ROWS_MUST_MATCH;
192            } else {
193                return ROWS_MIGHT_NOT_MATCH;
194            }
195        }
196        ROWS_MIGHT_NOT_MATCH
197    }
198
199    fn is_nan(
200        &mut self,
201        reference: &BoundReference,
202        _predicate: &BoundPredicate,
203    ) -> crate::Result<bool> {
204        let field_id = reference.field().id;
205
206        let contains_only = self.contains_nans_only(field_id);
207
208        if contains_only {
209            return ROWS_MUST_MATCH;
210        }
211
212        ROWS_MIGHT_NOT_MATCH
213    }
214
215    fn not_nan(
216        &mut self,
217        reference: &BoundReference,
218        _predicate: &BoundPredicate,
219    ) -> crate::Result<bool> {
220        let field_id = reference.field().id;
221
222        if let Some(&nan_count) = self.nan_count(field_id)
223            && nan_count == 0
224        {
225            return ROWS_MUST_MATCH;
226        }
227
228        if self.contains_nulls_only(field_id) {
229            return ROWS_MUST_MATCH;
230        }
231
232        ROWS_MIGHT_NOT_MATCH
233    }
234
235    fn less_than(
236        &mut self,
237        reference: &BoundReference,
238        datum: &Datum,
239        _predicate: &BoundPredicate,
240    ) -> crate::Result<bool> {
241        self.visit_inequality(reference, datum, PartialOrd::lt, false)
242    }
243
244    fn less_than_or_eq(
245        &mut self,
246        reference: &BoundReference,
247        datum: &Datum,
248        _predicate: &BoundPredicate,
249    ) -> crate::Result<bool> {
250        self.visit_inequality(reference, datum, PartialOrd::le, false)
251    }
252
253    fn greater_than(
254        &mut self,
255        reference: &BoundReference,
256        datum: &Datum,
257        _predicate: &BoundPredicate,
258    ) -> crate::Result<bool> {
259        let field_id = reference.field().id;
260
261        if let Some(lower) = self.lower_bound(field_id)
262            && lower.is_nan()
263        {
264            return ROWS_MIGHT_NOT_MATCH;
265        }
266
267        self.visit_inequality(reference, datum, PartialOrd::gt, true)
268    }
269
270    fn greater_than_or_eq(
271        &mut self,
272        reference: &BoundReference,
273        datum: &Datum,
274        _predicate: &BoundPredicate,
275    ) -> crate::Result<bool> {
276        self.visit_inequality(reference, datum, PartialOrd::ge, true)
277    }
278
279    fn eq(
280        &mut self,
281        reference: &BoundReference,
282        datum: &Datum,
283        _predicate: &BoundPredicate,
284    ) -> crate::Result<bool> {
285        let field_id = reference.field().id;
286
287        if self.may_contain_null(field_id) || self.may_contain_nan(field_id) {
288            return ROWS_MIGHT_NOT_MATCH;
289        }
290
291        if let (Some(lower), Some(upper)) = (self.lower_bound(field_id), self.upper_bound(field_id))
292        {
293            // For an equality predicate to hold strictly, we must have:
294            //     lower == literal.value == upper.
295            if lower.literal() == datum.literal() && upper.literal() == datum.literal() {
296                return ROWS_MUST_MATCH;
297            } else {
298                return ROWS_MIGHT_NOT_MATCH;
299            }
300        }
301
302        ROWS_MIGHT_NOT_MATCH
303    }
304
305    fn not_eq(
306        &mut self,
307        reference: &BoundReference,
308        datum: &Datum,
309        _predicate: &BoundPredicate,
310    ) -> crate::Result<bool> {
311        let field_id = reference.field().id;
312
313        if self.contains_nulls_only(field_id) || self.contains_nans_only(field_id) {
314            return ROWS_MUST_MATCH;
315        }
316
317        if let Some(lower) = self.lower_bound(field_id) {
318            if lower.is_nan() {
319                return ROWS_MIGHT_NOT_MATCH;
320            }
321            if lower.literal() > datum.literal() {
322                return ROWS_MUST_MATCH;
323            }
324        }
325
326        if let Some(upper) = self.upper_bound(field_id) {
327            if upper.is_nan() {
328                return ROWS_MIGHT_NOT_MATCH;
329            }
330            if upper.literal() < datum.literal() {
331                return ROWS_MUST_MATCH;
332            }
333        }
334
335        ROWS_MIGHT_NOT_MATCH
336    }
337
338    fn starts_with(
339        &mut self,
340        _reference: &BoundReference,
341        _datum: &Datum,
342        _predicate: &BoundPredicate,
343    ) -> crate::Result<bool> {
344        ROWS_MIGHT_NOT_MATCH
345    }
346
347    fn not_starts_with(
348        &mut self,
349        _reference: &BoundReference,
350        _datum: &Datum,
351        _predicate: &BoundPredicate,
352    ) -> crate::Result<bool> {
353        ROWS_MIGHT_NOT_MATCH
354    }
355
356    fn r#in(
357        &mut self,
358        reference: &BoundReference,
359        literals: &FnvHashSet<Datum>,
360        _predicate: &BoundPredicate,
361    ) -> crate::Result<bool> {
362        let field_id = reference.field().id;
363
364        if self.may_contain_null(field_id) || self.may_contain_nan(field_id) {
365            return ROWS_MIGHT_NOT_MATCH;
366        }
367
368        if let (Some(lower), Some(upper)) = (self.lower_bound(field_id), self.upper_bound(field_id))
369        {
370            if !literals.contains(lower) || !literals.contains(upper) || lower != upper {
371                return ROWS_MIGHT_NOT_MATCH;
372            }
373
374            return ROWS_MUST_MATCH;
375        }
376
377        ROWS_MIGHT_NOT_MATCH
378    }
379
380    fn not_in(
381        &mut self,
382        reference: &BoundReference,
383        literals: &FnvHashSet<Datum>,
384        _predicate: &BoundPredicate,
385    ) -> crate::Result<bool> {
386        let field_id = reference.field().id;
387
388        if self.contains_nulls_only(field_id) || self.contains_nans_only(field_id) {
389            return ROWS_MUST_MATCH;
390        }
391
392        let mut filtered_literals = literals.clone();
393
394        if let Some(lower) = self.lower_bound(field_id) {
395            if lower.is_nan() {
396                return ROWS_MIGHT_NOT_MATCH;
397            }
398
399            filtered_literals.retain(|val| lower <= val);
400            if filtered_literals.is_empty() {
401                return ROWS_MUST_MATCH;
402            }
403        }
404
405        if let Some(upper) = self.upper_bound(field_id) {
406            filtered_literals.retain(|val| *val <= *upper);
407            if filtered_literals.is_empty() {
408                return ROWS_MUST_MATCH;
409            }
410        }
411
412        ROWS_MIGHT_NOT_MATCH
413    }
414}
415
416#[cfg(test)]
417mod test {
418    use std::collections::HashMap;
419    use std::ops::Not;
420    use std::sync::Arc;
421
422    use fnv::FnvHashSet;
423
424    use crate::expr::PredicateOperator::{
425        Eq, GreaterThan, GreaterThanOrEq, In, IsNan, IsNull, LessThan, LessThanOrEq, NotEq, NotIn,
426        NotNan, NotNull, NotStartsWith, StartsWith,
427    };
428    use crate::expr::visitors::strict_metrics_evaluator::StrictMetricsEvaluator;
429    use crate::expr::{
430        BinaryExpression, Bind, BoundPredicate, Predicate, Reference, SetExpression,
431        UnaryExpression,
432    };
433    use crate::spec::{
434        DataContentType, DataFile, DataFileFormat, Datum, NestedField, PrimitiveType, Schema,
435        Struct, Type,
436    };
437
438    const INT_MIN_VALUE: i32 = 30;
439    const INT_MAX_VALUE: i32 = 79;
440
441    // Helper: Create a test schema.
442    fn create_test_schema() -> Arc<Schema> {
443        let table_schema = Schema::builder()
444            .with_fields(vec![
445                // field id=1: "id" (Int)
446                Arc::new(NestedField::required(
447                    1,
448                    "id",
449                    Type::Primitive(PrimitiveType::Int),
450                )),
451                // field id=2: "no_stats" (Int)
452                Arc::new(NestedField::optional(
453                    2,
454                    "no_stats",
455                    Type::Primitive(PrimitiveType::Int),
456                )),
457                // field id=3: "required" (String)
458                Arc::new(NestedField::required(
459                    3,
460                    "required",
461                    Type::Primitive(PrimitiveType::String),
462                )),
463                // field id=4: "all_nulls" (String)
464                Arc::new(NestedField::optional(
465                    4,
466                    "all_nulls",
467                    Type::Primitive(PrimitiveType::String),
468                )),
469                // field id=5: "some_nulls" (String)
470                Arc::new(NestedField::optional(
471                    5,
472                    "some_nulls",
473                    Type::Primitive(PrimitiveType::String),
474                )),
475                // field id=6: "no_nulls" (String)
476                Arc::new(NestedField::optional(
477                    6,
478                    "no_nulls",
479                    Type::Primitive(PrimitiveType::String),
480                )),
481                // field id=7: "all_nans" (Double)
482                Arc::new(NestedField::optional(
483                    7,
484                    "all_nans",
485                    Type::Primitive(PrimitiveType::Double),
486                )),
487                // field id=8: "some_nans" (Float)
488                Arc::new(NestedField::optional(
489                    8,
490                    "some_nans",
491                    Type::Primitive(PrimitiveType::Float),
492                )),
493                // field id=9: "no_nans" (Float)
494                Arc::new(NestedField::optional(
495                    9,
496                    "no_nans",
497                    Type::Primitive(PrimitiveType::Float),
498                )),
499                // field id=10: "all_nulls_double" (Double)
500                Arc::new(NestedField::optional(
501                    10,
502                    "all_nulls_double",
503                    Type::Primitive(PrimitiveType::Double),
504                )),
505                // field id=11: "all_nans_v1_stats" (Float)
506                Arc::new(NestedField::optional(
507                    11,
508                    "all_nans_v1_stats",
509                    Type::Primitive(PrimitiveType::Float),
510                )),
511                // field id=12: "nan_and_null_only" (Double)
512                Arc::new(NestedField::optional(
513                    12,
514                    "nan_and_null_only",
515                    Type::Primitive(PrimitiveType::Double),
516                )),
517                // field id=13: "no_nan_stats" (Double)
518                Arc::new(NestedField::optional(
519                    13,
520                    "no_nan_stats",
521                    Type::Primitive(PrimitiveType::Double),
522                )),
523                // field id=14: "some_empty" (String)
524                Arc::new(NestedField::optional(
525                    14,
526                    "some_empty",
527                    Type::Primitive(PrimitiveType::String),
528                )),
529            ])
530            .build()
531            .unwrap();
532
533        Arc::new(table_schema)
534    }
535
536    fn get_test_file_1() -> DataFile {
537        DataFile {
538            content: DataContentType::Data,
539            file_path: "/test/path".to_string(),
540            file_format: DataFileFormat::Parquet,
541            partition: Struct::empty(),
542            record_count: 50,
543            file_size_in_bytes: 10,
544            value_counts: HashMap::from([
545                (1, 50),
546                (4, 50),
547                (5, 50),
548                (6, 50),
549                (7, 50),
550                (8, 50),
551                (9, 50),
552                (10, 50),
553                (11, 50),
554                (12, 50),
555                (14, 50),
556            ]),
557            null_value_counts: HashMap::from([
558                (1, 0),
559                (4, 50),
560                (5, 10),
561                (6, 0),
562                (10, 50),
563                (11, 0),
564                (12, 1),
565            ]),
566            nan_value_counts: HashMap::from([(1, 0), (7, 50), (8, 10), (9, 0), (11, 50)]),
567            lower_bounds: HashMap::from([
568                (1, Datum::int(INT_MIN_VALUE)), // id lower bound = 30
569                (11, Datum::float(f32::NAN)),
570                (12, Datum::double(f64::NAN)),
571                (14, Datum::string("")),
572            ]),
573            upper_bounds: HashMap::from([
574                (1, Datum::int(INT_MAX_VALUE)), // id upper bound = 79
575                (11, Datum::float(f32::NAN)),
576                (12, Datum::double(f64::NAN)),
577                (14, Datum::string("房东整租霍营小区二层两居室")),
578            ]),
579            column_sizes: Default::default(),
580            key_metadata: None,
581            split_offsets: None,
582            equality_ids: None,
583            sort_order_id: None,
584            partition_spec_id: 0,
585            first_row_id: None,
586            referenced_data_file: None,
587            content_offset: None,
588            content_size_in_bytes: None,
589        }
590    }
591
592    fn create_zero_records_data_file() -> DataFile {
593        DataFile {
594            content: DataContentType::Data,
595            file_path: "/test/zero".to_string(),
596            file_format: DataFileFormat::Parquet,
597            partition: Struct::empty(),
598            record_count: 0,
599            file_size_in_bytes: 10,
600            column_sizes: HashMap::new(),
601            value_counts: Default::default(),
602            null_value_counts: Default::default(),
603            nan_value_counts: Default::default(),
604            lower_bounds: Default::default(),
605            upper_bounds: Default::default(),
606            key_metadata: None,
607            split_offsets: None,
608            equality_ids: None,
609            sort_order_id: None,
610            partition_spec_id: 0,
611            first_row_id: None,
612            referenced_data_file: None,
613            content_offset: None,
614            content_size_in_bytes: None,
615        }
616    }
617
618    fn get_test_file_eq() -> DataFile {
619        DataFile {
620            content: DataContentType::Data,
621            file_path: "/test/path_eq".to_string(),
622            file_format: DataFileFormat::Parquet,
623            partition: Struct::empty(),
624            record_count: 10,
625            file_size_in_bytes: 10,
626            value_counts: HashMap::from([(1, 10)]),
627            null_value_counts: HashMap::from([(1, 0)]),
628            nan_value_counts: HashMap::from([(1, 0)]),
629            lower_bounds: HashMap::from([(1, Datum::int(42))]),
630            upper_bounds: HashMap::from([(1, Datum::int(42))]),
631            column_sizes: Default::default(),
632            key_metadata: None,
633            split_offsets: None,
634            equality_ids: None,
635            sort_order_id: None,
636            partition_spec_id: 0,
637            first_row_id: None,
638            referenced_data_file: None,
639            content_offset: None,
640            content_size_in_bytes: None,
641        }
642    }
643
644    // For string test files we reuse the ones from inclusive tests.
645    fn get_test_file_2() -> DataFile {
646        DataFile {
647            content: DataContentType::Data,
648            file_path: "file_2.avro".to_string(),
649            file_format: DataFileFormat::Parquet,
650            partition: Struct::empty(),
651            record_count: 50,
652            file_size_in_bytes: 10,
653            value_counts: HashMap::from([(3, 20)]),
654            null_value_counts: HashMap::from([(3, 2)]),
655            nan_value_counts: HashMap::default(),
656            lower_bounds: HashMap::from([(3, Datum::string("aa"))]),
657            upper_bounds: HashMap::from([(3, Datum::string("dC"))]),
658            column_sizes: Default::default(),
659            key_metadata: None,
660            split_offsets: None,
661            equality_ids: None,
662            sort_order_id: None,
663            partition_spec_id: 0,
664            first_row_id: None,
665            referenced_data_file: None,
666            content_offset: None,
667            content_size_in_bytes: None,
668        }
669    }
670
671    // Helper functions to bind predicates with the test schema and then evaluate using StrictMetricsEvaluator.
672    fn not_null(reference: &str) -> BoundPredicate {
673        let schema = create_test_schema();
674        let filter = Predicate::Unary(UnaryExpression::new(NotNull, Reference::new(reference)));
675        filter.bind(schema.clone(), true).unwrap()
676    }
677
678    fn is_null(reference: &str) -> BoundPredicate {
679        let schema = create_test_schema();
680        let filter = Predicate::Unary(UnaryExpression::new(IsNull, Reference::new(reference)));
681        filter.bind(schema.clone(), true).unwrap()
682    }
683
684    fn not_nan(reference: &str) -> BoundPredicate {
685        let schema = create_test_schema();
686        let filter = Predicate::Unary(UnaryExpression::new(NotNan, Reference::new(reference)));
687        filter.bind(schema.clone(), true).unwrap()
688    }
689
690    fn is_nan(reference: &str) -> BoundPredicate {
691        let schema = create_test_schema();
692        let filter = Predicate::Unary(UnaryExpression::new(IsNan, Reference::new(reference)));
693        filter.bind(schema.clone(), true).unwrap()
694    }
695
696    fn less_than(reference: &str, literal: &str) -> BoundPredicate {
697        let schema = create_test_schema();
698        let filter = Predicate::Binary(BinaryExpression::new(
699            LessThan,
700            Reference::new(reference),
701            Datum::string(literal),
702        ));
703        filter.bind(schema.clone(), true).unwrap()
704    }
705
706    fn less_than_or_equal(reference: &str, literal: &str) -> BoundPredicate {
707        let schema = create_test_schema();
708        let filter = Predicate::Binary(BinaryExpression::new(
709            LessThanOrEq,
710            Reference::new(reference),
711            Datum::string(literal),
712        ));
713        filter.bind(schema.clone(), true).unwrap()
714    }
715
716    fn greater_than(reference: &str, literal: &str) -> BoundPredicate {
717        let schema = create_test_schema();
718        let filter = Predicate::Binary(BinaryExpression::new(
719            GreaterThan,
720            Reference::new(reference),
721            Datum::string(literal),
722        ));
723        filter.bind(schema.clone(), true).unwrap()
724    }
725
726    fn greater_than_or_equal(reference: &str, literal: &str) -> BoundPredicate {
727        let schema = create_test_schema();
728        let filter = Predicate::Binary(BinaryExpression::new(
729            GreaterThanOrEq,
730            Reference::new(reference),
731            Datum::string(literal),
732        ));
733        filter.bind(schema.clone(), true).unwrap()
734    }
735
736    fn equal(reference: &str, literal: &str) -> BoundPredicate {
737        let schema = create_test_schema();
738        let filter = Predicate::Binary(BinaryExpression::new(
739            Eq,
740            Reference::new(reference),
741            Datum::string(literal),
742        ));
743        filter.bind(schema.clone(), true).unwrap()
744    }
745
746    fn less_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
747        let schema = create_test_schema();
748        let filter = Predicate::Binary(BinaryExpression::new(
749            LessThan,
750            Reference::new(reference),
751            Datum::int(int_literal),
752        ));
753        filter.bind(schema.clone(), true).unwrap()
754    }
755
756    fn not_less_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
757        let schema = create_test_schema();
758        let filter = Predicate::Binary(BinaryExpression::new(
759            LessThan,
760            Reference::new(reference),
761            Datum::int(int_literal),
762        ))
763        .not();
764        filter.bind(schema.clone(), true).unwrap()
765    }
766
767    fn less_than_or_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
768        let schema = create_test_schema();
769        let filter = Predicate::Binary(BinaryExpression::new(
770            LessThanOrEq,
771            Reference::new(reference),
772            Datum::int(int_literal),
773        ));
774        filter.bind(schema.clone(), true).unwrap()
775    }
776
777    fn greater_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
778        let schema = create_test_schema();
779        let filter = Predicate::Binary(BinaryExpression::new(
780            GreaterThan,
781            Reference::new(reference),
782            Datum::int(int_literal),
783        ));
784        filter.bind(schema.clone(), true).unwrap()
785    }
786
787    fn greater_than_or_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
788        let schema = create_test_schema();
789        let filter = Predicate::Binary(BinaryExpression::new(
790            GreaterThanOrEq,
791            Reference::new(reference),
792            Datum::int(int_literal),
793        ));
794        filter.bind(schema.clone(), true).unwrap()
795    }
796
797    fn equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
798        let schema = create_test_schema();
799        let filter = Predicate::Binary(BinaryExpression::new(
800            Eq,
801            Reference::new(reference),
802            Datum::int(int_literal),
803        ));
804        filter.bind(schema.clone(), true).unwrap()
805    }
806
807    fn equal_int_not(reference: &str, int_literal: i32) -> BoundPredicate {
808        let schema = create_test_schema();
809        let filter = Predicate::Binary(BinaryExpression::new(
810            Eq,
811            Reference::new(reference),
812            Datum::int(int_literal),
813        ))
814        .not();
815        filter.bind(schema.clone(), true).unwrap()
816    }
817
818    fn not_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
819        let schema = create_test_schema();
820        let filter = Predicate::Binary(BinaryExpression::new(
821            NotEq,
822            Reference::new(reference),
823            Datum::int(int_literal),
824        ));
825        filter.bind(schema.clone(), true).unwrap()
826    }
827
828    fn starts_with(reference: &str, literal: &str) -> BoundPredicate {
829        let schema = create_test_schema();
830        let filter = Predicate::Binary(BinaryExpression::new(
831            StartsWith,
832            Reference::new(reference),
833            Datum::string(literal),
834        ));
835        filter.bind(schema.clone(), true).unwrap()
836    }
837
838    fn not_starts_with(reference: &str, literal: &str) -> BoundPredicate {
839        let schema = create_test_schema();
840        let filter = Predicate::Binary(BinaryExpression::new(
841            NotStartsWith,
842            Reference::new(reference),
843            Datum::string(literal),
844        ));
845        filter.bind(schema.clone(), true).unwrap()
846    }
847
848    fn in_int(reference: &str, int_literals: &[i32]) -> BoundPredicate {
849        let schema = create_test_schema();
850        let filter = Predicate::Set(SetExpression::new(
851            In,
852            Reference::new(reference),
853            FnvHashSet::from_iter(int_literals.iter().copied().map(Datum::int)),
854        ));
855        filter.bind(schema.clone(), true).unwrap()
856    }
857
858    fn not_in_int(reference: &str, int_literals: &[i32]) -> BoundPredicate {
859        let schema = create_test_schema();
860        let filter = Predicate::Set(SetExpression::new(
861            NotIn,
862            Reference::new(reference),
863            FnvHashSet::from_iter(int_literals.iter().copied().map(Datum::int)),
864        ));
865        filter.bind(schema.clone(), true).unwrap()
866    }
867
868    fn not_in_str(reference: &str, str_literals: &[&str]) -> BoundPredicate {
869        let schema = create_test_schema();
870        let filter = Predicate::Set(SetExpression::new(
871            NotIn,
872            Reference::new(reference),
873            FnvHashSet::from_iter(str_literals.iter().map(Datum::string)),
874        ));
875        filter.bind(schema.clone(), true).unwrap()
876    }
877
878    #[test]
879    fn test_data_file_no_partitions() {
880        let schema = create_test_schema();
881        let partition_filter = Predicate::AlwaysTrue.bind(schema.clone(), false).unwrap();
882
883        let data_file = get_test_file_1();
884
885        let result = StrictMetricsEvaluator::eval(&partition_filter, &data_file).unwrap();
886        assert!(result, "Should read: AlwaysTrue predicate");
887    }
888
889    #[test]
890    fn test_all_nulls() {
891        let file = get_test_file_1();
892
893        // "all_nulls" (field 4) is all null.
894        let result = StrictMetricsEvaluator::eval(&not_null("all_nulls"), &file).unwrap();
895        assert!(!result, "Should skip: notNull on all-null column");
896
897        let result = StrictMetricsEvaluator::eval(&less_than("all_nulls", "a"), &file).unwrap();
898        assert!(!result, "Should skip: lessThan on all-null column");
899
900        let result =
901            StrictMetricsEvaluator::eval(&less_than_or_equal("all_nulls", "a"), &file).unwrap();
902        assert!(!result, "Should skip: lessThanOrEqual on all-null column");
903
904        let result = StrictMetricsEvaluator::eval(&greater_than("all_nulls", "a"), &file).unwrap();
905        assert!(!result, "Should skip: greaterThan on all-null column");
906
907        let result =
908            StrictMetricsEvaluator::eval(&greater_than_or_equal("all_nulls", "a"), &file).unwrap();
909        assert!(
910            !result,
911            "Should skip: greaterThanOrEqual on all-null column"
912        );
913
914        let result = StrictMetricsEvaluator::eval(&equal("all_nulls", "a"), &file).unwrap();
915        assert!(!result, "Should skip: equal on all-null column");
916
917        let result = StrictMetricsEvaluator::eval(&starts_with("all_nulls", "a"), &file).unwrap();
918        assert!(!result, "Strict eval: startsWith always returns false");
919
920        let result =
921            StrictMetricsEvaluator::eval(&not_starts_with("all_nulls", "a"), &file).unwrap();
922        assert!(!result, "Strict eval: notStartsWith always returns false");
923
924        // "some_nulls" (field 5) has some nulls.
925        let result = StrictMetricsEvaluator::eval(&not_null("some_nulls"), &file).unwrap();
926        assert!(!result, "Should skip: notNull on column with some nulls");
927
928        // "no_nulls" (field 6) has no nulls.
929        let result = StrictMetricsEvaluator::eval(&not_null("no_nulls"), &file).unwrap();
930        assert!(result, "Should read: notNull on column with no nulls");
931    }
932
933    #[test]
934    fn test_no_nulls() {
935        let file = get_test_file_1();
936
937        // "all_nulls" is all null so isNull returns MUST_MATCH.
938        let result = StrictMetricsEvaluator::eval(&is_null("all_nulls"), &file).unwrap();
939        assert!(result, "Should read: isNull on all-null column");
940
941        // "some_nulls" is not all null.
942        let result = StrictMetricsEvaluator::eval(&is_null("some_nulls"), &file).unwrap();
943        assert!(
944            !result,
945            "Should skip: isNull on column with some non-null values"
946        );
947
948        // "no_nulls" has no nulls.
949        let result = StrictMetricsEvaluator::eval(&is_null("no_nulls"), &file).unwrap();
950        assert!(!result, "Should skip: isNull on column with no nulls");
951    }
952
953    #[test]
954    fn test_is_nan() {
955        let file = get_test_file_1();
956
957        // "all_nans" (field 7) is all NaN.
958        let result = StrictMetricsEvaluator::eval(&is_nan("all_nans"), &file).unwrap();
959        assert!(result, "Should read: isNan on all-NaN column");
960
961        // "some_nans" (field 8) has some NaN.
962        let result = StrictMetricsEvaluator::eval(&is_nan("some_nans"), &file).unwrap();
963        assert!(!result, "Should skip: isNan on column with some NaNs");
964
965        // "no_nans" (field 9) has no NaN.
966        let result = StrictMetricsEvaluator::eval(&is_nan("no_nans"), &file).unwrap();
967        assert!(!result, "Should skip: isNan on column with no NaNs");
968
969        // "all_nulls_double" (field 10) is all null.
970        let result = StrictMetricsEvaluator::eval(&is_nan("all_nulls_double"), &file).unwrap();
971        assert!(!result, "Should skip: isNan on all-null double column");
972
973        // "no_nan_stats" (field 13) missing stats → cannot guarantee, so false.
974        let result = StrictMetricsEvaluator::eval(&is_nan("no_nan_stats"), &file).unwrap();
975        assert!(!result, "Should skip: isNan when stats are missing");
976
977        // "all_nans_v1_stats" (field 11) is all NaN.
978        let result = StrictMetricsEvaluator::eval(&is_nan("all_nans_v1_stats"), &file).unwrap();
979        assert!(result, "Should read: isNan on all-NaN (v1 stats) column");
980
981        // "nan_and_null_only" (field 12) → mixed, so false.
982        let result = StrictMetricsEvaluator::eval(&is_nan("nan_and_null_only"), &file).unwrap();
983        assert!(!result, "Should skip: isNan on nan-and-null-only column");
984    }
985
986    #[test]
987    fn test_not_nan() {
988        let file = get_test_file_1();
989
990        // "all_nans" → notNan returns MIGHT_NOT_MATCH.
991        let result = StrictMetricsEvaluator::eval(&not_nan("all_nans"), &file).unwrap();
992        assert!(
993            !result,
994            "Should read: notNan on all-NaN column (strict: must match)"
995        );
996
997        // "some_nans" → returns false.
998        let result = StrictMetricsEvaluator::eval(&not_nan("some_nans"), &file).unwrap();
999        assert!(!result, "Should skip: notNan on column with some NaNs");
1000
1001        // "no_nans" → notNan returns MUST_MATCH.
1002        let result = StrictMetricsEvaluator::eval(&not_nan("no_nans"), &file).unwrap();
1003        assert!(result, "Should read: notNan on column with no NaNs");
1004
1005        // "all_nulls_double" → returns MUST_MATCH due to all nulls.
1006        let result = StrictMetricsEvaluator::eval(&not_nan("all_nulls_double"), &file).unwrap();
1007        assert!(result, "Should read: notNan on all-null double column");
1008
1009        // "no_nan_stats" → missing stats so returns false.
1010        let result = StrictMetricsEvaluator::eval(&not_nan("no_nan_stats"), &file).unwrap();
1011        assert!(!result, "Should skip: notNan when stats are missing");
1012
1013        // "all_nans_v1_stats" → returns false.
1014        let result = StrictMetricsEvaluator::eval(&not_nan("all_nans_v1_stats"), &file).unwrap();
1015        assert!(!result, "Should read: notNan on all-NaN (v1 stats) column");
1016
1017        // "nan_and_null_only" → returns false.
1018        let result = StrictMetricsEvaluator::eval(&not_nan("nan_and_null_only"), &file).unwrap();
1019        assert!(!result, "Should skip: notNan on nan-and-null-only column");
1020    }
1021
1022    #[test]
1023    #[should_panic]
1024    fn test_missing_column() {
1025        let _ = StrictMetricsEvaluator::eval(&less_than("missing", "a"), &get_test_file_1());
1026    }
1027
1028    #[test]
1029    fn test_zero_record_file() {
1030        let file = create_zero_records_data_file();
1031
1032        let expressions = [
1033            less_than_int("no_stats", 5),
1034            less_than_or_equal_int("no_stats", 30),
1035            equal_int("no_stats", 70),
1036            greater_than_int("no_stats", 78),
1037            greater_than_or_equal_int("no_stats", 90),
1038            not_equal_int("no_stats", 101),
1039            is_null("no_stats"),
1040            not_null("no_stats"),
1041        ];
1042
1043        for expr in expressions {
1044            let result = StrictMetricsEvaluator::eval(&expr, &file).unwrap();
1045            // For zero-record files, strict eval returns MUST_MATCH.
1046            assert!(
1047                result,
1048                "Strict eval: Should read zero-record file for expression {expr:?}"
1049            );
1050        }
1051    }
1052
1053    #[test]
1054    fn test_not() {
1055        let file = get_test_file_1();
1056
1057        let result =
1058            StrictMetricsEvaluator::eval(&not_less_than_int("id", INT_MIN_VALUE - 25), &file);
1059        assert!(result.is_err());
1060    }
1061
1062    #[test]
1063    fn test_and() {
1064        let schema = create_test_schema();
1065
1066        // (id < (INT_MIN_VALUE - 25)) AND (id >= (INT_MAX_VALUE + 1))
1067        let filter = Predicate::Binary(BinaryExpression::new(
1068            LessThan,
1069            Reference::new("id"),
1070            Datum::int(INT_MIN_VALUE - 25),
1071        ))
1072        .and(Predicate::Binary(BinaryExpression::new(
1073            GreaterThanOrEq,
1074            Reference::new("id"),
1075            Datum::int(INT_MAX_VALUE + 1),
1076        )));
1077        let bound = filter.bind(schema.clone(), true).unwrap();
1078        let result = StrictMetricsEvaluator::eval(&bound, &get_test_file_1()).unwrap();
1079        assert!(!result, "Strict eval: and(false, false) should be false");
1080
1081        // (id > (INT_MIN_VALUE - 1)) AND (id <= (INT_MAX_VALUE + 1))
1082        let filter = Predicate::Binary(BinaryExpression::new(
1083            GreaterThan,
1084            Reference::new("id"),
1085            Datum::int(INT_MIN_VALUE - 1),
1086        ))
1087        .and(Predicate::Binary(BinaryExpression::new(
1088            LessThanOrEq,
1089            Reference::new("id"),
1090            Datum::int(INT_MAX_VALUE + 1),
1091        )));
1092        let bound = filter.bind(schema.clone(), true).unwrap();
1093        let result = StrictMetricsEvaluator::eval(&bound, &get_test_file_1()).unwrap();
1094        assert!(result, "Strict eval: and(true, true) should be true");
1095    }
1096
1097    #[test]
1098    fn test_or() {
1099        let schema = create_test_schema();
1100
1101        let filter = Predicate::Binary(BinaryExpression::new(
1102            LessThan,
1103            Reference::new("id"),
1104            Datum::int(INT_MIN_VALUE - 25),
1105        ))
1106        .or(Predicate::Binary(BinaryExpression::new(
1107            GreaterThanOrEq,
1108            Reference::new("id"),
1109            Datum::int(INT_MIN_VALUE - 30),
1110        )));
1111        let bound = filter.bind(schema.clone(), true).unwrap();
1112        let result = StrictMetricsEvaluator::eval(&bound, &get_test_file_1()).unwrap();
1113        assert!(result, "Strict eval: or(false, true) should be true");
1114
1115        let filter = Predicate::Binary(BinaryExpression::new(
1116            LessThan,
1117            Reference::new("id"),
1118            Datum::int(INT_MIN_VALUE - 25),
1119        ))
1120        .or(Predicate::Binary(BinaryExpression::new(
1121            GreaterThanOrEq,
1122            Reference::new("id"),
1123            Datum::int(INT_MAX_VALUE + 1),
1124        )));
1125        let bound = filter.bind(schema.clone(), true).unwrap();
1126        let result = StrictMetricsEvaluator::eval(&bound, &get_test_file_1()).unwrap();
1127        assert!(!result, "Strict eval: or(false, false) should be false");
1128    }
1129
1130    #[test]
1131    fn test_integer_lt() {
1132        let file = get_test_file_1();
1133
1134        let result =
1135            StrictMetricsEvaluator::eval(&less_than_int("id", INT_MIN_VALUE - 25), &file).unwrap();
1136        assert!(
1137            !result,
1138            "Strict eval: id < {} should be false",
1139            INT_MIN_VALUE - 25
1140        );
1141
1142        let result =
1143            StrictMetricsEvaluator::eval(&less_than_int("id", INT_MIN_VALUE), &file).unwrap();
1144        assert!(!result, "Strict eval: id < {INT_MIN_VALUE} should be false");
1145
1146        let result =
1147            StrictMetricsEvaluator::eval(&less_than_int("id", INT_MIN_VALUE + 1), &file).unwrap();
1148        assert!(
1149            !result,
1150            "Strict eval: id < {} should be false",
1151            INT_MIN_VALUE + 1
1152        );
1153
1154        let result =
1155            StrictMetricsEvaluator::eval(&less_than_int("id", INT_MAX_VALUE), &file).unwrap();
1156        assert!(!result, "Strict eval: id < {INT_MAX_VALUE} should be false");
1157
1158        let result =
1159            StrictMetricsEvaluator::eval(&less_than_int("id", INT_MAX_VALUE + 1), &file).unwrap();
1160        assert!(
1161            result,
1162            "Strict eval: id < {} should be true",
1163            INT_MAX_VALUE + 1
1164        );
1165    }
1166
1167    #[test]
1168    fn test_integer_lt_eq() {
1169        let file = get_test_file_1();
1170
1171        let result =
1172            StrictMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MIN_VALUE - 25), &file)
1173                .unwrap();
1174        assert!(
1175            !result,
1176            "Strict eval: id <= {} should be false",
1177            INT_MIN_VALUE - 25
1178        );
1179
1180        let result =
1181            StrictMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MIN_VALUE - 1), &file)
1182                .unwrap();
1183        assert!(
1184            !result,
1185            "Strict eval: id <= {} should be false",
1186            INT_MIN_VALUE - 1
1187        );
1188
1189        let result =
1190            StrictMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MIN_VALUE), &file)
1191                .unwrap();
1192        assert!(
1193            !result,
1194            "Strict eval: id <= {INT_MIN_VALUE} should be false"
1195        );
1196
1197        let result =
1198            StrictMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MAX_VALUE), &file)
1199                .unwrap();
1200        assert!(result, "Strict eval: id <= {INT_MAX_VALUE} should be true");
1201
1202        let result =
1203            StrictMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MAX_VALUE + 1), &file)
1204                .unwrap();
1205        assert!(
1206            result,
1207            "Strict eval: id <= {} should be true",
1208            INT_MAX_VALUE + 1
1209        );
1210    }
1211
1212    #[test]
1213    fn test_integer_gt() {
1214        let file = get_test_file_1();
1215
1216        let result =
1217            StrictMetricsEvaluator::eval(&greater_than_int("id", INT_MAX_VALUE + 6), &file)
1218                .unwrap();
1219        assert!(
1220            !result,
1221            "Strict eval: id > {} should be false",
1222            INT_MAX_VALUE + 6
1223        );
1224
1225        let result =
1226            StrictMetricsEvaluator::eval(&greater_than_int("id", INT_MAX_VALUE), &file).unwrap();
1227        assert!(!result, "Strict eval: id > {INT_MAX_VALUE} should be false");
1228
1229        let result =
1230            StrictMetricsEvaluator::eval(&greater_than_int("id", INT_MIN_VALUE), &file).unwrap();
1231        assert!(!result, "Strict eval: id > {INT_MIN_VALUE} should be false");
1232
1233        let result =
1234            StrictMetricsEvaluator::eval(&greater_than_int("id", INT_MIN_VALUE - 1), &file)
1235                .unwrap();
1236        assert!(
1237            result,
1238            "Strict eval: id > {} should be true",
1239            INT_MIN_VALUE - 1
1240        );
1241
1242        let result =
1243            StrictMetricsEvaluator::eval(&greater_than_int("id", INT_MIN_VALUE - 4), &file)
1244                .unwrap();
1245        assert!(
1246            result,
1247            "Strict eval: id > {} should be true",
1248            INT_MIN_VALUE - 4
1249        );
1250    }
1251
1252    #[test]
1253    fn test_integer_gt_eq() {
1254        let file = get_test_file_1();
1255
1256        let result = StrictMetricsEvaluator::eval(
1257            &greater_than_or_equal_int("id", INT_MAX_VALUE + 6),
1258            &file,
1259        )
1260        .unwrap();
1261        assert!(
1262            !result,
1263            "Strict eval: id >= {} should be false",
1264            INT_MAX_VALUE + 6
1265        );
1266
1267        let result = StrictMetricsEvaluator::eval(
1268            &greater_than_or_equal_int("id", INT_MAX_VALUE + 1),
1269            &file,
1270        )
1271        .unwrap();
1272        assert!(
1273            !result,
1274            "Strict eval: id >= {} should be false",
1275            INT_MAX_VALUE + 1
1276        );
1277
1278        let result =
1279            StrictMetricsEvaluator::eval(&greater_than_or_equal_int("id", INT_MAX_VALUE), &file)
1280                .unwrap();
1281        assert!(
1282            !result,
1283            "Strict eval: id >= {INT_MAX_VALUE} should be false"
1284        );
1285
1286        let result =
1287            StrictMetricsEvaluator::eval(&greater_than_or_equal_int("id", INT_MIN_VALUE), &file)
1288                .unwrap();
1289        assert!(result, "Strict eval: id >= {INT_MIN_VALUE} should be true");
1290
1291        let result = StrictMetricsEvaluator::eval(
1292            &greater_than_or_equal_int("id", INT_MIN_VALUE - 1),
1293            &file,
1294        )
1295        .unwrap();
1296        assert!(
1297            result,
1298            "Strict eval: id >= {} should be true",
1299            INT_MIN_VALUE - 1
1300        );
1301    }
1302
1303    #[test]
1304    fn test_integer_eq() {
1305        let file = get_test_file_1();
1306
1307        let result = StrictMetricsEvaluator::eval(&equal_int("id", INT_MIN_VALUE), &file).unwrap();
1308        assert!(
1309            !result,
1310            "Strict eval: equal should be false if bounds are not identical"
1311        );
1312
1313        let eq_file = get_test_file_eq();
1314        let result = StrictMetricsEvaluator::eval(&equal_int("id", 42), &eq_file).unwrap();
1315        assert!(
1316            result,
1317            "Strict eval: equal should be true when lower == upper == literal"
1318        );
1319
1320        let result = StrictMetricsEvaluator::eval(&equal_int("id", 41), &eq_file).unwrap();
1321        assert!(
1322            !result,
1323            "Strict eval: equal should be false for non-matching literal"
1324        );
1325    }
1326
1327    #[test]
1328    fn test_integer_not_eq() {
1329        let file = get_test_file_1();
1330
1331        let result =
1332            StrictMetricsEvaluator::eval(&not_equal_int("id", INT_MIN_VALUE - 25), &file).unwrap();
1333        assert!(
1334            result,
1335            "Strict eval: notEqual should be true when lower bound > literal"
1336        );
1337
1338        let result =
1339            StrictMetricsEvaluator::eval(&not_equal_int("id", INT_MIN_VALUE - 1), &file).unwrap();
1340        assert!(
1341            result,
1342            "Strict eval: notEqual should be true when lower bound > literal"
1343        );
1344
1345        let result =
1346            StrictMetricsEvaluator::eval(&not_equal_int("id", INT_MIN_VALUE), &file).unwrap();
1347        assert!(
1348            !result,
1349            "Strict eval: notEqual should be false when literal equals lower bound (but upper is different)"
1350        );
1351
1352        let result =
1353            StrictMetricsEvaluator::eval(&not_equal_int("id", INT_MAX_VALUE - 4), &file).unwrap();
1354        assert!(
1355            !result,
1356            "Strict eval: notEqual should be false when literal is between bounds"
1357        );
1358
1359        let result =
1360            StrictMetricsEvaluator::eval(&not_equal_int("id", INT_MAX_VALUE), &file).unwrap();
1361        assert!(
1362            !result,
1363            "Strict eval: notEqual should be false when literal equals upper bound"
1364        );
1365
1366        let result =
1367            StrictMetricsEvaluator::eval(&not_equal_int("id", INT_MAX_VALUE + 1), &file).unwrap();
1368        assert!(
1369            result,
1370            "Strict eval: notEqual should be true when upper bound < literal"
1371        );
1372
1373        let result =
1374            StrictMetricsEvaluator::eval(&not_equal_int("id", INT_MAX_VALUE + 6), &file).unwrap();
1375        assert!(
1376            result,
1377            "Strict eval: notEqual should be true when literal is well above upper bound"
1378        );
1379
1380        let eq_file = get_test_file_eq();
1381        let result = StrictMetricsEvaluator::eval(&not_equal_int("id", 42), &eq_file).unwrap();
1382        assert!(
1383            !result,
1384            "Strict eval: notEqual should be false when literal equals the only value"
1385        );
1386
1387        let result = StrictMetricsEvaluator::eval(&not_equal_int("id", 41), &eq_file).unwrap();
1388        assert!(
1389            result,
1390            "Strict eval: notEqual should be true when literal does not equal the only value"
1391        );
1392    }
1393
1394    #[test]
1395    #[should_panic]
1396    fn test_case_sensitive_integer_not_eq_rewritten() {
1397        let _ = StrictMetricsEvaluator::eval(&equal_int_not("ID", 5), &get_test_file_1()).unwrap();
1398    }
1399
1400    #[test]
1401    fn test_string_starts_with() {
1402        let file1 = get_test_file_1();
1403        let file2 = get_test_file_2();
1404
1405        let result = StrictMetricsEvaluator::eval(&starts_with("required", "a"), &file1).unwrap();
1406        assert!(
1407            !result,
1408            "strict eval: startsWith always false (no metrics support)"
1409        );
1410        let result = StrictMetricsEvaluator::eval(&starts_with("required", "a"), &file2).unwrap();
1411        assert!(!result, "strict eval: startsWith always false");
1412    }
1413
1414    #[test]
1415    fn test_string_not_starts_with() {
1416        let file1 = get_test_file_1();
1417        let file2 = get_test_file_2();
1418
1419        let result =
1420            StrictMetricsEvaluator::eval(&not_starts_with("required", "a"), &file1).unwrap();
1421        assert!(!result, "Strict eval: notStartsWith always false");
1422        let result =
1423            StrictMetricsEvaluator::eval(&not_starts_with("required", "a"), &file2).unwrap();
1424        assert!(!result, "Strict eval: notStartsWith always false");
1425    }
1426
1427    #[test]
1428    fn test_integer_in() {
1429        let file = get_test_file_1();
1430
1431        let result = StrictMetricsEvaluator::eval(
1432            &in_int("id", &[INT_MIN_VALUE - 25, INT_MIN_VALUE - 24]),
1433            &file,
1434        )
1435        .unwrap();
1436        assert!(
1437            !result,
1438            "Strict eval: inInt on file1 returns false because bounds are not equal"
1439        );
1440
1441        let result =
1442            StrictMetricsEvaluator::eval(&in_int("id", &[INT_MIN_VALUE - 1, INT_MIN_VALUE]), &file)
1443                .unwrap();
1444        assert!(
1445            !result,
1446            "Strict eval: inInt on file1 returns false when only one bound is in set"
1447        );
1448
1449        // For file with equality stats.
1450        let eq_file = get_test_file_eq();
1451        let result = StrictMetricsEvaluator::eval(&in_int("id", &[42]), &eq_file).unwrap();
1452        assert!(
1453            result,
1454            "Strict eval: inInt should be true when both bounds equal literal in set"
1455        );
1456
1457        let result =
1458            StrictMetricsEvaluator::eval(&in_int("id", &[INT_MAX_VALUE, INT_MAX_VALUE + 1]), &file)
1459                .unwrap();
1460        assert!(
1461            !result,
1462            "Strict eval: inInt on file1 returns false due to unequal bounds"
1463        );
1464    }
1465
1466    #[test]
1467    fn test_integer_not_in() {
1468        let file = get_test_file_1();
1469
1470        let result = StrictMetricsEvaluator::eval(
1471            &not_in_int("id", &[INT_MIN_VALUE - 25, INT_MIN_VALUE - 24]),
1472            &file,
1473        )
1474        .unwrap();
1475        assert!(
1476            result,
1477            "Strict eval: notInInt should be true when all provided literals are outside bounds"
1478        );
1479
1480        let result = StrictMetricsEvaluator::eval(
1481            &not_in_int("id", &[INT_MIN_VALUE - 2, INT_MIN_VALUE - 1]),
1482            &file,
1483        )
1484        .unwrap();
1485        assert!(
1486            result,
1487            "Strict eval: notInInt should be true when literals are below lower bound"
1488        );
1489
1490        let result = StrictMetricsEvaluator::eval(
1491            &not_in_int("id", &[INT_MIN_VALUE - 1, INT_MIN_VALUE]),
1492            &file,
1493        )
1494        .unwrap();
1495        assert!(
1496            !result,
1497            "Strict eval: notInInt should be false when at least one literal falls within bounds"
1498        );
1499
1500        let result = StrictMetricsEvaluator::eval(
1501            &not_in_int("id", &[INT_MAX_VALUE - 4, INT_MAX_VALUE - 3]),
1502            &file,
1503        )
1504        .unwrap();
1505        assert!(
1506            !result,
1507            "Strict eval: notInInt should be false when some literals are within bounds"
1508        );
1509
1510        let result = StrictMetricsEvaluator::eval(
1511            &not_in_int("id", &[INT_MAX_VALUE, INT_MAX_VALUE + 1]),
1512            &file,
1513        )
1514        .unwrap();
1515        assert!(
1516            !result,
1517            "Strict eval: not_in_int should be false when one literal is within bounds"
1518        );
1519
1520        let result = StrictMetricsEvaluator::eval(
1521            &not_in_int("id", &[INT_MAX_VALUE + 1, INT_MAX_VALUE + 2]),
1522            &file,
1523        )
1524        .unwrap();
1525        assert!(
1526            result,
1527            "Strict eval: notInInt should be true when all literals are above upper bound"
1528        );
1529
1530        let result = StrictMetricsEvaluator::eval(
1531            &not_in_int("id", &[INT_MAX_VALUE + 6, INT_MAX_VALUE + 7]),
1532            &file,
1533        )
1534        .unwrap();
1535        assert!(
1536            result,
1537            "Strict eval: notInInt should be true when literals are well above upper bound"
1538        );
1539
1540        let result =
1541            StrictMetricsEvaluator::eval(&not_in_str("all_nulls", &["abc", "def"]), &file).unwrap();
1542        assert!(
1543            result,
1544            "Strict eval: notInStr on all-null column should be true"
1545        );
1546
1547        let result =
1548            StrictMetricsEvaluator::eval(&not_in_str("some_nulls", &["abc", "def"]), &file)
1549                .unwrap();
1550        assert!(
1551            !result,
1552            "Strict eval: notInStr on column start with nan should be false"
1553        );
1554    }
1555}