1use fnv::FnvHashSet;
19
20use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit};
21use crate::expr::{BoundPredicate, BoundReference};
22use crate::spec::{DataFile, Datum, PrimitiveLiteral};
23use crate::{Error, ErrorKind};
24
25const IN_PREDICATE_LIMIT: usize = 200;
26const ROWS_MIGHT_MATCH: crate::Result<bool> = Ok(true);
27const ROWS_CANNOT_MATCH: crate::Result<bool> = Ok(false);
28
29pub(crate) struct InclusiveMetricsEvaluator<'a> {
30 data_file: &'a DataFile,
31}
32
33impl<'a> InclusiveMetricsEvaluator<'a> {
34 fn new(data_file: &'a DataFile) -> Self {
35 InclusiveMetricsEvaluator { data_file }
36 }
37
38 pub(crate) fn eval(
43 filter: &'a BoundPredicate,
44 data_file: &'a DataFile,
45 include_empty_files: bool,
46 ) -> crate::Result<bool> {
47 if !include_empty_files && data_file.record_count == 0 {
48 return ROWS_CANNOT_MATCH;
49 }
50
51 let mut evaluator = Self::new(data_file);
52 visit(&mut evaluator, filter)
53 }
54
55 fn nan_count(&self, field_id: i32) -> Option<&u64> {
56 self.data_file.nan_value_counts.get(&field_id)
57 }
58
59 fn null_count(&self, field_id: i32) -> Option<&u64> {
60 self.data_file.null_value_counts.get(&field_id)
61 }
62
63 fn value_count(&self, field_id: i32) -> Option<&u64> {
64 self.data_file.value_counts.get(&field_id)
65 }
66
67 fn lower_bound(&self, field_id: i32) -> Option<&Datum> {
68 self.data_file.lower_bounds.get(&field_id)
69 }
70
71 fn upper_bound(&self, field_id: i32) -> Option<&Datum> {
72 self.data_file.upper_bounds.get(&field_id)
73 }
74
75 fn contains_nans_only(&self, field_id: i32) -> bool {
76 let nan_count = self.nan_count(field_id);
77 let value_count = self.value_count(field_id);
78
79 nan_count.is_some() && nan_count == value_count
80 }
81
82 fn contains_nulls_only(&self, field_id: i32) -> bool {
83 let null_count = self.null_count(field_id);
84 let value_count = self.value_count(field_id);
85
86 null_count.is_some() && null_count == value_count
87 }
88
89 fn may_contain_null(&self, field_id: i32) -> bool {
90 if let Some(&null_count) = self.null_count(field_id) {
91 null_count > 0
92 } else {
93 true
94 }
95 }
96
97 fn visit_inequality(
98 &mut self,
99 reference: &BoundReference,
100 datum: &Datum,
101 cmp_fn: fn(&Datum, &Datum) -> bool,
102 use_lower_bound: bool,
103 ) -> crate::Result<bool> {
104 let field_id = reference.field().id;
105
106 if self.contains_nulls_only(field_id) || self.contains_nans_only(field_id) {
107 return ROWS_CANNOT_MATCH;
108 }
109
110 if datum.is_nan() {
111 return ROWS_MIGHT_MATCH;
114 }
115
116 let bound = if use_lower_bound {
117 self.lower_bound(field_id)
118 } else {
119 self.upper_bound(field_id)
120 };
121
122 if let Some(bound) = bound {
123 if cmp_fn(bound, datum) {
124 return ROWS_MIGHT_MATCH;
125 }
126
127 return ROWS_CANNOT_MATCH;
128 }
129
130 ROWS_MIGHT_MATCH
131 }
132}
133
134impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
135 type T = bool;
136
137 fn always_true(&mut self) -> crate::Result<bool> {
138 ROWS_MIGHT_MATCH
139 }
140
141 fn always_false(&mut self) -> crate::Result<bool> {
142 ROWS_CANNOT_MATCH
143 }
144
145 fn and(&mut self, lhs: bool, rhs: bool) -> crate::Result<bool> {
146 Ok(lhs && rhs)
147 }
148
149 fn or(&mut self, lhs: bool, rhs: bool) -> crate::Result<bool> {
150 Ok(lhs || rhs)
151 }
152
153 fn not(&mut self, inner: bool) -> crate::Result<bool> {
154 Ok(!inner)
155 }
156
157 fn is_null(
158 &mut self,
159 reference: &BoundReference,
160 _predicate: &BoundPredicate,
161 ) -> crate::Result<bool> {
162 let field_id = reference.field().id;
163
164 match self.null_count(field_id) {
165 Some(&0) => ROWS_CANNOT_MATCH,
166 Some(_) => ROWS_MIGHT_MATCH,
167 None => ROWS_MIGHT_MATCH,
168 }
169 }
170
171 fn not_null(
172 &mut self,
173 reference: &BoundReference,
174 _predicate: &BoundPredicate,
175 ) -> crate::Result<bool> {
176 let field_id = reference.field().id;
177
178 if self.contains_nulls_only(field_id) {
179 return ROWS_CANNOT_MATCH;
180 }
181
182 ROWS_MIGHT_MATCH
183 }
184
185 fn is_nan(
186 &mut self,
187 reference: &BoundReference,
188 _predicate: &BoundPredicate,
189 ) -> crate::Result<bool> {
190 let field_id = reference.field().id;
191
192 match self.nan_count(field_id) {
193 Some(&0) => ROWS_CANNOT_MATCH,
194 _ if self.contains_nulls_only(field_id) => ROWS_CANNOT_MATCH,
195 _ => ROWS_MIGHT_MATCH,
196 }
197 }
198
199 fn not_nan(
200 &mut self,
201 reference: &BoundReference,
202 _predicate: &BoundPredicate,
203 ) -> crate::Result<bool> {
204 let field_id = reference.field().id;
205
206 if self.contains_nans_only(field_id) {
207 return ROWS_CANNOT_MATCH;
208 }
209
210 ROWS_MIGHT_MATCH
211 }
212
213 fn less_than(
214 &mut self,
215 reference: &BoundReference,
216 datum: &Datum,
217 _predicate: &BoundPredicate,
218 ) -> crate::Result<bool> {
219 self.visit_inequality(reference, datum, PartialOrd::lt, true)
220 }
221
222 fn less_than_or_eq(
223 &mut self,
224 reference: &BoundReference,
225 datum: &Datum,
226 _predicate: &BoundPredicate,
227 ) -> crate::Result<bool> {
228 self.visit_inequality(reference, datum, PartialOrd::le, true)
229 }
230
231 fn greater_than(
232 &mut self,
233 reference: &BoundReference,
234 datum: &Datum,
235 _predicate: &BoundPredicate,
236 ) -> crate::Result<bool> {
237 self.visit_inequality(reference, datum, PartialOrd::gt, false)
238 }
239
240 fn greater_than_or_eq(
241 &mut self,
242 reference: &BoundReference,
243 datum: &Datum,
244 _predicate: &BoundPredicate,
245 ) -> crate::Result<bool> {
246 self.visit_inequality(reference, datum, PartialOrd::ge, false)
247 }
248
249 fn eq(
250 &mut self,
251 reference: &BoundReference,
252 datum: &Datum,
253 _predicate: &BoundPredicate,
254 ) -> crate::Result<bool> {
255 let field_id = reference.field().id;
256
257 if self.contains_nulls_only(field_id) || self.contains_nans_only(field_id) {
258 return ROWS_CANNOT_MATCH;
259 }
260
261 if let Some(lower_bound) = self.lower_bound(field_id) {
262 if lower_bound.is_nan() {
263 return ROWS_MIGHT_MATCH;
266 } else if lower_bound.gt(datum) {
267 return ROWS_CANNOT_MATCH;
268 }
269 }
270
271 if let Some(upper_bound) = self.upper_bound(field_id) {
272 if upper_bound.is_nan() {
273 return ROWS_MIGHT_MATCH;
276 } else if upper_bound.lt(datum) {
277 return ROWS_CANNOT_MATCH;
278 }
279 }
280
281 ROWS_MIGHT_MATCH
282 }
283
284 fn not_eq(
285 &mut self,
286 _reference: &BoundReference,
287 _datum: &Datum,
288 _predicate: &BoundPredicate,
289 ) -> crate::Result<bool> {
290 ROWS_MIGHT_MATCH
294 }
295
296 fn starts_with(
297 &mut self,
298 reference: &BoundReference,
299 datum: &Datum,
300 _predicate: &BoundPredicate,
301 ) -> crate::Result<bool> {
302 let field_id = reference.field().id;
303
304 if self.contains_nulls_only(field_id) {
305 return ROWS_CANNOT_MATCH;
306 }
307
308 let PrimitiveLiteral::String(datum) = datum.literal() else {
309 return Err(Error::new(
310 ErrorKind::Unexpected,
311 "Cannot use StartsWith operator on non-string values",
312 ));
313 };
314
315 if let Some(lower_bound) = self.lower_bound(field_id) {
316 let PrimitiveLiteral::String(lower_bound) = lower_bound.literal() else {
317 return Err(Error::new(
318 ErrorKind::Unexpected,
319 "Cannot use StartsWith operator on non-string lower_bound value",
320 ));
321 };
322
323 let prefix_length = lower_bound.chars().count().min(datum.chars().count());
324
325 let truncated_lower_bound = lower_bound.chars().take(prefix_length).collect::<String>();
328 if datum < &truncated_lower_bound {
329 return ROWS_CANNOT_MATCH;
330 }
331 }
332
333 if let Some(upper_bound) = self.upper_bound(field_id) {
334 let PrimitiveLiteral::String(upper_bound) = upper_bound.literal() else {
335 return Err(Error::new(
336 ErrorKind::Unexpected,
337 "Cannot use StartsWith operator on non-string upper_bound value",
338 ));
339 };
340
341 let prefix_length = upper_bound.chars().count().min(datum.chars().count());
342
343 let truncated_upper_bound = upper_bound.chars().take(prefix_length).collect::<String>();
346 if datum > &truncated_upper_bound {
347 return ROWS_CANNOT_MATCH;
348 }
349 }
350
351 ROWS_MIGHT_MATCH
352 }
353
354 fn not_starts_with(
355 &mut self,
356 reference: &BoundReference,
357 datum: &Datum,
358 _predicate: &BoundPredicate,
359 ) -> crate::Result<bool> {
360 let field_id = reference.field().id;
361
362 if self.may_contain_null(field_id) {
363 return ROWS_MIGHT_MATCH;
364 }
365
366 let PrimitiveLiteral::String(prefix) = datum.literal() else {
370 return Err(Error::new(
371 ErrorKind::Unexpected,
372 "Cannot use StartsWith operator on non-string values",
373 ));
374 };
375
376 let Some(lower_bound) = self.lower_bound(field_id) else {
377 return ROWS_MIGHT_MATCH;
378 };
379
380 let PrimitiveLiteral::String(lower_bound_str) = lower_bound.literal() else {
381 return Err(Error::new(
382 ErrorKind::Unexpected,
383 "Cannot use NotStartsWith operator on non-string lower_bound value",
384 ));
385 };
386
387 if lower_bound_str < prefix {
388 return ROWS_MIGHT_MATCH;
390 }
391
392 let prefix_len = prefix.chars().count();
393
394 if lower_bound_str.chars().take(prefix_len).collect::<String>() == *prefix {
395 let Some(upper_bound) = self.upper_bound(field_id) else {
398 return ROWS_MIGHT_MATCH;
399 };
400
401 let PrimitiveLiteral::String(upper_bound) = upper_bound.literal() else {
402 return Err(Error::new(
403 ErrorKind::Unexpected,
404 "Cannot use NotStartsWith operator on non-string upper_bound value",
405 ));
406 };
407
408 if upper_bound.chars().count() < prefix_len {
410 return ROWS_MIGHT_MATCH;
411 }
412
413 if upper_bound.chars().take(prefix_len).collect::<String>() == *prefix {
414 return ROWS_CANNOT_MATCH;
417 }
418 }
419
420 ROWS_MIGHT_MATCH
421 }
422
423 fn r#in(
424 &mut self,
425 reference: &BoundReference,
426 literals: &FnvHashSet<Datum>,
427 _predicate: &BoundPredicate,
428 ) -> crate::Result<bool> {
429 let field_id = reference.field().id;
430
431 if self.contains_nulls_only(field_id) || self.contains_nans_only(field_id) {
432 return ROWS_CANNOT_MATCH;
433 }
434
435 if literals.len() > IN_PREDICATE_LIMIT {
436 return ROWS_MIGHT_MATCH;
438 }
439
440 if let Some(lower_bound) = self.lower_bound(field_id) {
441 if lower_bound.is_nan() {
442 return ROWS_MIGHT_MATCH;
444 }
445
446 if !literals.iter().any(|datum| datum.ge(lower_bound)) {
447 return ROWS_CANNOT_MATCH;
449 }
450 }
451
452 if let Some(upper_bound) = self.upper_bound(field_id) {
453 if upper_bound.is_nan() {
454 return ROWS_MIGHT_MATCH;
456 }
457
458 if !literals.iter().any(|datum| datum.le(upper_bound)) {
459 return ROWS_CANNOT_MATCH;
461 }
462 }
463
464 ROWS_MIGHT_MATCH
465 }
466
467 fn not_in(
468 &mut self,
469 _reference: &BoundReference,
470 _literals: &FnvHashSet<Datum>,
471 _predicate: &BoundPredicate,
472 ) -> crate::Result<bool> {
473 ROWS_MIGHT_MATCH
477 }
478}
479
480#[cfg(test)]
481mod test {
482 use std::collections::HashMap;
483 use std::ops::Not;
484 use std::sync::Arc;
485
486 use fnv::FnvHashSet;
487
488 use crate::expr::PredicateOperator::{
489 Eq, GreaterThan, GreaterThanOrEq, In, IsNan, IsNull, LessThan, LessThanOrEq, NotEq, NotIn,
490 NotNan, NotNull, NotStartsWith, StartsWith,
491 };
492 use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator;
493 use crate::expr::{
494 BinaryExpression, Bind, BoundPredicate, Predicate, Reference, SetExpression,
495 UnaryExpression,
496 };
497 use crate::spec::{
498 DataContentType, DataFile, DataFileFormat, Datum, NestedField, PartitionSpec,
499 PartitionSpecRef, PrimitiveType, Schema, SchemaRef, Struct, Transform, Type,
500 UnboundPartitionField,
501 };
502
503 const INT_MIN_VALUE: i32 = 30;
504 const INT_MAX_VALUE: i32 = 79;
505
506 #[test]
507 fn test_data_file_no_partitions() {
508 let (_partition_spec_ref, schema_ref) = create_test_partition_spec();
509
510 let partition_filter = Predicate::AlwaysTrue
511 .bind(schema_ref.clone(), false)
512 .unwrap();
513
514 let case_sensitive = false;
515
516 let data_file = create_test_data_file();
517
518 let result =
519 InclusiveMetricsEvaluator::eval(&partition_filter, &data_file, case_sensitive).unwrap();
520
521 assert!(result);
522 }
523
524 #[test]
525 fn test_all_nulls() {
526 let result =
527 InclusiveMetricsEvaluator::eval(¬_null("all_nulls"), &get_test_file_1(), true)
528 .unwrap();
529 assert!(!result, "Should skip: no non-null value in all null column");
530
531 let result =
532 InclusiveMetricsEvaluator::eval(&less_than("all_nulls", "a"), &get_test_file_1(), true)
533 .unwrap();
534 assert!(!result, "Should skip: LessThan on an all null column");
535
536 let result = InclusiveMetricsEvaluator::eval(
537 &less_than_or_equal("all_nulls", "a"),
538 &get_test_file_1(),
539 true,
540 )
541 .unwrap();
542 assert!(
543 !result,
544 "Should skip: LessThanOrEqual on an all null column"
545 );
546
547 let result = InclusiveMetricsEvaluator::eval(
548 &greater_than("all_nulls", "a"),
549 &get_test_file_1(),
550 true,
551 )
552 .unwrap();
553 assert!(!result, "Should skip: GreaterThan on an all null column");
554
555 let result = InclusiveMetricsEvaluator::eval(
556 &greater_than_or_equal("all_nulls", "a"),
557 &get_test_file_1(),
558 true,
559 )
560 .unwrap();
561 assert!(
562 !result,
563 "Should skip: GreaterThanOrEqual on an all null column"
564 );
565
566 let result =
567 InclusiveMetricsEvaluator::eval(&equal("all_nulls", "a"), &get_test_file_1(), true)
568 .unwrap();
569 assert!(!result, "Should skip: Equal on an all null column");
570
571 let result = InclusiveMetricsEvaluator::eval(
572 &starts_with("all_nulls", "a"),
573 &get_test_file_1(),
574 true,
575 )
576 .unwrap();
577 assert!(!result, "Should skip: StartsWith on an all null column");
578
579 let result = InclusiveMetricsEvaluator::eval(
580 ¬_starts_with("all_nulls", "a"),
581 &get_test_file_1(),
582 true,
583 )
584 .unwrap();
585 assert!(result, "Should read: NotStartsWith on an all null column");
586
587 let result =
588 InclusiveMetricsEvaluator::eval(¬_null("some_nulls"), &get_test_file_1(), true)
589 .unwrap();
590 assert!(
591 result,
592 "Should read: col with some nulls could contain a non-null value"
593 );
594
595 let result =
596 InclusiveMetricsEvaluator::eval(¬_null("no_nulls"), &get_test_file_1(), true)
597 .unwrap();
598 assert!(
599 result,
600 "Should read: col with all nulls contains a non-null value"
601 );
602 }
603
604 #[test]
605 fn test_no_nulls() {
606 let result =
607 InclusiveMetricsEvaluator::eval(&is_null("all_nulls"), &get_test_file_1(), true)
608 .unwrap();
609 assert!(
610 result,
611 "Should read: col with all nulls contains a non-null value"
612 );
613
614 let result =
615 InclusiveMetricsEvaluator::eval(&is_null("some_nulls"), &get_test_file_1(), true)
616 .unwrap();
617 assert!(
618 result,
619 "Should read: col with some nulls could contain a non-null value"
620 );
621
622 let result =
623 InclusiveMetricsEvaluator::eval(&is_null("no_nulls"), &get_test_file_1(), true)
624 .unwrap();
625 assert!(
626 !result,
627 "Should skip: col with no nulls can't contains a non-null value"
628 );
629 }
630
631 #[test]
632 fn test_is_nan() {
633 let result =
634 InclusiveMetricsEvaluator::eval(&is_nan("all_nans"), &get_test_file_1(), true).unwrap();
635 assert!(
636 result,
637 "Should read: col with all nans must contains a nan value"
638 );
639
640 let result =
641 InclusiveMetricsEvaluator::eval(&is_nan("some_nans"), &get_test_file_1(), true)
642 .unwrap();
643 assert!(
644 result,
645 "Should read: col with some nans could contains a nan value"
646 );
647
648 let result =
649 InclusiveMetricsEvaluator::eval(&is_nan("no_nans"), &get_test_file_1(), true).unwrap();
650 assert!(
651 !result,
652 "Should skip: col with no nans can't contains a nan value"
653 );
654
655 let result =
656 InclusiveMetricsEvaluator::eval(&is_nan("all_nulls_double"), &get_test_file_1(), true)
657 .unwrap();
658 assert!(
659 !result,
660 "Should skip: col with no nans can't contains a nan value"
661 );
662
663 let result =
664 InclusiveMetricsEvaluator::eval(&is_nan("no_nan_stats"), &get_test_file_1(), true)
665 .unwrap();
666 assert!(
667 result,
668 "Should read: no guarantee col is nan-free without nan stats"
669 );
670
671 let result =
672 InclusiveMetricsEvaluator::eval(&is_nan("all_nans_v1_stats"), &get_test_file_1(), true)
673 .unwrap();
674 assert!(
675 result,
676 "Should read: col with all nans must contains a nan value"
677 );
678
679 let result =
680 InclusiveMetricsEvaluator::eval(&is_nan("nan_and_null_only"), &get_test_file_1(), true)
681 .unwrap();
682 assert!(
683 result,
684 "Should read: col with nans and nulls must contain a nan value"
685 );
686 }
687
688 #[test]
689 fn test_not_nan() {
690 let result =
691 InclusiveMetricsEvaluator::eval(¬_nan("all_nans"), &get_test_file_1(), true)
692 .unwrap();
693 assert!(
694 !result,
695 "Should read: col with all nans must contains a nan value"
696 );
697
698 let result =
699 InclusiveMetricsEvaluator::eval(¬_nan("some_nans"), &get_test_file_1(), true)
700 .unwrap();
701 assert!(
702 result,
703 "Should read: col with some nans could contains a nan value"
704 );
705
706 let result =
707 InclusiveMetricsEvaluator::eval(¬_nan("no_nans"), &get_test_file_1(), true).unwrap();
708 assert!(
709 result,
710 "Should read: col with no nans might contains a non-nan value"
711 );
712
713 let result =
714 InclusiveMetricsEvaluator::eval(¬_nan("all_nulls_double"), &get_test_file_1(), true)
715 .unwrap();
716 assert!(
717 result,
718 "Should read: col with no nans can't contains a nan value"
719 );
720
721 let result =
722 InclusiveMetricsEvaluator::eval(¬_nan("no_nan_stats"), &get_test_file_1(), true)
723 .unwrap();
724 assert!(
725 result,
726 "Should read: no guarantee col is nan-free without nan stats"
727 );
728
729 let result = InclusiveMetricsEvaluator::eval(
730 ¬_nan("all_nans_v1_stats"),
731 &get_test_file_1(),
732 true,
733 )
734 .unwrap();
735 assert!(
736 result,
737 "Should read: col with all nans must contains a nan value"
738 );
739
740 let result = InclusiveMetricsEvaluator::eval(
741 ¬_nan("nan_and_null_only"),
742 &get_test_file_1(),
743 true,
744 )
745 .unwrap();
746 assert!(
747 result,
748 "Should read: col with nans and nulls may contain a non-nan value"
749 );
750 }
751
752 #[test]
753 fn test_required_column() {
754 let result =
755 InclusiveMetricsEvaluator::eval(¬_null("required"), &get_test_file_1(), true)
756 .unwrap();
757 assert!(result, "Should read: required columns are always non-null");
758
759 let result =
760 InclusiveMetricsEvaluator::eval(&is_null("required"), &get_test_file_1(), true)
761 .unwrap();
762 assert!(!result, "Should skip: required columns are always non-null");
763 }
764
765 #[test]
766 #[should_panic]
767 fn test_missing_column() {
768 let _result =
769 InclusiveMetricsEvaluator::eval(&less_than("missing", "a"), &get_test_file_1(), true);
770 }
771
772 #[test]
773 fn test_missing_stats() {
774 let missing_stats_datafile = create_test_data_file();
775
776 let expressions = [
777 less_than_int("no_stats", 5),
778 less_than_or_equal_int("no_stats", 30),
779 equal_int("no_stats", 70),
780 greater_than_int("no_stats", 78),
781 greater_than_or_equal_int("no_stats", 90),
782 not_equal_int("no_stats", 101),
783 is_null("no_stats"),
784 not_null("no_stats"),
785 ];
788
789 for expression in expressions {
790 let result =
791 InclusiveMetricsEvaluator::eval(&expression, &missing_stats_datafile, true)
792 .unwrap();
793
794 assert!(
795 result,
796 "Should read if stats are missing for {:?}",
797 &expression
798 );
799 }
800 }
801
802 #[test]
803 fn test_zero_record_file() {
804 let zero_records_datafile = create_zero_records_data_file();
805
806 let expressions = [
807 less_than_int("no_stats", 5),
808 less_than_or_equal_int("no_stats", 30),
809 equal_int("no_stats", 70),
810 greater_than_int("no_stats", 78),
811 greater_than_or_equal_int("no_stats", 90),
812 not_equal_int("no_stats", 101),
813 is_null("no_stats"),
814 not_null("no_stats"),
815 ];
818
819 for expression in expressions {
820 let result =
821 InclusiveMetricsEvaluator::eval(&expression, &zero_records_datafile, true).unwrap();
822
823 assert!(
824 result,
825 "Should skip if data file has zero records (expression: {:?})",
826 &expression
827 );
828 }
829 }
830
831 #[test]
832 fn test_not() {
833 let result = InclusiveMetricsEvaluator::eval(
837 ¬_less_than_int("id", INT_MIN_VALUE - 25),
838 &get_test_file_1(),
839 true,
840 )
841 .unwrap();
842 assert!(result, "Should read: not(false)");
843
844 let result = InclusiveMetricsEvaluator::eval(
845 ¬_greater_than_int("id", INT_MIN_VALUE - 25),
846 &get_test_file_1(),
847 true,
848 )
849 .unwrap();
850 assert!(!result, "Should skip: not(true)");
851 }
852
853 #[test]
854 fn test_and() {
855 let schema = create_test_schema();
856 let filter = Predicate::Binary(BinaryExpression::new(
857 LessThan,
858 Reference::new("id"),
859 Datum::int(INT_MIN_VALUE - 25),
860 ))
861 .and(Predicate::Binary(BinaryExpression::new(
862 GreaterThanOrEq,
863 Reference::new("id"),
864 Datum::int(INT_MIN_VALUE - 30),
865 )));
866
867 let bound_pred = filter.bind(schema.clone(), true).unwrap();
868
869 let result =
870 InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
871 assert!(!result, "Should skip: and(false, true)");
872
873 let schema = create_test_schema();
874 let filter = Predicate::Binary(BinaryExpression::new(
875 LessThan,
876 Reference::new("id"),
877 Datum::int(INT_MIN_VALUE - 25),
878 ))
879 .and(Predicate::Binary(BinaryExpression::new(
880 GreaterThanOrEq,
881 Reference::new("id"),
882 Datum::int(INT_MAX_VALUE + 1),
883 )));
884
885 let bound_pred = filter.bind(schema.clone(), true).unwrap();
886
887 let result =
888 InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
889 assert!(!result, "Should skip: and(false, false)");
890
891 let schema = create_test_schema();
892 let filter = Predicate::Binary(BinaryExpression::new(
893 GreaterThan,
894 Reference::new("id"),
895 Datum::int(INT_MIN_VALUE - 25),
896 ))
897 .and(Predicate::Binary(BinaryExpression::new(
898 LessThanOrEq,
899 Reference::new("id"),
900 Datum::int(INT_MIN_VALUE),
901 )));
902
903 let bound_pred = filter.bind(schema.clone(), true).unwrap();
904
905 let result =
906 InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
907 assert!(result, "Should read: and(true, true)");
908 }
909
910 #[test]
911 fn test_or() {
912 let schema = create_test_schema();
913 let filter = Predicate::Binary(BinaryExpression::new(
914 LessThan,
915 Reference::new("id"),
916 Datum::int(INT_MIN_VALUE - 25),
917 ))
918 .or(Predicate::Binary(BinaryExpression::new(
919 GreaterThanOrEq,
920 Reference::new("id"),
921 Datum::int(INT_MIN_VALUE - 30),
922 )));
923
924 let bound_pred = filter.bind(schema.clone(), true).unwrap();
925
926 let result =
927 InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
928 assert!(result, "Should read: or(false, true)");
929
930 let schema = create_test_schema();
931 let filter = Predicate::Binary(BinaryExpression::new(
932 LessThan,
933 Reference::new("id"),
934 Datum::int(INT_MIN_VALUE - 25),
935 ))
936 .or(Predicate::Binary(BinaryExpression::new(
937 GreaterThanOrEq,
938 Reference::new("id"),
939 Datum::int(INT_MAX_VALUE + 1),
940 )));
941
942 let bound_pred = filter.bind(schema.clone(), true).unwrap();
943
944 let result =
945 InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
946 assert!(!result, "Should skip: or(false, false)");
947 }
948
949 #[test]
950 fn test_integer_lt() {
951 let result = InclusiveMetricsEvaluator::eval(
952 &less_than_int("id", INT_MIN_VALUE - 25),
953 &get_test_file_1(),
954 true,
955 )
956 .unwrap();
957 assert!(!result, "Should skip: id range below lower bound (5 < 30)");
958
959 let result = InclusiveMetricsEvaluator::eval(
960 &less_than_int("id", INT_MIN_VALUE),
961 &get_test_file_1(),
962 true,
963 )
964 .unwrap();
965 assert!(
966 !result,
967 "Should skip: id range below lower bound (30 is not < 30)"
968 );
969
970 let result = InclusiveMetricsEvaluator::eval(
971 &less_than_int("id", INT_MIN_VALUE + 1),
972 &get_test_file_1(),
973 true,
974 )
975 .unwrap();
976 assert!(result, "Should read: one possible id");
977
978 let result = InclusiveMetricsEvaluator::eval(
979 &less_than_int("id", INT_MAX_VALUE),
980 &get_test_file_1(),
981 true,
982 )
983 .unwrap();
984 assert!(result, "Should read: many possible ids");
985 }
986
987 #[test]
988 fn test_integer_lt_eq() {
989 let result = InclusiveMetricsEvaluator::eval(
990 &less_than_or_equal_int("id", INT_MIN_VALUE - 25),
991 &get_test_file_1(),
992 true,
993 )
994 .unwrap();
995 assert!(!result, "Should skip: id range below lower bound (5 < 30)");
996
997 let result = InclusiveMetricsEvaluator::eval(
998 &less_than_or_equal_int("id", INT_MIN_VALUE - 1),
999 &get_test_file_1(),
1000 true,
1001 )
1002 .unwrap();
1003 assert!(!result, "Should skip: id range below lower bound (29 < 30)");
1004
1005 let result = InclusiveMetricsEvaluator::eval(
1006 &less_than_or_equal_int("id", INT_MIN_VALUE),
1007 &get_test_file_1(),
1008 true,
1009 )
1010 .unwrap();
1011 assert!(result, "Should read: one possible id");
1012
1013 let result = InclusiveMetricsEvaluator::eval(
1014 &less_than_or_equal_int("id", INT_MAX_VALUE),
1015 &get_test_file_1(),
1016 true,
1017 )
1018 .unwrap();
1019 assert!(result, "Should read: many possible ids");
1020 }
1021
1022 #[test]
1023 fn test_integer_gt() {
1024 let result = InclusiveMetricsEvaluator::eval(
1025 &greater_than_int("id", INT_MAX_VALUE + 6),
1026 &get_test_file_1(),
1027 true,
1028 )
1029 .unwrap();
1030 assert!(!result, "Should skip: id range above upper bound (85 > 79)");
1031
1032 let result = InclusiveMetricsEvaluator::eval(
1033 &greater_than_int("id", INT_MAX_VALUE),
1034 &get_test_file_1(),
1035 true,
1036 )
1037 .unwrap();
1038 assert!(
1039 !result,
1040 "Should skip: id range above upper bound (79 is not > 79)"
1041 );
1042
1043 let result = InclusiveMetricsEvaluator::eval(
1044 &greater_than_int("id", INT_MAX_VALUE - 1),
1045 &get_test_file_1(),
1046 true,
1047 )
1048 .unwrap();
1049 assert!(result, "Should read: one possible id");
1050
1051 let result = InclusiveMetricsEvaluator::eval(
1052 &greater_than_int("id", INT_MAX_VALUE - 4),
1053 &get_test_file_1(),
1054 true,
1055 )
1056 .unwrap();
1057 assert!(result, "Should read: many possible ids");
1058 }
1059
1060 #[test]
1061 fn test_integer_gt_eq() {
1062 let result = InclusiveMetricsEvaluator::eval(
1063 &greater_than_or_equal_int("id", INT_MAX_VALUE + 6),
1064 &get_test_file_1(),
1065 true,
1066 )
1067 .unwrap();
1068 assert!(!result, "Should skip: id range above upper bound (85 < 79)");
1069
1070 let result = InclusiveMetricsEvaluator::eval(
1071 &greater_than_or_equal_int("id", INT_MAX_VALUE + 1),
1072 &get_test_file_1(),
1073 true,
1074 )
1075 .unwrap();
1076 assert!(!result, "Should skip: id range above upper bound (80 > 79)");
1077
1078 let result = InclusiveMetricsEvaluator::eval(
1079 &greater_than_or_equal_int("id", INT_MAX_VALUE),
1080 &get_test_file_1(),
1081 true,
1082 )
1083 .unwrap();
1084 assert!(result, "Should read: one possible id");
1085
1086 let result = InclusiveMetricsEvaluator::eval(
1087 &greater_than_or_equal_int("id", INT_MAX_VALUE - 4),
1088 &get_test_file_1(),
1089 true,
1090 )
1091 .unwrap();
1092 assert!(result, "Should read: many possible ids");
1093 }
1094
1095 #[test]
1096 fn test_integer_eq() {
1097 let result = InclusiveMetricsEvaluator::eval(
1098 &equal_int("id", INT_MIN_VALUE - 25),
1099 &get_test_file_1(),
1100 true,
1101 )
1102 .unwrap();
1103 assert!(!result, "Should skip: id below lower bound");
1104
1105 let result = InclusiveMetricsEvaluator::eval(
1106 &equal_int("id", INT_MIN_VALUE - 1),
1107 &get_test_file_1(),
1108 true,
1109 )
1110 .unwrap();
1111 assert!(!result, "Should skip: id below lower bound");
1112
1113 let result = InclusiveMetricsEvaluator::eval(
1114 &equal_int("id", INT_MIN_VALUE),
1115 &get_test_file_1(),
1116 true,
1117 )
1118 .unwrap();
1119 assert!(result, "Should read: id equal to lower bound");
1120
1121 let result = InclusiveMetricsEvaluator::eval(
1122 &equal_int("id", INT_MAX_VALUE - 4),
1123 &get_test_file_1(),
1124 true,
1125 )
1126 .unwrap();
1127 assert!(result, "Should read: id between lower and upper bounds");
1128
1129 let result = InclusiveMetricsEvaluator::eval(
1130 &equal_int("id", INT_MAX_VALUE),
1131 &get_test_file_1(),
1132 true,
1133 )
1134 .unwrap();
1135 assert!(result, "Should read: id equal to upper bound");
1136
1137 let result = InclusiveMetricsEvaluator::eval(
1138 &equal_int("id", INT_MAX_VALUE + 1),
1139 &get_test_file_1(),
1140 true,
1141 )
1142 .unwrap();
1143 assert!(!result, "Should skip: id above upper bound");
1144
1145 let result = InclusiveMetricsEvaluator::eval(
1146 &equal_int("id", INT_MAX_VALUE + 6),
1147 &get_test_file_1(),
1148 true,
1149 )
1150 .unwrap();
1151 assert!(!result, "Should skip: id above upper bound");
1152 }
1153
1154 #[test]
1155 fn test_integer_not_eq() {
1156 let result = InclusiveMetricsEvaluator::eval(
1157 ¬_equal_int("id", INT_MIN_VALUE - 25),
1158 &get_test_file_1(),
1159 true,
1160 )
1161 .unwrap();
1162 assert!(result, "Should read: id below lower bound");
1163
1164 let result = InclusiveMetricsEvaluator::eval(
1165 ¬_equal_int("id", INT_MIN_VALUE - 1),
1166 &get_test_file_1(),
1167 true,
1168 )
1169 .unwrap();
1170 assert!(result, "Should read: id below lower bound");
1171
1172 let result = InclusiveMetricsEvaluator::eval(
1173 ¬_equal_int("id", INT_MIN_VALUE),
1174 &get_test_file_1(),
1175 true,
1176 )
1177 .unwrap();
1178 assert!(result, "Should read: id equal to lower bound");
1179
1180 let result = InclusiveMetricsEvaluator::eval(
1181 ¬_equal_int("id", INT_MAX_VALUE - 4),
1182 &get_test_file_1(),
1183 true,
1184 )
1185 .unwrap();
1186 assert!(result, "Should read: id between lower and upper bounds");
1187
1188 let result = InclusiveMetricsEvaluator::eval(
1189 ¬_equal_int("id", INT_MAX_VALUE),
1190 &get_test_file_1(),
1191 true,
1192 )
1193 .unwrap();
1194 assert!(result, "Should read: id equal to upper bound");
1195
1196 let result = InclusiveMetricsEvaluator::eval(
1197 ¬_equal_int("id", INT_MAX_VALUE + 1),
1198 &get_test_file_1(),
1199 true,
1200 )
1201 .unwrap();
1202 assert!(result, "Should read: id above upper bound");
1203
1204 let result = InclusiveMetricsEvaluator::eval(
1205 ¬_equal_int("id", INT_MAX_VALUE + 6),
1206 &get_test_file_1(),
1207 true,
1208 )
1209 .unwrap();
1210 assert!(result, "Should read: id above upper bound");
1211 }
1212
1213 #[test]
1214 #[should_panic]
1215 fn test_case_sensitive_integer_not_eq_rewritten() {
1216 let _result =
1217 InclusiveMetricsEvaluator::eval(&equal_int_not("ID", 5), &get_test_file_1(), true)
1218 .unwrap();
1219 }
1220
1221 #[test]
1222 fn test_string_starts_with() {
1223 let result = InclusiveMetricsEvaluator::eval(
1224 &starts_with("required", "a"),
1225 &get_test_file_1(),
1226 true,
1227 )
1228 .unwrap();
1229 assert!(result, "Should read: no stats");
1230
1231 let result = InclusiveMetricsEvaluator::eval(
1232 &starts_with("required", "a"),
1233 &get_test_file_2(),
1234 true,
1235 )
1236 .unwrap();
1237 assert!(result, "Should read: range matches");
1238
1239 let result = InclusiveMetricsEvaluator::eval(
1240 &starts_with("required", "aa"),
1241 &get_test_file_2(),
1242 true,
1243 )
1244 .unwrap();
1245 assert!(result, "Should read: range matches");
1246
1247 let result = InclusiveMetricsEvaluator::eval(
1248 &starts_with("required", "aaa"),
1249 &get_test_file_2(),
1250 true,
1251 )
1252 .unwrap();
1253 assert!(result, "Should read: range matches");
1254
1255 let result = InclusiveMetricsEvaluator::eval(
1256 &starts_with("required", "1s"),
1257 &get_test_file_3(),
1258 true,
1259 )
1260 .unwrap();
1261 assert!(result, "Should read: range matches");
1262
1263 let result = InclusiveMetricsEvaluator::eval(
1264 &starts_with("required", "1str1x"),
1265 &get_test_file_3(),
1266 true,
1267 )
1268 .unwrap();
1269 assert!(result, "Should read: range matches");
1270
1271 let result = InclusiveMetricsEvaluator::eval(
1272 &starts_with("required", "ff"),
1273 &get_test_file_4(),
1274 true,
1275 )
1276 .unwrap();
1277 assert!(result, "Should read: range matches");
1278
1279 let result = InclusiveMetricsEvaluator::eval(
1280 &starts_with("required", "aB"),
1281 &get_test_file_2(),
1282 true,
1283 )
1284 .unwrap();
1285 assert!(!result, "Should skip: range does not match");
1286
1287 let result = InclusiveMetricsEvaluator::eval(
1288 &starts_with("required", "dWX"),
1289 &get_test_file_2(),
1290 true,
1291 )
1292 .unwrap();
1293 assert!(!result, "Should skip: range does not match");
1294
1295 let result = InclusiveMetricsEvaluator::eval(
1296 &starts_with("required", "5"),
1297 &get_test_file_3(),
1298 true,
1299 )
1300 .unwrap();
1301 assert!(!result, "Should skip: range does not match");
1302
1303 let result = InclusiveMetricsEvaluator::eval(
1304 &starts_with("required", "3str3x"),
1305 &get_test_file_3(),
1306 true,
1307 )
1308 .unwrap();
1309 assert!(!result, "Should skip: range does not match");
1310
1311 let result = InclusiveMetricsEvaluator::eval(
1312 &starts_with("some_empty", "房东整租霍"),
1313 &get_test_file_1(),
1314 true,
1315 )
1316 .unwrap();
1317 assert!(result, "Should read: range does matches");
1318
1319 let result = InclusiveMetricsEvaluator::eval(
1320 &starts_with("all_nulls", ""),
1321 &get_test_file_1(),
1322 true,
1323 )
1324 .unwrap();
1325 assert!(!result, "Should skip: range does not match");
1326
1327 let above_max = "イロハニボ";
1337
1338 let result = InclusiveMetricsEvaluator::eval(
1339 &starts_with("required", above_max),
1340 &get_test_file_4(),
1341 true,
1342 )
1343 .unwrap();
1344 assert!(!result, "Should skip: range does not match");
1345 }
1346
1347 #[test]
1348 fn test_string_not_starts_with() {
1349 let result = InclusiveMetricsEvaluator::eval(
1350 ¬_starts_with("required", "a"),
1351 &get_test_file_1(),
1352 true,
1353 )
1354 .unwrap();
1355 assert!(result, "Should read: no stats");
1356
1357 let result = InclusiveMetricsEvaluator::eval(
1358 ¬_starts_with("required", "a"),
1359 &get_test_file_2(),
1360 true,
1361 )
1362 .unwrap();
1363 assert!(result, "Should read: range matches");
1364
1365 let result = InclusiveMetricsEvaluator::eval(
1366 ¬_starts_with("required", "aa"),
1367 &get_test_file_2(),
1368 true,
1369 )
1370 .unwrap();
1371 assert!(result, "Should read: range matches");
1372
1373 let result = InclusiveMetricsEvaluator::eval(
1374 ¬_starts_with("required", "aaa"),
1375 &get_test_file_2(),
1376 true,
1377 )
1378 .unwrap();
1379 assert!(result, "Should read: range matches");
1380
1381 let result = InclusiveMetricsEvaluator::eval(
1382 ¬_starts_with("required", "1s"),
1383 &get_test_file_3(),
1384 true,
1385 )
1386 .unwrap();
1387 assert!(result, "Should read: range matches");
1388
1389 let result = InclusiveMetricsEvaluator::eval(
1390 ¬_starts_with("required", "1str1x"),
1391 &get_test_file_3(),
1392 true,
1393 )
1394 .unwrap();
1395 assert!(result, "Should read: range matches");
1396
1397 let result = InclusiveMetricsEvaluator::eval(
1398 ¬_starts_with("required", "ff"),
1399 &get_test_file_4(),
1400 true,
1401 )
1402 .unwrap();
1403 assert!(result, "Should read: range matches");
1404
1405 let result = InclusiveMetricsEvaluator::eval(
1406 ¬_starts_with("required", "aB"),
1407 &get_test_file_2(),
1408 true,
1409 )
1410 .unwrap();
1411 assert!(result, "Should read: range matches");
1412
1413 let result = InclusiveMetricsEvaluator::eval(
1414 ¬_starts_with("required", "dWX"),
1415 &get_test_file_2(),
1416 true,
1417 )
1418 .unwrap();
1419 assert!(result, "Should read: range matches");
1420
1421 let result = InclusiveMetricsEvaluator::eval(
1422 ¬_starts_with("required", "5"),
1423 &get_test_file_3(),
1424 true,
1425 )
1426 .unwrap();
1427 assert!(result, "Should read: range matches");
1428
1429 let result = InclusiveMetricsEvaluator::eval(
1430 ¬_starts_with("required", "3str3x"),
1431 &get_test_file_3(),
1432 true,
1433 )
1434 .unwrap();
1435 assert!(result, "Should read: range matches");
1436
1437 let above_max = "イロハニホヘト";
1438 let result = InclusiveMetricsEvaluator::eval(
1439 ¬_starts_with("required", above_max),
1440 &get_test_file_4(),
1441 true,
1442 )
1443 .unwrap();
1444 assert!(result, "Should read: range matches");
1445 }
1446
1447 #[test]
1448 fn test_integer_in() {
1449 let result = InclusiveMetricsEvaluator::eval(
1450 &r#in_int("id", &[INT_MIN_VALUE - 25, INT_MIN_VALUE - 24]),
1451 &get_test_file_1(),
1452 true,
1453 )
1454 .unwrap();
1455 assert!(
1456 !result,
1457 "Should skip: id below lower bound (5 < 30, 6 < 30)"
1458 );
1459
1460 let result = InclusiveMetricsEvaluator::eval(
1461 &r#in_int("id", &[INT_MIN_VALUE - 2, INT_MIN_VALUE - 1]),
1462 &get_test_file_1(),
1463 true,
1464 )
1465 .unwrap();
1466 assert!(
1467 !result,
1468 "Should skip: id below lower bound (28 < 30, 29 < 30)"
1469 );
1470
1471 let result = InclusiveMetricsEvaluator::eval(
1472 &r#in_int("id", &[INT_MIN_VALUE - 1, INT_MIN_VALUE]),
1473 &get_test_file_1(),
1474 true,
1475 )
1476 .unwrap();
1477 assert!(result, "Should read: id equal to lower bound (30 == 30)");
1478
1479 let result = InclusiveMetricsEvaluator::eval(
1480 &r#in_int("id", &[INT_MAX_VALUE - 4, INT_MAX_VALUE - 3]),
1481 &get_test_file_1(),
1482 true,
1483 )
1484 .unwrap();
1485 assert!(
1486 result,
1487 "Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)"
1488 );
1489
1490 let result = InclusiveMetricsEvaluator::eval(
1491 &r#in_int("id", &[INT_MAX_VALUE, INT_MAX_VALUE + 1]),
1492 &get_test_file_1(),
1493 true,
1494 )
1495 .unwrap();
1496 assert!(result, "Should read: id equal to upper bound (79 == 79)");
1497
1498 let result = InclusiveMetricsEvaluator::eval(
1499 &r#in_int("id", &[INT_MAX_VALUE + 1, INT_MAX_VALUE + 2]),
1500 &get_test_file_1(),
1501 true,
1502 )
1503 .unwrap();
1504 assert!(
1505 !result,
1506 "Should skip: id above upper bound (80 > 79, 81 > 79)"
1507 );
1508
1509 let result = InclusiveMetricsEvaluator::eval(
1510 &r#in_int("id", &[INT_MAX_VALUE + 6, INT_MAX_VALUE + 7]),
1511 &get_test_file_1(),
1512 true,
1513 )
1514 .unwrap();
1515 assert!(
1516 !result,
1517 "Should skip: id above upper bound (85 > 79, 86 > 79)"
1518 );
1519
1520 let result = InclusiveMetricsEvaluator::eval(
1521 &r#in_str("all_nulls", &["abc", "def"]),
1522 &get_test_file_1(),
1523 true,
1524 )
1525 .unwrap();
1526 assert!(!result, "Should skip: in on all nulls column");
1527
1528 let result = InclusiveMetricsEvaluator::eval(
1529 &r#in_str("some_nulls", &["abc", "def"]),
1530 &get_test_file_1(),
1531 true,
1532 )
1533 .unwrap();
1534 assert!(result, "Should read: in on some nulls column");
1535
1536 let result = InclusiveMetricsEvaluator::eval(
1537 &r#in_str("no_nulls", &["abc", "def"]),
1538 &get_test_file_1(),
1539 true,
1540 )
1541 .unwrap();
1542 assert!(result, "Should read: in on no nulls column");
1543
1544 let ids = (-400..=0).collect::<Vec<_>>();
1545 let result =
1546 InclusiveMetricsEvaluator::eval(&r#in_int("id", &ids), &get_test_file_1(), true)
1547 .unwrap();
1548 assert!(
1549 result,
1550 "Should read: number of items in In expression greater than threshold"
1551 );
1552 }
1553
1554 #[test]
1555 fn test_integer_not_in() {
1556 let result = InclusiveMetricsEvaluator::eval(
1557 &r#not_in_int("id", &[INT_MIN_VALUE - 25, INT_MIN_VALUE - 24]),
1558 &get_test_file_1(),
1559 true,
1560 )
1561 .unwrap();
1562 assert!(result, "Should read: id below lower bound (5 < 30, 6 < 30)");
1563
1564 let result = InclusiveMetricsEvaluator::eval(
1565 &r#not_in_int("id", &[INT_MIN_VALUE - 2, INT_MIN_VALUE - 1]),
1566 &get_test_file_1(),
1567 true,
1568 )
1569 .unwrap();
1570 assert!(
1571 result,
1572 "Should read: id below lower bound (28 < 30, 29 < 30)"
1573 );
1574
1575 let result = InclusiveMetricsEvaluator::eval(
1576 &r#not_in_int("id", &[INT_MIN_VALUE - 1, INT_MIN_VALUE]),
1577 &get_test_file_1(),
1578 true,
1579 )
1580 .unwrap();
1581 assert!(result, "Should read: id equal to lower bound (30 == 30)");
1582
1583 let result = InclusiveMetricsEvaluator::eval(
1584 &r#not_in_int("id", &[INT_MAX_VALUE - 4, INT_MAX_VALUE - 3]),
1585 &get_test_file_1(),
1586 true,
1587 )
1588 .unwrap();
1589 assert!(
1590 result,
1591 "Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)"
1592 );
1593
1594 let result = InclusiveMetricsEvaluator::eval(
1595 &r#not_in_int("id", &[INT_MAX_VALUE, INT_MAX_VALUE + 1]),
1596 &get_test_file_1(),
1597 true,
1598 )
1599 .unwrap();
1600 assert!(result, "Should read: id equal to upper bound (79 == 79)");
1601
1602 let result = InclusiveMetricsEvaluator::eval(
1603 &r#not_in_int("id", &[INT_MAX_VALUE + 1, INT_MAX_VALUE + 2]),
1604 &get_test_file_1(),
1605 true,
1606 )
1607 .unwrap();
1608 assert!(
1609 result,
1610 "Should read: id above upper bound (80 > 79, 81 > 79)"
1611 );
1612
1613 let result = InclusiveMetricsEvaluator::eval(
1614 &r#not_in_int("id", &[INT_MAX_VALUE + 6, INT_MAX_VALUE + 7]),
1615 &get_test_file_1(),
1616 true,
1617 )
1618 .unwrap();
1619 assert!(
1620 result,
1621 "Should read: id above upper bound (85 > 79, 86 > 79)"
1622 );
1623
1624 let result = InclusiveMetricsEvaluator::eval(
1625 &r#not_in_str("all_nulls", &["abc", "def"]),
1626 &get_test_file_1(),
1627 true,
1628 )
1629 .unwrap();
1630 assert!(result, "Should read: NotIn on all nulls column");
1631
1632 let result = InclusiveMetricsEvaluator::eval(
1633 &r#not_in_str("some_nulls", &["abc", "def"]),
1634 &get_test_file_1(),
1635 true,
1636 )
1637 .unwrap();
1638 assert!(result, "Should read: NotIn on some nulls column");
1639
1640 let result = InclusiveMetricsEvaluator::eval(
1641 &r#not_in_str("no_nulls", &["abc", "def"]),
1642 &get_test_file_1(),
1643 true,
1644 )
1645 .unwrap();
1646 assert!(result, "Should read: NotIn on no nulls column");
1647 }
1648
1649 fn create_test_partition_spec() -> (PartitionSpecRef, SchemaRef) {
1650 let table_schema = Schema::builder()
1651 .with_fields(vec![Arc::new(NestedField::optional(
1652 1,
1653 "a",
1654 Type::Primitive(PrimitiveType::Float),
1655 ))])
1656 .build()
1657 .unwrap();
1658 let table_schema_ref = Arc::new(table_schema);
1659
1660 let partition_spec = PartitionSpec::builder(table_schema_ref.clone())
1661 .with_spec_id(1)
1662 .add_unbound_fields(vec![
1663 UnboundPartitionField::builder()
1664 .source_id(1)
1665 .name("a".to_string())
1666 .field_id(1)
1667 .transform(Transform::Identity)
1668 .build(),
1669 ])
1670 .unwrap()
1671 .build()
1672 .unwrap();
1673 (Arc::new(partition_spec), table_schema_ref)
1674 }
1675
1676 fn not_null(reference: &str) -> BoundPredicate {
1677 let schema = create_test_schema();
1678 let filter = Predicate::Unary(UnaryExpression::new(NotNull, Reference::new(reference)));
1679 filter.bind(schema.clone(), true).unwrap()
1680 }
1681
1682 fn is_null(reference: &str) -> BoundPredicate {
1683 let schema = create_test_schema();
1684 let filter = Predicate::Unary(UnaryExpression::new(IsNull, Reference::new(reference)));
1685 filter.bind(schema.clone(), true).unwrap()
1686 }
1687
1688 fn not_nan(reference: &str) -> BoundPredicate {
1689 let schema = create_test_schema();
1690 let filter = Predicate::Unary(UnaryExpression::new(NotNan, Reference::new(reference)));
1691 filter.bind(schema.clone(), true).unwrap()
1692 }
1693
1694 fn is_nan(reference: &str) -> BoundPredicate {
1695 let schema = create_test_schema();
1696 let filter = Predicate::Unary(UnaryExpression::new(IsNan, Reference::new(reference)));
1697 filter.bind(schema.clone(), true).unwrap()
1698 }
1699
1700 fn less_than(reference: &str, str_literal: &str) -> BoundPredicate {
1701 let schema = create_test_schema();
1702 let filter = Predicate::Binary(BinaryExpression::new(
1703 LessThan,
1704 Reference::new(reference),
1705 Datum::string(str_literal),
1706 ));
1707 filter.bind(schema.clone(), true).unwrap()
1708 }
1709
1710 fn less_than_or_equal(reference: &str, str_literal: &str) -> BoundPredicate {
1711 let schema = create_test_schema();
1712 let filter = Predicate::Binary(BinaryExpression::new(
1713 LessThanOrEq,
1714 Reference::new(reference),
1715 Datum::string(str_literal),
1716 ));
1717 filter.bind(schema.clone(), true).unwrap()
1718 }
1719
1720 fn greater_than(reference: &str, str_literal: &str) -> BoundPredicate {
1721 let schema = create_test_schema();
1722 let filter = Predicate::Binary(BinaryExpression::new(
1723 GreaterThan,
1724 Reference::new(reference),
1725 Datum::string(str_literal),
1726 ));
1727 filter.bind(schema.clone(), true).unwrap()
1728 }
1729
1730 fn greater_than_or_equal(reference: &str, str_literal: &str) -> BoundPredicate {
1731 let schema = create_test_schema();
1732 let filter = Predicate::Binary(BinaryExpression::new(
1733 GreaterThanOrEq,
1734 Reference::new(reference),
1735 Datum::string(str_literal),
1736 ));
1737 filter.bind(schema.clone(), true).unwrap()
1738 }
1739
1740 fn equal(reference: &str, str_literal: &str) -> BoundPredicate {
1741 let schema = create_test_schema();
1742 let filter = Predicate::Binary(BinaryExpression::new(
1743 Eq,
1744 Reference::new(reference),
1745 Datum::string(str_literal),
1746 ));
1747 filter.bind(schema.clone(), true).unwrap()
1748 }
1749
1750 fn less_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
1751 let schema = create_test_schema();
1752 let filter = Predicate::Binary(BinaryExpression::new(
1753 LessThan,
1754 Reference::new(reference),
1755 Datum::int(int_literal),
1756 ));
1757 filter.bind(schema.clone(), true).unwrap()
1758 }
1759
1760 fn not_less_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
1761 let schema = create_test_schema();
1762 let filter = Predicate::Binary(BinaryExpression::new(
1763 LessThan,
1764 Reference::new(reference),
1765 Datum::int(int_literal),
1766 ))
1767 .not();
1768 filter.bind(schema.clone(), true).unwrap()
1769 }
1770
1771 fn less_than_or_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
1772 let schema = create_test_schema();
1773 let filter = Predicate::Binary(BinaryExpression::new(
1774 LessThanOrEq,
1775 Reference::new(reference),
1776 Datum::int(int_literal),
1777 ));
1778 filter.bind(schema.clone(), true).unwrap()
1779 }
1780
1781 fn greater_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
1782 let schema = create_test_schema();
1783 let filter = Predicate::Binary(BinaryExpression::new(
1784 GreaterThan,
1785 Reference::new(reference),
1786 Datum::int(int_literal),
1787 ));
1788 filter.bind(schema.clone(), true).unwrap()
1789 }
1790
1791 fn not_greater_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
1792 let schema = create_test_schema();
1793 let filter = Predicate::Binary(BinaryExpression::new(
1794 GreaterThan,
1795 Reference::new(reference),
1796 Datum::int(int_literal),
1797 ))
1798 .not();
1799 filter.bind(schema.clone(), true).unwrap()
1800 }
1801
1802 fn greater_than_or_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
1803 let schema = create_test_schema();
1804 let filter = Predicate::Binary(BinaryExpression::new(
1805 GreaterThanOrEq,
1806 Reference::new(reference),
1807 Datum::int(int_literal),
1808 ));
1809 filter.bind(schema.clone(), true).unwrap()
1810 }
1811
1812 fn equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
1813 let schema = create_test_schema();
1814 let filter = Predicate::Binary(BinaryExpression::new(
1815 Eq,
1816 Reference::new(reference),
1817 Datum::int(int_literal),
1818 ));
1819 filter.bind(schema.clone(), true).unwrap()
1820 }
1821
1822 fn equal_int_not(reference: &str, int_literal: i32) -> BoundPredicate {
1823 let schema = create_test_schema();
1824 let filter = Predicate::Binary(BinaryExpression::new(
1825 Eq,
1826 Reference::new(reference),
1827 Datum::int(int_literal),
1828 ))
1829 .not();
1830 filter.bind(schema.clone(), true).unwrap()
1831 }
1832
1833 fn not_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
1834 let schema = create_test_schema();
1835 let filter = Predicate::Binary(BinaryExpression::new(
1836 NotEq,
1837 Reference::new(reference),
1838 Datum::int(int_literal),
1839 ));
1840 filter.bind(schema.clone(), true).unwrap()
1841 }
1842
1843 fn starts_with(reference: &str, str_literal: &str) -> BoundPredicate {
1844 let schema = create_test_schema();
1845 let filter = Predicate::Binary(BinaryExpression::new(
1846 StartsWith,
1847 Reference::new(reference),
1848 Datum::string(str_literal),
1849 ));
1850 filter.bind(schema.clone(), true).unwrap()
1851 }
1852
1853 fn not_starts_with(reference: &str, str_literal: &str) -> BoundPredicate {
1854 let schema = create_test_schema();
1855 let filter = Predicate::Binary(BinaryExpression::new(
1856 NotStartsWith,
1857 Reference::new(reference),
1858 Datum::string(str_literal),
1859 ));
1860 filter.bind(schema.clone(), true).unwrap()
1861 }
1862
1863 fn in_int(reference: &str, int_literals: &[i32]) -> BoundPredicate {
1864 let schema = create_test_schema();
1865 let filter = Predicate::Set(SetExpression::new(
1866 In,
1867 Reference::new(reference),
1868 FnvHashSet::from_iter(int_literals.iter().map(|&lit| Datum::int(lit))),
1869 ));
1870 filter.bind(schema.clone(), true).unwrap()
1871 }
1872
1873 fn in_str(reference: &str, str_literals: &[&str]) -> BoundPredicate {
1874 let schema = create_test_schema();
1875 let filter = Predicate::Set(SetExpression::new(
1876 In,
1877 Reference::new(reference),
1878 FnvHashSet::from_iter(str_literals.iter().map(Datum::string)),
1879 ));
1880 filter.bind(schema.clone(), true).unwrap()
1881 }
1882
1883 fn not_in_int(reference: &str, int_literals: &[i32]) -> BoundPredicate {
1884 let schema = create_test_schema();
1885 let filter = Predicate::Set(SetExpression::new(
1886 NotIn,
1887 Reference::new(reference),
1888 FnvHashSet::from_iter(int_literals.iter().map(|&lit| Datum::int(lit))),
1889 ));
1890 filter.bind(schema.clone(), true).unwrap()
1891 }
1892
1893 fn not_in_str(reference: &str, str_literals: &[&str]) -> BoundPredicate {
1894 let schema = create_test_schema();
1895 let filter = Predicate::Set(SetExpression::new(
1896 NotIn,
1897 Reference::new(reference),
1898 FnvHashSet::from_iter(str_literals.iter().map(Datum::string)),
1899 ));
1900 filter.bind(schema.clone(), true).unwrap()
1901 }
1902
1903 fn create_test_schema() -> Arc<Schema> {
1904 let table_schema = Schema::builder()
1905 .with_fields(vec![
1906 Arc::new(NestedField::required(
1907 1,
1908 "id",
1909 Type::Primitive(PrimitiveType::Int),
1910 )),
1911 Arc::new(NestedField::optional(
1912 2,
1913 "no_stats",
1914 Type::Primitive(PrimitiveType::Int),
1915 )),
1916 Arc::new(NestedField::required(
1917 3,
1918 "required",
1919 Type::Primitive(PrimitiveType::String),
1920 )),
1921 Arc::new(NestedField::optional(
1922 4,
1923 "all_nulls",
1924 Type::Primitive(PrimitiveType::String),
1925 )),
1926 Arc::new(NestedField::optional(
1927 5,
1928 "some_nulls",
1929 Type::Primitive(PrimitiveType::String),
1930 )),
1931 Arc::new(NestedField::optional(
1932 6,
1933 "no_nulls",
1934 Type::Primitive(PrimitiveType::String),
1935 )),
1936 Arc::new(NestedField::optional(
1937 7,
1938 "all_nans",
1939 Type::Primitive(PrimitiveType::Double),
1940 )),
1941 Arc::new(NestedField::optional(
1942 8,
1943 "some_nans",
1944 Type::Primitive(PrimitiveType::Float),
1945 )),
1946 Arc::new(NestedField::optional(
1947 9,
1948 "no_nans",
1949 Type::Primitive(PrimitiveType::Float),
1950 )),
1951 Arc::new(NestedField::optional(
1952 10,
1953 "all_nulls_double",
1954 Type::Primitive(PrimitiveType::Double),
1955 )),
1956 Arc::new(NestedField::optional(
1957 11,
1958 "all_nans_v1_stats",
1959 Type::Primitive(PrimitiveType::Float),
1960 )),
1961 Arc::new(NestedField::optional(
1962 12,
1963 "nan_and_null_only",
1964 Type::Primitive(PrimitiveType::Double),
1965 )),
1966 Arc::new(NestedField::optional(
1967 13,
1968 "no_nan_stats",
1969 Type::Primitive(PrimitiveType::Double),
1970 )),
1971 Arc::new(NestedField::optional(
1972 14,
1973 "some_empty",
1974 Type::Primitive(PrimitiveType::String),
1975 )),
1976 ])
1977 .build()
1978 .unwrap();
1979
1980 Arc::new(table_schema)
1981 }
1982
1983 fn create_test_data_file() -> DataFile {
1984 DataFile {
1985 content: DataContentType::Data,
1986 file_path: "/test/path".to_string(),
1987 file_format: DataFileFormat::Parquet,
1988 partition: Struct::empty(),
1989 record_count: 10,
1990 file_size_in_bytes: 10,
1991 column_sizes: Default::default(),
1992 value_counts: Default::default(),
1993 null_value_counts: Default::default(),
1994 nan_value_counts: Default::default(),
1995 lower_bounds: Default::default(),
1996 upper_bounds: Default::default(),
1997 key_metadata: None,
1998 split_offsets: None,
1999 equality_ids: None,
2000 sort_order_id: None,
2001 partition_spec_id: 0,
2002 first_row_id: None,
2003 referenced_data_file: None,
2004 content_offset: None,
2005 content_size_in_bytes: None,
2006 }
2007 }
2008
2009 fn create_zero_records_data_file() -> DataFile {
2010 DataFile {
2011 content: DataContentType::Data,
2012 file_path: "/test/path".to_string(),
2013 file_format: DataFileFormat::Parquet,
2014 partition: Struct::empty(),
2015 record_count: 0,
2016 file_size_in_bytes: 10,
2017 column_sizes: Default::default(),
2018 value_counts: Default::default(),
2019 null_value_counts: Default::default(),
2020 nan_value_counts: Default::default(),
2021 lower_bounds: Default::default(),
2022 upper_bounds: Default::default(),
2023 key_metadata: None,
2024 split_offsets: None,
2025 equality_ids: None,
2026 sort_order_id: None,
2027 partition_spec_id: 0,
2028 first_row_id: None,
2029 referenced_data_file: None,
2030 content_offset: None,
2031 content_size_in_bytes: None,
2032 }
2033 }
2034
2035 fn get_test_file_1() -> DataFile {
2036 DataFile {
2037 content: DataContentType::Data,
2038 file_path: "/test/path".to_string(),
2039 file_format: DataFileFormat::Parquet,
2040 partition: Struct::empty(),
2041 record_count: 50,
2042 file_size_in_bytes: 10,
2043
2044 value_counts: HashMap::from([
2045 (4, 50),
2046 (5, 50),
2047 (6, 50),
2048 (7, 50),
2049 (8, 50),
2050 (9, 50),
2051 (10, 50),
2052 (11, 50),
2053 (12, 50),
2054 (13, 50),
2055 (14, 50),
2056 ]),
2057
2058 null_value_counts: HashMap::from([
2059 (4, 50),
2060 (5, 10),
2061 (6, 0),
2062 (10, 50),
2063 (11, 0),
2064 (12, 1),
2065 (14, 0),
2066 ]),
2067
2068 nan_value_counts: HashMap::from([(7, 50), (8, 10), (9, 0)]),
2069
2070 lower_bounds: HashMap::from([
2071 (1, Datum::int(INT_MIN_VALUE)),
2072 (11, Datum::float(f32::NAN)),
2073 (12, Datum::double(f64::NAN)),
2074 (14, Datum::string("")),
2075 ]),
2076
2077 upper_bounds: HashMap::from([
2078 (1, Datum::int(INT_MAX_VALUE)),
2079 (11, Datum::float(f32::NAN)),
2080 (12, Datum::double(f64::NAN)),
2081 (14, Datum::string("房东整租霍营小区二层两居室")),
2082 ]),
2083
2084 column_sizes: Default::default(),
2085 key_metadata: None,
2086 split_offsets: None,
2087 equality_ids: None,
2088 sort_order_id: None,
2089 partition_spec_id: 0,
2090 first_row_id: None,
2091 referenced_data_file: None,
2092 content_offset: None,
2093 content_size_in_bytes: None,
2094 }
2095 }
2096 fn get_test_file_2() -> DataFile {
2097 DataFile {
2098 content: DataContentType::Data,
2099 file_path: "file_2.avro".to_string(),
2100 file_format: DataFileFormat::Parquet,
2101 partition: Struct::empty(),
2102 record_count: 50,
2103 file_size_in_bytes: 10,
2104
2105 value_counts: HashMap::from([(3, 20)]),
2106
2107 null_value_counts: HashMap::from([(3, 2)]),
2108
2109 nan_value_counts: HashMap::default(),
2110
2111 lower_bounds: HashMap::from([(3, Datum::string("aa"))]),
2112
2113 upper_bounds: HashMap::from([(3, Datum::string("dC"))]),
2114
2115 column_sizes: Default::default(),
2116 key_metadata: None,
2117 split_offsets: None,
2118 equality_ids: None,
2119 sort_order_id: None,
2120 partition_spec_id: 0,
2121 first_row_id: None,
2122 referenced_data_file: None,
2123 content_offset: None,
2124 content_size_in_bytes: None,
2125 }
2126 }
2127
2128 fn get_test_file_3() -> DataFile {
2129 DataFile {
2130 content: DataContentType::Data,
2131 file_path: "file_3.avro".to_string(),
2132 file_format: DataFileFormat::Parquet,
2133 partition: Struct::empty(),
2134 record_count: 50,
2135 file_size_in_bytes: 10,
2136
2137 value_counts: HashMap::from([(3, 20)]),
2138
2139 null_value_counts: HashMap::from([(3, 2)]),
2140
2141 nan_value_counts: HashMap::default(),
2142
2143 lower_bounds: HashMap::from([(3, Datum::string("1str1"))]),
2144
2145 upper_bounds: HashMap::from([(3, Datum::string("3str3"))]),
2146
2147 column_sizes: Default::default(),
2148 key_metadata: None,
2149 split_offsets: None,
2150 equality_ids: None,
2151 sort_order_id: None,
2152 partition_spec_id: 0,
2153 first_row_id: None,
2154 referenced_data_file: None,
2155 content_offset: None,
2156 content_size_in_bytes: None,
2157 }
2158 }
2159
2160 fn get_test_file_4() -> DataFile {
2161 DataFile {
2162 content: DataContentType::Data,
2163 file_path: "file_4.avro".to_string(),
2164 file_format: DataFileFormat::Parquet,
2165 partition: Struct::empty(),
2166 record_count: 50,
2167 file_size_in_bytes: 10,
2168
2169 value_counts: HashMap::from([(3, 20)]),
2170
2171 null_value_counts: HashMap::from([(3, 2)]),
2172
2173 nan_value_counts: HashMap::default(),
2174
2175 lower_bounds: HashMap::from([(3, Datum::string("abc"))]),
2176
2177 upper_bounds: HashMap::from([(3, Datum::string("イロハニホヘト"))]),
2178
2179 column_sizes: Default::default(),
2180 key_metadata: None,
2181 split_offsets: None,
2182 equality_ids: None,
2183 sort_order_id: None,
2184 partition_spec_id: 0,
2185 first_row_id: None,
2186 referenced_data_file: None,
2187 content_offset: None,
2188 content_size_in_bytes: None,
2189 }
2190 }
2191}