1use fnv::FnvHashSet;
19use serde_bytes::ByteBuf;
20
21use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit};
22use crate::expr::{BoundPredicate, BoundReference};
23use crate::spec::{Datum, FieldSummary, ManifestFile, PrimitiveLiteral, Type};
24use crate::{Error, ErrorKind, Result};
25
26#[derive(Debug)]
28pub(crate) struct ManifestEvaluatorBuilder {
29 partition_filter: BoundPredicate,
30 rewrite_not: bool,
31}
32
33impl ManifestEvaluatorBuilder {
34 pub(crate) fn new(partition_filter: BoundPredicate) -> Self {
36 Self {
37 partition_filter,
38 rewrite_not: false,
39 }
40 }
41
42 #[allow(unused)]
46 pub(crate) fn with_rewrite_not(mut self, rewrite_not: bool) -> Self {
47 self.rewrite_not = rewrite_not;
48 self
49 }
50
51 pub(crate) fn build(self) -> ManifestEvaluator {
53 let partition_filter = if self.rewrite_not {
54 self.partition_filter.rewrite_not()
55 } else {
56 self.partition_filter
57 };
58
59 ManifestEvaluator { partition_filter }
60 }
61}
62
63#[derive(Debug)]
69pub(crate) struct ManifestEvaluator {
70 partition_filter: BoundPredicate,
71}
72
73impl ManifestEvaluator {
74 pub(crate) fn builder(partition_filter: BoundPredicate) -> ManifestEvaluatorBuilder {
76 ManifestEvaluatorBuilder::new(partition_filter)
77 }
78
79 pub(crate) fn eval(&self, manifest_file: &ManifestFile) -> Result<bool> {
84 match &manifest_file.partitions {
85 Some(p) if !p.is_empty() => {
86 let mut evaluator = ManifestFilterVisitor::new(p);
87 visit(&mut evaluator, &self.partition_filter)
88 }
89 _ => Ok(true),
90 }
91 }
92}
93
94struct ManifestFilterVisitor<'a> {
95 partitions: &'a Vec<FieldSummary>,
96}
97
98impl<'a> ManifestFilterVisitor<'a> {
99 fn new(partitions: &'a Vec<FieldSummary>) -> Self {
100 ManifestFilterVisitor { partitions }
101 }
102}
103
104const ROWS_MIGHT_MATCH: Result<bool> = Ok(true);
105const ROWS_CANNOT_MATCH: Result<bool> = Ok(false);
106const IN_PREDICATE_LIMIT: usize = 200;
107
108impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
109 type T = bool;
110
111 fn always_true(&mut self) -> crate::Result<bool> {
112 ROWS_MIGHT_MATCH
113 }
114
115 fn always_false(&mut self) -> crate::Result<bool> {
116 ROWS_CANNOT_MATCH
117 }
118
119 fn and(&mut self, lhs: bool, rhs: bool) -> crate::Result<bool> {
120 Ok(lhs && rhs)
121 }
122
123 fn or(&mut self, lhs: bool, rhs: bool) -> crate::Result<bool> {
124 Ok(lhs || rhs)
125 }
126
127 fn not(&mut self, _: bool) -> crate::Result<bool> {
128 Err(Error::new(
129 ErrorKind::Unexpected,
130 "not operator is not supported in partition filter",
131 ))
132 }
133
134 fn is_null(
135 &mut self,
136 reference: &BoundReference,
137 _predicate: &BoundPredicate,
138 ) -> crate::Result<bool> {
139 Ok(self.field_summary_for_reference(reference).contains_null)
140 }
141
142 fn not_null(
143 &mut self,
144 reference: &BoundReference,
145 _predicate: &BoundPredicate,
146 ) -> crate::Result<bool> {
147 let field = self.field_summary_for_reference(reference);
148
149 if ManifestFilterVisitor::are_all_null(field, &reference.field().field_type) {
152 ROWS_CANNOT_MATCH
153 } else {
154 ROWS_MIGHT_MATCH
155 }
156 }
157
158 fn is_nan(
159 &mut self,
160 reference: &BoundReference,
161 _predicate: &BoundPredicate,
162 ) -> crate::Result<bool> {
163 let field = self.field_summary_for_reference(reference);
164 if let Some(contains_nan) = field.contains_nan
165 && !contains_nan
166 {
167 return ROWS_CANNOT_MATCH;
168 }
169
170 if ManifestFilterVisitor::are_all_null(field, &reference.field().field_type) {
171 return ROWS_CANNOT_MATCH;
172 }
173
174 ROWS_MIGHT_MATCH
175 }
176
177 fn not_nan(
178 &mut self,
179 reference: &BoundReference,
180 _predicate: &BoundPredicate,
181 ) -> crate::Result<bool> {
182 let field = self.field_summary_for_reference(reference);
183 if let Some(contains_nan) = field.contains_nan {
184 if contains_nan && !field.contains_null && field.lower_bound.is_none() {
186 return ROWS_CANNOT_MATCH;
187 }
188 }
189 ROWS_MIGHT_MATCH
190 }
191
192 fn less_than(
193 &mut self,
194 reference: &BoundReference,
195 datum: &Datum,
196 _predicate: &BoundPredicate,
197 ) -> crate::Result<bool> {
198 let field = self.field_summary_for_reference(reference);
199
200 match &field.lower_bound {
201 Some(bound_bytes) => {
202 let bound = ManifestFilterVisitor::bytes_to_datum(
203 bound_bytes,
204 *reference.field().field_type.clone(),
205 );
206 if datum <= &bound {
207 ROWS_CANNOT_MATCH
208 } else {
209 ROWS_MIGHT_MATCH
210 }
211 }
212 None => ROWS_CANNOT_MATCH,
213 }
214 }
215
216 fn less_than_or_eq(
217 &mut self,
218 reference: &BoundReference,
219 datum: &Datum,
220 _predicate: &BoundPredicate,
221 ) -> crate::Result<bool> {
222 let field = self.field_summary_for_reference(reference);
223 match &field.lower_bound {
224 Some(bound_bytes) => {
225 let bound = ManifestFilterVisitor::bytes_to_datum(
226 bound_bytes,
227 *reference.field().field_type.clone(),
228 );
229 if datum < &bound {
230 ROWS_CANNOT_MATCH
231 } else {
232 ROWS_MIGHT_MATCH
233 }
234 }
235 None => ROWS_CANNOT_MATCH,
236 }
237 }
238
239 fn greater_than(
240 &mut self,
241 reference: &BoundReference,
242 datum: &Datum,
243 _predicate: &BoundPredicate,
244 ) -> crate::Result<bool> {
245 let field = self.field_summary_for_reference(reference);
246 match &field.upper_bound {
247 Some(bound_bytes) => {
248 let bound = ManifestFilterVisitor::bytes_to_datum(
249 bound_bytes,
250 *reference.field().field_type.clone(),
251 );
252 if datum >= &bound {
253 ROWS_CANNOT_MATCH
254 } else {
255 ROWS_MIGHT_MATCH
256 }
257 }
258 None => ROWS_CANNOT_MATCH,
259 }
260 }
261
262 fn greater_than_or_eq(
263 &mut self,
264 reference: &BoundReference,
265 datum: &Datum,
266 _predicate: &BoundPredicate,
267 ) -> crate::Result<bool> {
268 let field = self.field_summary_for_reference(reference);
269 match &field.upper_bound {
270 Some(bound_bytes) => {
271 let bound = ManifestFilterVisitor::bytes_to_datum(
272 bound_bytes,
273 *reference.field().field_type.clone(),
274 );
275 if datum > &bound {
276 ROWS_CANNOT_MATCH
277 } else {
278 ROWS_MIGHT_MATCH
279 }
280 }
281 None => ROWS_CANNOT_MATCH,
282 }
283 }
284
285 fn eq(
286 &mut self,
287 reference: &BoundReference,
288 datum: &Datum,
289 _predicate: &BoundPredicate,
290 ) -> crate::Result<bool> {
291 let field = self.field_summary_for_reference(reference);
292
293 if field.lower_bound.is_none() || field.upper_bound.is_none() {
294 return ROWS_CANNOT_MATCH;
295 }
296
297 if let Some(lower_bound_bytes) = &field.lower_bound {
298 let lower_bound = ManifestFilterVisitor::bytes_to_datum(
299 lower_bound_bytes,
300 *reference.field().field_type.clone(),
301 );
302 if &lower_bound > datum {
303 return ROWS_CANNOT_MATCH;
304 }
305 }
306
307 if let Some(upper_bound_bytes) = &field.upper_bound {
308 let upper_bound = ManifestFilterVisitor::bytes_to_datum(
309 upper_bound_bytes,
310 *reference.field().field_type.clone(),
311 );
312 if &upper_bound < datum {
313 return ROWS_CANNOT_MATCH;
314 }
315 }
316
317 ROWS_MIGHT_MATCH
318 }
319
320 fn not_eq(
321 &mut self,
322 _reference: &BoundReference,
323 _datum: &Datum,
324 _predicate: &BoundPredicate,
325 ) -> crate::Result<bool> {
326 ROWS_MIGHT_MATCH
329 }
330
331 fn starts_with(
332 &mut self,
333 reference: &BoundReference,
334 datum: &Datum,
335 _predicate: &BoundPredicate,
336 ) -> crate::Result<bool> {
337 let field = self.field_summary_for_reference(reference);
338
339 if field.lower_bound.is_none() || field.upper_bound.is_none() {
340 return ROWS_CANNOT_MATCH;
341 }
342
343 let prefix = ManifestFilterVisitor::datum_as_str(
344 datum,
345 "Cannot perform starts_with on non-string value",
346 )?;
347 let prefix_len = prefix.len();
348
349 if let Some(lower_bound) = &field.lower_bound {
350 let min_len = lower_bound.len().min(prefix_len);
351 if prefix.as_bytes().lt(&lower_bound[..min_len]) {
352 return ROWS_CANNOT_MATCH;
353 }
354 }
355
356 if let Some(upper_bound) = &field.upper_bound {
357 let min_len = upper_bound.len().min(prefix_len);
358 if prefix.as_bytes().gt(&upper_bound[..min_len]) {
359 return ROWS_CANNOT_MATCH;
360 }
361 }
362
363 ROWS_MIGHT_MATCH
364 }
365
366 fn not_starts_with(
367 &mut self,
368 reference: &BoundReference,
369 datum: &Datum,
370 _predicate: &BoundPredicate,
371 ) -> crate::Result<bool> {
372 let field = self.field_summary_for_reference(reference);
373
374 if field.contains_null || field.lower_bound.is_none() || field.upper_bound.is_none() {
375 return ROWS_MIGHT_MATCH;
376 }
377
378 let prefix = ManifestFilterVisitor::datum_as_str(
379 datum,
380 "Cannot perform not_starts_with on non-string value",
381 )?;
382 let prefix_len = prefix.len();
383
384 if let Some(lower_bound) = &field.lower_bound {
387 if prefix_len > lower_bound.len() {
389 return ROWS_MIGHT_MATCH;
390 }
391
392 if prefix.as_bytes().eq(&lower_bound[..prefix_len])
393 && let Some(upper_bound) = &field.upper_bound
394 {
395 if prefix_len > upper_bound.len() {
397 return ROWS_MIGHT_MATCH;
398 }
399
400 if prefix.as_bytes().eq(&upper_bound[..prefix_len]) {
401 return ROWS_CANNOT_MATCH;
402 }
403 }
404 }
405
406 ROWS_MIGHT_MATCH
407 }
408
409 fn r#in(
410 &mut self,
411 reference: &BoundReference,
412 literals: &FnvHashSet<Datum>,
413 _predicate: &BoundPredicate,
414 ) -> crate::Result<bool> {
415 let field = self.field_summary_for_reference(reference);
416 if field.lower_bound.is_none() {
417 return ROWS_CANNOT_MATCH;
418 }
419
420 if literals.len() > IN_PREDICATE_LIMIT {
421 return ROWS_MIGHT_MATCH;
422 }
423
424 if let Some(lower_bound) = &field.lower_bound {
425 let lower_bound = ManifestFilterVisitor::bytes_to_datum(
426 lower_bound,
427 *reference.field().clone().field_type,
428 );
429 if literals.iter().all(|datum| &lower_bound > datum) {
430 return ROWS_CANNOT_MATCH;
431 }
432 }
433
434 if let Some(upper_bound) = &field.upper_bound {
435 let upper_bound = ManifestFilterVisitor::bytes_to_datum(
436 upper_bound,
437 *reference.field().clone().field_type,
438 );
439 if literals.iter().all(|datum| &upper_bound < datum) {
440 return ROWS_CANNOT_MATCH;
441 }
442 }
443
444 ROWS_MIGHT_MATCH
445 }
446
447 fn not_in(
448 &mut self,
449 _reference: &BoundReference,
450 _literals: &FnvHashSet<Datum>,
451 _predicate: &BoundPredicate,
452 ) -> crate::Result<bool> {
453 ROWS_MIGHT_MATCH
456 }
457}
458
459impl ManifestFilterVisitor<'_> {
460 fn field_summary_for_reference(&self, reference: &BoundReference) -> &FieldSummary {
461 let pos = reference.accessor().position();
462 &self.partitions[pos]
463 }
464
465 fn are_all_null(field: &FieldSummary, r#type: &Type) -> bool {
466 let mut all_null: bool = field.contains_null && field.lower_bound.is_none();
469
470 if all_null && r#type.is_floating_type() {
471 all_null = match field.contains_nan {
474 Some(val) => !val,
475 None => false,
476 }
477 }
478
479 all_null
480 }
481
482 fn datum_as_str<'a>(bound: &'a Datum, err_msg: &str) -> crate::Result<&'a String> {
483 let PrimitiveLiteral::String(bound) = bound.literal() else {
484 return Err(Error::new(ErrorKind::Unexpected, err_msg));
485 };
486 Ok(bound)
487 }
488
489 fn bytes_to_datum(bytes: &ByteBuf, t: Type) -> Datum {
490 let p = t.as_primitive_type().unwrap();
491 Datum::try_from_bytes(bytes, p.clone()).unwrap()
492 }
493}
494
495#[cfg(test)]
496mod test {
497 use std::ops::Not;
498 use std::sync::Arc;
499
500 use fnv::FnvHashSet;
501
502 use crate::Result;
503 use crate::expr::visitors::manifest_evaluator::ManifestEvaluator;
504 use crate::expr::{
505 BinaryExpression, Bind, Predicate, PredicateOperator, Reference, SetExpression,
506 UnaryExpression,
507 };
508 use crate::spec::{
509 Datum, FieldSummary, ManifestContentType, ManifestFile, NestedField, PrimitiveType, Schema,
510 SchemaRef, Type,
511 };
512
513 const INT_MIN_VALUE: i32 = 30;
514 const INT_MAX_VALUE: i32 = 79;
515
516 const STRING_MIN_VALUE: &str = "a";
517 const STRING_MAX_VALUE: &str = "z";
518
519 fn create_schema() -> Result<SchemaRef> {
520 let schema = Schema::builder()
521 .with_fields(vec![
522 Arc::new(NestedField::required(
523 1,
524 "id",
525 Type::Primitive(PrimitiveType::Int),
526 )),
527 Arc::new(NestedField::optional(
528 2,
529 "all_nulls_missing_nan",
530 Type::Primitive(PrimitiveType::String),
531 )),
532 Arc::new(NestedField::optional(
533 3,
534 "some_nulls",
535 Type::Primitive(PrimitiveType::String),
536 )),
537 Arc::new(NestedField::optional(
538 4,
539 "no_nulls",
540 Type::Primitive(PrimitiveType::String),
541 )),
542 Arc::new(NestedField::optional(
543 5,
544 "float",
545 Type::Primitive(PrimitiveType::Float),
546 )),
547 Arc::new(NestedField::optional(
548 6,
549 "all_nulls_double",
550 Type::Primitive(PrimitiveType::Double),
551 )),
552 Arc::new(NestedField::optional(
553 7,
554 "all_nulls_no_nans",
555 Type::Primitive(PrimitiveType::Float),
556 )),
557 Arc::new(NestedField::optional(
558 8,
559 "all_nans",
560 Type::Primitive(PrimitiveType::Double),
561 )),
562 Arc::new(NestedField::optional(
563 9,
564 "both_nan_and_null",
565 Type::Primitive(PrimitiveType::Float),
566 )),
567 Arc::new(NestedField::optional(
568 10,
569 "no_nan_or_null",
570 Type::Primitive(PrimitiveType::Double),
571 )),
572 Arc::new(NestedField::optional(
573 11,
574 "all_nulls_missing_nan_float",
575 Type::Primitive(PrimitiveType::Float),
576 )),
577 Arc::new(NestedField::optional(
578 12,
579 "all_same_value_or_null",
580 Type::Primitive(PrimitiveType::String),
581 )),
582 Arc::new(NestedField::optional(
583 13,
584 "no_nulls_same_value_a",
585 Type::Primitive(PrimitiveType::String),
586 )),
587 ])
588 .build()?;
589
590 Ok(Arc::new(schema))
591 }
592
593 fn create_partitions() -> Vec<FieldSummary> {
594 vec![
595 FieldSummary {
597 contains_null: false,
598 contains_nan: None,
599 lower_bound: Some(Datum::int(INT_MIN_VALUE).to_bytes().unwrap()),
600 upper_bound: Some(Datum::int(INT_MAX_VALUE).to_bytes().unwrap()),
601 },
602 FieldSummary {
604 contains_null: true,
605 contains_nan: None,
606 lower_bound: None,
607 upper_bound: None,
608 },
609 FieldSummary {
611 contains_null: true,
612 contains_nan: None,
613 lower_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
614 upper_bound: Some(Datum::string(STRING_MAX_VALUE).to_bytes().unwrap()),
615 },
616 FieldSummary {
618 contains_null: false,
619 contains_nan: None,
620 lower_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
621 upper_bound: Some(Datum::string(STRING_MAX_VALUE).to_bytes().unwrap()),
622 },
623 FieldSummary {
625 contains_null: true,
626 contains_nan: None,
627 lower_bound: Some(Datum::float(0.0).to_bytes().unwrap()),
628 upper_bound: Some(Datum::float(20.0).to_bytes().unwrap()),
629 },
630 FieldSummary {
632 contains_null: true,
633 contains_nan: None,
634 lower_bound: None,
635 upper_bound: None,
636 },
637 FieldSummary {
639 contains_null: true,
640 contains_nan: Some(false),
641 lower_bound: None,
642 upper_bound: None,
643 },
644 FieldSummary {
646 contains_null: false,
647 contains_nan: Some(true),
648 lower_bound: None,
649 upper_bound: None,
650 },
651 FieldSummary {
653 contains_null: true,
654 contains_nan: Some(true),
655 lower_bound: None,
656 upper_bound: None,
657 },
658 FieldSummary {
660 contains_null: false,
661 contains_nan: Some(false),
662 lower_bound: Some(Datum::float(0.0).to_bytes().unwrap()),
663 upper_bound: Some(Datum::float(20.0).to_bytes().unwrap()),
664 },
665 FieldSummary {
667 contains_null: true,
668 contains_nan: None,
669 lower_bound: None,
670 upper_bound: None,
671 },
672 FieldSummary {
674 contains_null: true,
675 contains_nan: None,
676 lower_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
677 upper_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
678 },
679 FieldSummary {
681 contains_null: false,
682 contains_nan: None,
683 lower_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
684 upper_bound: Some(Datum::string(STRING_MIN_VALUE).to_bytes().unwrap()),
685 },
686 ]
687 }
688
689 fn create_manifest_file(partitions: Vec<FieldSummary>) -> ManifestFile {
690 ManifestFile {
691 manifest_path: "/test/path".to_string(),
692 manifest_length: 0,
693 partition_spec_id: 1,
694 content: ManifestContentType::Data,
695 sequence_number: 0,
696 min_sequence_number: 0,
697 added_snapshot_id: 0,
698 added_files_count: None,
699 existing_files_count: None,
700 deleted_files_count: None,
701 added_rows_count: None,
702 existing_rows_count: None,
703 deleted_rows_count: None,
704 partitions: Some(partitions),
705 key_metadata: None,
706 first_row_id: None,
707 }
708 }
709
710 #[test]
711 fn test_always_true() -> Result<()> {
712 let case_sensitive = false;
713 let schema = create_schema()?;
714 let partitions = create_partitions();
715 let manifest_file = create_manifest_file(partitions);
716
717 let filter = Predicate::AlwaysTrue.bind(schema.clone(), case_sensitive)?;
718
719 assert!(
720 ManifestEvaluator::builder(filter)
721 .build()
722 .eval(&manifest_file)?
723 );
724
725 Ok(())
726 }
727
728 #[test]
729 fn test_always_false() -> Result<()> {
730 let case_sensitive = false;
731 let schema = create_schema()?;
732 let partitions = create_partitions();
733 let manifest_file = create_manifest_file(partitions);
734
735 let filter = Predicate::AlwaysFalse.bind(schema.clone(), case_sensitive)?;
736
737 assert!(
738 !ManifestEvaluator::builder(filter)
739 .build()
740 .eval(&manifest_file)?
741 );
742
743 Ok(())
744 }
745
746 #[test]
747 fn test_all_nulls() -> Result<()> {
748 let case_sensitive = true;
749 let schema = create_schema()?;
750 let partitions = create_partitions();
751 let manifest_file = create_manifest_file(partitions);
752
753 let all_nulls_missing_nan_filter = Predicate::Unary(UnaryExpression::new(
755 PredicateOperator::NotNull,
756 Reference::new("all_nulls_missing_nan"),
757 ))
758 .bind(schema.clone(), case_sensitive)?;
759 assert!(
760 !ManifestEvaluator::builder(all_nulls_missing_nan_filter)
761 .build()
762 .eval(&manifest_file)?,
763 "Should skip: all nulls column with non-floating type contains all null"
764 );
765
766 let all_nulls_missing_nan_float_filter = Predicate::Unary(UnaryExpression::new(
768 PredicateOperator::NotNull,
769 Reference::new("all_nulls_missing_nan_float"),
770 ))
771 .bind(schema.clone(), case_sensitive)?;
772 assert!(
773 ManifestEvaluator::builder(all_nulls_missing_nan_float_filter)
774 .build()
775 .eval(&manifest_file)?,
776 "Should read: no NaN information may indicate presence of NaN value"
777 );
778
779 let some_nulls_filter = Predicate::Unary(UnaryExpression::new(
781 PredicateOperator::NotNull,
782 Reference::new("some_nulls"),
783 ))
784 .bind(schema.clone(), case_sensitive)?;
785 assert!(
786 ManifestEvaluator::builder(some_nulls_filter)
787 .build()
788 .eval(&manifest_file)?,
789 "Should read: column with some nulls contains a non-null value"
790 );
791
792 let no_nulls_filter = Predicate::Unary(UnaryExpression::new(
794 PredicateOperator::NotNull,
795 Reference::new("no_nulls"),
796 ))
797 .bind(schema.clone(), case_sensitive)?;
798
799 assert!(
800 ManifestEvaluator::builder(no_nulls_filter)
801 .build()
802 .eval(&manifest_file)?,
803 "Should read: non-null column contains a non-null value"
804 );
805
806 Ok(())
807 }
808
809 #[test]
810 fn test_no_nulls() -> Result<()> {
811 let case_sensitive = true;
812 let schema = create_schema()?;
813 let partitions = create_partitions();
814 let manifest_file = create_manifest_file(partitions);
815
816 let all_nulls_missing_nan_filter = Predicate::Unary(UnaryExpression::new(
818 PredicateOperator::IsNull,
819 Reference::new("all_nulls_missing_nan"),
820 ))
821 .bind(schema.clone(), case_sensitive)?;
822 assert!(
823 ManifestEvaluator::builder(all_nulls_missing_nan_filter)
824 .build()
825 .eval(&manifest_file)?,
826 "Should read: at least one null value in all null column"
827 );
828
829 let some_nulls_filter = Predicate::Unary(UnaryExpression::new(
831 PredicateOperator::IsNull,
832 Reference::new("some_nulls"),
833 ))
834 .bind(schema.clone(), case_sensitive)?;
835 assert!(
836 ManifestEvaluator::builder(some_nulls_filter)
837 .build()
838 .eval(&manifest_file)?,
839 "Should read: column with some nulls contains a null value"
840 );
841
842 let no_nulls_filter = Predicate::Unary(UnaryExpression::new(
844 PredicateOperator::IsNull,
845 Reference::new("no_nulls"),
846 ))
847 .bind(schema.clone(), case_sensitive)?;
848
849 assert!(
850 !ManifestEvaluator::builder(no_nulls_filter)
851 .build()
852 .eval(&manifest_file)?,
853 "Should skip: non-null column contains no null values"
854 );
855
856 let both_nan_and_null_filter = Predicate::Unary(UnaryExpression::new(
858 PredicateOperator::IsNull,
859 Reference::new("both_nan_and_null"),
860 ))
861 .bind(schema.clone(), case_sensitive)?;
862 assert!(
863 ManifestEvaluator::builder(both_nan_and_null_filter)
864 .build()
865 .eval(&manifest_file)?,
866 "Should read: both_nan_and_null column contains no null values"
867 );
868
869 Ok(())
870 }
871
872 #[test]
873 fn test_is_nan() -> Result<()> {
874 let case_sensitive = true;
875 let schema = create_schema()?;
876 let partitions = create_partitions();
877 let manifest_file = create_manifest_file(partitions);
878
879 let float_filter = Predicate::Unary(UnaryExpression::new(
881 PredicateOperator::IsNan,
882 Reference::new("float"),
883 ))
884 .bind(schema.clone(), case_sensitive)?;
885 assert!(
886 ManifestEvaluator::builder(float_filter)
887 .build()
888 .eval(&manifest_file)?,
889 "Should read: no information on if there are nan value in float column"
890 );
891
892 let all_nulls_double_filter = Predicate::Unary(UnaryExpression::new(
894 PredicateOperator::IsNan,
895 Reference::new("all_nulls_double"),
896 ))
897 .bind(schema.clone(), case_sensitive)?;
898 assert!(
899 ManifestEvaluator::builder(all_nulls_double_filter)
900 .build()
901 .eval(&manifest_file)?,
902 "Should read: no NaN information may indicate presence of NaN value"
903 );
904
905 let all_nulls_missing_nan_float_filter = Predicate::Unary(UnaryExpression::new(
907 PredicateOperator::IsNan,
908 Reference::new("all_nulls_missing_nan_float"),
909 ))
910 .bind(schema.clone(), case_sensitive)?;
911 assert!(
912 ManifestEvaluator::builder(all_nulls_missing_nan_float_filter)
913 .build()
914 .eval(&manifest_file)?,
915 "Should read: no NaN information may indicate presence of NaN value"
916 );
917
918 let all_nulls_no_nans_filter = Predicate::Unary(UnaryExpression::new(
920 PredicateOperator::IsNan,
921 Reference::new("all_nulls_no_nans"),
922 ))
923 .bind(schema.clone(), case_sensitive)?;
924 assert!(
925 !ManifestEvaluator::builder(all_nulls_no_nans_filter)
926 .build()
927 .eval(&manifest_file)?,
928 "Should skip: no nan column doesn't contain nan value"
929 );
930
931 let all_nans_filter = Predicate::Unary(UnaryExpression::new(
933 PredicateOperator::IsNan,
934 Reference::new("all_nans"),
935 ))
936 .bind(schema.clone(), case_sensitive)?;
937 assert!(
938 ManifestEvaluator::builder(all_nans_filter)
939 .build()
940 .eval(&manifest_file)?,
941 "Should read: all_nans column contains nan value"
942 );
943
944 let both_nan_and_null_filter = Predicate::Unary(UnaryExpression::new(
946 PredicateOperator::IsNan,
947 Reference::new("both_nan_and_null"),
948 ))
949 .bind(schema.clone(), case_sensitive)?;
950 assert!(
951 ManifestEvaluator::builder(both_nan_and_null_filter)
952 .build()
953 .eval(&manifest_file)?,
954 "Should read: both_nan_and_null column contains nan value"
955 );
956
957 let no_nan_or_null_filter = Predicate::Unary(UnaryExpression::new(
959 PredicateOperator::IsNan,
960 Reference::new("no_nan_or_null"),
961 ))
962 .bind(schema.clone(), case_sensitive)?;
963 assert!(
964 !ManifestEvaluator::builder(no_nan_or_null_filter)
965 .build()
966 .eval(&manifest_file)?,
967 "Should skip: no_nan_or_null column doesn't contain nan value"
968 );
969
970 Ok(())
971 }
972
973 #[test]
974 fn test_not_nan() -> Result<()> {
975 let case_sensitive = true;
976 let schema = create_schema()?;
977 let partitions = create_partitions();
978 let manifest_file = create_manifest_file(partitions);
979
980 let float_filter = Predicate::Unary(UnaryExpression::new(
982 PredicateOperator::NotNan,
983 Reference::new("float"),
984 ))
985 .bind(schema.clone(), case_sensitive)?;
986 assert!(
987 ManifestEvaluator::builder(float_filter)
988 .build()
989 .eval(&manifest_file)?,
990 "Should read: no information on if there are nan value in float column"
991 );
992
993 let all_nulls_double_filter = Predicate::Unary(UnaryExpression::new(
995 PredicateOperator::NotNan,
996 Reference::new("all_nulls_double"),
997 ))
998 .bind(schema.clone(), case_sensitive)?;
999 assert!(
1000 ManifestEvaluator::builder(all_nulls_double_filter)
1001 .build()
1002 .eval(&manifest_file)?,
1003 "Should read: all null column contains non nan value"
1004 );
1005
1006 let all_nulls_no_nans_filter = Predicate::Unary(UnaryExpression::new(
1008 PredicateOperator::NotNan,
1009 Reference::new("all_nulls_no_nans"),
1010 ))
1011 .bind(schema.clone(), case_sensitive)?;
1012 assert!(
1013 ManifestEvaluator::builder(all_nulls_no_nans_filter)
1014 .build()
1015 .eval(&manifest_file)?,
1016 "Should read: no_nans column contains non nan value"
1017 );
1018
1019 let all_nans_filter = Predicate::Unary(UnaryExpression::new(
1021 PredicateOperator::NotNan,
1022 Reference::new("all_nans"),
1023 ))
1024 .bind(schema.clone(), case_sensitive)?;
1025 assert!(
1026 !ManifestEvaluator::builder(all_nans_filter)
1027 .build()
1028 .eval(&manifest_file)?,
1029 "Should skip: all nans column doesn't contain non nan value"
1030 );
1031
1032 let both_nan_and_null_filter = Predicate::Unary(UnaryExpression::new(
1034 PredicateOperator::NotNan,
1035 Reference::new("both_nan_and_null"),
1036 ))
1037 .bind(schema.clone(), case_sensitive)?;
1038 assert!(
1039 ManifestEvaluator::builder(both_nan_and_null_filter)
1040 .build()
1041 .eval(&manifest_file)?,
1042 "Should read: both_nan_and_null nans column contains non nan value"
1043 );
1044
1045 let no_nan_or_null_filter = Predicate::Unary(UnaryExpression::new(
1047 PredicateOperator::NotNan,
1048 Reference::new("no_nan_or_null"),
1049 ))
1050 .bind(schema.clone(), case_sensitive)?;
1051 assert!(
1052 ManifestEvaluator::builder(no_nan_or_null_filter)
1053 .build()
1054 .eval(&manifest_file)?,
1055 "Should read: no_nan_or_null column contains non nan value"
1056 );
1057
1058 Ok(())
1059 }
1060
1061 #[test]
1062 fn test_and() -> Result<()> {
1063 let case_sensitive = true;
1064 let schema = create_schema()?;
1065 let partitions = create_partitions();
1066 let manifest_file = create_manifest_file(partitions);
1067
1068 let filter = Predicate::Binary(BinaryExpression::new(
1069 PredicateOperator::LessThan,
1070 Reference::new("id"),
1071 Datum::int(INT_MIN_VALUE - 25),
1072 ))
1073 .and(Predicate::Binary(BinaryExpression::new(
1074 PredicateOperator::GreaterThanOrEq,
1075 Reference::new("id"),
1076 Datum::int(INT_MIN_VALUE - 30),
1077 )))
1078 .bind(schema.clone(), case_sensitive)?;
1079 assert!(
1080 !ManifestEvaluator::builder(filter)
1081 .build()
1082 .eval(&manifest_file)?,
1083 "Should read: no information on if there are nan value in float column"
1084 );
1085
1086 Ok(())
1087 }
1088
1089 #[test]
1090 fn test_or() -> Result<()> {
1091 let case_sensitive = true;
1092 let schema = create_schema()?;
1093 let partitions = create_partitions();
1094 let manifest_file = create_manifest_file(partitions);
1095
1096 let filter = Predicate::Binary(BinaryExpression::new(
1097 PredicateOperator::LessThan,
1098 Reference::new("id"),
1099 Datum::int(INT_MIN_VALUE - 25),
1100 ))
1101 .or(Predicate::Binary(BinaryExpression::new(
1102 PredicateOperator::GreaterThanOrEq,
1103 Reference::new("id"),
1104 Datum::int(INT_MAX_VALUE + 1),
1105 )))
1106 .bind(schema.clone(), case_sensitive)?;
1107 assert!(
1108 !ManifestEvaluator::builder(filter)
1109 .build()
1110 .eval(&manifest_file)?,
1111 "Should skip: or(false, false)"
1112 );
1113
1114 Ok(())
1115 }
1116
1117 #[test]
1118 fn test_not() -> Result<()> {
1119 let case_sensitive = true;
1120 let schema = create_schema()?;
1121 let partitions = create_partitions();
1122 let manifest_file = create_manifest_file(partitions);
1123
1124 let filter = Predicate::Binary(BinaryExpression::new(
1125 PredicateOperator::LessThan,
1126 Reference::new("id"),
1127 Datum::int(INT_MIN_VALUE - 25),
1128 ))
1129 .not()
1130 .bind(schema.clone(), case_sensitive)?;
1131 assert!(
1132 ManifestEvaluator::builder(filter)
1133 .build()
1134 .eval(&manifest_file)
1135 .is_err(),
1136 );
1137 let filter = Predicate::Binary(BinaryExpression::new(
1138 PredicateOperator::LessThan,
1139 Reference::new("id"),
1140 Datum::int(INT_MIN_VALUE - 25),
1141 ))
1142 .not()
1143 .rewrite_not()
1144 .bind(schema.clone(), case_sensitive)?;
1145 assert!(
1146 ManifestEvaluator::builder(filter)
1147 .build()
1148 .eval(&manifest_file)?,
1149 "Should read: not(false)"
1150 );
1151
1152 let filter = Predicate::Binary(BinaryExpression::new(
1153 PredicateOperator::GreaterThan,
1154 Reference::new("id"),
1155 Datum::int(INT_MIN_VALUE - 25),
1156 ))
1157 .not()
1158 .bind(schema.clone(), case_sensitive)?;
1159 assert!(
1160 ManifestEvaluator::builder(filter)
1161 .build()
1162 .eval(&manifest_file)
1163 .is_err()
1164 );
1165
1166 let filter = Predicate::Binary(BinaryExpression::new(
1167 PredicateOperator::GreaterThan,
1168 Reference::new("id"),
1169 Datum::int(INT_MIN_VALUE - 25),
1170 ))
1171 .not()
1172 .rewrite_not()
1173 .bind(schema.clone(), case_sensitive)?;
1174 assert!(
1175 !ManifestEvaluator::builder(filter)
1176 .build()
1177 .eval(&manifest_file)?,
1178 "Should skip: not(true)"
1179 );
1180
1181 Ok(())
1182 }
1183
1184 #[test]
1185 fn test_less_than() -> Result<()> {
1186 let case_sensitive = true;
1187 let schema = create_schema()?;
1188 let partitions = create_partitions();
1189 let manifest_file = create_manifest_file(partitions);
1190
1191 let filter = Predicate::Binary(BinaryExpression::new(
1192 PredicateOperator::LessThan,
1193 Reference::new("id"),
1194 Datum::int(INT_MIN_VALUE - 25),
1195 ))
1196 .bind(schema.clone(), case_sensitive)?;
1197 assert!(
1198 !ManifestEvaluator::builder(filter)
1199 .build()
1200 .eval(&manifest_file)?,
1201 "Should not read: id range below lower bound (5 < 30)"
1202 );
1203
1204 Ok(())
1205 }
1206
1207 #[test]
1208 fn test_less_than_or_eq() -> Result<()> {
1209 let case_sensitive = true;
1210 let schema = create_schema()?;
1211 let partitions = create_partitions();
1212 let manifest_file = create_manifest_file(partitions);
1213
1214 let filter = Predicate::Binary(BinaryExpression::new(
1215 PredicateOperator::LessThanOrEq,
1216 Reference::new("id"),
1217 Datum::int(INT_MIN_VALUE - 25),
1218 ))
1219 .bind(schema.clone(), case_sensitive)?;
1220 assert!(
1221 !ManifestEvaluator::builder(filter)
1222 .build()
1223 .eval(&manifest_file)?,
1224 "Should not read: id range below lower bound (5 < 30)"
1225 );
1226
1227 Ok(())
1228 }
1229
1230 #[test]
1231 fn test_greater_than() -> Result<()> {
1232 let case_sensitive = true;
1233 let schema = create_schema()?;
1234 let partitions = create_partitions();
1235 let manifest_file = create_manifest_file(partitions);
1236
1237 let filter = Predicate::Binary(BinaryExpression::new(
1238 PredicateOperator::GreaterThan,
1239 Reference::new("id"),
1240 Datum::int(INT_MAX_VALUE + 6),
1241 ))
1242 .bind(schema.clone(), case_sensitive)?;
1243 assert!(
1244 !ManifestEvaluator::builder(filter)
1245 .build()
1246 .eval(&manifest_file)?,
1247 "Should not read: id range above upper bound (85 < 79)"
1248 );
1249
1250 Ok(())
1251 }
1252
1253 #[test]
1254 fn test_greater_than_or_eq() -> Result<()> {
1255 let case_sensitive = true;
1256 let schema = create_schema()?;
1257 let partitions = create_partitions();
1258 let manifest_file = create_manifest_file(partitions);
1259
1260 let filter = Predicate::Binary(BinaryExpression::new(
1261 PredicateOperator::GreaterThanOrEq,
1262 Reference::new("id"),
1263 Datum::int(INT_MAX_VALUE + 6),
1264 ))
1265 .bind(schema.clone(), case_sensitive)?;
1266 assert!(
1267 !ManifestEvaluator::builder(filter)
1268 .build()
1269 .eval(&manifest_file)?,
1270 "Should not read: id range above upper bound (85 < 79)"
1271 );
1272
1273 let filter = Predicate::Binary(BinaryExpression::new(
1274 PredicateOperator::GreaterThanOrEq,
1275 Reference::new("id"),
1276 Datum::int(INT_MAX_VALUE),
1277 ))
1278 .bind(schema.clone(), case_sensitive)?;
1279 assert!(
1280 ManifestEvaluator::builder(filter)
1281 .build()
1282 .eval(&manifest_file)?,
1283 "Should read: one possible id"
1284 );
1285
1286 Ok(())
1287 }
1288
1289 #[test]
1290 fn test_eq() -> Result<()> {
1291 let case_sensitive = true;
1292 let schema = create_schema()?;
1293 let partitions = create_partitions();
1294 let manifest_file = create_manifest_file(partitions);
1295
1296 let filter = Predicate::Binary(BinaryExpression::new(
1297 PredicateOperator::Eq,
1298 Reference::new("id"),
1299 Datum::int(INT_MIN_VALUE - 25),
1300 ))
1301 .bind(schema.clone(), case_sensitive)?;
1302 assert!(
1303 !ManifestEvaluator::builder(filter)
1304 .build()
1305 .eval(&manifest_file)?,
1306 "Should not read: id below lower bound"
1307 );
1308
1309 let filter = Predicate::Binary(BinaryExpression::new(
1310 PredicateOperator::Eq,
1311 Reference::new("id"),
1312 Datum::int(INT_MIN_VALUE),
1313 ))
1314 .bind(schema.clone(), case_sensitive)?;
1315 assert!(
1316 ManifestEvaluator::builder(filter)
1317 .build()
1318 .eval(&manifest_file)?,
1319 "Should read: id equal to lower bound"
1320 );
1321
1322 Ok(())
1323 }
1324
1325 #[test]
1326 fn test_not_eq() -> Result<()> {
1327 let case_sensitive = true;
1328 let schema = create_schema()?;
1329 let partitions = create_partitions();
1330 let manifest_file = create_manifest_file(partitions);
1331
1332 let filter = Predicate::Binary(BinaryExpression::new(
1333 PredicateOperator::NotEq,
1334 Reference::new("id"),
1335 Datum::int(INT_MIN_VALUE - 25),
1336 ))
1337 .bind(schema.clone(), case_sensitive)?;
1338 assert!(
1339 ManifestEvaluator::builder(filter)
1340 .build()
1341 .eval(&manifest_file)?,
1342 "Should read: id below lower bound"
1343 );
1344
1345 Ok(())
1346 }
1347
1348 #[test]
1349 fn test_in() -> Result<()> {
1350 let case_sensitive = true;
1351 let schema = create_schema()?;
1352 let partitions = create_partitions();
1353 let manifest_file = create_manifest_file(partitions);
1354
1355 let filter = Predicate::Set(SetExpression::new(
1356 PredicateOperator::In,
1357 Reference::new("id"),
1358 FnvHashSet::from_iter(vec![
1359 Datum::int(INT_MIN_VALUE - 25),
1360 Datum::int(INT_MIN_VALUE - 24),
1361 ]),
1362 ))
1363 .bind(schema.clone(), case_sensitive)?;
1364 assert!(
1365 !ManifestEvaluator::builder(filter)
1366 .build()
1367 .eval(&manifest_file)?,
1368 "Should not read: id below lower bound (5 < 30, 6 < 30)"
1369 );
1370
1371 let filter = Predicate::Set(SetExpression::new(
1372 PredicateOperator::In,
1373 Reference::new("id"),
1374 FnvHashSet::from_iter(vec![
1375 Datum::int(INT_MIN_VALUE - 1),
1376 Datum::int(INT_MIN_VALUE),
1377 ]),
1378 ))
1379 .bind(schema.clone(), case_sensitive)?;
1380 assert!(
1381 ManifestEvaluator::builder(filter)
1382 .build()
1383 .eval(&manifest_file)?,
1384 "Should read: id equal to lower bound (30 == 30)"
1385 );
1386
1387 Ok(())
1388 }
1389
1390 #[test]
1391 fn test_not_in() -> Result<()> {
1392 let case_sensitive = true;
1393 let schema = create_schema()?;
1394 let partitions = create_partitions();
1395 let manifest_file = create_manifest_file(partitions);
1396
1397 let filter = Predicate::Set(SetExpression::new(
1398 PredicateOperator::NotIn,
1399 Reference::new("id"),
1400 FnvHashSet::from_iter(vec![
1401 Datum::int(INT_MIN_VALUE - 25),
1402 Datum::int(INT_MIN_VALUE - 24),
1403 ]),
1404 ))
1405 .bind(schema.clone(), case_sensitive)?;
1406 assert!(
1407 ManifestEvaluator::builder(filter)
1408 .build()
1409 .eval(&manifest_file)?,
1410 "Should read: id below lower bound (5 < 30, 6 < 30)"
1411 );
1412
1413 Ok(())
1414 }
1415
1416 #[test]
1417 fn test_starts_with() -> Result<()> {
1418 let case_sensitive = false;
1419 let schema = create_schema()?;
1420 let partitions = create_partitions();
1421 let manifest_file = create_manifest_file(partitions);
1422
1423 let filter = Predicate::Binary(BinaryExpression::new(
1424 PredicateOperator::StartsWith,
1425 Reference::new("some_nulls"),
1426 Datum::string("a"),
1427 ))
1428 .bind(schema.clone(), case_sensitive)?;
1429 assert!(
1430 ManifestEvaluator::builder(filter)
1431 .build()
1432 .eval(&manifest_file)?,
1433 "Should read: range matches"
1434 );
1435
1436 let filter = Predicate::Binary(BinaryExpression::new(
1437 PredicateOperator::StartsWith,
1438 Reference::new("some_nulls"),
1439 Datum::string("zzzz"),
1440 ))
1441 .bind(schema.clone(), case_sensitive)?;
1442 assert!(
1443 !ManifestEvaluator::builder(filter)
1444 .build()
1445 .eval(&manifest_file)?,
1446 "Should skip: range doesn't match"
1447 );
1448
1449 Ok(())
1450 }
1451
1452 #[test]
1453 fn test_not_starts_with() -> Result<()> {
1454 let case_sensitive = false;
1455 let schema = create_schema()?;
1456 let partitions = create_partitions();
1457 let manifest_file = create_manifest_file(partitions);
1458
1459 let filter = Predicate::Binary(BinaryExpression::new(
1460 PredicateOperator::NotStartsWith,
1461 Reference::new("some_nulls"),
1462 Datum::string("a"),
1463 ))
1464 .bind(schema.clone(), case_sensitive)?;
1465 assert!(
1466 ManifestEvaluator::builder(filter)
1467 .build()
1468 .eval(&manifest_file)?,
1469 "Should read: range matches"
1470 );
1471
1472 let filter = Predicate::Binary(BinaryExpression::new(
1473 PredicateOperator::NotStartsWith,
1474 Reference::new("no_nulls_same_value_a"),
1475 Datum::string("a"),
1476 ))
1477 .bind(schema.clone(), case_sensitive)?;
1478 assert!(
1479 !ManifestEvaluator::builder(filter)
1480 .build()
1481 .eval(&manifest_file)?,
1482 "Should not read: all values start with the prefix"
1483 );
1484
1485 Ok(())
1486 }
1487
1488 #[test]
1489 fn test_manifest_evaluator_builder_with_rewrite() -> Result<()> {
1490 let case_sensitive = true;
1491 let schema = create_schema()?;
1492 let partitions = create_partitions();
1493 let manifest_file = create_manifest_file(partitions);
1494
1495 let filter = Predicate::Binary(BinaryExpression::new(
1498 PredicateOperator::LessThan,
1499 Reference::new("id"),
1500 Datum::int(25), ))
1502 .not()
1503 .bind(schema.clone(), case_sensitive)?;
1504
1505 let evaluator = ManifestEvaluator::builder(filter.clone())
1507 .with_rewrite_not(false)
1508 .build();
1509 assert!(
1510 evaluator.eval(&manifest_file).is_err(),
1511 "Should error: NOT is not supported without rewrite"
1512 );
1513
1514 let evaluator = ManifestEvaluator::builder(filter)
1516 .with_rewrite_not(true)
1517 .build();
1518 let result = evaluator.eval(&manifest_file)?;
1519 assert!(
1520 result,
1521 "Should read: NOT(id < 25) becomes (id >= 25), which matches our range [30, 79]"
1522 );
1523
1524 let simple_filter = Predicate::Binary(BinaryExpression::new(
1526 PredicateOperator::GreaterThan,
1527 Reference::new("id"),
1528 Datum::int(20),
1529 ))
1530 .bind(schema, case_sensitive)?;
1531
1532 let evaluator = ManifestEvaluator::builder(simple_filter).build();
1533 assert!(
1534 evaluator.eval(&manifest_file)?,
1535 "Should read: simple predicate without NOT works by default"
1536 );
1537
1538 Ok(())
1539 }
1540}