1use fnv::FnvHashSet;
19
20use super::bound_predicate_visitor::{BoundPredicateVisitor, visit};
21use crate::expr::{BoundPredicate, BoundReference};
22use crate::spec::{DataFile, Datum, PrimitiveLiteral, Struct};
23use crate::{Error, ErrorKind, Result};
24
25#[derive(Debug)]
31pub(crate) struct ExpressionEvaluator {
32 partition_filter: BoundPredicate,
34}
35
36impl ExpressionEvaluator {
37 pub(crate) fn new(partition_filter: BoundPredicate) -> Self {
39 Self { partition_filter }
40 }
41
42 pub(crate) fn eval(&self, data_file: &DataFile) -> Result<bool> {
47 let mut visitor = ExpressionEvaluatorVisitor::new(data_file.partition());
48
49 visit(&mut visitor, &self.partition_filter)
50 }
51}
52
53#[derive(Debug)]
57struct ExpressionEvaluatorVisitor<'a> {
58 partition: &'a Struct,
60}
61
62impl<'a> ExpressionEvaluatorVisitor<'a> {
63 fn new(partition: &'a Struct) -> Self {
65 Self { partition }
66 }
67}
68
69impl BoundPredicateVisitor for ExpressionEvaluatorVisitor<'_> {
70 type T = bool;
71
72 fn always_true(&mut self) -> Result<bool> {
73 Ok(true)
74 }
75
76 fn always_false(&mut self) -> Result<bool> {
77 Ok(false)
78 }
79
80 fn and(&mut self, lhs: bool, rhs: bool) -> Result<bool> {
81 Ok(lhs && rhs)
82 }
83
84 fn or(&mut self, lhs: bool, rhs: bool) -> Result<bool> {
85 Ok(lhs || rhs)
86 }
87
88 fn not(&mut self, _inner: bool) -> Result<bool> {
89 Err(Error::new(
90 ErrorKind::Unexpected,
91 "The evaluation of expressions should not be performed against Predicates that contain a Not operator. Ensure that \"Rewrite Not\" gets applied to the originating Predicate before binding it.",
92 ))
93 }
94
95 fn is_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<bool> {
96 match reference.accessor().get(self.partition)? {
97 Some(_) => Ok(false),
98 None => Ok(true),
99 }
100 }
101
102 fn not_null(
103 &mut self,
104 reference: &BoundReference,
105 _predicate: &BoundPredicate,
106 ) -> Result<bool> {
107 match reference.accessor().get(self.partition)? {
108 Some(_) => Ok(true),
109 None => Ok(false),
110 }
111 }
112
113 fn is_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<bool> {
114 match reference.accessor().get(self.partition)? {
115 Some(datum) => Ok(datum.is_nan()),
116 None => Ok(false),
117 }
118 }
119
120 fn not_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<bool> {
121 match reference.accessor().get(self.partition)? {
122 Some(datum) => Ok(!datum.is_nan()),
123 None => Ok(true),
124 }
125 }
126
127 fn less_than(
128 &mut self,
129 reference: &BoundReference,
130 literal: &Datum,
131 _predicate: &BoundPredicate,
132 ) -> Result<bool> {
133 match reference.accessor().get(self.partition)? {
134 Some(datum) => Ok(&datum < literal),
135 None => Ok(false),
136 }
137 }
138
139 fn less_than_or_eq(
140 &mut self,
141 reference: &BoundReference,
142 literal: &Datum,
143 _predicate: &BoundPredicate,
144 ) -> Result<bool> {
145 match reference.accessor().get(self.partition)? {
146 Some(datum) => Ok(&datum <= literal),
147 None => Ok(false),
148 }
149 }
150
151 fn greater_than(
152 &mut self,
153 reference: &BoundReference,
154 literal: &Datum,
155 _predicate: &BoundPredicate,
156 ) -> Result<bool> {
157 match reference.accessor().get(self.partition)? {
158 Some(datum) => Ok(&datum > literal),
159 None => Ok(false),
160 }
161 }
162
163 fn greater_than_or_eq(
164 &mut self,
165 reference: &BoundReference,
166 literal: &Datum,
167 _predicate: &BoundPredicate,
168 ) -> Result<bool> {
169 match reference.accessor().get(self.partition)? {
170 Some(datum) => Ok(&datum >= literal),
171 None => Ok(false),
172 }
173 }
174
175 fn eq(
176 &mut self,
177 reference: &BoundReference,
178 literal: &Datum,
179 _predicate: &BoundPredicate,
180 ) -> Result<bool> {
181 match reference.accessor().get(self.partition)? {
182 Some(datum) => Ok(&datum == literal),
183 None => Ok(false),
184 }
185 }
186
187 fn not_eq(
188 &mut self,
189 reference: &BoundReference,
190 literal: &Datum,
191 _predicate: &BoundPredicate,
192 ) -> Result<bool> {
193 match reference.accessor().get(self.partition)? {
194 Some(datum) => Ok(&datum != literal),
195 None => Ok(true),
196 }
197 }
198
199 fn starts_with(
200 &mut self,
201 reference: &BoundReference,
202 literal: &Datum,
203 _predicate: &BoundPredicate,
204 ) -> Result<bool> {
205 let Some(datum) = reference.accessor().get(self.partition)? else {
206 return Ok(false);
207 };
208
209 match (datum.literal(), literal.literal()) {
210 (PrimitiveLiteral::String(d), PrimitiveLiteral::String(l)) => Ok(d.starts_with(l)),
211 _ => Ok(false),
212 }
213 }
214
215 fn not_starts_with(
216 &mut self,
217 reference: &BoundReference,
218 literal: &Datum,
219 _predicate: &BoundPredicate,
220 ) -> Result<bool> {
221 Ok(!self.starts_with(reference, literal, _predicate)?)
222 }
223
224 fn r#in(
225 &mut self,
226 reference: &BoundReference,
227 literals: &FnvHashSet<Datum>,
228 _predicate: &BoundPredicate,
229 ) -> Result<bool> {
230 match reference.accessor().get(self.partition)? {
231 Some(datum) => Ok(literals.contains(&datum)),
232 None => Ok(false),
233 }
234 }
235
236 fn not_in(
237 &mut self,
238 reference: &BoundReference,
239 literals: &FnvHashSet<Datum>,
240 _predicate: &BoundPredicate,
241 ) -> Result<bool> {
242 match reference.accessor().get(self.partition)? {
243 Some(datum) => Ok(!literals.contains(&datum)),
244 None => Ok(true),
245 }
246 }
247}
248
249#[cfg(test)]
250mod tests {
251 use std::collections::HashMap;
252 use std::sync::Arc;
253
254 use fnv::FnvHashSet;
255 use predicate::SetExpression;
256
257 use super::ExpressionEvaluator;
258 use crate::Result;
259 use crate::expr::visitors::inclusive_projection::InclusiveProjection;
260 use crate::expr::{
261 BinaryExpression, Bind, BoundPredicate, Predicate, PredicateOperator, Reference,
262 UnaryExpression, predicate,
263 };
264 use crate::spec::{
265 DataContentType, DataFile, DataFileFormat, Datum, Literal, NestedField, PartitionSpec,
266 PartitionSpecRef, PrimitiveType, Schema, SchemaRef, Struct, Transform, Type,
267 UnboundPartitionField,
268 };
269
270 fn create_partition_spec(r#type: PrimitiveType) -> Result<(PartitionSpecRef, SchemaRef)> {
271 let schema = Schema::builder()
272 .with_fields(vec![Arc::new(NestedField::optional(
273 1,
274 "a",
275 Type::Primitive(r#type),
276 ))])
277 .build()?;
278
279 let spec = PartitionSpec::builder(schema.clone())
280 .with_spec_id(1)
281 .add_unbound_field(
282 UnboundPartitionField::builder()
283 .source_id(1)
284 .name("a".to_string())
285 .field_id(1)
286 .transform(Transform::Identity)
287 .build(),
288 )
289 .unwrap()
290 .build()
291 .unwrap();
292
293 Ok((Arc::new(spec), schema.into()))
294 }
295
296 fn create_partition_filter(
297 partition_spec: PartitionSpecRef,
298 schema: &Schema,
299 predicate: &BoundPredicate,
300 case_sensitive: bool,
301 ) -> Result<BoundPredicate> {
302 let partition_type = partition_spec.partition_type(schema).unwrap();
303 let partition_fields = partition_type.fields().to_owned();
304
305 let partition_schema = Schema::builder()
306 .with_schema_id(partition_spec.spec_id())
307 .with_fields(partition_fields)
308 .build()?;
309
310 let mut inclusive_projection = InclusiveProjection::new((*partition_spec).clone().into());
311
312 let partition_filter = inclusive_projection
313 .project(predicate)?
314 .rewrite_not()
315 .bind(Arc::new(partition_schema), case_sensitive)?;
316
317 Ok(partition_filter)
318 }
319
320 fn create_expression_evaluator(
321 partition_spec: PartitionSpecRef,
322 schema: &Schema,
323 predicate: &BoundPredicate,
324 case_sensitive: bool,
325 ) -> Result<ExpressionEvaluator> {
326 let partition_filter =
327 create_partition_filter(partition_spec, schema, predicate, case_sensitive)?;
328
329 Ok(ExpressionEvaluator::new(partition_filter))
330 }
331
332 fn create_data_file_float() -> DataFile {
333 let partition = Struct::from_iter([Some(Literal::float(1.0))]);
334
335 DataFile {
336 content: DataContentType::Data,
337 file_path: "/test/path".to_string(),
338 file_format: DataFileFormat::Parquet,
339 partition,
340 record_count: 1,
341 file_size_in_bytes: 1,
342 column_sizes: HashMap::new(),
343 value_counts: HashMap::new(),
344 null_value_counts: HashMap::new(),
345 nan_value_counts: HashMap::new(),
346 lower_bounds: HashMap::new(),
347 upper_bounds: HashMap::new(),
348 key_metadata: None,
349 split_offsets: None,
350 equality_ids: None,
351 sort_order_id: None,
352 partition_spec_id: 0,
353 first_row_id: None,
354 referenced_data_file: None,
355 content_offset: None,
356 content_size_in_bytes: None,
357 }
358 }
359
360 fn create_data_file_string() -> DataFile {
361 let partition = Struct::from_iter([Some(Literal::string("test str"))]);
362
363 DataFile {
364 content: DataContentType::Data,
365 file_path: "/test/path".to_string(),
366 file_format: DataFileFormat::Parquet,
367 partition,
368 record_count: 1,
369 file_size_in_bytes: 1,
370 column_sizes: HashMap::new(),
371 value_counts: HashMap::new(),
372 null_value_counts: HashMap::new(),
373 nan_value_counts: HashMap::new(),
374 lower_bounds: HashMap::new(),
375 upper_bounds: HashMap::new(),
376 key_metadata: None,
377 split_offsets: None,
378 equality_ids: None,
379 sort_order_id: None,
380 partition_spec_id: 0,
381 first_row_id: None,
382 referenced_data_file: None,
383 content_offset: None,
384 content_size_in_bytes: None,
385 }
386 }
387
388 #[test]
389 fn test_expr_or() -> Result<()> {
390 let case_sensitive = true;
391 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
392
393 let predicate = Predicate::Binary(BinaryExpression::new(
394 PredicateOperator::LessThan,
395 Reference::new("a"),
396 Datum::float(1.0),
397 ))
398 .or(Predicate::Binary(BinaryExpression::new(
399 PredicateOperator::GreaterThanOrEq,
400 Reference::new("a"),
401 Datum::float(0.4),
402 )))
403 .bind(schema.clone(), case_sensitive)?;
404
405 let expression_evaluator =
406 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
407
408 let data_file = create_data_file_float();
409
410 let result = expression_evaluator.eval(&data_file)?;
411
412 assert!(result);
413
414 Ok(())
415 }
416
417 #[test]
418 fn test_expr_and() -> Result<()> {
419 let case_sensitive = true;
420 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
421
422 let predicate = Predicate::Binary(BinaryExpression::new(
423 PredicateOperator::LessThan,
424 Reference::new("a"),
425 Datum::float(1.1),
426 ))
427 .and(Predicate::Binary(BinaryExpression::new(
428 PredicateOperator::GreaterThanOrEq,
429 Reference::new("a"),
430 Datum::float(0.4),
431 )))
432 .bind(schema.clone(), case_sensitive)?;
433
434 let expression_evaluator =
435 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
436
437 let data_file = create_data_file_float();
438
439 let result = expression_evaluator.eval(&data_file)?;
440
441 assert!(result);
442
443 Ok(())
444 }
445
446 #[test]
447 fn test_expr_not_in() -> Result<()> {
448 let case_sensitive = true;
449 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
450
451 let predicate = Predicate::Set(SetExpression::new(
452 PredicateOperator::NotIn,
453 Reference::new("a"),
454 FnvHashSet::from_iter([Datum::float(0.9), Datum::float(1.2), Datum::float(2.4)]),
455 ))
456 .bind(schema.clone(), case_sensitive)?;
457
458 let expression_evaluator =
459 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
460
461 let data_file = create_data_file_float();
462
463 let result = expression_evaluator.eval(&data_file)?;
464
465 assert!(result);
466
467 Ok(())
468 }
469
470 #[test]
471 fn test_expr_in() -> Result<()> {
472 let case_sensitive = true;
473 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
474
475 let predicate = Predicate::Set(SetExpression::new(
476 PredicateOperator::In,
477 Reference::new("a"),
478 FnvHashSet::from_iter([Datum::float(1.0), Datum::float(1.2), Datum::float(2.4)]),
479 ))
480 .bind(schema.clone(), case_sensitive)?;
481
482 let expression_evaluator =
483 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
484
485 let data_file = create_data_file_float();
486
487 let result = expression_evaluator.eval(&data_file)?;
488
489 assert!(result);
490
491 Ok(())
492 }
493
494 #[test]
495 fn test_expr_not_starts_with() -> Result<()> {
496 let case_sensitive = true;
497 let (partition_spec, schema) = create_partition_spec(PrimitiveType::String)?;
498
499 let predicate = Predicate::Binary(BinaryExpression::new(
500 PredicateOperator::NotStartsWith,
501 Reference::new("a"),
502 Datum::string("not"),
503 ))
504 .bind(schema.clone(), case_sensitive)?;
505
506 let expression_evaluator =
507 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
508
509 let data_file = create_data_file_string();
510
511 let result = expression_evaluator.eval(&data_file)?;
512
513 assert!(result);
514
515 Ok(())
516 }
517
518 #[test]
519 fn test_expr_starts_with() -> Result<()> {
520 let case_sensitive = true;
521 let (partition_spec, schema) = create_partition_spec(PrimitiveType::String)?;
522
523 let predicate = Predicate::Binary(BinaryExpression::new(
524 PredicateOperator::StartsWith,
525 Reference::new("a"),
526 Datum::string("test"),
527 ))
528 .bind(schema.clone(), case_sensitive)?;
529
530 let expression_evaluator =
531 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
532
533 let data_file = create_data_file_string();
534
535 let result = expression_evaluator.eval(&data_file)?;
536
537 assert!(result);
538
539 Ok(())
540 }
541
542 #[test]
543 fn test_expr_not_eq() -> Result<()> {
544 let case_sensitive = true;
545 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
546
547 let predicate = Predicate::Binary(BinaryExpression::new(
548 PredicateOperator::NotEq,
549 Reference::new("a"),
550 Datum::float(0.9),
551 ))
552 .bind(schema.clone(), case_sensitive)?;
553
554 let expression_evaluator =
555 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
556
557 let data_file = create_data_file_float();
558
559 let result = expression_evaluator.eval(&data_file)?;
560
561 assert!(result);
562
563 Ok(())
564 }
565
566 #[test]
567 fn test_expr_eq() -> Result<()> {
568 let case_sensitive = true;
569 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
570
571 let predicate = Predicate::Binary(BinaryExpression::new(
572 PredicateOperator::Eq,
573 Reference::new("a"),
574 Datum::float(1.0),
575 ))
576 .bind(schema.clone(), case_sensitive)?;
577
578 let expression_evaluator =
579 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
580
581 let data_file = create_data_file_float();
582
583 let result = expression_evaluator.eval(&data_file)?;
584
585 assert!(result);
586
587 Ok(())
588 }
589
590 #[test]
591 fn test_expr_greater_than_or_eq() -> Result<()> {
592 let case_sensitive = true;
593 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
594
595 let predicate = Predicate::Binary(BinaryExpression::new(
596 PredicateOperator::GreaterThanOrEq,
597 Reference::new("a"),
598 Datum::float(1.0),
599 ))
600 .bind(schema.clone(), case_sensitive)?;
601
602 let expression_evaluator =
603 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
604
605 let data_file = create_data_file_float();
606
607 let result = expression_evaluator.eval(&data_file)?;
608
609 assert!(result);
610
611 Ok(())
612 }
613
614 #[test]
615 fn test_expr_greater_than() -> Result<()> {
616 let case_sensitive = true;
617 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
618
619 let predicate = Predicate::Binary(BinaryExpression::new(
620 PredicateOperator::GreaterThan,
621 Reference::new("a"),
622 Datum::float(0.9),
623 ))
624 .bind(schema.clone(), case_sensitive)?;
625
626 let expression_evaluator =
627 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
628
629 let data_file = create_data_file_float();
630
631 let result = expression_evaluator.eval(&data_file)?;
632
633 assert!(result);
634
635 Ok(())
636 }
637
638 #[test]
639 fn test_expr_less_than_or_eq() -> Result<()> {
640 let case_sensitive = true;
641 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
642
643 let predicate = Predicate::Binary(BinaryExpression::new(
644 PredicateOperator::LessThanOrEq,
645 Reference::new("a"),
646 Datum::float(1.0),
647 ))
648 .bind(schema.clone(), case_sensitive)?;
649
650 let expression_evaluator =
651 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
652
653 let data_file = create_data_file_float();
654
655 let result = expression_evaluator.eval(&data_file)?;
656
657 assert!(result);
658
659 Ok(())
660 }
661
662 #[test]
663 fn test_expr_less_than() -> Result<()> {
664 let case_sensitive = true;
665 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
666
667 let predicate = Predicate::Binary(BinaryExpression::new(
668 PredicateOperator::LessThan,
669 Reference::new("a"),
670 Datum::float(1.1),
671 ))
672 .bind(schema.clone(), case_sensitive)?;
673
674 let expression_evaluator =
675 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
676
677 let data_file = create_data_file_float();
678
679 let result = expression_evaluator.eval(&data_file)?;
680
681 assert!(result);
682
683 Ok(())
684 }
685
686 #[test]
687 fn test_expr_is_not_nan() -> Result<()> {
688 let case_sensitive = true;
689 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
690 let predicate = Predicate::Unary(UnaryExpression::new(
691 PredicateOperator::NotNan,
692 Reference::new("a"),
693 ))
694 .bind(schema.clone(), case_sensitive)?;
695
696 let expression_evaluator =
697 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
698
699 let data_file = create_data_file_float();
700
701 let result = expression_evaluator.eval(&data_file)?;
702
703 assert!(result);
704
705 Ok(())
706 }
707
708 #[test]
709 fn test_expr_is_nan() -> Result<()> {
710 let case_sensitive = true;
711 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
712 let predicate = Predicate::Unary(UnaryExpression::new(
713 PredicateOperator::IsNan,
714 Reference::new("a"),
715 ))
716 .bind(schema.clone(), case_sensitive)?;
717
718 let expression_evaluator =
719 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
720
721 let data_file = create_data_file_float();
722
723 let result = expression_evaluator.eval(&data_file)?;
724
725 assert!(!result);
726
727 Ok(())
728 }
729
730 #[test]
731 fn test_expr_is_not_null() -> Result<()> {
732 let case_sensitive = true;
733 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
734 let predicate = Predicate::Unary(UnaryExpression::new(
735 PredicateOperator::NotNull,
736 Reference::new("a"),
737 ))
738 .bind(schema.clone(), case_sensitive)?;
739
740 let expression_evaluator =
741 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
742
743 let data_file = create_data_file_float();
744
745 let result = expression_evaluator.eval(&data_file)?;
746
747 assert!(result);
748
749 Ok(())
750 }
751
752 #[test]
753 fn test_expr_is_null() -> Result<()> {
754 let case_sensitive = true;
755 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
756 let predicate = Predicate::Unary(UnaryExpression::new(
757 PredicateOperator::IsNull,
758 Reference::new("a"),
759 ))
760 .bind(schema.clone(), case_sensitive)?;
761
762 let expression_evaluator =
763 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
764
765 let data_file = create_data_file_float();
766
767 let result = expression_evaluator.eval(&data_file)?;
768
769 assert!(!result);
770
771 Ok(())
772 }
773
774 #[test]
775 fn test_expr_always_false() -> Result<()> {
776 let case_sensitive = true;
777 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
778 let predicate = Predicate::AlwaysFalse.bind(schema.clone(), case_sensitive)?;
779
780 let expression_evaluator =
781 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
782
783 let data_file = create_data_file_float();
784
785 let result = expression_evaluator.eval(&data_file)?;
786
787 assert!(!result);
788
789 Ok(())
790 }
791
792 #[test]
793 fn test_expr_always_true() -> Result<()> {
794 let case_sensitive = true;
795 let (partition_spec, schema) = create_partition_spec(PrimitiveType::Float)?;
796 let predicate = Predicate::AlwaysTrue.bind(schema.clone(), case_sensitive)?;
797
798 let expression_evaluator =
799 create_expression_evaluator(partition_spec, &schema, &predicate, case_sensitive)?;
800
801 let data_file = create_data_file_float();
802
803 let result = expression_evaluator.eval(&data_file)?;
804
805 assert!(result);
806
807 Ok(())
808 }
809}