iceberg/test_utils.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Test utilities.
19//! This module is pub just for internal testing.
20//! It is subject to change and is not intended to be used by external users.
21
22use arrow_array::RecordBatch;
23use expect_test::Expect;
24use itertools::Itertools;
25
26/// Snapshot testing to check the resulting record batch.
27///
28/// - `expected_schema/data`: put `expect![[""]]` as a placeholder,
29/// and then run test with `UPDATE_EXPECT=1 cargo test` to automatically update the result,
30/// or use rust-analyzer (see [video](https://github.com/rust-analyzer/expect-test)).
31/// Check the doc of [`expect_test`] for more details.
32/// - `ignore_check_columns`: Some columns are not stable, so we can skip them.
33/// - `sort_column`: The order of the data might be non-deterministic, so we can sort it by a column.
34pub fn check_record_batches(
35 record_batches: Vec<RecordBatch>,
36 expected_schema: Expect,
37 expected_data: Expect,
38 ignore_check_columns: &[&str],
39 sort_column: Option<&str>,
40) {
41 assert!(!record_batches.is_empty(), "Empty record batches");
42
43 // Combine record batches using the first batch's schema
44 let first_batch = record_batches.first().unwrap();
45 let record_batch =
46 arrow_select::concat::concat_batches(&first_batch.schema(), &record_batches).unwrap();
47
48 let mut columns = record_batch.columns().to_vec();
49 if let Some(sort_column) = sort_column {
50 let column = record_batch.column_by_name(sort_column).unwrap();
51 let indices = arrow_ord::sort::sort_to_indices(column, None, None).unwrap();
52 columns = columns
53 .iter()
54 .map(|column| arrow_select::take::take(column.as_ref(), &indices, None).unwrap())
55 .collect_vec();
56 }
57
58 expected_schema.assert_eq(&format!(
59 "{}",
60 record_batch.schema().fields().iter().format(",\n")
61 ));
62 expected_data.assert_eq(&format!(
63 "{}",
64 record_batch
65 .schema()
66 .fields()
67 .iter()
68 .zip_eq(columns)
69 .map(|(field, column)| {
70 if ignore_check_columns.contains(&field.name().as_str()) {
71 format!("{}: (skipped)", field.name())
72 } else {
73 format!("{}: {:?}", field.name(), column)
74 }
75 })
76 .format(",\n")
77 ));
78}