Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: don't panic in IPC reader if struct child arrays have different lengths #6417

Merged
merged 3 commits into from
Sep 20, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 51 additions & 3 deletions arrow-ipc/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ fn create_array(
// still work
for struct_field in struct_fields {
let child = create_array(reader, struct_field, variadic_counts, require_alignment)?;
struct_arrays.push((struct_field.clone(), child));
struct_arrays.push(child);
}
let null_count = struct_node.null_count() as usize;
let struct_array = if struct_arrays.is_empty() {
Expand All @@ -162,9 +162,11 @@ fn create_array(
)
} else if null_count > 0 {
// create struct array from fields, arrays and null data
StructArray::from((struct_arrays, null_buffer))
let len = struct_node.length() as usize;
let nulls = BooleanBuffer::new(null_buffer, 0, len).into();
StructArray::try_new(struct_fields.clone(), struct_arrays, Some(nulls))?
} else {
StructArray::from(struct_arrays)
StructArray::try_new(struct_fields.clone(), struct_arrays, None)?
};
Ok(Arc::new(struct_array))
}
Expand Down Expand Up @@ -2235,4 +2237,50 @@ mod tests {

assert_eq!(batch, roundtrip_batch);
}

#[test]
fn test_invalid_struct_array_ipc_read_errors() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I verified that this test panic's without the code changes in this PR, as expected

` value: InvalidArgumentError("Incorrect array length for StructArray field \"b\", expected 4 got 3")
stack backtrace:
   0: rust_begin_unwind
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/std/src/panicking.rs:665:5
   1: core::panicking::panic_fmt
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/core/src/panicking.rs:74:14
   2: core::result::unwrap_failed
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/core/src/result.rs:1679:5
   3: core::result::Result<T,E>::unwrap
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/core/src/result.rs:1102:23
   4: arrow_array::array::struct_array::StructArray::new
             at /Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/struct_array.rs:90:9
   5: <arrow_array::array::struct_array::StructArray as core::convert::From<alloc::vec::Vec<(alloc::sync::Arc<arrow_schema::field::Field>,alloc::sync::Arc<dyn arrow_array::array::Array>)>>>::from
             at /Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/struct_array.rs:401:9
   6: arrow_ipc::reader::create_array
             at ./src/reader.rs:167:17
   7: arrow_ipc::reader::read_record_batch_impl

let a_field = Field::new("a", DataType::Int32, false);
let b_field = Field::new("b", DataType::Int32, false);

let schema = Arc::new(Schema::new(vec![Field::new_struct(
"s",
vec![a_field.clone(), b_field.clone()],
false,
)]));

let a_array_data = ArrayData::builder(a_field.data_type().clone())
.len(4)
.add_buffer(Buffer::from_slice_ref([1, 2, 3, 4]))
.build()
.unwrap();
let b_array_data = ArrayData::builder(b_field.data_type().clone())
.len(3)
.add_buffer(Buffer::from_slice_ref([5, 6, 7]))
.build()
.unwrap();

let struct_data_type = schema.field(0).data_type();

let invalid_struct_arr = unsafe {
make_array(
ArrayData::builder(struct_data_type.clone())
.len(4)
.add_child_data(a_array_data)
.add_child_data(b_array_data)
.build_unchecked(),
)
};

let batch = RecordBatch::try_new(schema.clone(), vec![invalid_struct_arr]).unwrap();

let mut buf = Vec::new();
let mut writer = crate::writer::FileWriter::try_new(&mut buf, schema.as_ref()).unwrap();
writer.write(&batch).unwrap();
writer.finish().unwrap();

let mut reader = FileReader::try_new(std::io::Cursor::new(buf), None).unwrap();
let err = reader.next().unwrap().unwrap_err();
assert!(matches!(err, ArrowError::InvalidArgumentError(_)));
}
}
Loading