diff --git a/src/ffi/array.rs b/src/ffi/array.rs index b1c77d7366..ef32139e4d 100644 --- a/src/ffi/array.rs +++ b/src/ffi/array.rs @@ -209,7 +209,7 @@ unsafe fn get_buffer_ptr( let ptr = *buffers.add(index); if ptr.is_null() { return Err(Error::oos(format!( - "An array of type {data_type:?} + "An array of type {data_type:?} with a non-zero length must have a non-null buffer {index}" ))); } @@ -235,9 +235,14 @@ unsafe fn create_buffer( owner: InternalArrowArray, index: usize, ) -> Result> { + let len = buffer_len(array, data_type, index)?; + + if len == 0 { + return Ok(Buffer::new()); + } + let ptr = get_buffer_ptr(array, data_type, index)?; - let len = buffer_len(array, data_type, index)?; let offset = buffer_offset(array, data_type, index); let bytes = Bytes::from_foreign(ptr, len, BytesAllocator::InternalArrowArray(owner)); @@ -258,9 +263,12 @@ unsafe fn create_bitmap( // we can use the null count directly is_validity: bool, ) -> Result { + let len: usize = array.length.try_into().expect("length to fit in `usize`"); + if len == 0 { + return Ok(Bitmap::new()); + } let ptr = get_buffer_ptr(array, data_type, index)?; - let len: usize = array.length.try_into().expect("length to fit in `usize`"); let offset: usize = array.offset.try_into().expect("offset to fit in `usize`"); let bytes_len = bytes_for(offset + len); let bytes = Bytes::from_foreign(ptr, bytes_len, BytesAllocator::InternalArrowArray(owner)); diff --git a/src/io/ipc/mod.rs b/src/io/ipc/mod.rs index e618a92687..32cf2a9f55 100644 --- a/src/io/ipc/mod.rs +++ b/src/io/ipc/mod.rs @@ -80,7 +80,8 @@ pub mod append; pub mod read; pub mod write; -const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1']; +const ARROW_MAGIC_V1: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1']; +const ARROW_MAGIC_V2: [u8; 4] = [b'F', b'E', b'A', b'1']; pub(crate) const CONTINUATION_MARKER: [u8; 4] = [0xff; 4]; /// Struct containing `dictionary_id` and nested `IpcField`, allowing users diff --git a/src/io/ipc/read/file.rs b/src/io/ipc/read/file.rs index 341cdfeba6..7267520b64 100644 --- a/src/io/ipc/read/file.rs +++ b/src/io/ipc/read/file.rs @@ -8,7 +8,7 @@ use crate::datatypes::Schema; use crate::error::{Error, Result}; use crate::io::ipc::IpcSchema; -use super::super::{ARROW_MAGIC, CONTINUATION_MARKER}; +use super::super::{ARROW_MAGIC_V1, ARROW_MAGIC_V2, CONTINUATION_MARKER}; use super::common::*; use super::schema::fb_to_schema; use super::Dictionaries; @@ -151,7 +151,7 @@ fn read_footer_len(reader: &mut R) -> Result<(u64, usize)> { reader.read_exact(&mut footer)?; let footer_len = i32::from_le_bytes(footer[..4].try_into().unwrap()); - if footer[4..] != ARROW_MAGIC { + if footer[4..] != ARROW_MAGIC_V1 { return Err(Error::from(OutOfSpecKind::InvalidFooter)); } let footer_len = footer_len @@ -215,7 +215,10 @@ pub fn read_file_metadata(reader: &mut R) -> Result(reader: &mut R) -> Re reader.read_exact(&mut footer).await?; let footer_len = i32::from_le_bytes(footer[..4].try_into().unwrap()); - if footer[4..] != ARROW_MAGIC { + if footer[4..] != ARROW_MAGIC_V1 { return Err(Error::from(OutOfSpecKind::InvalidFooter)); } footer_len diff --git a/src/io/ipc/write/file_async.rs b/src/io/ipc/write/file_async.rs index 02dd5a4c7f..2cd03e34c7 100644 --- a/src/io/ipc/write/file_async.rs +++ b/src/io/ipc/write/file_async.rs @@ -11,7 +11,7 @@ use super::schema::serialize_schema; use super::{default_ipc_fields, schema_to_bytes, Record}; use crate::datatypes::*; use crate::error::{Error, Result}; -use crate::io::ipc::{IpcField, ARROW_MAGIC}; +use crate::io::ipc::{IpcField, ARROW_MAGIC_V1}; type WriteOutput = (usize, Option, Vec, Option); @@ -105,7 +105,7 @@ where } async fn start(mut writer: W, encoded: EncodedData) -> Result> { - writer.write_all(&ARROW_MAGIC[..]).await?; + writer.write_all(&ARROW_MAGIC_V1[..]).await?; writer.write_all(&[0, 0]).await?; let (meta, data) = write_message(&mut writer, encoded).await?; @@ -149,7 +149,7 @@ where writer .write_all(&(footer.len() as i32).to_le_bytes()) .await?; - writer.write_all(&ARROW_MAGIC).await?; + writer.write_all(&ARROW_MAGIC_V1).await?; writer.close().await?; Ok((0, None, vec![], None)) diff --git a/src/io/ipc/write/writer.rs b/src/io/ipc/write/writer.rs index 1637f8ea7c..3a61a57b90 100644 --- a/src/io/ipc/write/writer.rs +++ b/src/io/ipc/write/writer.rs @@ -4,7 +4,7 @@ use arrow_format::ipc::planus::Builder; use super::{ super::IpcField, - super::ARROW_MAGIC, + super::ARROW_MAGIC_V1, common::{DictionaryTracker, EncodedData, WriteOptions}, common_sync::{write_continuation, write_message}, default_ipc_fields, schema, schema_to_bytes, @@ -114,7 +114,7 @@ impl FileWriter { return Err(Error::oos("The IPC file can only be started once")); } // write magic to header - self.writer.write_all(&ARROW_MAGIC[..])?; + self.writer.write_all(&ARROW_MAGIC_V1[..])?; // create an 8-byte boundary after the header self.writer.write_all(&[0, 0])?; // write the schema, set the written bytes to the schema @@ -205,7 +205,7 @@ impl FileWriter { self.writer.write_all(footer_data)?; self.writer .write_all(&(footer_data.len() as i32).to_le_bytes())?; - self.writer.write_all(&ARROW_MAGIC)?; + self.writer.write_all(&ARROW_MAGIC_V1)?; self.writer.flush()?; self.state = State::Finished;