diff --git a/core/src/value/mod.rs b/core/src/value/mod.rs index 9eec4e498..49667346b 100644 --- a/core/src/value/mod.rs +++ b/core/src/value/mod.rs @@ -46,8 +46,8 @@ pub trait DicomValueType: HasLength { /// /// `I` is the complex type for nest data set items, which should usually /// implement [`HasLength`]. -/// `P` is the encapsulated pixel data provider, which should usually -/// implement `AsRef<[u8]>`. +/// `P` is the encapsulated pixel data fragment type, +/// which should usually implement `AsRef<[u8]>`. /// /// [`HasLength`]: ../header/trait.HasLength.html #[derive(Debug, Clone, PartialEq)] diff --git a/object/src/lazy/element.rs b/object/src/lazy/element.rs new file mode 100644 index 000000000..8b60e7692 --- /dev/null +++ b/object/src/lazy/element.rs @@ -0,0 +1,527 @@ +use std::collections::BTreeMap; + +use super::{PositionToValueSnafu, ReadValueSnafu, ReadFragmentSnafu, UnloadedFragmentSnafu}; +use dicom_core::{DataDictionary, DataElementHeader, DicomValue, Length, Tag, header::HasLength}; +use dicom_dictionary_std::StandardDataDictionary; +use dicom_parser::StatefulDecode; +use snafu::{OptionExt, ResultExt}; +use smallvec::SmallVec; + +use crate::{ + mem::{InMemElement, InMemFragment}, + util::ReadSeek, + InMemDicomObject, +}; + +type Result = std::result::Result; + +/// A DICOM element, which may be loaded in memory or not. +/// +/// This type alone does not have the means to load the element's value. +/// A byte source must be provided whenever a load is attempted. +#[derive(Debug, Clone)] +pub struct MaybeElement { + header: DataElementHeader, + position: u64, + value: MaybeValue, +} + +impl MaybeElement +where + D: DataDictionary, + D: Clone, +{ + /// Create a new lazy element with the given properties, + /// without loading its value in memory. + pub fn new_unloaded(header: DataElementHeader, position: u64) -> Self { + MaybeElement { + header, + position, + value: MaybeValue::Unloaded, + } + } + + /// Create a new lazy element with the given properties, + /// already loaded with an in-memory value. + pub fn new_loaded(header: DataElementHeader, position: u64, value: LoadedValue) -> Self { + MaybeElement { + header, + position, + value: MaybeValue::Loaded { + value, + dirty: false, + }, + } + } + + /// Ensure that the value is loaded in memory, + /// fetching it from the given source if necessary. + /// + /// The operation is a no-op if the value is already loaded. + pub fn load(&mut self, source: &mut S) -> Result<()> + where + S: StatefulDecode, + ::Reader: ReadSeek, + { + match &mut self.value { + MaybeValue::Loaded { .. } => Ok(()), + MaybeValue::PixelSequence { fragments, .. } => { + // load each fragment individually + for fragment in fragments { + if fragment.data.is_some() { + continue; + } + source.seek(fragment.position).context(PositionToValueSnafu)?; + let mut data = Vec::with_capacity(fragment.length as usize); + source.read_to_vec(fragment.length, &mut data).context(ReadFragmentSnafu)?; + fragment.data = Some(data); + } + Ok(()) + }, + MaybeValue::Unloaded => { + source.seek(self.position).context(PositionToValueSnafu)?; + let value = source + .read_value_preserved(&self.header) + .context(ReadValueSnafu)?; + self.value = MaybeValue::Loaded { + value: DicomValue::from(value), + dirty: false, + }; + Ok(()) + } + } + } + + /// Convert the lazy element into an in-memory element, + /// loading it from the given source if necessary. + pub fn into_mem(mut self, source: &mut S) -> Result> + where + S: StatefulDecode, + ::Reader: ReadSeek, + { + self.load(source)?; + + let value = self.value.into_mem(source)?; + + Ok(InMemElement::new(self.header.tag, self.header.vr, value)) + } +} + +/// A DICOM value which may be loaded in memory or not. +/// +/// Loading the value can only be done through the respective [`LazyElement`]. +/// +#[derive(Debug, Clone)] +pub enum MaybeValue { + /// A DICOM value that is at least partially loaded in memory. + /// + /// + /// Its nested DICOM data sets or fragments might not be all loaded + /// in the case of sequences. + Loaded { + /// the value proper + value: LoadedValue, + dirty: bool, + }, + /// a DICOM value that is a pixel sequence, + /// where each fragment can be loaded independently + PixelSequence { + /// the offset table for each pixel data frame + offset_table: SmallVec<[u32; 2]>, + /// the sequence of fragments + fragments: SmallVec<[MaybeFragment; 2]>, + }, + /// a DICOM value which is not loaded, + /// and so is unreachable from here + Unloaded, +} + +impl MaybeValue +where + D: DataDictionary, + D: Clone, +{ + /// Return a reference to the loaded value, + /// or `None` if the value is not loaded. + pub fn value(&self) -> Option<&LoadedValue> { + match self { + MaybeValue::Loaded { value, .. } => Some(value), + MaybeValue::PixelSequence { fragments, .. } => todo!("retrieving pixel sequences"), + MaybeValue::Unloaded => None, + } + } + + /// Check whether the element is loaded at this level. + /// + /// **Note:** + /// this method does not check + /// whether nested data sets or any pixel data fragments + /// are fully loaded. + pub fn is_loaded(&self) -> bool { + match self { + MaybeValue::Loaded { .. } => true, + MaybeValue::PixelSequence { .. } => true, + MaybeValue::Unloaded => false, + } + } + + /// **Pre-condition:** the value must be fully loaded. + fn into_mem(self, source: &mut S) -> Result, InMemFragment>> + where + S: StatefulDecode, + ::Reader: ReadSeek, + { + match self { + MaybeValue::Loaded { value, .. } => { + match value { + DicomValue::Primitive(primitive) => { + // accept primitive value as is + Ok(DicomValue::from(primitive)) + } + DicomValue::PixelSequence { + offset_table, + fragments, + } => { + let fragments: Result> = fragments.into_iter() + .enumerate() + .map(|(i, f)| f.data.context(UnloadedFragmentSnafu { index: i as u32 })) + .collect(); + // accept pixel sequence as is + Ok(DicomValue::PixelSequence { + offset_table, + fragments: fragments?, + }) + } + DicomValue::Sequence { items, size } => { + // recursively turn each item into memory + let items: Result<_> = items + .into_iter() + .map(|item| item.into_mem(source)) + .collect(); + let items = items?; + Ok(DicomValue::Sequence { items, size }) + } + } + } + _ => panic!("Value should be loaded"), + } + } +} + +/// A fragment of a pixel sequence, +/// which may be loaded in memory or not. +#[derive(Debug, Clone)] +pub struct MaybeFragment { + /// The offset of the fragment data relative to the original source + position: u64, + /// The number of data bytes in this fragment + length: u32, + /// The actual data proper, + /// which might not be loaded. + data: Option>, +} + +/// Type definition for a value which has been loaded into memory, +/// at least partially, +/// at one level. +/// +/// If it is a primitive value, +/// then is sure to be all in memory. +/// In the case of a sequence, +/// the nested objects may or may not be loaded. +/// In the case of a pixel sequence, +/// each fragments may be loaded in memory or not. +pub type LoadedValue = DicomValue, MaybeFragment>; + +/// A DICOM object nested in a lazy DICOM object. +/// +/// The type parameter `S` represents the borrowed stateful reader, +/// implementing `StatefulDecode`. +/// `D` is for the element dictionary. +#[derive(Debug, Clone)] +pub struct LazyNestedObject { + /// the element dictionary + entries: BTreeMap>, + /// the data attribute dictionary + dict: D, + /// The length of the DICOM object in bytes. + /// It is usually undefined, unless it is part of an item + /// in a sequence with a specified length in its item header. + len: Length, +} + +impl HasLength for LazyNestedObject { + fn length(&self) -> Length { + self.len + } +} + +impl LazyNestedObject +where + D: DataDictionary, + D: Clone, +{ + /// Load each element in the object. + pub fn load_all(&mut self, source: &mut S) -> Result<()> + where + S: StatefulDecode, + ::Reader: ReadSeek, + { + for elem in &mut self.entries.values_mut() { + elem.load(&mut *source)?; + } + Ok(()) + } + + /// Load each element in the object and turn it into an in-memory object. + pub fn into_mem(mut self, source: &mut S) -> Result> + where + S: StatefulDecode, + ::Reader: ReadSeek, + D: DataDictionary, + D: Clone, + { + self.load_all(&mut *source)?; + + let entries: Result<_> = self.entries.into_values() + .map(|elem| elem.into_mem(&mut *source).map(|elem| (elem.header().tag, elem))) + .collect(); + + Ok(InMemDicomObject::from_parts(entries?, self.dict, self.len)) + } +} + +#[cfg(test)] +mod tests { + use byteordered::Endianness; + use dicom_core::DataElementHeader; + use dicom_core::Length; + use dicom_core::Tag; + use dicom_core::VR; + use dicom_core::dicom_value; + use dicom_dictionary_std::StandardDataDictionary; + use dicom_encoding::decode::basic::BasicDecoder; + use dicom_encoding::decode::explicit_le::ExplicitVRLittleEndianDecoder; + use dicom_encoding::decode::implicit_le::ImplicitVRLittleEndianDecoder; + use dicom_encoding::text::SpecificCharacterSet; + use dicom_parser::StatefulDecode; + use dicom_parser::StatefulDecoder; + + use crate::mem::InMemElement; + use crate::InMemDicomObject; + + use super::MaybeElement; + use super::LazyNestedObject; + use super::MaybeValue; + + #[test] + fn lazy_element_single() { + let data_in = [ + 0x10, 0x00, 0x10, 0x00, // Tag(0x0010, 0x0010) + 0x08, 0x00, 0x00, 0x00, // Length: 8 + b'D', b'o', b'e', b'^', b'J', b'o', b'h', b'n', + ]; + + // Create a stateful reader for the data + let decoder = ImplicitVRLittleEndianDecoder::default(); + let text = SpecificCharacterSet::Default; + let mut cursor = std::io::Cursor::new(data_in); + let mut parser = StatefulDecoder::new( + &mut cursor, + decoder, + BasicDecoder::new(Endianness::Little), + text, + ); + + // Create an unloaded lazy element (actual value starts at 8) + let mut lazy_element: MaybeElement = MaybeElement { + header: DataElementHeader::new(Tag(0x0010, 0x0010), VR::PN, Length(8)), + position: 8, + value: MaybeValue::Unloaded, + }; + + // Load the lazy element + lazy_element + .load(&mut parser) + .expect("Failed to load lazy element"); + match lazy_element.value { + MaybeValue::Unloaded => panic!("element should be loaded"), + MaybeValue::PixelSequence { .. } => unreachable!("element is not a pixel sequence"), + MaybeValue::Loaded { value, dirty } => { + assert_eq!(value.to_str().unwrap(), "Doe^John"); + assert_eq!(dirty, false); + } + } + } + + #[test] + fn lazy_element_somewhere_in_middle() { + let data_in = [ + // 30 bytes of irrelevant data + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 30 + // actual element is here + 0x10, 0x00, 0x10, 0x00, // Tag(0x0010, 0x0010) + 0x08, 0x00, 0x00, 0x00, // Length: 8 + b'D', b'o', b'e', b'^', b'J', b'o', b'h', b'n', + // 10 more bytes of irrelevant data (@ 46) + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 66 + ]; + + // Create a stateful reader for the data + let decoder = ImplicitVRLittleEndianDecoder::default(); + let text = SpecificCharacterSet::Default; + let mut cursor = std::io::Cursor::new(data_in); + let mut parser = StatefulDecoder::new( + &mut cursor, + decoder, + BasicDecoder::new(Endianness::Little), + text, + ); + + // move cursor to the end (simulating a full file read) + parser.seek(66).expect("Failed to seek to end of file"); + + // Create an unloaded lazy element + let mut lazy_element: MaybeElement = MaybeElement { + header: DataElementHeader::new(Tag(0x0010, 0x0010), VR::PN, Length(8)), + position: 38, + value: MaybeValue::Unloaded, + }; + + // Load the lazy element + lazy_element + .load(&mut parser) + .expect("Failed to load lazy element"); + match lazy_element.value { + MaybeValue::Unloaded => panic!("element should be loaded"), + MaybeValue::PixelSequence { .. } => unreachable!("element is not a pixel sequence"), + MaybeValue::Loaded { value, dirty } => { + assert_eq!(value.to_str().unwrap(), "Doe^John"); + assert_eq!(dirty, false); + } + } + } + #[test] + fn lazy_nested_object() { + static DATA_IN: &[u8] = &[ + // SequenceStart: (0008,2218) ; len = 54 (#=3) + 0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, + // -- 12, -- + // ItemStart: len = 46 + 0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00, + // -- 20, -- + // ElementHeader: (0008,0100) CodeValue; len = 8 + 0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue + b'T', b'-', b'D', b'1', b'2', b'1', b'3', b' ', + // -- 36, -- + // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4 + 0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue + b'S', b'R', b'T', b' ', + // -- 48, -- + // (0008,0104) CodeMeaning; len = 10 + 0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue + b'J', b'a', b'w', b' ', b'r', b'e', b'g', b'i', b'o', b'n', + // -- 66 -- + // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0 + 0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // ElementHeader: (2050,0020) PresentationLUTShape; len = 8 + 0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue + b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y', + ]; + + // Create a stateful reader for the data + let decoder = ExplicitVRLittleEndianDecoder::default(); + let text = SpecificCharacterSet::Default; + let mut cursor = std::io::Cursor::new(DATA_IN); + let mut parser = StatefulDecoder::new( + &mut cursor, + decoder, + BasicDecoder::new(Endianness::Little), + text, + ); + + // move cursor to the end (simulating a full file read) + parser.seek(94).expect("Failed to seek to end of file"); + + // construct accurate nested object, unloaded + let mut nested_object: LazyNestedObject = LazyNestedObject { + entries: vec![ + // CodeValue element + ( + Tag(0x0008, 0x0100), + MaybeElement::new_unloaded( + DataElementHeader::new(Tag(0x0008, 0x0100), VR::SH, Length(8)), + 28, + ), + ), + // CodingSchemeDesignator element + ( + Tag(0x0008, 0x0102), + MaybeElement::new_unloaded( + DataElementHeader::new(Tag(0x0008, 0x0102), VR::SH, Length(4)), + 44, + ), + ), + // CodeMeaning element + ( + Tag(0x0008, 0x0104), + MaybeElement::new_unloaded( + DataElementHeader::new(Tag(0x0008, 0x0104), VR::LO, Length(10)), + 56, + ), + ), + ] + .into_iter() + .collect(), + dict: Default::default(), + len: Length(46), + }; + + // load nested object + nested_object + .load_all(&mut parser) + .expect("Failed to load nested object"); + + for e in nested_object.entries.values() { + assert!(e.value.is_loaded()); + } + + // turn it into an in-memory DICOM object, + // test with ground truth + let inmem = nested_object + .into_mem(&mut parser) + .expect("Failed to load all object into memory"); + + let gt: InMemDicomObject = InMemDicomObject::from_element_iter(vec![ + InMemElement::new( + Tag(0x0008, 0x0100), + VR::SH, + dicom_value!(Strs, ["T-D1213 "]), + ), + InMemElement::new(Tag(0x0008, 0x0102), VR::SH, dicom_value!(Strs, ["SRT "])), + InMemElement::new( + Tag(0x0008, 0x0104), + VR::LO, + dicom_value!(Strs, ["Jaw region"]), + ), + ]); + + assert_eq_elements(&inmem, >); + } + + /// Assert that two objects are equal + /// by traversing their elements in sequence + /// and checking that those are equal. + fn assert_eq_elements(obj1: &InMemDicomObject, obj2: &InMemDicomObject) + where + D: std::fmt::Debug, + { + // iterate through all elements in both objects + // and check that they are equal + for (e1, e2) in std::iter::Iterator::zip(obj1.into_iter(), obj2) { + assert_eq!(e1, e2); + } + } +} diff --git a/object/src/lazy/mod.rs b/object/src/lazy/mod.rs new file mode 100644 index 000000000..25c1926a4 --- /dev/null +++ b/object/src/lazy/mod.rs @@ -0,0 +1,357 @@ +//! This module contains the implementation for a lazily evaluated DICOM object. +//! +//! In a lazy DICOM object, larger DICOM elements +//! may be skipped during the decoding process, +//! and thus not be immediately available in memory. +//! A pointer to the original data source is kept for future access, +//! so that the element is fetched and its value is decoded on demand. + +use dicom_dictionary_std::StandardDataDictionary; +use dicom_transfer_syntax_registry::TransferSyntaxRegistry; +use std::collections::BTreeMap; +use std::fs::File; +use std::io::Read; +use std::path::Path; + +use crate::lazy::record::DataSetTableBuilder; +use crate::{meta::FileMetaTable, util::ReadSeek, FileDicomObject}; +use dicom_core::dictionary::DataDictionary; +use dicom_core::header::HasLength; +use dicom_core::{Length, Tag}; +use dicom_encoding::text::SpecificCharacterSet; +use dicom_encoding::transfer_syntax::TransferSyntaxIndex; +use dicom_parser::DynStatefulDecoder; +use dicom_parser::{ + dataset::lazy_read::LazyDataSetReader, stateful::decode::Error as StatefulDecodeError, +}; +use dicom_parser::{dataset::read::Error as ParserError, StatefulDecode}; +use snafu::{Backtrace, ResultExt, Snafu}; + +use self::element::LoadedValue; +pub use self::element::{LazyNestedObject, MaybeElement, MaybeValue}; +use self::record::{DataSetTable, RecordBuildingDataSetReader}; + +pub(crate) mod element; +pub mod record; + +/// The type of a pixel data fragment. +pub type InMemFragment = Vec; + +type ParserResult = std::result::Result; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Could not open file '{}'", filename.display()))] + OpenFile { + filename: std::path::PathBuf, + backtrace: Backtrace, + source: std::io::Error, + }, + #[snafu(display("Could not read from file '{}'", filename.display()))] + ReadFile { + filename: std::path::PathBuf, + backtrace: Backtrace, + source: std::io::Error, + }, + /// Could not parse meta group data set + ParseMetaDataSet { + #[snafu(backtrace)] + source: crate::meta::Error, + }, + /// Could not create data set parser + CreateParser { + #[snafu(backtrace)] + source: dicom_parser::dataset::lazy_read::Error, + }, + /// Could not read data set token + ReadToken { + #[snafu(backtrace)] + source: dicom_parser::dataset::lazy_read::Error, + }, + #[snafu(display("Could not write to file '{}'", filename.display()))] + WriteFile { + filename: std::path::PathBuf, + backtrace: Backtrace, + source: std::io::Error, + }, + /// Could not write object preamble + WritePreamble { + backtrace: Backtrace, + source: std::io::Error, + }, + #[snafu(display("Unknown data attribute named `{}`", name))] + NoSuchAttributeName { name: String, backtrace: Backtrace }, + #[snafu(display("Missing element value"))] + MissingElementValue { backtrace: Backtrace }, + #[snafu(display("Unsupported transfer syntax `{}`", uid))] + UnsupportedTransferSyntax { uid: String, backtrace: Backtrace }, + /// Could not position data source to value + PositionToValue { source: StatefulDecodeError }, + /// Could not read value from data source + ReadValue { source: StatefulDecodeError }, + /// Could not read fragment from data source + ReadFragment { source: StatefulDecodeError }, + /// Could not read pixel data offset table + ReadOffsetTable { source: StatefulDecodeError }, + #[snafu(display("Unexpected token {:?}", token))] + UnexpectedToken { + token: dicom_parser::dataset::LazyDataTokenRepr, + backtrace: Backtrace, + }, + #[snafu(display( + "Pixel data fragment #{} was expected to be loaded, but was not", + index + ))] + UnloadedFragment { index: u32, backtrace: Backtrace }, + /// Premature data set end + PrematureEnd { backtrace: Backtrace }, + #[snafu(display("No such data element with tag {}", tag))] + NoSuchDataElementTag { tag: Tag, backtrace: Backtrace }, +} + +pub type Result = std::result::Result; + +/// The options for opening a DICOM file +/// as a lazily evaluated object. +#[derive(Debug, Default, Clone, PartialEq)] +#[non_exhaustive] +pub struct OpenFileOptions { + /// the data dictionary to use + pub dictionary: D, + /// the transfer syntax registry to use + pub ts_index: T, +} + +/// A DICOM object which fetches elements from a data source on demand. +#[derive(Debug, Clone)] +pub struct LazyDicomObject { + /// the binary source to fetch DICOM data from + source: S, + /// the element dictionary at this level + entries: BTreeMap>, + /// the full record table + records: DataSetTable, + /// the data element dictionary + dict: D, + /// The length of the DICOM object in bytes. + /// It is usually undefined, unless it is part of an item + /// in a sequence with a specified length in its item header. + len: Length, +} + +pub type LazyFileDicomObject = FileDicomObject, D>>; + +/// A temporary reference to a DICOM element which fetches its value on demand. +#[derive(Debug)] +pub struct LazyElement<'a, S: 'a, D> { + source: &'a mut S, + elem: &'a mut MaybeElement, +} + +impl<'a, S, D> LazyElement<'a, S, D> +where + S: StatefulDecode, + ::Reader: ReadSeek, + D: Clone + DataDictionary, +{ + pub fn to_value(self) -> Result> { + self.elem.load(self.source)?; + + todo!() + } +} + +impl LazyFileDicomObject { + /// Load a new lazy DICOM object from a file + pub fn from_file

(path: P) -> Result + where + P: AsRef, + { + Self::from_file_with( + path, + OpenFileOptions::<_, TransferSyntaxRegistry>::default(), + ) + } +} + +impl LazyFileDicomObject { + /// Load a new lazy DICOM object from a file, + /// using the given options. + pub fn from_file_with(path: P, options: OpenFileOptions) -> Result + where + P: AsRef, + T: TransferSyntaxIndex, + D: DataDictionary, + D: Clone, + { + let OpenFileOptions { + dictionary, + ts_index, + } = options; + + let path = path.as_ref(); + let mut file = File::open(path).with_context(|_| OpenFileSnafu { filename: path })?; + + // skip preamble + { + let mut buf = [0u8; 128]; + // skip the preamble + file.read_exact(&mut buf) + .with_context(|_| ReadFileSnafu { filename: path })?; + } + + // read metadata header + let meta = FileMetaTable::from_reader(&mut file).context(ParseMetaDataSetSnafu)?; + + // read rest of data according to metadata, feed it to object + if let Some(ts) = ts_index.get(&meta.transfer_syntax) { + let cs = SpecificCharacterSet::Default; + let dataset = + LazyDataSetReader::new_with_ts_cs(file, ts, cs).context(CreateParserSnafu)?; + + let mut builder = DataSetTableBuilder::new(); + let mut entries = BTreeMap::new(); + + let mut dataset = RecordBuildingDataSetReader::new(dataset, &mut builder); + + LazyDicomObject::build_object( + &mut dataset, + &mut entries, + dictionary.clone(), + false, + Length::UNDEFINED, + )?; + + Ok(FileDicomObject { + meta, + obj: LazyDicomObject { + source: dataset.into_inner().into_decoder(), + entries, + records: builder.build(), + dict: dictionary, + len: Length::UNDEFINED, + }, + }) + } else { + UnsupportedTransferSyntaxSnafu { + uid: meta.transfer_syntax, + } + .fail() + } + } +} + +impl LazyDicomObject +where + S: StatefulDecode, + ::Reader: ReadSeek, +{ + pub fn read_dataset(reader: LazyDataSetReader) -> Result { + Self::read_dataset_with(reader, StandardDataDictionary) + } +} + +impl LazyDicomObject +where + S: StatefulDecode, + ::Reader: ReadSeek, + D: DataDictionary, +{ + pub fn read_dataset_with(reader: LazyDataSetReader, dict: D) -> Result { + todo!() + } + + pub fn element<'a>(&'a mut self, tag: Tag) -> Result> { + let source = &mut self.source; + self.entries + .get_mut(&tag) + .ok_or_else(|| NoSuchDataElementTagSnafu { tag }.build()) + .map(move |elem| LazyElement { source, elem }) + } + + pub fn element_mut<'a>(&'a mut self, tag: Tag) -> Result> { + let source = &mut self.source; + self.entries + .get_mut(&tag) + .ok_or_else(|| NoSuchDataElementTagSnafu { tag }.build()) + .map(move |elem| LazyElement { source, elem }) + } +} + +impl LazyDicomObject +where + S: StatefulDecode, + ::Reader: ReadSeek, + D: DataDictionary, +{ + /// Build an object by consuming a data set parser. + fn build_object( + dataset: &mut RecordBuildingDataSetReader, + entries: &mut BTreeMap>, + dict: D, + in_item: bool, + len: Length, + ) -> Result<()> { + todo!() + } +} + +impl HasLength for LazyDicomObject +where + S: StatefulDecode, + ::Reader: ReadSeek, + D: DataDictionary, +{ + fn length(&self) -> Length { + Length::UNDEFINED + } + + fn is_empty(&self) -> bool { + self.entries.is_empty() + } +} + +#[cfg(test)] +mod tests { + + use std::io::Cursor; + + use super::*; + use byteordered::Endianness; + use dicom_core::{dicom_value, header::VR, DataElement, DicomValue}; + use dicom_encoding::decode::{basic::BasicDecoder, implicit_le::ImplicitVRLittleEndianDecoder}; + use dicom_parser::StatefulDecoder; + + #[test] + #[ignore] + fn inmem_object_read_dataset() { + let data_in = [ + 0x10, 0x00, 0x10, 0x00, // Tag(0x0010, 0x0010) + 0x08, 0x00, 0x00, 0x00, // Length: 8 + b'D', b'o', b'e', b'^', b'J', b'o', b'h', b'n', + ]; + + let decoder = ImplicitVRLittleEndianDecoder::default(); + let text = SpecificCharacterSet::Default; + let mut cursor = Cursor::new(&data_in[..]); + let parser = StatefulDecoder::new( + &mut cursor, + decoder, + BasicDecoder::new(Endianness::Little), + text, + ); + let dataset = LazyDataSetReader::new(parser); + + let mut obj: LazyDicomObject<_, _> = LazyDicomObject::read_dataset(dataset).unwrap(); + + let patient_name = DataElement::new( + Tag(0x0010, 0x0010), + VR::PN, + DicomValue::new(dicom_value!(Strs, ["Doe^John"])), + ); + + let lazy_patient_name = obj + .element(Tag(0x0010, 0x0010)) + .expect("Failed to retrieve element"); + } +} diff --git a/object/src/lazy/record.rs b/object/src/lazy/record.rs new file mode 100644 index 000000000..9a6afb965 --- /dev/null +++ b/object/src/lazy/record.rs @@ -0,0 +1,474 @@ +//! Data structures and algorithms for DICOM data set record tables. +//! +//! A complete table of element records +//! (with some meta-information and byte positions) +//! can be obtained from a parser +//! by creating a [`DataSetTableBuilder`] +//! and invoking [`update`] on each token. +//! +//! [`update`]: DataSetTableBuilder::update +//! + +use std::{collections::BTreeMap, iter::FromIterator}; + +use dicom_core::{value::C, DataElementHeader, Length, Tag}; +use dicom_parser::{ + dataset::{lazy_read::LazyDataSetReader, LazyDataToken}, + StatefulDecode, +}; + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct DataSetTable { + table: BTreeMap, +} + +impl FromIterator for DataSetTable { + fn from_iter>(iter: T) -> Self { + DataSetTable { + table: iter + .into_iter() + .map(|record| (record.tag(), record)) + .collect(), + } + } +} + +impl DataSetTable { + pub fn new() -> Self { + Self::default() + } + + pub fn by_tag(&self, tag: Tag) -> Option<&DataSetRecord> { + self.table.get(&tag) + } +} + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct DataSetTableBuilder { + records: Vec, + /// current amount of data set nesting. + /// 0 means push new elements to `table`, + /// 1 or more means push them to last record at the given depth + depth: u32, + last_header: Option, +} + +impl DataSetTableBuilder { + pub fn new() -> Self { + Self::default() + } + + pub fn update(&mut self, token: &LazyDataToken) + where + D: StatefulDecode, + { + match token { + LazyDataToken::ElementHeader(..) => { + // no-op + } + LazyDataToken::LazyValue { header, decoder } => { + // record element header and position into table + let records = self.records_at(self.depth); + records.push(DataSetRecordBuilder::Element { + header: *header, + position: decoder.position(), + }) + } + LazyDataToken::SequenceStart { tag, len } => { + // add depth, create empty sequence record + let records = self.records_at(self.depth); + records.push(DataSetRecordBuilder::Sequence { + tag: *tag, + length: *len, + items: vec![], + }); + self.depth += 1; + } + LazyDataToken::ItemStart { len } => { + // create new item at record + match self.last_record_at(self.depth) { + DataSetRecordBuilder::Sequence { items, .. } => { + items.push(Default::default()); + } + DataSetRecordBuilder::PixelSequence { fragment_positions } => { + // record position if length is 0 + // (because then we have no LazyItemValue + // and the position must be recorded anyway) + if *len == Length(0) { + // Note: because the position cannot be identified from here, + // we place an arbitrary value with the assumption + // that the zero length will be checked beforehand + // and that no read is actually attempted. + fragment_positions.push(None); + } + } + _ => unreachable!("Unexpected record type"), + } + } + LazyDataToken::SequenceEnd => { + // remove depth + self.depth -= 1; + } + LazyDataToken::PixelSequenceStart => { + // create new empty pixel sequence record + let records = self.records_at(self.depth); + records.push(DataSetRecordBuilder::PixelSequence { + fragment_positions: Default::default(), + }); + self.depth += 1; + } + LazyDataToken::LazyItemValue { len: _, decoder } => { + // update pixel sequence record + match self.last_record_at(self.depth) { + DataSetRecordBuilder::PixelSequence { fragment_positions } => { + // record and push position + fragment_positions.push(Some(decoder.position())); + } + _ => unreachable!("Unexpected record type"), + } + } + LazyDataToken::ItemEnd => { + // no-op + } + _ => unreachable!("unsupported token variant"), + } + } + + pub fn build(self) -> DataSetTable { + DataSetTable::from_iter(self.records.into_iter().map(DataSetRecordBuilder::build)) + } + + fn records_at(&mut self, depth: u32) -> &mut Vec { + let mut records = &mut self.records; + + for i in 0..depth { + // go in self.depth times + if let Some(DataSetRecordBuilder::Sequence { items, .. }) = records.last_mut() { + if let Some(item) = items.last_mut() { + records = &mut item.records; + } else { + unreachable!("last record at depth {} does not have any items", i); + } + } else { + unreachable!("last record at depth {} is not a sequence", i); + } + } + records + } + + fn last_record_at(&mut self, depth: u32) -> &mut DataSetRecordBuilder { + let mut records = &mut self.records; + + for _ in 1..depth { + match records.last_mut().expect("missing record") { + DataSetRecordBuilder::Sequence { items, .. } => { + let item = items.last_mut().unwrap(); + records = &mut item.records; + } + _ => unreachable!(), + } + } + + records.last_mut().expect("missing last record") + } +} + +/// A record of value positions on a persisted DICOM data set. +#[derive(Debug, Clone, PartialEq)] +pub enum DataSetRecord { + /// Primitive data element + Element { + /// data element header + header: DataElementHeader, + /// the byte position of the value + position: u64, + }, + /// Data element sequence + Sequence { + /// sequence element tag + tag: Tag, + /// the length according to the persisted data set + length: Length, + items: Vec, + }, + /// Encapsulated pixel sequence + PixelSequence { + /// the byte positions of each fragment in order + /// (the first fragment is the offset table), + /// `None` if the fragment is empty + fragment_positions: C>, + }, +} + +impl DataSetRecord { + pub fn tag(&self) -> Tag { + match self { + DataSetRecord::Element { header, .. } => header.tag, + DataSetRecord::Sequence { tag, .. } => *tag, + DataSetRecord::PixelSequence { .. } => Tag(0x7FE0, 0x0010), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum DataSetRecordBuilder { + /// Primitive data element + Element { + /// data element header + header: DataElementHeader, + /// the byte position of the value + position: u64, + }, + /// Data element sequence + Sequence { + /// sequence element tag + tag: Tag, + /// the length according to the persisted data set + length: Length, + items: Vec, + }, + /// Encapsulated pixel sequence + PixelSequence { + /// the byte positions of each fragment in order, + /// `None` if the fragment is empty. + fragment_positions: C>, + }, +} + +impl DataSetRecordBuilder { + pub fn build(self) -> DataSetRecord { + match self { + DataSetRecordBuilder::Element { header, position } => { + DataSetRecord::Element { header, position } + } + DataSetRecordBuilder::Sequence { tag, length, items } => DataSetRecord::Sequence { + tag, + length, + items: items.into_iter().map(DataSetTableBuilder::build).collect(), + }, + DataSetRecordBuilder::PixelSequence { fragment_positions } => { + DataSetRecord::PixelSequence { fragment_positions } + } + } + } +} + +/// A lazy data set reader which updates a data set table builder +/// as it fetches new tokens. +/// +/// It still uses [`LazyDataSetReader`][1] as its underlying implementation. +/// +/// [1]: dicom_parser::dataset::lazy_read::LazyDataSetReader +#[derive(Debug)] +pub struct RecordBuildingDataSetReader<'a, S> { + builder: &'a mut DataSetTableBuilder, + reader: LazyDataSetReader, +} + +impl<'a, S> RecordBuildingDataSetReader<'a, S> +where + S: StatefulDecode, +{ + pub fn new(reader: LazyDataSetReader, builder: &'a mut DataSetTableBuilder) -> Self { + RecordBuildingDataSetReader { builder, reader } + } + + pub fn into_inner(self) -> LazyDataSetReader { + self.reader + } + + /** Advance and retrieve the next DICOM data token. + * + * If a token is obtained, + * the referenced builder is automatically updated. + * + * **Note:** For the data set to be successfully parsed, + * the resulting data tokens needs to be consumed + * if they are of a value type. + */ + pub fn advance( + &mut self, + ) -> Option>> { + match self.reader.advance() { + Some(Ok(token)) => { + self.builder.update(&token); + Some(Ok(token)) + } + e @ Some(Err(_)) => e, + None => None, + } + } +} + +#[cfg(test)] +mod tests { + use std::io::Read; + + use dicom_core::{DataElementHeader, Length, Tag, VR}; + use dicom_encoding::{ + decode::{basic::LittleEndianBasicDecoder, explicit_le::ExplicitVRLittleEndianDecoder}, + text::SpecificCharacterSet, + }; + use dicom_parser::{dataset::lazy_read::LazyDataSetReader, StatefulDecoder}; + + use crate::lazy::record::{DataSetRecord, DataSetTable}; + + use super::DataSetTableBuilder; + + fn validate_create_table_explicit_vr(source: R, gt: &DataSetTable) + where + R: Read, + { + let stateful_decoder = StatefulDecoder::new( + source, + ExplicitVRLittleEndianDecoder::default(), + LittleEndianBasicDecoder::default(), + SpecificCharacterSet::Default, + ); + + let mut dataset_reader = LazyDataSetReader::new(stateful_decoder); + + let mut b = DataSetTableBuilder::new(); + + while let Some(token) = dataset_reader.advance() { + let token = token.unwrap(); + b.update(&token); + token.skip().unwrap(); + } + + let table = b.build(); + + assert_eq!(&table, gt); + } + + #[test] + fn lazy_record_from_sequence_explicit() { + #[rustfmt::skip] + static DATA: &[u8] = &[ + 0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions + b'S', b'Q', // VR + 0x00, 0x00, // reserved + 0x2e, 0x00, 0x00, 0x00, // length: 28 + 18 = 46 (#= 2) + // -- 12 -- + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x14, 0x00, 0x00, 0x00, // item length: 20 (#= 2) + // -- 20 -- + 0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1 + // -- 30 -- + 0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2 + // -- 40 -- + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x0a, 0x00, 0x00, 0x00, // item length: 10 (#= 1) + // -- 48 -- + 0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4 + // -- 58 -- + 0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4 + b'T', b'E', b'S', b'T', // value = "TEST" + ]; + + let sequence_record: DataSetRecord = DataSetRecord::Sequence { + tag: Tag(0x0018, 0x6011), + length: Length(46), + items: vec![ + vec![ + DataSetRecord::Element { + header: DataElementHeader { + tag: Tag(0x0018, 0x6012), + vr: VR::US, + len: Length(2), + }, + position: 28, + }, + DataSetRecord::Element { + header: DataElementHeader { + tag: Tag(0x0018, 0x6014), + vr: VR::US, + len: Length(2), + }, + position: 38, + }, + ] + .into_iter() + .collect(), + vec![DataSetRecord::Element { + header: DataElementHeader { + tag: Tag(0x0018, 0x6012), + vr: VR::US, + len: Length(2), + }, + position: 56, + }] + .into_iter() + .collect(), + ], + }; + + let ground_truth: DataSetTable = vec![ + sequence_record, + DataSetRecord::Element { + header: DataElementHeader { + tag: Tag(0x0020, 0x4000), + vr: VR::LT, + len: Length(4), + }, + position: 66, + }, + ] + .into_iter() + .collect(); + + validate_create_table_explicit_vr(DATA, &ground_truth); + } + + #[test] + fn lazy_record_from_encapsulated_pixel_data() { + #[rustfmt::skip] + static DATA: &[u8] = &[ + 0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData + b'O', b'B', // VR + 0x00, 0x00, // reserved + 0xff, 0xff, 0xff, 0xff, // length: undefined + // -- 12 -- Pixel Item 0: empty offset table + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x00, 0x00, 0x00, 0x00, // item length: 0 + // -- 20 -- First fragment of pixel data + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x20, 0x00, 0x00, 0x00, // item length: 32 + // -- 28 -- Pixel Item 1: Compressed Fragment + 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + // -- 60 -- Second fragment of pixel data + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x10, 0x00, 0x00, 0x00, // item length: 16 + // -- 68 -- Pixel Item 2: Compressed Fragment + 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, + 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, + // -- 84 -- End of pixel data + 0xfe, 0xff, 0xdd, 0xe0, // sequence end tag + 0x00, 0x00, 0x00, 0x00, + // -- 92 -- padding + 0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding + b'O', b'B', // VR + 0x00, 0x00, // reserved + 0x08, 0x00, 0x00, 0x00, // length: 8 + // -- 104 -- + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + + let ground_truth = vec![ + DataSetRecord::PixelSequence { + fragment_positions: smallvec::smallvec![None, Some(28), Some(68)], + }, + DataSetRecord::Element { + header: DataElementHeader::new(Tag(0xFFFC, 0xFFFC), VR::OB, Length(8)), + position: 104, + }, + ] + .into_iter() + .collect(); + + validate_create_table_explicit_vr(DATA, &ground_truth); + } +} diff --git a/object/src/lib.rs b/object/src/lib.rs index 4b003a921..9e916da1a 100644 --- a/object/src/lib.rs +++ b/object/src/lib.rs @@ -108,6 +108,7 @@ //! # run().unwrap(); //! ``` pub mod file; +pub mod lazy; pub mod mem; pub mod meta; #[deprecated( diff --git a/object/src/mem.rs b/object/src/mem.rs index 07ebcd441..71883be9d 100644 --- a/object/src/mem.rs +++ b/object/src/mem.rs @@ -378,6 +378,20 @@ where D: DataDictionary, D: Clone, { + /// Create an in-memory DICOM object from its constituent parts. + /// + /// This is currently crate-only because + /// it is useful for converting between DICOM object implementations, + /// but can produce inconsistent objects + /// if used with incoherent parameters. + pub(crate) fn from_parts(entries: BTreeMap>, dict: D, len: Length) -> Self { + InMemDicomObject { + entries, + dict, + len, + } + } + /// Create a new empty object, using the given dictionary for name lookup. pub fn new_empty_with_dict(dict: D) -> Self { InMemDicomObject { @@ -388,11 +402,11 @@ where } /// Construct a DICOM object from an iterator of structured elements. - pub fn from_element_source_with_dict(iter: I, dict: D) -> Result + pub fn from_element_source_with_dict(iter: I, dict: D) -> Result where - I: IntoIterator>>, + I: IntoIterator, E>>, { - let entries: Result<_> = iter.into_iter().map_ok(|e| (e.tag(), e)).collect(); + let entries: Result<_, E> = iter.into_iter().map_ok(|e| (e.tag(), e)).collect(); Ok(InMemDicomObject { entries: entries?, dict, diff --git a/parser/src/dataset/lazy_read.rs b/parser/src/dataset/lazy_read.rs index f7fc56d38..c92cbbcd1 100644 --- a/parser/src/dataset/lazy_read.rs +++ b/parser/src/dataset/lazy_read.rs @@ -210,13 +210,17 @@ where }) } - /** Advance and retrieve the next DICOM data token. - * - * **Note:** For the data set to be successfully parsed, - * the resulting data tokens needs to be consumed - * if they are of a value type. - */ - pub fn next(&mut self) -> Option>> { + /// Retrieve the inner stateful decoder from this data set reader. + pub fn into_decoder(self) -> S { + self.parser + } + + /// Advance and retrieve the next DICOM data token. + /// + /// **Note:** For the data set to be successfully parsed, + /// the resulting data tokens needs to be consumed + /// if they are of a value type. + pub fn advance(&mut self) -> Option>> { if self.hard_break { return None; } @@ -478,7 +482,7 @@ mod tests { let mut dset_reader = LazyDataSetReader::new(parser); let mut gt_iter = ground_truth.into_iter(); - while let Some(res) = dset_reader.next() { + while let Some(res) = dset_reader.advance() { let gt_token = gt_iter.next().expect("ground truth is shorter"); let token = res.expect("should parse without an error"); let token = token.into_owned().unwrap(); @@ -910,14 +914,21 @@ mod tests { 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, - // -- 60 -- End of pixel data + // -- 64 -- Second fragment of pixel data + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x10, 0x00, 0x00, 0x00, // item length: 16 + // -- 72 -- Compressed Fragment + 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, + 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, + // -- 88 -- End of pixel data 0xfe, 0xff, 0xdd, 0xe0, // sequence end tag 0x00, 0x00, 0x00, 0x00, - // -- 68 -- padding + // -- 96 -- padding 0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding b'O', b'B', // VR 0x00, 0x00, // reserved 0x08, 0x00, 0x00, 0x00, // length: 8 + // -- 108 -- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; @@ -929,6 +940,9 @@ mod tests { DataToken::ItemStart { len: Length(32) }, DataToken::ItemValue(vec![0x99; 32]), DataToken::ItemEnd, + DataToken::ItemStart { len: Length(16) }, + DataToken::ItemValue(vec![0xbb; 16]), + DataToken::ItemEnd, DataToken::SequenceEnd, DataToken::ElementHeader(DataElementHeader::new( Tag(0xfffc, 0xfffc), @@ -1025,7 +1039,7 @@ mod tests { let mut dset_reader = LazyDataSetReader::new(parser); let mut gt_iter = ground_truth.into_iter(); - while let Some(res) = dset_reader.next() { + while let Some(res) = dset_reader.advance() { let token = res.expect("should parse without an error"); let gt_token = gt_iter.next().expect("ground truth is shorter"); match token { @@ -1078,7 +1092,7 @@ mod tests { let mut dset_reader = LazyDataSetReader::new(parser); let token = dset_reader - .next() + .advance() .expect("Expected token 1") .expect("Failed to read token 1"); @@ -1090,7 +1104,7 @@ mod tests { }; let token = dset_reader - .next() + .advance() .expect("Expected token 2") .expect("Failed to read token 2"); @@ -1110,7 +1124,7 @@ mod tests { ); let token = dset_reader - .next() + .advance() .expect("Expected token 3") .expect("Failed to read token 3"); @@ -1122,7 +1136,7 @@ mod tests { }; let token = dset_reader - .next() + .advance() .expect("Expected token 4") .expect("Failed to read token 4"); @@ -1142,7 +1156,7 @@ mod tests { ); assert!( - dset_reader.next().is_none(), + dset_reader.advance().is_none(), "unexpected number of tokens remaining" ); } diff --git a/parser/src/dataset/mod.rs b/parser/src/dataset/mod.rs index 6808217f0..828a01f03 100644 --- a/parser/src/dataset/mod.rs +++ b/parser/src/dataset/mod.rs @@ -200,14 +200,14 @@ impl LazyDataToken where D: decode::StatefulDecode, { - pub fn skip(self) -> Result<()> { + pub fn skip(self) -> crate::stateful::decode::Result<()> { match self { LazyDataToken::LazyValue { header, mut decoder, - } => decoder.skip_bytes(header.len.0).context(SkipValueSnafu), + } => decoder.skip_bytes(header.len.0), LazyDataToken::LazyItemValue { len, mut decoder } => { - decoder.skip_bytes(len).context(SkipValueSnafu) + decoder.skip_bytes(len) } _ => Ok(()), // do nothing }