Skip to content

Commit

Permalink
Read heterogenous nonlist PLY properties as arrays
Browse files Browse the repository at this point in the history
Summary:
In the original implementation, I had considered PLY properties where there are mixed types of elements in a property to be rare and basically unimportant, so the implementation is very naive.

If we want to support pointcloud PLY files, we need to handle at least the subcase where there are no lists efficiently because this seems to be very common there.

Reviewed By: nikhilaravi, gkioxari

Differential Revision: D22573315

fbshipit-source-id: db6f29446d4e555a2e2b37d38c8e4450d061465b
  • Loading branch information
bottler authored and facebook-github-bot committed Jan 7, 2021
1 parent 89532a8 commit 3b9fbfc
Show file tree
Hide file tree
Showing 2 changed files with 267 additions and 56 deletions.
261 changes: 213 additions & 48 deletions pytorch3d/io/ply_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


"""This module implements utility functions for loading and saving meshes."""
import itertools
import struct
import sys
import warnings
Expand Down Expand Up @@ -232,13 +233,20 @@ def _read_ply_fixed_size_element_ascii(f, definition: _PlyElementType):
Given an element which has no lists and one type, read the
corresponding data.
For example
element vertex 8
property float x
property float y
property float z
Args:
f: file-like object being read.
definition: The element object which describes what we are reading.
Returns:
2D numpy array corresponding to the data. The rows are the different
values. There is one column for each property.
1-element list containing a 2D numpy array corresponding to the data.
The rows are the different values. There is one column for each property.
"""
np_type = _PLY_TYPES[definition.properties[0].data_type].np_type
old_offset = f.tell()
Expand All @@ -251,11 +259,62 @@ def _read_ply_fixed_size_element_ascii(f, definition: _PlyElementType):
)
if not len(data): # np.loadtxt() seeks even on empty data
f.seek(old_offset)
if definition.count and data.shape[1] != len(definition.properties):
if data.shape[1] != len(definition.properties):
raise ValueError("Inconsistent data for %s." % definition.name)
if data.shape[0] != definition.count:
raise ValueError("Not enough data for %s." % definition.name)
return data
return [data]


def _read_ply_nolist_element_ascii(f, definition: _PlyElementType):
"""
Given an element which has no lists and multiple types, read the
corresponding data, by loading all the data as float64 and converting
the relevant parts later.
For example, given
element vertex 8
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
the output will have two arrays, the first containing (x,y,z)
and the second (red,green,blue).
Args:
f: file-like object being read.
definition: The element object which describes what we are reading.
Returns:
List of 2D numpy arrays corresponding to the data.
"""
old_offset = f.tell()
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message=".* Empty input file.*", category=UserWarning
)
data = np.loadtxt(
f, dtype=np.float64, comments=None, ndmin=2, max_rows=definition.count
)
if not len(data): # np.loadtxt() seeks even on empty data
f.seek(old_offset)
if data.shape[1] != len(definition.properties):
raise ValueError("Inconsistent data for %s." % definition.name)
if data.shape[0] != definition.count:
raise ValueError("Not enough data for %s." % definition.name)
pieces = []
offset = 0
for dtype, it in itertools.groupby(p.data_type for p in definition.properties):
count = sum(1 for _ in it)
end_offset = offset + count
piece = data[:, offset:end_offset].astype(_PLY_TYPES[dtype].np_type)
pieces.append(piece)
offset = end_offset
return pieces


def _try_read_ply_constant_list_ascii(f, definition: _PlyElementType):
Expand All @@ -264,6 +323,28 @@ def _try_read_ply_constant_list_ascii(f, definition: _PlyElementType):
corresponding data assuming every value has the same length.
If the data is ragged, return None and leave f undisturbed.
For example, if the element is
element face 2
property list uchar int vertex_index
and the data is
4 0 1 2 3
4 7 6 5 4
then the function will return
[[0, 1, 2, 3],
[7, 6, 5, 4]]
but if the data is
4 0 1 2 3
3 6 5 4
then the function will return None.
Args:
f: file-like object being read.
definition: The element object which describes what we are reading.
Expand Down Expand Up @@ -349,8 +430,12 @@ def _read_ply_element_ascii(f, definition: _PlyElementType):
each occurence of the element, and the inner lists have one value per
property.
"""
if not definition.count:
return []
if definition.is_constant_type_fixed_size():
return _read_ply_fixed_size_element_ascii(f, definition)
if definition.is_fixed_size():
return _read_ply_nolist_element_ascii(f, definition)
if definition.try_constant_list():
data = _try_read_ply_constant_list_ascii(f, definition)
if data is not None:
Expand All @@ -372,75 +457,123 @@ def _read_ply_element_ascii(f, definition: _PlyElementType):
return data


def _read_raw_array(f, aim: str, length: int, dtype=np.uint8, dtype_size=1):
"""
Read [length] elements from a file.
Args:
f: file object
aim: name of target for error message
length: number of elements
dtype: numpy type
dtype_size: number of bytes per element.
Returns:
new numpy array
"""

if isinstance(f, BytesIO):
# np.fromfile is faster but won't work on a BytesIO
needed_bytes = length * dtype_size
bytes_data = bytearray(needed_bytes)
n_bytes_read = f.readinto(bytes_data)
if n_bytes_read != needed_bytes:
raise ValueError("Not enough data for %s." % aim)
data = np.frombuffer(bytes_data, dtype=dtype)
else:
data = np.fromfile(f, dtype=dtype, count=length)
if data.shape[0] != length:
raise ValueError("Not enough data for %s." % aim)
return data


def _read_ply_fixed_size_element_binary(
f, definition: _PlyElementType, big_endian: bool
):
"""
Given an element which has no lists and one type, read the
corresponding data.
For example
element vertex 8
property float x
property float y
property float z
Args:
f: file-like object being read.
definition: The element object which describes what we are reading.
big_endian: (bool) whether the document is encoded as big endian.
Returns:
2D numpy array corresponding to the data. The rows are the different
values. There is one column for each property.
1-element list containing a 2D numpy array corresponding to the data.
The rows are the different values. There is one column for each property.
"""
ply_type = _PLY_TYPES[definition.properties[0].data_type]
np_type = ply_type.np_type
type_size = ply_type.size
needed_length = definition.count * len(definition.properties)
if isinstance(f, BytesIO):
# np.fromfile is faster but won't work on a BytesIO
needed_bytes = needed_length * type_size
bytes_data = bytearray(needed_bytes)
n_bytes_read = f.readinto(bytes_data)
if n_bytes_read != needed_bytes:
raise ValueError("Not enough data for %s." % definition.name)
data = np.frombuffer(bytes_data, dtype=np_type)
else:
data = np.fromfile(f, dtype=np_type, count=needed_length)
if data.shape[0] != needed_length:
raise ValueError("Not enough data for %s." % definition.name)
data = _read_raw_array(f, definition.name, needed_length, np_type, type_size)

if (sys.byteorder == "big") != big_endian:
data = data.byteswap()
return data.reshape(definition.count, len(definition.properties))
return [data.reshape(definition.count, len(definition.properties))]


def _read_ply_element_struct(f, definition: _PlyElementType, endian_str: str):
def _read_ply_element_binary_nolists(f, definition: _PlyElementType, big_endian: bool):
"""
Given an element which has no lists, read the corresponding data. Uses the
struct library.
Given an element which has no lists, read the corresponding data as tuple
of numpy arrays, one for each set of adjacent columns with the same type.
For example, given
element vertex 8
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
Note: It looks like struct would also support lists where
type=size_type=char, but it is hard to know how much data to read in that
case.
the output will have two arrays, the first containing (x,y,z)
and the second (red,green,blue).
Args:
f: file-like object being read.
definition: The element object which describes what we are reading.
endian_str: ">" or "<" according to whether the document is big or
little endian.
big_endian: (bool) whether the document is encoded as big endian.
Returns:
2D numpy array corresponding to the data. The rows are the different
values. There is one column for each property.
List of 2D numpy arrays corresponding to the data. The rows are the different
values.
"""
format = "".join(
_PLY_TYPES[property.data_type].struct_char for property in definition.properties
)
format = endian_str + format
pattern = struct.Struct(format)
size = pattern.size
size = sum(_PLY_TYPES[prop.data_type].size for prop in definition.properties)
needed_bytes = size * definition.count
bytes_data = f.read(needed_bytes)
if len(bytes_data) != needed_bytes:
raise ValueError("Not enough data for %s." % definition.name)
data = [pattern.unpack_from(bytes_data, i * size) for i in range(definition.count)]
return data
data = _read_raw_array(f, definition.name, needed_bytes).reshape(-1, size)
offset = 0
pieces = []
for dtype, it in itertools.groupby(p.data_type for p in definition.properties):
count = sum(1 for _ in it)
bytes_each = count * _PLY_TYPES[dtype].size
end_offset = offset + bytes_each

# what we want to do is
# piece = data[:, offset:end_offset].view(_PLY_TYPES[dtype].np_type)
# but it fails in the general case
# because of https://github.com/numpy/numpy/issues/9496.
piece = np.lib.stride_tricks.as_strided(
data[:1, offset:end_offset].view(_PLY_TYPES[dtype].np_type),
shape=(definition.count, count),
strides=(data.strides[0], _PLY_TYPES[dtype].size),
)

if (sys.byteorder == "big") != big_endian:
piece = piece.byteswap()
pieces.append(piece)
offset = end_offset
return pieces


def _try_read_ply_constant_list_binary(
Expand All @@ -451,6 +584,28 @@ def _try_read_ply_constant_list_binary(
corresponding data assuming every value has the same length.
If the data is ragged, return None and leave f undisturbed.
For example, if the element is
element face 2
property list uchar int vertex_index
and the data is
4 0 1 2 3
4 7 6 5 4
then the function will return
[[0, 1, 2, 3],
[7, 6, 5, 4]]
but if the data is
4 0 1 2 3
3 6 5 4
then the function will return None.
Args:
f: file-like object being read.
definition: The element object which describes what we are reading.
Expand All @@ -460,8 +615,6 @@ def _try_read_ply_constant_list_binary(
If every element has the same size, 2D numpy array corresponding to the
data. The rows are the different values. Otherwise None.
"""
if definition.count == 0:
return []
property = definition.properties[0]
endian_str = ">" if big_endian else "<"
length_format = endian_str + _PLY_TYPES[property.list_size_type].struct_char
Expand Down Expand Up @@ -515,18 +668,20 @@ def _read_ply_element_binary(f, definition: _PlyElementType, big_endian: bool) -
each occurence of the element, and the inner lists have one value per
property.
"""
endian_str = ">" if big_endian else "<"
if not definition.count:
return []

if definition.is_constant_type_fixed_size():
return _read_ply_fixed_size_element_binary(f, definition, big_endian)
if definition.is_fixed_size():
return _read_ply_element_struct(f, definition, endian_str)
return _read_ply_element_binary_nolists(f, definition, big_endian)
if definition.try_constant_list():
data = _try_read_ply_constant_list_binary(f, definition, big_endian)
if data is not None:
return data

# We failed to read the element as a lump, must process each line manually.
endian_str = ">" if big_endian else "<"
property_structs = []
for property in definition.properties:
initial_type = property.list_size_type or property.data_type
Expand Down Expand Up @@ -606,6 +761,7 @@ def _load_ply_raw(f, path_manager: PathManager) -> Tuple[_PlyHeader, dict]:
elements: A dictionary of element names to values. If an element is
regular, in the sense of having no lists or being one
uniformly-sized list, then the value will be a 2D numpy array.
If it has no lists but more than one type, it will be a list of arrays.
If not, it is a list of the relevant property values.
"""
with _open_file(f, path_manager, "rb") as f:
Expand Down Expand Up @@ -670,11 +826,20 @@ def load_ply(f, path_manager: Optional[PathManager] = None):
if face is None:
raise ValueError("The ply file has no face element.")

if len(vertex) and (
not isinstance(vertex, np.ndarray) or vertex.ndim != 2 or vertex.shape[1] != 3
):
if not isinstance(vertex, list) or len(vertex) > 1:
raise ValueError("Invalid vertices in file.")
verts = _make_tensor(vertex, cols=3, dtype=torch.float32)

if len(vertex):
vertex0 = vertex[0]
if len(vertex0) and (
not isinstance(vertex0, np.ndarray)
or vertex0.ndim != 2
or vertex0.shape[1] != 3
):
raise ValueError("Invalid vertices in file.")
else:
vertex0 = []
verts = _make_tensor(vertex0, cols=3, dtype=torch.float32)

face_head = next(head for head in header.elements if head.name == "face")
if len(face_head.properties) != 1 or face_head.properties[0].list_size_type is None:
Expand Down
Loading

0 comments on commit 3b9fbfc

Please sign in to comment.