Skip to content

Commit

Permalink
update data preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
stnoah1 committed Nov 22, 2021
1 parent a3115de commit e8eb418
Show file tree
Hide file tree
Showing 24 changed files with 1,025,654 additions and 184 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
data
data_temp
*.sh
wandb
*_old.py
Expand Down
Binary file added data/NW-UCLA/val_label.pkl
Binary file not shown.
436 changes: 436 additions & 0 deletions data/ntu/get_raw_denoised_data.py

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions data/ntu/get_raw_skes_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import os.path as osp
import os
import numpy as np
import pickle
import logging


def get_raw_bodies_data(skes_path, ske_name, frames_drop_skes, frames_drop_logger):
"""
Get raw bodies data from a skeleton sequence.
Each body's data is a dict that contains the following keys:
- joints: raw 3D joints positions. Shape: (num_frames x 25, 3)
- colors: raw 2D color locations. Shape: (num_frames, 25, 2)
- interval: a list which stores the frame indices of this body.
- motion: motion amount (only for the sequence with 2 or more bodyIDs).
Return:
a dict for a skeleton sequence with 3 key-value pairs:
- name: the skeleton filename.
- data: a dict which stores raw data of each body.
- num_frames: the number of valid frames.
"""
ske_file = osp.join(skes_path, ske_name + '.skeleton')
assert osp.exists(ske_file), 'Error: Skeleton file %s not found' % ske_file
# Read all data from .skeleton file into a list (in string format)
print('Reading data from %s' % ske_file[-29:])
with open(ske_file, 'r') as fr:
str_data = fr.readlines()

num_frames = int(str_data[0].strip('\r\n'))
frames_drop = []
bodies_data = dict()
valid_frames = -1 # 0-based index
current_line = 1

for f in range(num_frames):
num_bodies = int(str_data[current_line].strip('\r\n'))
current_line += 1

if num_bodies == 0: # no data in this frame, drop it
frames_drop.append(f) # 0-based index
continue

valid_frames += 1
joints = np.zeros((num_bodies, 25, 3), dtype=np.float32)
colors = np.zeros((num_bodies, 25, 2), dtype=np.float32)

for b in range(num_bodies):
bodyID = str_data[current_line].strip('\r\n').split()[0]
current_line += 1
num_joints = int(str_data[current_line].strip('\r\n')) # 25 joints
current_line += 1

for j in range(num_joints):
temp_str = str_data[current_line].strip('\r\n').split()
joints[b, j, :] = np.array(temp_str[:3], dtype=np.float32)
colors[b, j, :] = np.array(temp_str[5:7], dtype=np.float32)
current_line += 1

if bodyID not in bodies_data: # Add a new body's data
body_data = dict()
body_data['joints'] = joints[b] # ndarray: (25, 3)
body_data['colors'] = colors[b, np.newaxis] # ndarray: (1, 25, 2)
body_data['interval'] = [valid_frames] # the index of the first frame
else: # Update an already existed body's data
body_data = bodies_data[bodyID]
# Stack each body's data of each frame along the frame order
body_data['joints'] = np.vstack((body_data['joints'], joints[b]))
body_data['colors'] = np.vstack((body_data['colors'], colors[b, np.newaxis]))
pre_frame_idx = body_data['interval'][-1]
body_data['interval'].append(pre_frame_idx + 1) # add a new frame index

bodies_data[bodyID] = body_data # Update bodies_data

num_frames_drop = len(frames_drop)
assert num_frames_drop < num_frames, \
'Error: All frames data (%d) of %s is missing or lost' % (num_frames, ske_name)
if num_frames_drop > 0:
frames_drop_skes[ske_name] = np.array(frames_drop, dtype=np.int)
frames_drop_logger.info('{}: {} frames missed: {}\n'.format(ske_name, num_frames_drop,
frames_drop))

# Calculate motion (only for the sequence with 2 or more bodyIDs)
if len(bodies_data) > 1:
for body_data in bodies_data.values():
body_data['motion'] = np.sum(np.var(body_data['joints'], axis=0))

return {'name': ske_name, 'data': bodies_data, 'num_frames': num_frames - num_frames_drop}


def get_raw_skes_data():
# # save_path = './data'
# # skes_path = '/data/pengfei/NTU/nturgb+d_skeletons/'
# stat_path = osp.join(save_path, 'statistics')
#
# skes_name_file = osp.join(stat_path, 'skes_available_name.txt')
# save_data_pkl = osp.join(save_path, 'raw_skes_data.pkl')
# frames_drop_pkl = osp.join(save_path, 'frames_drop_skes.pkl')
#
# frames_drop_logger = logging.getLogger('frames_drop')
# frames_drop_logger.setLevel(logging.INFO)
# frames_drop_logger.addHandler(logging.FileHandler(osp.join(save_path, 'frames_drop.log')))
# frames_drop_skes = dict()

skes_name = np.loadtxt(skes_name_file, dtype=str)

num_files = skes_name.size
print('Found %d available skeleton files.' % num_files)

raw_skes_data = []
frames_cnt = np.zeros(num_files, dtype=np.int)

for (idx, ske_name) in enumerate(skes_name):
bodies_data = get_raw_bodies_data(skes_path, ske_name, frames_drop_skes, frames_drop_logger)
raw_skes_data.append(bodies_data)
frames_cnt[idx] = bodies_data['num_frames']
if (idx + 1) % 1000 == 0:
print('Processed: %.2f%% (%d / %d)' % \
(100.0 * (idx + 1) / num_files, idx + 1, num_files))

with open(save_data_pkl, 'wb') as fw:
pickle.dump(raw_skes_data, fw, pickle.HIGHEST_PROTOCOL)
np.savetxt(osp.join(save_path, 'raw_data', 'frames_cnt.txt'), frames_cnt, fmt='%d')

print('Saved raw bodies data into %s' % save_data_pkl)
print('Total frames: %d' % np.sum(frames_cnt))

with open(frames_drop_pkl, 'wb') as fw:
pickle.dump(frames_drop_skes, fw, pickle.HIGHEST_PROTOCOL)

if __name__ == '__main__':
save_path = './'

skes_path = '../nturgbd_raw/nturgb+d_skeletons/'
stat_path = osp.join(save_path, 'statistics')
if not osp.exists('./raw_data'):
os.makedirs('./raw_data')

skes_name_file = osp.join(stat_path, 'skes_available_name.txt')
save_data_pkl = osp.join(save_path, 'raw_data', 'raw_skes_data.pkl')
frames_drop_pkl = osp.join(save_path, 'raw_data', 'frames_drop_skes.pkl')

frames_drop_logger = logging.getLogger('frames_drop')
frames_drop_logger.setLevel(logging.INFO)
frames_drop_logger.addHandler(logging.FileHandler(osp.join(save_path, 'raw_data', 'frames_drop.log')))
frames_drop_skes = dict()

get_raw_skes_data()

with open(frames_drop_pkl, 'wb') as fw:
pickle.dump(frames_drop_skes, fw, pickle.HIGHEST_PROTOCOL)

Empty file.
Loading

0 comments on commit e8eb418

Please sign in to comment.