Skip to content

Commit

Permalink
added 2983 class task
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Aug 15, 2024
1 parent 97c1735 commit 93cb70f
Showing 1 changed file with 96 additions and 17 deletions.
113 changes: 96 additions & 17 deletions examples/graphbolt/rgcn/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,33 +56,33 @@ def build_yaml_helper(path, dataset_size, in_memory=True):
"data": [
{
"in_memory": in_memory,
"path": "set/validation_indices.npy",
"path": "set/validation_indices_19.npy",
"name": "seeds",
"format": "numpy",
},
{
"in_memory": in_memory,
"path": "set/validation_labels.npy",
"path": "set/validation_labels_19.npy",
"name": "labels",
"format": "numpy",
},
],
"type": "paper",
}
],
"name": "node_classification",
"name": "node_classification_19",
"train_set": [
{
"data": [
{
"in_memory": in_memory,
"path": "set/train_indices.npy",
"path": "set/train_indices_19.npy",
"name": "seeds",
"format": "numpy",
},
{
"in_memory": in_memory,
"path": "set/train_labels.npy",
"path": "set/train_labels_19.npy",
"name": "labels",
"format": "numpy",
},
Expand All @@ -95,21 +95,82 @@ def build_yaml_helper(path, dataset_size, in_memory=True):
"data": [
{
"in_memory": in_memory,
"path": "set/test_indices.npy",
"path": "set/test_indices_19.npy",
"name": "seeds",
"format": "numpy",
},
{
"in_memory": in_memory,
"path": "set/test_labels.npy",
"path": "set/test_labels_19.npy",
"name": "labels",
"format": "numpy",
},
],
"type": "paper",
}
],
}
},
{
"num_classes": 2983,
"validation_set": [
{
"data": [
{
"in_memory": in_memory,
"path": "set/validation_indices_2983.npy",
"name": "seeds",
"format": "numpy",
},
{
"in_memory": in_memory,
"path": "set/validation_labels_2983.npy",
"name": "labels",
"format": "numpy",
},
],
"type": "paper",
}
],
"name": "node_classification_2K",
"train_set": [
{
"data": [
{
"in_memory": in_memory,
"path": "set/train_indices_2983.npy",
"name": "seeds",
"format": "numpy",
},
{
"in_memory": in_memory,
"path": "set/train_labels_2983.npy",
"name": "labels",
"format": "numpy",
},
],
"type": "paper",
}
],
"test_set": [
{
"data": [
{
"in_memory": in_memory,
"path": "set/test_indices_2983.npy",
"name": "seeds",
"format": "numpy",
},
{
"in_memory": in_memory,
"path": "set/test_labels_2983.npy",
"name": "labels",
"format": "numpy",
},
],
"type": "paper",
}
],
},
],
"feature_data": [
{
Expand Down Expand Up @@ -390,7 +451,7 @@ def download_dataset(path, dataset_type, dataset_size):
}


def split_data(label_path, set_dir, dataset_size):
def split_data(label_path, set_dir, dataset_size, class_num):
"""This is for splitting the labels into three sets: train, validation, and test sets."""
# labels = np.memmap(label_path, dtype='int32', mode='r', shape=(num_nodes[dataset_size]["paper"], 1))
labels = np.load(label_path)
Expand All @@ -415,14 +476,24 @@ def split_data(label_path, set_dir, dataset_size):
print(validation_labels, len(validation_labels))
print(test_labels, len(test_labels))

gb.numpy_save_aligned(f"{set_dir}/train_indices.npy", train_indices)
gb.numpy_save_aligned(
f"{set_dir}/validation_indices.npy", validation_indices
f"{set_dir}/train_indices_{class_num}.npy", train_indices
)
gb.numpy_save_aligned(
f"{set_dir}/validation_indices_{class_num}.npy", validation_indices
)
gb.numpy_save_aligned(
f"{set_dir}/test_indices_{class_num}.npy", test_indices
)
gb.numpy_save_aligned(
f"{set_dir}/train_labels_{class_num}.npy", train_labels
)
gb.numpy_save_aligned(
f"{set_dir}/validation_labels_{class_num}.npy", validation_labels
)

Check warning on line 493 in examples/graphbolt/rgcn/download.py

View workflow job for this annotation

GitHub Actions / lintrunner

UFMT format

Run `lintrunner -a` to apply this patch.
gb.numpy_save_aligned(
f"{set_dir}/test_labels_{class_num}.npy", test_labels
)
gb.numpy_save_aligned(f"{set_dir}/test_indices.npy", test_indices)
gb.numpy_save_aligned(f"{set_dir}/train_labels.npy", train_labels)
gb.numpy_save_aligned(f"{set_dir}/validation_labels.npy", validation_labels)
gb.numpy_save_aligned(f"{set_dir}/test_labels.npy", test_labels)


def add_edges(edges, source, dest, dataset_size):
Expand Down Expand Up @@ -480,7 +551,6 @@ def process_label(file_path, num_class, dataset_size):
assert new_array.shape[0] == 227130858
assert np.array_equal(array, new_array)
else:
assert num_class == 19
# new_array = np.memmap(file_path, dtype='int32', mode='r', shape=(num_nodes[dataset_size]["paper"], 1))
new_array = np.load(file_path)
assert new_array.shape[0] == num_nodes[dataset_size]["paper"]
Expand Down Expand Up @@ -547,7 +617,16 @@ def process_dataset(path, dataset_size):
set_dir = processed_dir + "/" + "set"
os.makedirs(name=set_dir, exist_ok=True)
split_data(
label_path=label_file_19, set_dir=set_dir, dataset_size=dataset_size
label_path=label_file_19,
set_dir=set_dir,
dataset_size=dataset_size,
class_num=19,
)
split_data(
label_path=label_file_2K,
set_dir=set_dir,
dataset_size=dataset_size,
class_num=2983,
)

# Step 3: Move edge files
Expand Down

0 comments on commit 93cb70f

Please sign in to comment.