-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_utils.py
35 lines (33 loc) · 997 Bytes
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as np
import torch as th
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from six.moves import cPickle as pl
import networkx as nx
def data_to_graph(data, vocab_obj):
if type(data) is str:
data = pl.load(open(data, "rb"))
idx_sent = [
np.array(
[vocab_obj["<START>"]]
+ [vocab_obj[w] for w in sent]
+ [vocab_obj["<END>"]],
dtype="int",
)
for sent in data["text"]
]
idx_summ = np.array(
[vocab_obj["<START>"]]
+ [vocab_obj[w] for w in data["summary"]]
+ [vocab_obj["<END>"]],
dtype="int",
)
dep_graphs = [nx.DiGraph() for _ in range(len(idx_sent))]
for g, deps, idx in zip(dep_graphs, data["dep"], idx_sent):
g.add_nodes_from(list(range(len(idx))))
g.add_edges_from(deps)
return [(i, d) for i, d in zip(idx_sent, dep_graphs)], idx_summ