Skip to content

Commit

Permalink
Merge branch 'wsy000718-HGCL' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
lazishu2000 committed Jul 18, 2023
2 parents ab4b815 + e47c239 commit b440998
Show file tree
Hide file tree
Showing 11 changed files with 775 additions and 1 deletion.
19 changes: 19 additions & 0 deletions openhgnn/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -858,3 +858,22 @@ mlp_inference_bool = 1
neg_alpha = 0
load_json = 0

[HGCL]
batch = 8192
epochs = 400
wu1 = 0.8
wu2 = 0.2
wi1 = 0.8
wi2 = 0.2
lr = 0.055
topk = 10
hide_dim = 32
metareg = 0.15
ssl_temp = 0.5
ssl_ureg = 0.04
ssl_ireg = 0.05
ssl_reg = 0.01
ssl_beta = 0.32
rank = 3
Layers = 2
reg = 0.043
20 changes: 20 additions & 0 deletions openhgnn/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,26 @@ def __init__(self, file_path, model, dataset, task, gpu):
self.compress_ratio = conf.getfloat("SHGP", 'compress_ratio')
self.cuda = conf.getint("SHGP", 'cuda')

elif model == 'HGCL':
self.lr = conf.getfloat("HGCL", "lr")
self.batch = conf.getint("HGCL", "batch")
self.wu1 = conf.getfloat('HGCL', "wu1")
self.wu2 = conf.getfloat("HGCL", "wu2")
self.wi1 = conf.getfloat("HGCL", "wi1")
self.wi2 = conf.getfloat("HGCL", "wi2")
self.epochs = conf.getint("HGCL", "epochs")
self.topk = conf.getint("HGCL", "topk")
self.hide_dim = conf.getint("HGCL", "hide_dim")
self.reg = conf.getfloat("HGCL", "reg")
self.metareg = conf.getfloat("HGCL", "metareg")
self.ssl_temp = conf.getfloat("HGCL", "ssl_temp")
self.ssl_ureg = conf.getfloat("HGCL", "ssl_ureg")
self.ssl_ireg = conf.getfloat("HGCL", "ssl_ireg")
self.ssl_reg = conf.getfloat("HGCL", "ssl_reg")
self.ssl_beta = conf.getfloat("HGCL", "ssl_beta")
self.rank = conf.getint("HGCL", "rank")
self.Layers = conf.getint("HGCL", "Layers")

if hasattr(self, 'device'):
self.device = th.device(self.device)
elif gpu == -1:
Expand Down
114 changes: 114 additions & 0 deletions openhgnn/dataset/HGCLDataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import torch as t
from dgl.data import DGLDataset
from dgl.data.utils import download, extract_archive
from dgl.data.utils import load_graphs
import os
import numpy as np
import dgl
import pickle


class HGCLDataset(DGLDataset):

_prefix = 'https://s3.cn-north-1.amazonaws.com.cn/dgl-data/'
_urls = {

}

def __init__(self, name, raw_dir=None, force_reload=False, verbose=True):
assert name in ['Epinions', 'CiaoDVD', 'Yelp']
self.data_path = './{}.zip'.format(name)
self.g_path = './{}/graph.bin'.format(name)
raw_dir = './'
url = 'https://s3.cn-north-1.amazonaws.com.cn/dgl-data/' + 'dataset/{}.zip'.format(name)

super(HGCLDataset, self).__init__(name=name,
url=url,
raw_dir=raw_dir,
force_reload=force_reload,
verbose=verbose)
def create_graph(self):
'''
raw_dataset url : https://drive.google.com/drive/folders/1s6LGibPnal6gMld5t63aK4J7hnVkNeDs
'''
data_path = self.data_path + '/data.pkl'
distance_path = self.data_path + '/distanceMat_addIUUI.pkl'
ici_path = self.data_path + '/ICI.pkl'

with open(data_path, 'rb') as fs:
data = pickle.load(fs)
with open(distance_path, 'rb') as fs:
distanceMat = pickle.load(fs)
with open(ici_path, "rb") as fs:
itemMat = pickle.load(fs)

trainMat, testdata, _, categoryMat, _ = data
userNum, itemNum = trainMat.shape
userDistanceMat, itemDistanceMat, uiDistanceMat = distanceMat

# trainMat
trainMat_coo = trainMat.tocoo()
trainMat_i, trainMat_j, trainMat_data = trainMat_coo.row, trainMat_coo.col, trainMat_coo.data

# testdata
testdata = np.array(testdata)

# userDistanceMat
userDistanceMat_coo = userDistanceMat.tocoo()
userDistanceMat_i, userDistanceMat_j, userDistanceMat_data = userDistanceMat_coo.row, userDistanceMat_coo.col, userDistanceMat_coo.data

# itemMat
itemMat_coo = itemMat.tocoo()
itemMat_i, itemMat_j, itemMat_data = itemMat_coo.row, itemMat_coo.col, itemMat_coo.data

# uiDisantanceMat
uiDistanceMat_coo = uiDistanceMat.tocoo()
uiDistanceMat_i, uiDistanceMat_j, uiDistanceMat_data = uiDistanceMat_coo.row, uiDistanceMat_coo.col, uiDistanceMat_coo.data

graph_data = {
('user', 'interact_train', 'item'): (t.tensor(trainMat_i), t.tensor(trainMat_j)),
('user', 'distance', 'user'): (t.tensor(userDistanceMat_i), t.tensor(userDistanceMat_j)),
('item', 'distance', 'item'): (t.tensor(itemMat_i), t.tensor(itemMat_j)),
('user+item', 'distance', 'user+item'): (t.tensor(uiDistanceMat_i), t.tensor(uiDistanceMat_j)),
('user', 'interact_test', 'item'): (t.tensor(testdata[:, 0]), t.tensor(testdata[:, 1]))
}
g = dgl.heterograph(graph_data)
dgl.save_graphs(self.data_path + '/graph.bin', g)
self.g_path = self.data_path + '/graph.bin'


def download(self):
# download raw data to local disk
# path to store the file
if os.path.exists(self.data_path): # pragma: no cover
pass
else:
file_path = os.path.join(self.raw_dir)
# download file
download(self.url, path=file_path)
extract_archive(self.data_path, os.path.join(self.raw_dir, self.name))

def process(self):
# process raw data to graphs, labels, splitting masks
g, _ = load_graphs(self.g_path)
self._g = g

def __getitem__(self, idx):
# get one example by index
return self._g[idx]

def __len__(self):
# number of data examples
return 1

def save(self):
# save processed data to directory `self.save_path`
pass

def load(self):
# load processed data from directory `self.save_path`
pass

def has_cache(self):
# check whether there are processed data in `self.save_path`
pass
23 changes: 23 additions & 0 deletions openhgnn/dataset/RecommendationDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .multigraph import MultiGraphDataset
from ..sampler.negative_sampler import Uniform_exclusive
from . import AcademicDataset
from .HGCLDataset import HGCLDataset

#add more lib for KGAT
import time
Expand Down Expand Up @@ -51,7 +52,29 @@ def get_train_data(self):
def get_labels(self):
return self.label

@register_dataset('hgcl_recommendation')
class HGCLRecommendation(RecommendationDataset):
def __init__(self, dataset_name, *args, **kwargs):
super(RecommendationDataset, self).__init__(*args, **kwargs)
dataset = HGCLDataset(name=dataset_name, raw_dir='')
self.g = dataset[0].long()

def get_split(self, validation=True):
ratingsGraph = self.g
n_edges = ratingsGraph.num_edges()
random_int = th.randperm(n_edges)
train_idx = random_int[:int(n_edges * 0.6)]
val_idx = random_int[int(n_edges * 0.6):int(n_edges * 0.8)]
test_idx = random_int[int(n_edges * 0.6):int(n_edges * 0.8)]

return train_idx, val_idx, test_idx

def get_train_data(self):
pass

def get_labels(self):
return self.label

@register_dataset('hin_recommendation')
class HINRecommendation(RecommendationDataset):
def __init__(self, dataset_name, *args, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions openhgnn/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def build_dataset(dataset, task, *args, **kwargs):
_dataset = 'kgcn_recommendation'
elif dataset in ['yelp4rec']:
_dataset = 'hin_' + task
elif dataset in ['Epinions', 'CiaoDVD', 'Yelp']:
_dataset = 'hgcl_recommendation'
elif dataset in ['dblp4Mg2vec_4', 'dblp4Mg2vec_5']:
_dataset = 'hin_' + task
elif dataset == 'demo':
Expand Down
1 change: 1 addition & 0 deletions openhgnn/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class Experiment(object):
'MeiREC': 'MeiREC_trainer',
'KGAT': 'KGAT_trainer'
'SHGP': 'SHGP_trainer'
'HGCL': 'hgcltrainer',
}
immutable_params = ['model', 'dataset', 'task']

Expand Down
Loading

0 comments on commit b440998

Please sign in to comment.