Skip to content

Commit

Permalink
Replace eval metric with lenskit TopN (#15)
Browse files Browse the repository at this point in the history
Prior values for first 10 users:
```
evaluation using the first 1 is NDCG@5 = 0.5, NDCG@10 = 0.5, RR = 0.3333333333333333
evaluation using the first 2 is NDCG@5 = 0.3391602052736161, NDCG@10 = 0.41039156802332444, RR = 0.2142073313555942
evaluation using the first 3 is NDCG@5 = 1.0, NDCG@10 = 1.0, RR = 1.0
evaluation using the first 4 is NDCG@5 = 0.9197207891481876, NDCG@10 = 0.9197207891481876, RR = 0.6666666666666666
evaluation using the first 5 is NDCG@5 = 0.0, NDCG@10 = 0.0, RR = 0.038461538461538464
evaluation using the first 6 is NDCG@5 = 0.0, NDCG@10 = 0.0, RR = 0.030303030303030304
evaluation using the first 7 is NDCG@5 = 0.38685280723454163, NDCG@10 = 0.38685280723454163, RR = 0.2
evaluation using the first 8 is NDCG@5 = 0.0, NDCG@10 = 0.0, RR = 0.02658371040723982
evaluation using the first 9 is NDCG@5 = 0.43067655807339306, NDCG@10 = 0.43067655807339306, RR = 0.25
evaluation using the first 10 is NDCG@5 = 0.0, NDCG@10 = 0.0, RR = 0.03125
```

Lenskit eval values for first 10 users:
```
evaluation using the first 1 is ndcg5 = 0.6309297535714575, ndcg10 = 0.6309297535714575, mrr = 1.0
evaluation using the first 2 is ndcg5 = 0.2807721888661444, ndcg10 = 0.35123899361230887, mrr = 1.0
evaluation using the first 3 is ndcg5 = 1.0, ndcg10 = 1.0, mrr = 1.0
evaluation using the first 4 is ndcg5 = 0.8154648767857288, ndcg10 = 0.8154648767857288, mrr = 1.0
evaluation using the first 5 is ndcg5 = 0.0, ndcg10 = 0.0, mrr = 1.0
evaluation using the first 6 is ndcg5 = 0.0, ndcg10 = 0.0, mrr = 1.0
evaluation using the first 7 is ndcg5 = 0.43067655807339306, ndcg10 = 0.43067655807339306, mrr = 1.0
evaluation using the first 8 is ndcg5 = 0.0, ndcg10 = 0.0, mrr = 1.0
evaluation using the first 9 is ndcg5 = 0.5, ndcg10 = 0.5, mrr = 1.0
evaluation using the first 10 is ndcg5 = 0.0, ndcg10 = 0.0, mrr = 1.0
```
  • Loading branch information
sophiasun0515 committed Jun 13, 2024
1 parent 9d4b28e commit e2308b2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 56 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ requires-python = ">=3.10"
keywords = []
authors = [{ name = "Karl Higley", email = "khigley@umn.edu" }]
dependencies = [
"lenskit==0.14.*",
"nltk>=3.8,<4",
"numpy>=1.26,<2",
"pandas==2.*",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
import json
import os
import sys
sys.path.append('../')
from lenskit.metrics import topn
sys.path.append('src')
import torch as th
from safetensors.torch import load_file
from uuid import UUID
import numpy as np
import pandas as pd
from tqdm import tqdm
from poprox_recommender.domain import Article, ClickHistory
from poprox_concepts import Article, ClickHistory
from poprox_recommender.default import select_articles
from poprox_recommender.paths import project_root

Expand All @@ -20,7 +21,7 @@ def load_model(device_name=None):
if device_name is None:
device_name = "cuda" if th.cuda.is_available() else "cpu"

load_path = f"/home/XLL1713/POPROX/engine0/NRMS_bert/NRMS_BERT_checkpoint/model.safetensors"
load_path = f"{project_root()}/models/model.safetensors"
checkpoint = load_file(load_path)

return checkpoint, device_name
Expand All @@ -30,52 +31,11 @@ def custom_encoder(obj):
if isinstance(obj, UUID):
return str(obj)

def compute_mrr(y_pred, y_true):
# 关心原本label为1的item在y_pred中的位置 y_pred =list[排好的item id]
relevant_items = {item.split('-')[0] for item in y_true if item.endswith('-1')}
mrr = 0
for rank, pred_item in enumerate(y_pred, start = 1):
if pred_item in relevant_items:
mrr += (1/rank)
return mrr/len(relevant_items)



def compute_dcg(y_pred, y_true, k=5):
'''
y_pred: 排好的item顺序
y_true:[item-label]
'''
item_label = {}
for pair in y_true:
item_label[pair.split('-')[0]] = int(pair.split('-')[1])

y_true = np.array([item_label[item] for item in y_pred[:k]])

gains = 2 ** y_true - 1
discounts = np.log2(np.arange(len(y_true)) + 2)

return np.sum(gains / discounts)


def compute_ndcg(y_pred, y_true, k):
'''
y_pred: 排好的item顺序
y_true:[item-label]
'''
# y_true2 作为best的item排序
y_true_score = [item.split('-')[1] for item in y_true]
order = np.argsort(y_true_score)[::-1]
y_true_item = [y_true[index].split('-')[0] for index in order]

best = compute_dcg(y_true_item, y_true, k)
actual = compute_dcg(y_pred, y_true, k)
return actual / best

def recsys_metric(recommendations, row_index, news_struuid_ID ):
# recommendations {account id (uuid): LIST[Article]}
# use the url of Article
impressions_truth = pd.read_table("/home/XLL1713/POPROX/engine0/NRMS_bert/data/mind/large/val/behaviors.tsv",
impressions_truth = pd.read_table(f"{project_root()}/data/test_mind_large/behaviors.tsv",
header='infer',
usecols=range(5),
names=[
Expand All @@ -86,12 +46,25 @@ def recsys_metric(recommendations, row_index, news_struuid_ID ):
account_id = list(recommendations.keys())[0]
recommended_list = recommendations[account_id]
recommended_list = [news_struuid_ID[item.url] for item in recommended_list]

single_mrr = compute_mrr(recommended_list, impressions_truth)
single_ndcg5 = compute_ndcg(recommended_list, impressions_truth, 5)
single_ndcg10 = compute_ndcg(recommended_list, impressions_truth, 10)

return single_ndcg5, single_ndcg10, single_mrr


recs = pd.DataFrame({
'item': recommended_list
})

truth = pd.DataFrame.from_records(
(
(row.split('-')[0], int(row.split('-')[1]))
for row in impressions_truth
),
columns=['item', 'rating']
).set_index('item')

single_rr = topn.recip_rank(recs, truth)
single_ndcg5 = topn.ndcg(recs, truth, k=5)
single_ndcg10 = topn.ndcg(recs, truth, k=10)

return single_ndcg5, single_ndcg10, single_rr

if __name__ == '__main__':
'''
Expand All @@ -100,18 +73,18 @@ def recsys_metric(recommendations, row_index, news_struuid_ID ):
MODEL, DEVICE = load_model()
TOKEN_MAPPING = 'distilbert-base-uncased' # can be modified

with open('/home/XLL1713/POPROX/poprox-recommender/tests/news_uuid_ID.json', 'r') as json_file:
with open(f"{project_root()}/data/val_mind_large/news_uuid_ID.json", 'r') as json_file:
news_struuid_ID = json.load(json_file)

# load the mind test json file
with open('/home/XLL1713/POPROX/poprox-recommender/tests/mind_test.json', 'r') as json_file:
with open(f"{project_root()}/data/val_mind_large/mind_test.json", 'r') as json_file:
mind_data = json.load(json_file)

ndcg5 = []
ndcg10 = []
mrr = []

for impression_idx in range(len(mind_data)): # one by one
for impression_idx in range(10): # one by one

request_body = mind_data[impression_idx]

Expand All @@ -135,15 +108,15 @@ def recsys_metric(recommendations, row_index, news_struuid_ID ):

single_ndcg5, single_ndcg10, single_mrr = recsys_metric(recommendations, impression_idx, news_struuid_ID )
# recommendations {account id (uuid): LIST[Article]}
print(f"----------------evaluation using the first {impression_idx + 1} is ndcg5 = {single_ndcg5}, ndcg10 = {single_ndcg10}, mrr = {single_mrr}")
print(f"----------------evaluation using the first {impression_idx + 1} is NDCG@5 = {single_ndcg5}, NDCG@10 = {single_ndcg10}, RR = {single_mrr}")

ndcg5.append(single_ndcg5)
ndcg10.append(single_ndcg10)
mrr.append(single_mrr)



print(f"Offline evaluation metrics on MIND data: ndcg5 = {np.mean(ndcg5)}, ndcg10 = {np.mean(ndcg10)}, mrr = {np.mean(mrr)}")
print(f"Offline evaluation metrics on MIND data: NDCG@5 = {np.mean(ndcg5)}, NDCG@10 = {np.mean(ndcg10)}, MRR = {np.mean(mrr)}")


#response = {"statusCode": 200, "body": json.dump(body, default=custom_encoder)}
Expand Down

0 comments on commit e2308b2

Please sign in to comment.