Replace eval metric with lenskit TopN (#15)

Prior values for first 10 users: ``` evaluation using the first 1 is NDCG@5 = 0.5, NDCG@10 = 0.5, RR = 0.3333333333333333 evaluation using the first 2 is NDCG@5 = 0.3391602052736161, NDCG@10 = 0.41039156802332444, RR = 0.2142073313555942 evaluation using the first 3 is NDCG@5 = 1.0, NDCG@10 = 1.0, RR = 1.0 evaluation using the first 4 is NDCG@5 = 0.9197207891481876, NDCG@10 = 0.9197207891481876, RR = 0.6666666666666666 evaluation using the first 5 is NDCG@5 = 0.0, NDCG@10 = 0.0, RR = 0.038461538461538464 evaluation using the first 6 is NDCG@5 = 0.0, NDCG@10 = 0.0, RR = 0.030303030303030304 evaluation using the first 7 is NDCG@5 = 0.38685280723454163, NDCG@10 = 0.38685280723454163, RR = 0.2 evaluation using the first 8 is NDCG@5 = 0.0, NDCG@10 = 0.0, RR = 0.02658371040723982 evaluation using the first 9 is NDCG@5 = 0.43067655807339306, NDCG@10 = 0.43067655807339306, RR = 0.25 evaluation using the first 10 is NDCG@5 = 0.0, NDCG@10 = 0.0, RR = 0.03125 ``` Lenskit eval values for first 10 users: ``` evaluation using the first 1 is ndcg5 = 0.6309297535714575, ndcg10 = 0.6309297535714575, mrr = 1.0 evaluation using the first 2 is ndcg5 = 0.2807721888661444, ndcg10 = 0.35123899361230887, mrr = 1.0 evaluation using the first 3 is ndcg5 = 1.0, ndcg10 = 1.0, mrr = 1.0 evaluation using the first 4 is ndcg5 = 0.8154648767857288, ndcg10 = 0.8154648767857288, mrr = 1.0 evaluation using the first 5 is ndcg5 = 0.0, ndcg10 = 0.0, mrr = 1.0 evaluation using the first 6 is ndcg5 = 0.0, ndcg10 = 0.0, mrr = 1.0 evaluation using the first 7 is ndcg5 = 0.43067655807339306, ndcg10 = 0.43067655807339306, mrr = 1.0 evaluation using the first 8 is ndcg5 = 0.0, ndcg10 = 0.0, mrr = 1.0 evaluation using the first 9 is ndcg5 = 0.5, ndcg10 = 0.5, mrr = 1.0 evaluation using the first 10 is ndcg5 = 0.0, ndcg10 = 0.0, mrr = 1.0 ```
CCRI-POPROX · Jun 13, 2024 · e2308b2 · e2308b2
1 parent 9d4b28e
commit e2308b2
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 56 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,6 +11,7 @@ requires-python = ">=3.10"
 keywords = []
 authors = [{ name = "Karl Higley", email = "khigley@umn.edu" }]
 dependencies = [
+  "lenskit==0.14.*",
   "nltk>=3.8,<4",
   "numpy>=1.26,<2",
   "pandas==2.*",

diff --git a/src/poprox_recommender/test_json.py → src/poprox_recommender/offline_test.py b/src/poprox_recommender/test_json.py → src/poprox_recommender/offline_test.py
@@ -2,14 +2,15 @@
 import json
 import os
 import sys
-sys.path.append('../')
+from lenskit.metrics import topn
+sys.path.append('src')
 import torch as th
 from safetensors.torch import load_file
 from uuid import UUID
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
-from poprox_recommender.domain import Article, ClickHistory
+from poprox_concepts import Article, ClickHistory
 from poprox_recommender.default import select_articles
 from poprox_recommender.paths import project_root
 
@@ -20,7 +21,7 @@ def load_model(device_name=None):
     if device_name is None:
         device_name = "cuda" if th.cuda.is_available() else "cpu"
 
-    load_path = f"/home/XLL1713/POPROX/engine0/NRMS_bert/NRMS_BERT_checkpoint/model.safetensors"
+    load_path = f"{project_root()}/models/model.safetensors"
     checkpoint = load_file(load_path)
 
     return checkpoint, device_name
@@ -30,52 +31,11 @@ def custom_encoder(obj):
     if isinstance(obj, UUID):
         return str(obj)
 
-def compute_mrr(y_pred, y_true):
-    # 关心原本label为1的item在y_pred中的位置 y_pred =list[排好的item id]
-    relevant_items = {item.split('-')[0] for item in y_true if item.endswith('-1')}
-    mrr = 0
-    for rank, pred_item in enumerate(y_pred, start = 1):
-        if pred_item in relevant_items:
-            mrr += (1/rank)
-    return mrr/len(relevant_items)
-
-
-
-def compute_dcg(y_pred, y_true, k=5):
-    '''
-    y_pred: 排好的item顺序
-    y_true:[item-label]
-    '''
-    item_label = {}
-    for pair in y_true:
-        item_label[pair.split('-')[0]] = int(pair.split('-')[1])
-
-    y_true = np.array([item_label[item] for item in y_pred[:k]])
-
-    gains = 2 ** y_true - 1
-    discounts = np.log2(np.arange(len(y_true)) + 2)
-
-    return np.sum(gains / discounts)
-
-
-def compute_ndcg(y_pred, y_true, k):
-    '''
-    y_pred: 排好的item顺序
-    y_true:[item-label]
-    '''
-    # y_true2 作为best的item排序
-    y_true_score = [item.split('-')[1] for item in y_true]
-    order = np.argsort(y_true_score)[::-1]
-    y_true_item = [y_true[index].split('-')[0] for index in order]
-
-    best = compute_dcg(y_true_item, y_true, k)
-    actual = compute_dcg(y_pred, y_true, k)
-    return actual / best
 
 def recsys_metric(recommendations, row_index, news_struuid_ID ):
     # recommendations {account id (uuid): LIST[Article]}
     # use the url of Article
-    impressions_truth = pd.read_table("/home/XLL1713/POPROX/engine0/NRMS_bert/data/mind/large/val/behaviors.tsv",
+    impressions_truth = pd.read_table(f"{project_root()}/data/test_mind_large/behaviors.tsv",
                                     header='infer',
                                     usecols=range(5),
                                     names=[
@@ -86,12 +46,25 @@ def recsys_metric(recommendations, row_index, news_struuid_ID ):
     account_id = list(recommendations.keys())[0]
     recommended_list = recommendations[account_id]
     recommended_list = [news_struuid_ID[item.url] for item in recommended_list]
-
-    single_mrr = compute_mrr(recommended_list, impressions_truth)
-    single_ndcg5 = compute_ndcg(recommended_list, impressions_truth, 5)
-    single_ndcg10 = compute_ndcg(recommended_list, impressions_truth, 10)
-
-    return single_ndcg5, single_ndcg10, single_mrr
+
+
+    recs = pd.DataFrame({
+        'item': recommended_list
+    })
+
+    truth = pd.DataFrame.from_records(
+        (
+            (row.split('-')[0], int(row.split('-')[1]))
+            for row in impressions_truth
+        ),
+        columns=['item', 'rating']
+    ).set_index('item')
+
+    single_rr = topn.recip_rank(recs, truth)
+    single_ndcg5 = topn.ndcg(recs, truth, k=5)
+    single_ndcg10 = topn.ndcg(recs, truth, k=10)
+
+    return single_ndcg5, single_ndcg10, single_rr
 
 if __name__ == '__main__':
     '''
@@ -100,18 +73,18 @@ def recsys_metric(recommendations, row_index, news_struuid_ID ):
     MODEL, DEVICE = load_model()
     TOKEN_MAPPING = 'distilbert-base-uncased'  # can be modified
 
-    with open('/home/XLL1713/POPROX/poprox-recommender/tests/news_uuid_ID.json', 'r') as json_file:
+    with open(f"{project_root()}/data/val_mind_large/news_uuid_ID.json", 'r') as json_file:
         news_struuid_ID = json.load(json_file)
 
     # load the mind test json file
-    with open('/home/XLL1713/POPROX/poprox-recommender/tests/mind_test.json', 'r') as json_file:
+    with open(f"{project_root()}/data/val_mind_large/mind_test.json", 'r') as json_file:
         mind_data = json.load(json_file)
 
     ndcg5 = []
     ndcg10 = []
     mrr = []
 
-    for impression_idx in range(len(mind_data)): # one by one
+    for impression_idx in range(10): # one by one
 
         request_body = mind_data[impression_idx]
 
@@ -135,15 +108,15 @@ def recsys_metric(recommendations, row_index, news_struuid_ID ):
 
         single_ndcg5, single_ndcg10, single_mrr = recsys_metric(recommendations, impression_idx, news_struuid_ID )
         # recommendations {account id (uuid): LIST[Article]}
-        print(f"----------------evaluation using the first {impression_idx + 1} is ndcg5 = {single_ndcg5}, ndcg10 = {single_ndcg10}, mrr = {single_mrr}")
+        print(f"----------------evaluation using the first {impression_idx + 1} is NDCG@5 = {single_ndcg5}, NDCG@10 = {single_ndcg10}, RR = {single_mrr}")
 
         ndcg5.append(single_ndcg5)
         ndcg10.append(single_ndcg10)
         mrr.append(single_mrr)
 
 
 
-    print(f"Offline evaluation metrics on MIND data: ndcg5 = {np.mean(ndcg5)}, ndcg10 = {np.mean(ndcg10)}, mrr = {np.mean(mrr)}")
+    print(f"Offline evaluation metrics on MIND data: NDCG@5 = {np.mean(ndcg5)}, NDCG@10 = {np.mean(ndcg10)}, MRR = {np.mean(mrr)}")
 
 
         #response = {"statusCode": 200, "body": json.dump(body, default=custom_encoder)}