Skip to content

Commit

Permalink
add dl23 passage/doc and dl22 doc to pyserini (#1804)
Browse files Browse the repository at this point in the history
This cl integrates dl22 doc and dl23 doc/passages into pyserini
TESTED=added load qrel and load query tests
  • Loading branch information
sahel-sh committed Mar 5, 2024
1 parent 8b21701 commit 84c6742
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 0 deletions.
4 changes: 4 additions & 0 deletions pyserini/search/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
'dl22': JTopics.TREC2022_DL,
'dl22-unicoil': JTopics.TREC2022_DL_UNICOIL,
'dl22-unicoil-noexp': JTopics.TREC2022_DL_UNICOIL_NOEXP,
'dl23': JTopics.TREC2023_DL,
'msmarco-doc-dev': JTopics.MSMARCO_DOC_DEV,
'msmarco-doc-dev-unicoil': JTopics.MSMARCO_DOC_DEV_UNICOIL,
'msmarco-doc-dev-unicoil-noexp': JTopics.MSMARCO_DOC_DEV_UNICOIL_NOEXP,
Expand Down Expand Up @@ -407,7 +408,10 @@
'dl20-passage': JQrels.TREC2020_DL_PASSAGE,
'dl21-doc': JQrels.TREC2021_DL_DOC,
'dl21-passage': JQrels.TREC2021_DL_PASSAGE,
'dl22-doc': JQrels.TREC2022_DL_DOC,
'dl22-passage': JQrels.TREC2022_DL_PASSAGE,
'dl23-doc': JQrels.TREC2023_DL_DOC,
'dl23-passage': JQrels.TREC2023_DL_PASSAGE,
'msmarco-doc-dev': JQrels.MSMARCO_DOC_DEV,
'msmarco-passage-dev-subset': JQrels.MSMARCO_PASSAGE_DEV_SUBSET,
'msmarco-v2-doc-dev': JQrels.MSMARCO_V2_DOC_DEV,
Expand Down
19 changes: 19 additions & 0 deletions tests/test_load_qrels.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,12 +358,31 @@ def test_dl21(self):
self.assertFalse(isinstance(next(iter(qrels.keys())), str))

def test_dl22(self):
qrels = search.get_qrels('dl22-doc')
self.assertIsNotNone(qrels)
self.assertEqual(len(qrels), 76)
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 369638)
self.assertFalse(isinstance(next(iter(qrels.keys())), str))

qrels = search.get_qrels('dl22-passage')
self.assertIsNotNone(qrels)
self.assertEqual(len(qrels), 76)
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 386416)
self.assertFalse(isinstance(next(iter(qrels.keys())), str))

def test_dl23(self):
qrels = search.get_qrels('dl23-doc')
self.assertIsNotNone(qrels)
self.assertEqual(len(qrels), 82)
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 18034)
self.assertFalse(isinstance(next(iter(qrels.keys())), str))

qrels = search.get_qrels('dl23-passage')
self.assertIsNotNone(qrels)
self.assertEqual(len(qrels), 82)
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 22327)
self.assertFalse(isinstance(next(iter(qrels.keys())), str))

# Various multi-lingual test collections
def test_ntcir8_zh(self):
qrels = search.get_qrels('ntcir8-zh')
Expand Down
6 changes: 6 additions & 0 deletions tests/test_load_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,12 @@ def test_dl22(self):
self.assertEqual(len(topics), 500)
self.assertFalse(isinstance(next(iter(topics.keys())), str))

def test_dl23(self):
topics = search.get_topics('dl23')
self.assertIsNotNone(topics)
self.assertEqual(len(topics), 700)
self.assertFalse(isinstance(next(iter(topics.keys())), str))

# Various multi-lingual test collections
def test_ntcir8_zh(self):
topics = search.get_topics('ntcir8-zh')
Expand Down

0 comments on commit 84c6742

Please sign in to comment.