Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Commit

Permalink
Abstract class for compiling crowdsourcing data with MephistoDataBrow…
Browse files Browse the repository at this point in the history
…ser (#3422)

* Added a general argument and function that will be used often with Mephisto tasks.

* Moved Mephisto task functionalities to the new AbstractDataBrowserResultsCompiler class.

* Fixed a bug, List typ was not imported.

* Moved data browser to a class variable and added the function to access/load it.

* Adding task_name in the abstract class.

* Added get_units_data method, to get unit data.

* Removed some extra parts that were added by mistake after the merge.

* Added access method for mephisto db.

* Worker name from worker id

* Debug: the parent class flags were skipped.

* Added the docstring to class.

* Removed the redundant function that was added after the merging.

* Addressed the review comments.

* Added crowdsourcing to the install requirements for unittests_37

* reformat

* Remvoed the chaneges in test that was not necessary.

* moving class definition out of try, only imports stay in try

* specified Except Error type

Co-authored-by: mojtaba-komeili <komeili@devfair0169.h2.fair>
  • Loading branch information
mojtaba-komeili and mojtaba-komeili committed Feb 3, 2021
1 parent 042a2e3 commit 15fbf55
Showing 1 changed file with 67 additions and 2 deletions.
69 changes: 67 additions & 2 deletions parlai/crowdsourcing/utils/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,21 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

import argparse
from abc import ABC, abstractmethod
from typing import Any, Dict

from typing import Any, Dict, List
import pandas as pd

# Defining the class only if Mephisto is installed, since it relies on Mephisto
try:
from mephisto.abstractions.databases.local_database import LocalMephistoDB
from mephisto.data_model.unit import Unit
from mephisto.tools.data_browser import DataBrowser as MephistoDataBrowser
except ImportError:
pass


class AbstractResultsCompiler(ABC):
"""
Expand Down Expand Up @@ -79,3 +88,59 @@ def __init__(self, opt: Dict[str, Any]):
raise ValueError(
'There must be a "none_all_good" category in self.problem_buckets!'
)


class AbstractDataBrowserResultsCompiler(AbstractResultsCompiler):
"""
Provides interface for using Mephisto's DataBrowser, DB, and their methods.
Uses Mephisto's DataBrowser to retrieve the work units and their data.
"""

@classmethod
def setup_args(cls):
parser = super().setup_args()
parser.add_argument(
'--task-name', type=str, help='Name of the Mephisto task to open'
)
return parser

def __init__(self, opt):
self.task_name = opt["task_name"]
self._mephisto_db = None
self._mephisto_data_browser = None

def get_mephisto_data_browser(self) -> MephistoDataBrowser:
if not self._mephisto_data_browser:
db = self.get_mephisto_db()
self._mephisto_data_browser = MephistoDataBrowser(db=db)
return self._mephisto_data_browser

def get_mephisto_db(self) -> LocalMephistoDB:
if not self._mephisto_db:
self._mephisto_db = LocalMephistoDB()
return self._mephisto_db

def get_worker_name(self, worker_id: str) -> str:
"""
Gets the global (AWS) id of a worker from their Mephisto worker_id.
"""
db = self.get_mephisto_db()
return db.get_worker(worker_id)["worker_name"]

def get_task_units(self, task_name: str) -> List[Unit]:
"""
Retrieves the list of work units from the Mephisto task.
"""
data_browser = self.get_mephisto_data_browser()
return data_browser.get_units_for_task_name(task_name)

def get_units_data(self, task_units: List[Unit]) -> List[dict]:
"""
Retrieves task data for a list of Mephisto task units.
"""
data_browser = self.get_mephisto_data_browser()
task_data = []
for unit in task_units:
task_data.append(data_browser.get_data_from_unit(unit))
return task_data

0 comments on commit 15fbf55

Please sign in to comment.