From 15fbf55f32e64722c452c907425e10fdb977f62e Mon Sep 17 00:00:00 2001 From: Mojtaba <11262163+mojtaba-komeili@users.noreply.github.com> Date: Wed, 3 Feb 2021 09:06:52 -0800 Subject: [PATCH] Abstract class for compiling crowdsourcing data with MephistoDataBrowser (#3422) * Added a general argument and function that will be used often with Mephisto tasks. * Moved Mephisto task functionalities to the new AbstractDataBrowserResultsCompiler class. * Fixed a bug, List typ was not imported. * Moved data browser to a class variable and added the function to access/load it. * Adding task_name in the abstract class. * Added get_units_data method, to get unit data. * Removed some extra parts that were added by mistake after the merge. * Added access method for mephisto db. * Worker name from worker id * Debug: the parent class flags were skipped. * Added the docstring to class. * Removed the redundant function that was added after the merging. * Addressed the review comments. * Added crowdsourcing to the install requirements for unittests_37 * reformat * Remvoed the chaneges in test that was not necessary. * moving class definition out of try, only imports stay in try * specified Except Error type Co-authored-by: mojtaba-komeili --- parlai/crowdsourcing/utils/analysis.py | 69 +++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/parlai/crowdsourcing/utils/analysis.py b/parlai/crowdsourcing/utils/analysis.py index 3174d5f0c2c..90706a4cc5a 100644 --- a/parlai/crowdsourcing/utils/analysis.py +++ b/parlai/crowdsourcing/utils/analysis.py @@ -4,12 +4,21 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from __future__ import annotations + import argparse from abc import ABC, abstractmethod -from typing import Any, Dict - +from typing import Any, Dict, List import pandas as pd +# Defining the class only if Mephisto is installed, since it relies on Mephisto +try: + from mephisto.abstractions.databases.local_database import LocalMephistoDB + from mephisto.data_model.unit import Unit + from mephisto.tools.data_browser import DataBrowser as MephistoDataBrowser +except ImportError: + pass + class AbstractResultsCompiler(ABC): """ @@ -79,3 +88,59 @@ def __init__(self, opt: Dict[str, Any]): raise ValueError( 'There must be a "none_all_good" category in self.problem_buckets!' ) + + +class AbstractDataBrowserResultsCompiler(AbstractResultsCompiler): + """ + Provides interface for using Mephisto's DataBrowser, DB, and their methods. + + Uses Mephisto's DataBrowser to retrieve the work units and their data. + """ + + @classmethod + def setup_args(cls): + parser = super().setup_args() + parser.add_argument( + '--task-name', type=str, help='Name of the Mephisto task to open' + ) + return parser + + def __init__(self, opt): + self.task_name = opt["task_name"] + self._mephisto_db = None + self._mephisto_data_browser = None + + def get_mephisto_data_browser(self) -> MephistoDataBrowser: + if not self._mephisto_data_browser: + db = self.get_mephisto_db() + self._mephisto_data_browser = MephistoDataBrowser(db=db) + return self._mephisto_data_browser + + def get_mephisto_db(self) -> LocalMephistoDB: + if not self._mephisto_db: + self._mephisto_db = LocalMephistoDB() + return self._mephisto_db + + def get_worker_name(self, worker_id: str) -> str: + """ + Gets the global (AWS) id of a worker from their Mephisto worker_id. + """ + db = self.get_mephisto_db() + return db.get_worker(worker_id)["worker_name"] + + def get_task_units(self, task_name: str) -> List[Unit]: + """ + Retrieves the list of work units from the Mephisto task. + """ + data_browser = self.get_mephisto_data_browser() + return data_browser.get_units_for_task_name(task_name) + + def get_units_data(self, task_units: List[Unit]) -> List[dict]: + """ + Retrieves task data for a list of Mephisto task units. + """ + data_browser = self.get_mephisto_data_browser() + task_data = [] + for unit in task_units: + task_data.append(data_browser.get_data_from_unit(unit)) + return task_data