Skip to content

Commit

Permalink
add limit to fix too much data problem
Browse files Browse the repository at this point in the history
  • Loading branch information
Bruno Rodrigues committed Mar 17, 2022
1 parent c08e568 commit c44ad6a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 59 deletions.
26 changes: 15 additions & 11 deletions todoist_analytics/backend/data_collector.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import numpy as np
import pandas as pd
import streamlit as st
import todoist

from todoist_analytics.frontend.colorscale import color_code_to_hex
import numpy as np


class DataCollector:
def __init__(self, token):
Expand All @@ -11,6 +12,7 @@ def __init__(self, token):
self.projects = pd.DataFrame()
self.api = todoist.TodoistAPI(self.token)
self.api.sync()
self.current_offset = 0

def get_user_timezone(self):
self.tz = self.api.state["user"]["tz_info"]["timezone"]
Expand All @@ -20,17 +22,18 @@ def _collect_active_tasks(self):

def _collect_completed_tasks(self, limit, offset):
data = self.api.completed.get_all(limit=limit, offset=offset)
print(len(data["items"]))
self._append_to_properties(data)
if len(data["items"]) != 0:
self._append_to_properties(data)

def _append_to_properties(self, data):
preprocessed_items, preprocessed_projects = self._preprocess_completed_tasks(pd.DataFrame(data["items"]), pd.DataFrame.from_dict(data["projects"], orient="index"))
self.items = self.items.append(preprocessed_items)
self.projects = self.projects.append(
preprocessed_projects
preprocessed_items, preprocessed_projects = self._preprocess_completed_tasks(
pd.DataFrame(data["items"]),
pd.DataFrame.from_dict(data["projects"], orient="index"),
)
self.items = self.items.append(preprocessed_items)
self.projects = self.projects.append(preprocessed_projects)

def _collect_all_completed_tasks(self, limit=2000):
def _collect_all_completed_tasks(self, limit=10000):
"""
gets all the tasks and stores it
this function may take too long to complete and timeout,
Expand Down Expand Up @@ -70,16 +73,17 @@ def _collect_active_tasks(self):
]
self.active_tasks = self.active_tasks[keep_columns]
self.active_tasks = self.active_tasks.loc[self.active_tasks["checked"] == 0]

def _preprocess_completed_tasks(self, completed_tasks, projects):

projects = projects.rename({"id": "project_id"}, axis=1)

completed_tasks["datehour_completed"] = pd.to_datetime(
completed_tasks["completed_date"]
)

self.get_user_timezone()

completed_tasks["datehour_completed"] = pd.DatetimeIndex(
completed_tasks["datehour_completed"]
).tz_convert(self.tz)
Expand Down
3 changes: 2 additions & 1 deletion todoist_analytics/backend/session_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
>>> session_state.user_name
'Mary'
"""
import sys

import streamlit.report_thread as ReportThread
from streamlit.server.server import Server
import sys


class SessionState(object):
Expand Down
47 changes: 0 additions & 47 deletions todoist_analytics/backend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,53 +7,6 @@
from todoist_analytics.frontend.colorscale import color_code_to_hex


def preprocess(dc: DataCollector) -> DataFrame:
completed_tasks = dc.items
projects = dc.projects
projects = projects.rename({"id": "project_id"}, axis=1)

completed_tasks["datehour_completed"] = pd.to_datetime(
completed_tasks["completed_date"]
)
completed_tasks["datehour_completed"] = pd.DatetimeIndex(
completed_tasks["datehour_completed"]
).tz_convert("America/Sao_Paulo")
completed_tasks["completed_date"] = pd.to_datetime(
completed_tasks["datehour_completed"]
).dt.date
completed_tasks["completed_date_weekday"] = pd.to_datetime(
completed_tasks["datehour_completed"]
).dt.day_name()
completed_tasks = completed_tasks.merge(
projects[["project_id", "name", "color", "inbox_project"]],
how="left",
left_on="project_id",
right_on="project_id",
)
completed_tasks = completed_tasks.rename({"name": "project_name"}, axis=1)

# creating the recurrent flag column -> not good implementation
completed_date_count = completed_tasks.groupby("task_id").agg(
{"completed_date": "nunique"}
)
completed_date_count["isRecurrent"] = np.where(
completed_date_count["completed_date"] > 1, 1, 0
)
completed_date_count.drop(columns="completed_date", inplace=True)

completed_tasks = completed_tasks.merge(
completed_date_count, left_on="task_id", right_index=True
)

completed_tasks["hex_color"] = completed_tasks["color"].apply(
lambda x: color_code_to_hex[int(x)]["hex"]
)

completed_tasks = completed_tasks.drop_duplicates().reset_index(drop=True)

return completed_tasks


def create_color_palette(completed_tasks: DataFrame):
project_id_color = pd.Series(
completed_tasks.hex_color.values, index=completed_tasks.project_name
Expand Down

0 comments on commit c44ad6a

Please sign in to comment.