Skip to content

Commit 3b29d5d

Browse files
committed
feat(harvest): convert notebook to reusable service
1 parent 0ebf403 commit 3b29d5d

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed

compiler_admin/services/harvest.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from datetime import datetime, timedelta
2+
import os
3+
import sys
4+
from typing import TextIO
5+
6+
import pandas as pd
7+
8+
import compiler_admin.services.files as files
9+
10+
# input CSV columns needed for conversion
11+
INPUT_COLUMNS = ["Date", "Client", "Project", "Notes", "Hours", "First Name", "Last Name"]
12+
13+
# default output CSV columns
14+
OUTPUT_COLUMNS = ["Email", "Start date", "Start time", "Duration", "Project", "Task", "Client", "Billable", "Description"]
15+
16+
17+
def _calc_start_time(group: pd.DataFrame):
18+
"""Start time is offset by the previous record's duration, with a default of 0 offset for the first record."""
19+
group["Start time"] = group["Start time"] + group["Duration"].shift(fill_value=pd.to_timedelta("00:00:00")).cumsum()
20+
return group
21+
22+
23+
def _duration_str(duration: timedelta) -> str:
24+
"""Use total seconds to convert to a datetime and format as a string e.g. 01:30."""
25+
return datetime.fromtimestamp(duration.total_seconds()).strftime("%H:%M")
26+
27+
28+
def _toggl_client_name():
29+
"""Gets the value of the TOGGL_CLIENT_NAME env var."""
30+
return os.environ.get("TOGGL_CLIENT_NAME")
31+
32+
33+
def convert_to_toggl(
34+
source_path: str | TextIO = sys.stdin,
35+
output_path: str | TextIO = sys.stdout,
36+
client_name: str = None,
37+
output_cols: list[str] = OUTPUT_COLUMNS,
38+
):
39+
"""Convert Harvest formatted entries in source_path to equivalent Toggl formatted entries.
40+
41+
Args:
42+
source_path: The path to a readable CSV file of Harvest time entries; or a readable buffer of the same.
43+
44+
output_cols (list[str]): A list of column names for the output
45+
46+
output_path: The path to a CSV file where Toggl time entries will be written; or a writeable buffer for the same.
47+
48+
Returns:
49+
None. Either prints the resulting CSV data or writes to output_path.
50+
"""
51+
if client_name is None:
52+
client_name = _toggl_client_name()
53+
54+
# read CSV file, parsing dates
55+
source = files.read_csv(source_path, usecols=INPUT_COLUMNS, parse_dates=["Date"], cache_dates=True)
56+
57+
# rename columns that can be imported as-is
58+
source.rename(columns={"Project": "Task", "Notes": "Description", "Date": "Start date"}, inplace=True)
59+
60+
# update static calculated columns
61+
source["Client"] = client_name
62+
source["Project"] = client_name
63+
source["Billable"] = "Yes"
64+
65+
# add the Email column
66+
source["Email"] = source["First Name"].apply(lambda x: f"{x.lower()}@compiler.la")
67+
68+
# Convert numeric Hours to timedelta Duration
69+
source["Duration"] = source["Hours"].apply(pd.to_timedelta, unit="hours")
70+
71+
# Default start time to 09:00
72+
source["Start time"] = pd.to_timedelta("09:00:00")
73+
74+
user_days = (
75+
source
76+
# sort and group by email and date
77+
.sort_values(["Email", "Start date"]).groupby(["Email", "Start date"], observed=False)
78+
# calculate a start time within each group (excluding the groupby columns)
79+
.apply(_calc_start_time, include_groups=False)
80+
)
81+
82+
# convert timedeltas to duration strings
83+
user_days["Duration"] = user_days["Duration"].apply(_duration_str)
84+
user_days["Start time"] = user_days["Start time"].apply(_duration_str)
85+
86+
# re-sort by start date/time and user
87+
# reset the index to get rid of the group multi index and fold the group columns back down
88+
output_data = pd.DataFrame(data=user_days).reset_index()
89+
output_data.sort_values(["Start date", "Start time", "Email"], inplace=True)
90+
91+
files.write_csv(output_path, output_data, output_cols)

tests/services/test_harvest.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import sys
2+
from datetime import timedelta
3+
from io import StringIO
4+
5+
import numpy as np
6+
import pandas as pd
7+
import pytest
8+
9+
import compiler_admin.services.harvest
10+
from compiler_admin.services.harvest import (
11+
__name__ as MODULE,
12+
files,
13+
INPUT_COLUMNS,
14+
OUTPUT_COLUMNS,
15+
_calc_start_time,
16+
_duration_str,
17+
_toggl_client_name,
18+
convert_to_toggl,
19+
)
20+
21+
22+
@pytest.fixture(autouse=True)
23+
def mock_environment(monkeypatch):
24+
monkeypatch.setenv("TOGGL_CLIENT_NAME", "Test_Client")
25+
26+
27+
@pytest.fixture
28+
def spy_files(mocker):
29+
return mocker.patch.object(compiler_admin.services.harvest, "files", wraps=files)
30+
31+
32+
@pytest.fixture
33+
def mock_toggl_client_name(mocker):
34+
return mocker.patch(f"{MODULE}._toggl_client_name")
35+
36+
37+
def test_calc_start_time():
38+
durations = pd.to_timedelta(np.arange(1, 6), unit="m")
39+
df = pd.DataFrame(data={"Duration": durations, "Start time": [pd.to_timedelta("09:00:00") for d in durations]})
40+
41+
calc_df = _calc_start_time(df)
42+
43+
assert calc_df.columns.equals(df.columns)
44+
assert calc_df["Duration"].equals(df["Duration"])
45+
assert calc_df["Start time"].to_list() == [
46+
# offset = 0, cumsum = 0
47+
pd.to_timedelta("09:00:00"),
48+
# offset = 1, cumsum = 1
49+
pd.to_timedelta("09:01:00"),
50+
# offset = 2, cumsum = 3
51+
pd.to_timedelta("09:03:00"),
52+
# offset = 3, cumsum = 6
53+
pd.to_timedelta("09:06:00"),
54+
# offset = 4, cumsum = 10
55+
pd.to_timedelta("09:10:00"),
56+
]
57+
58+
59+
def test_duration_str():
60+
td = timedelta(hours=1, minutes=30, seconds=15)
61+
62+
result = _duration_str(td)
63+
64+
assert isinstance(result, str)
65+
assert result == "01:30"
66+
67+
68+
def test_toggl_client_name(monkeypatch):
69+
assert _toggl_client_name() == "Test_Client"
70+
71+
monkeypatch.setenv("TOGGL_CLIENT_NAME", "New Test Client")
72+
73+
assert _toggl_client_name() == "New Test Client"
74+
75+
76+
def test_convert_to_toggl_mocked(harvest_file, spy_files, mock_toggl_client_name):
77+
convert_to_toggl(harvest_file, client_name=None)
78+
79+
mock_toggl_client_name.assert_called_once()
80+
81+
spy_files.read_csv.assert_called_once()
82+
call_args = spy_files.read_csv.call_args
83+
assert (harvest_file,) in call_args
84+
assert call_args.kwargs["usecols"] == INPUT_COLUMNS
85+
assert call_args.kwargs["parse_dates"] == ["Date"]
86+
assert call_args.kwargs["cache_dates"] is True
87+
88+
spy_files.write_csv.assert_called_once()
89+
call_args = spy_files.write_csv.call_args
90+
assert call_args[0][0] == sys.stdout
91+
assert call_args[0][2] == OUTPUT_COLUMNS
92+
93+
94+
def test_convert_to_toggl_sample(harvest_file, toggl_file):
95+
output = None
96+
97+
with StringIO() as output_data:
98+
convert_to_toggl(harvest_file, output_data, "Test Client 123")
99+
output = output_data.getvalue()
100+
101+
assert output
102+
assert isinstance(output, str)
103+
assert ",".join(OUTPUT_COLUMNS) in output
104+
105+
order = ["Start date", "Start time", "Email"]
106+
sample_output_df = pd.read_csv(toggl_file).sort_values(order)
107+
output_df = pd.read_csv(StringIO(output)).sort_values(order)
108+
109+
assert set(output_df.columns.to_list()) <= set(sample_output_df.columns.to_list())
110+
assert output_df["Client"].eq("Test Client 123").all()
111+
assert output_df["Project"].eq("Test Client 123").all()

0 commit comments

Comments
 (0)