Skip to content

Commit be0f698

Browse files
committed
feat(harvest): notebook to help compare conversions
1 parent 6172918 commit be0f698

File tree

1 file changed

+179
-0
lines changed

1 file changed

+179
-0
lines changed

notebooks/verify-harvest.ipynb

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import os\n",
10+
"\n",
11+
"import pandas as pd"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"google_file = os.environ[\"HARVEST_GOOGLE_DATA\"]\n",
21+
"local_file = os.environ[\"HARVEST_DATA\"]\n",
22+
"\n",
23+
"df_google = pd.read_csv(google_file)\n",
24+
"df_local = pd.read_csv(local_file)"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"df_google.info()"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": null,
39+
"metadata": {},
40+
"outputs": [],
41+
"source": [
42+
"df_local.info()"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": null,
48+
"metadata": {},
49+
"outputs": [],
50+
"source": [
51+
"df_google.columns.to_list()"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": null,
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"df_local.columns.to_list()"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": null,
66+
"metadata": {},
67+
"outputs": [],
68+
"source": [
69+
"df_local.columns.equals(df_google.columns)"
70+
]
71+
},
72+
{
73+
"cell_type": "code",
74+
"execution_count": null,
75+
"metadata": {},
76+
"outputs": [],
77+
"source": [
78+
"df_google[\"Hours\"].sum()"
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": null,
84+
"metadata": {},
85+
"outputs": [],
86+
"source": [
87+
"df_local[\"Hours\"].sum()"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": null,
93+
"metadata": {},
94+
"outputs": [],
95+
"source": [
96+
"len(df_google)"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": null,
102+
"metadata": {},
103+
"outputs": [],
104+
"source": [
105+
"len(df_local)"
106+
]
107+
},
108+
{
109+
"cell_type": "code",
110+
"execution_count": null,
111+
"metadata": {},
112+
"outputs": [],
113+
"source": [
114+
"google_groups = df_google.groupby([\"Project\", \"Last name\"])\n",
115+
"google_sums = {}\n",
116+
"for group in google_groups.groups:\n",
117+
" project, last_name = group\n",
118+
" group_df = google_groups.get_group(group)\n",
119+
" google_sums[group] = group_df['Hours'].sum()"
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"execution_count": null,
125+
"metadata": {},
126+
"outputs": [],
127+
"source": [
128+
"local_groups = df_local.groupby([\"Project\", \"Last name\"])\n",
129+
"local_sums = {}\n",
130+
"for group in local_groups.groups:\n",
131+
" project, last_name = group\n",
132+
" group_df = local_groups.get_group(group)\n",
133+
" local_sums[group] = group_df['Hours'].sum()"
134+
]
135+
},
136+
{
137+
"cell_type": "code",
138+
"execution_count": null,
139+
"metadata": {},
140+
"outputs": [],
141+
"source": [
142+
"len(local_sums) == len(google_sums)"
143+
]
144+
},
145+
{
146+
"cell_type": "code",
147+
"execution_count": null,
148+
"metadata": {},
149+
"outputs": [],
150+
"source": [
151+
"for k,v in local_sums.items():\n",
152+
" print(k)\n",
153+
" assert k in google_sums\n",
154+
" assert v == google_sums[k]"
155+
]
156+
}
157+
],
158+
"metadata": {
159+
"kernelspec": {
160+
"display_name": "Python 3",
161+
"language": "python",
162+
"name": "python3"
163+
},
164+
"language_info": {
165+
"codemirror_mode": {
166+
"name": "ipython",
167+
"version": 3
168+
},
169+
"file_extension": ".py",
170+
"mimetype": "text/x-python",
171+
"name": "python",
172+
"nbconvert_exporter": "python",
173+
"pygments_lexer": "ipython3",
174+
"version": "3.11.6"
175+
}
176+
},
177+
"nbformat": 4,
178+
"nbformat_minor": 2
179+
}

0 commit comments

Comments
 (0)