-
Notifications
You must be signed in to change notification settings - Fork 0
/
currentlongstayers.py
112 lines (94 loc) · 3.81 KB
/
currentlongstayers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
__author__ = "David Marienburg"
__version__ = "1.1"
__LastUpdate__ = "4/9/2019"
import pandas as pd
import numpy as np
from datetime import datetime
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import asksaveasfilename
class LongStayersReport:
def __init__(self):
file = askopenfilename(
title="Open the raw Current Long Stayers report",
initialdir="//tproserver/Reports/Monthly Reports/"
)
self.entries = pd.read_excel(file)
self.entries_copy = self.entries.copy()
self.case_workers = pd.read_excel(askopenfilename(title="Open Export Report - Case Worker.xlsx"))
self.today = datetime.today()
self.save_output()
def create_los_columns(self):
# Fill blank values in the Entry Exit Exit Date column with todays date
self.entries_copy["Entry Exit Exit Date"].fillna(self.today, inplace=True)
# Create length of stay columns using the np.timedelta64 method
self.entries_copy["LOS Years"] = (
self.entries_copy["Entry Exit Exit Date"] - self.entries_copy["Entry Exit Entry Date"]
)/np.timedelta64(1, "Y")
self.entries_copy["LOS Days"] = (
self.entries_copy["Entry Exit Exit Date"] - self.entries_copy["Entry Exit Entry Date"]
)/np.timedelta64(1, "D")
# Use the pandas.groupby method to find the total length of stay for
# each participant
grouped = self.entries_copy[
["Client Uid", "LOS Years", "LOS Days"]
].groupby(by="Client Uid").sum()
# Return only rows where the total length of stay is longer than 364 days
return grouped[grouped["LOS Days"] > 364]
def show_current_location(self):
los_data = self.create_los_columns()
# Slice the self.entries dataframe so that it only shows the current
# participants who are currently entered into a shelter
current_stayers = self.entries[self.entries["Entry Exit Exit Date"].isna()]
# Merge the current stayers and los_data dataframes then return the
# using an inner merge.s
output = current_stayers[
[
"Client Uid",
"Client First Name",
"Client Last Name",
"Entry Exit Provider Id",
"Entry Exit Entry Date"
]
].merge(
los_data.reset_index(),
on="Client Uid",
how="inner"
).sort_values(by=["Entry Exit Provider Id", "LOS Years"], ascending=False)
return output
def get_newest_cm(self):
# slice the self.case_workers data frame so that it only shows the
# newest row per client
newest_cm = self.case_workers.sort_values(
by=[
"Client Unique Id",
"Case Worker Date Started"
],
ascending=False
).drop_duplicates(
subset="Client Uid",
keep="first"
)
return newest_cm[["Client Uid", "Case Worker Name"]]
def merge_cm_name(self):
location = self.show_current_location()
cm = self.get_newest_cm()
return location.merge(
cm,
on="Client Uid",
how="left"
)
def save_output(self):
final_data = self.merge_cm_name()
writer = pd.ExcelWriter(
asksaveasfilename(
title="Save the Current Long Stayers report",
defaultextension=".xlsx",
initialfile="Current Long Stayers(Processed).xlsx",
initialdir="//tproserver/Report/Monthly Reports/"
),
engine="xlsxwriter"
)
final_data.to_excel(writer, sheet_name="Current Long Stayers", index=False)
writer.save()
if __name__ == "__main__":
LongStayersReport()