-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_utils.py
344 lines (257 loc) · 13.9 KB
/
test_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
import pytest
from pandas._testing import assert_frame_equal, assert_series_equal
import pathlib
import pandas as pd
# get relative test_data folder
PATH = pathlib.Path(__file__).parent
DATA_PATH = PATH.joinpath("test_data").resolve()
from utils import functions as utils
from utils import config as config
# Load test data files to create them as input dataframes
df_input = pd.read_csv(str(DATA_PATH) + "/test_full_data.csv")
df_input1 = pd.read_csv(str(DATA_PATH) + "/test_data1.csv")
df_input2 = pd.read_csv(str(DATA_PATH) + "/test_data2.csv")
df_input3= pd.read_csv(str(DATA_PATH) + "/test_data3.csv")
df_input4 = pd.read_csv(str(DATA_PATH) + "/test_data4.csv")
def test_read_cassandra():
pass
def test_get_total_counts():
"""
This function will call all get total counts funcion an test if the total of patients,
readings, alerts, emergencies and warnings are as expected.
"""
# call the get_total_counts function by passing test_df_1 and store the results as the variables
patients, readings, alerts, emergencies, warnings = utils.get_total_counts(df_input1)
assert patients == 100 # We expect to have 100 patients
assert readings == 2981 # We expect to have 2981 sensor readings
assert alerts == 2981 # We expect to have 2981 total alerts
assert emergencies == 2044 # We expect to have 2044 emergency alerts
assert warnings == 937 # We expect to have 2044 warning alerts
def test_get_alert_counts():
"""
This function will call all get total counts funcion an test if the total of hypertension, hypothermia,
hyperthermia, fever, hyperglycemia and tachycardia are as expected.
"""
hypertension, hypothermia, hyperthermia, fever, hyperglycemia, tachycardia = utils.get_alert_counts(df_input1)
assert hypertension == 185
assert hypothermia == 642
assert hyperthermia == 1
assert fever == 936
assert hyperglycemia == 1216
assert tachycardia == 1
def test_create_alert_table_instance():
"""
This funcion will test that the object returned (df) by the function is a dataframe (isinstance)
"""
# Run the function with the input data csv to create the dataframe from the create_alert_table function
actual_df = utils.create_alert_table(df_input2)
assert isinstance(actual_df, pd.DataFrame)
def test_create_alert_table_columns():
"""
This funcion will test that the columns returned are timestamp, name, phone, alert, latitude, longitude
"""
# load result data file as a dataframe. We use these to test the dataframe functions output matches the input
expected_df = pd.read_csv(str(DATA_PATH) + "/result_data2.csv")
# Run the function with the input data csv to create the dataframe from the create_alert_table function
actual_df = utils.create_alert_table(df_input2)
expected_columns = ["timestamp", "name", "phone", "alert", "latitude", "longitude"]
actual_columns = list(actual_df.head()) # convert the columns/head of actual_df to a list to compare to expected
assert actual_columns == expected_columns
def test_create_alert_table_matching():
"""
This funcion will test that the actual dataframe matches the expected dataframe
:return:
"""
# load result data file as a dataframe. We use these to test the dataframe functions output matches the input
expected_df = pd.read_csv(str(DATA_PATH) + "/result_data2.csv")
# Run the function with the input data csv to create the dataframe from the create_alert_table function
actual_df = utils.create_alert_table(df_input2)
# use pandas testing to assert dataframes equal
assert_frame_equal(actual_df, expected_df)
def test_filter_dataframe_instance():
"""
This funcion will test that the object returned (df) by the function is a dataframe (isinstance)
:return:
"""
# -------- Test 1 -------- #
# generate a filtered dataframe
actual_df = utils.filter_dataframe(df=df_input, disease=config.diseases,
status=config.statuses,
age=[50, 90],
gender=config.genders,
bmi=[19, 30], temperature=[34.0, 36.9], heartrate=[40, 200],
bloodsugar=[35, 200],
systolic=[100, 140], diastolic=[40, 100])
assert isinstance(actual_df, pd.DataFrame)
def test_filter_dataframe_matching_one():
"""
This funcion will test that the object returned (df) by the function is a dataframe (isinstance) matches with the input
:return:
"""
# load result data file as a dataframe. We use these to test the dataframe functions output matches the input
expected_df = pd.read_csv(str(DATA_PATH) + "/result_data3.csv")
# all diseases, all statuses, all genders, old ages and high readings
actual_df = utils.filter_dataframe(df=df_input, disease=config.diseases,
status=config.statuses,
age=[50, 90],
gender=config.genders,
bmi=[19, 30], temperature=[34.0, 36.9], heartrate=[40, 200],
bloodsugar=[35, 200],
systolic=[100, 140], diastolic=[40, 100])
# Drop indexes (we don't want to compare indexes, only the values of the dataframe)
actual_df.reset_index(drop=True, inplace=True)
expected_df.reset_index(drop=True, inplace=True)
assert_frame_equal(actual_df, expected_df)
def test_filter_dataframe_matching_two():
"""
This function will test that the object returned (df) by the function is a data frame (isinstance) matches with the input
:return:
"""
# load result data file as a dataframe. We use these to test the dataframe functions output matches the input
expected_df = pd.read_csv(str(DATA_PATH) + "/result_data4.csv")
# specific diseases, specific statuses, males, old ages and mixed readings
actual_df = utils.filter_dataframe(df=df_input,
disease=['diabetes', 'hypertension'],
status=['stable unhealthy', 'stable healthy'],
age=[50, 90],
gender=['Male'],
bmi=[19, 30], temperature=[37.0, 41.8], heartrate=[40, 200],
bloodsugar=[35, 200],
systolic=[100, 140], diastolic=[100, 150])
# Drop indexes (we don't want to compare indexes, only the values of the dataframe)
actual_df.reset_index(drop=True, inplace=True)
expected_df.reset_index(drop=True, inplace=True)
assert_frame_equal(actual_df, expected_df)
def test_filter_dataframe_matching_three():
"""
This function will test that the object returned (df) by the function is a data frame (isinstance) matches with the input.
:return:
"""
# load result data file as a dataframe. We use these to test the dataframe functions output matches the input
expected_df = pd.read_csv(str(DATA_PATH) + "/result_data5.csv")
# specific diseases, specific statuses, females, young ages and mixed readings
actual_df = utils.filter_dataframe(df=df_input,
disease=['diabetes', 'hypertension', 'heart disease'],
status=['stable unhealthy', 'stable healthy', 'critical unhealthy'],
age=[50, 90],
gender=['Female'],
bmi=[30, 40], temperature=[34.0, 36.9], heartrate=[200, 300],
bloodsugar=[200, 400],
systolic=[140, 220], diastolic=[40, 100])
# Drop indexes (we don't want to compare indexes, only the values of the dataframe)
actual_df.reset_index(drop=True, inplace=True)
expected_df.reset_index(drop=True, inplace=True)
assert_frame_equal(actual_df, expected_df)
def test_get_bmi_segment():
"""
This function will test that the expected series returned matches with the expected data
:return:
"""
#Prepare the expected data as a Pandas Series
expected_data = {'Extremely Obese': 24, 'Normal': 32, 'Obese': 30, 'Overweight': 14}
expected_series = pd.Series(data=expected_data,
index=['Extremely Obese', 'Normal', 'Obese', 'Overweight'])
# Run the get_bmi_sgement function to return the actual series
actual_series = utils.get_bmi_segment(df_input)
# Compare actual vs expected
assert_series_equal(actual_series,expected_series, check_names=False)
def test_get_age_segment():
"""
This function will test that the expected series returned matches with the expected data
"""
#Prepare the expected data as a Pandas Series
expected_data = {'18 to 39': 44, '40 to 49': 14, '50 to 59': 25, '60 to 69': 17}
expected_series = pd.Series(data=expected_data,
index=['18 to 39', '40 to 49', '50 to 59', '60 to 69'])
# Run the get_bmi_sgement function to return the actual series
actual_series = utils.get_age_segment(df_input)
# Compare actual vs expected
assert_series_equal(actual_series,expected_series, check_names=False)
def test_get_existing_gender_segments():
"""
This function will test that the expected series returned matches with the expected data that is female and male
:return:
"""
#Prepare the expected data as a Pandas Series
expected_data = {'Female': 47, 'Male': 53}
expected_series = pd.Series(data=expected_data,
index=['Female', 'Male'])
# Run the get_bmi_sgement function to return the actual series
actual_series = utils.get_existing_segments(df_input, 'gender')
# Compare actual vs expected
assert_series_equal(actual_series,expected_series, check_names=False)
def test_get_existing_health_segments():
"""
This function will test that the expected series returned matches with the expected data
that is critical unhealthy, stable healthy, stable unhealthy.
"""
#Prepare the expected data as a Pandas Series
expected_data = {'critical unhealthy':40, 'stable healthy':10, 'stable unhealthy':50}
expected_series = pd.Series(data=expected_data,
index=['critical unhealthy', 'stable healthy', 'stable unhealthy'])
# Run the function to return the actual series
actual_series = utils.get_existing_segments(df_input, 'status')
# Compare actual vs expected
assert_series_equal(actual_series,expected_series, check_names=False)
def test_get_existing_disease_segments():
"""
This function will test that the expected series returned matches with the expected data that
is diabetes, heart disease, hypertension and none
"""
#Prepare the expected data as a Pandas Series
expected_data = {'diabetes': 32, 'heart disease': 28,'hypertension': 16, 'none': 24}
expected_series = pd.Series(data=expected_data,
index=['diabetes', 'heart disease','hypertension','none'])
# Run the get_bmi_sgement function to return the actual series
actual_series = utils.get_existing_segments(df_input, 'condition')
# Compare actual vs expected
assert_series_equal(actual_series,expected_series, check_names=False)
def test_get_postcode_segment():
"""
This function will test that the expected series returned matches with
the expected data that is Dublin 15 and Dublin 3.
"""
#Prepare the expected data as a Pandas Series
expected_data = {'Dublin 15': 1, 'Dublin 3': 1}
expected_series = pd.Series(data=expected_data,
index=['Dublin 15', 'Dublin 3'])
# Run the get_bmi_sgement function to return the actual series
actual_series = utils.get_postcode_segment(df_input2)
# Compare actual vs expected
assert_series_equal(actual_series,expected_series, check_names=False)
def test_produce_health_stats():
"""
This function will test that the expected series returned matches with the expected data that is
index, timestamp, heart-rate, body temperature and blood sugar.
:return:
"""
# Create the expected outputs
expected_index = [0, 1, 2, 3, 4]
expected_timestamp = ['2021-08-05 16:18:00', '2021-08-05 19:53:00', '2021-08-04 23:38:00',
'2021-08-05 21:12:00', '2021-08-05 19:27:00']
expected_heart_rate = [77, 70, 99, 130, 68]
expected_body_temperature = [36.3, 35.39, 35.35, 35.08, 36.6]
expected_blood_sugar = [329, 396, 396, 354, 389]
#Call the function using Abigail Mercer as the person
index, timestamp, heart_rate, body_temperature, blood_sugar = utils.produce_health_stats(df_input4, 'Abigail Mercer')
assert index == expected_index
assert timestamp == expected_timestamp
assert heart_rate == expected_heart_rate
assert body_temperature == expected_body_temperature
assert blood_sugar == expected_blood_sugar
def test_produce_blood_pressure():
"""
This function will test that the expected series returned matches with the expected data that is index,
timestamp, blood pressure top and blood pressure bottom.
"""
# Create the expected outputs
expected_index = [0, 1, 2, 3, 4]
expected_timestamp = ['2021-08-05 16:18:00', '2021-08-05 19:53:00', '2021-08-04 23:38:00',
'2021-08-05 21:12:00', '2021-08-05 19:27:00']
expected_blood_pressure_top = [131,104,204,123,114]
expected_blood_pressure_bottom = [135,68,65,67,69]
index, timestamp, blood_pressure_top, blood_pressure_bottom = utils.produce_blood_pressure(df_input4, 'Abigail Mercer')
assert index == expected_index
assert timestamp == expected_timestamp
assert blood_pressure_top == expected_blood_pressure_top
assert blood_pressure_bottom == expected_blood_pressure_bottom