-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.py
55 lines (45 loc) · 1.62 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import logging
import joblib
import pandas as pd
from data import process_data
from model import compute_model_metrics
logging.basicConfig(filename="slice_output.txt", level=logging.INFO, format="%(message)s")
logger = logging.getLogger()
# Load in the data
# logger.info("Loading the dataset")
test = pd.read_csv("./data/test.csv")
# Load in the OneHotEncoder, LabelBinarizer and Model
encoder = joblib.load("model/OHE.pkl")
lb = joblib.load("model/LB.pkl")
model = joblib.load("model/rfc_model.pkl")
# Categorical Features
cat_features = [
"workclass",
"education",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"native-country",
]
# Metrics
# logger.info("Processing the test data")
# Process the test data with the process_data function.
X_test, y_test, encoder, lb = process_data(
test, categorical_features=cat_features, label="salary", training=False, encoder=encoder, lb=lb
)
for cat in ["workclass", "race", "sex", "relationship"]:
logger.info(f"Category {cat}:")
logger.info("\n")
for val in test[cat].unique():
X_slice, y_slice, encoder, lb = process_data(
test[test[cat] == val], categorical_features=cat_features, label="salary", training=False, encoder=encoder,
lb=lb
)
precision, recall, fbeta, _ = compute_model_metrics(y_slice, model.predict(X_slice))
logger.info(f"{val}")
logger.info(f"Precision: {precision.round(3)}, Recall: {recall.round(3)}, fbeta: {fbeta.round(3)}")
logger.info("\n")
logger.info("_____________________________________________________________")
logger.info("\n")