Skip to content

Commit

Permalink
Merge pull request #66 from kiraplenkin/hotfix/save-scorecard_#65
Browse files Browse the repository at this point in the history
Fix save_scorecard method
  • Loading branch information
kiraplenkin committed Apr 5, 2024
2 parents 35c49b2 + d44611d commit 7b7677a
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 26 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "woe_scoring"
version = "0.10.8"
version = "0.10.9"
description = "Weight Of Evidence Transformer and LogisticRegression model with scikit-learn API"
authors = ["Stroganov Kirill <[email protected]>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion woe_scoring/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from woe_scoring.core import CreateModel, WOETransformer

__version__ = "0.10.8"
__version__ = "0.10.9"
6 changes: 4 additions & 2 deletions woe_scoring/core/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@

from .binning.functions import (cat_processing, find_cat_features,
num_processing, prepare_data, refit)
from .model.functions import (calc_features_gini_quality, check_correlation_threshold, check_features_gini_threshold,
check_min_pct_group, find_bad_features, generate_sql, save_reports, save_scorecard_fn)
from .model.functions import (_calc_model_results_table, calc_features_gini_quality, check_correlation_threshold,
check_features_gini_threshold, check_min_pct_group, find_bad_features, generate_sql,
save_reports, save_scorecard_fn)
from .model.model import Model
from .model.selector import FeatureSelector

Expand Down Expand Up @@ -397,6 +398,7 @@ def fit(self, data: pd.DataFrame, target: Union[pd.Series, np.ndarray]) -> None:
self.feature_names_ = selected_model.feature_names_
self.model_score_ = selected_model.model_score_
self.pvalues_ = selected_model.pvalues_
self.model_results = _calc_model_results_table(selected_model)

return self.model

Expand Down
48 changes: 26 additions & 22 deletions woe_scoring/core/model/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def _calc_stats_for_feature(
result_dict["percent_of_population"].append(bin_info["pct"])
result_dict["total"].append(bin_info["total"])
result_dict["event_cnt"].append(bin_info["bad"])
result_dict["non_event_cnt"].append(bin_info["good"])
result_dict["non_event_cnt"].append(bin_info["total"] - bin_info["bad"])
result_dict["event_rate"].append(bin_info["bad_rate"])
result_dict["score_ball"].append(
_calc_score_points(
Expand Down Expand Up @@ -433,10 +433,10 @@ def _calc_stats(
Returns:
Stats."""

return Parallel(n_jobs=-1, backend="multiprocessing")(
return Parallel(n_jobs=-1, backend="threading")(
delayed(_calc_stats_for_feature)(
idx, feature, feature_names, encoder, model_results, factor, offset
).rename(feature.replace("WOE_", ""))
)
for idx, feature in enumerate(model_results.iloc[:, 0])
)

Expand All @@ -449,7 +449,7 @@ def _build_excel_sheet_with_charts(
first_plot_position: str = 'A',
second_plot_position: str = "J",
) -> None:
"""Build excel sheet with charts.
"""Build Excel sheet with charts.
Args:
feature_stats: Feature stats.
writer: Writer.
Expand All @@ -460,9 +460,7 @@ def _build_excel_sheet_with_charts(
Returns:
None."""

# Get workbook link
workbook = writer.book
# Create merge format
merge_format = workbook.add_format(
{
'bold': 1,
Expand All @@ -471,8 +469,8 @@ def _build_excel_sheet_with_charts(
'valign': 'vcenter'
}
)
const = [result for result in feature_stats if result.name == 'const']
iterator = [result for result in feature_stats if ((result is not None) and (result.name != 'const'))]
const = [result for result in feature_stats if result['feature'][0] == 'const']
iterator = [result for result in feature_stats if ((result is not None) and (result['feature'][0] != 'const'))]
scorecard_iterator = [*const, *iterator]
indexes = np.cumsum([len(result) for result in scorecard_iterator])
full_features = pd.concat(tuple(scorecard_iterator), ignore_index=True)
Expand All @@ -486,21 +484,16 @@ def _build_excel_sheet_with_charts(
area_start = index + 1

for result in iterator:
# Get dimensions of result Excel sheet and column indexes
max_row = len(result)
event_cnt = result.columns.get_loc('event_cnt') + 1
non_event_cnt = result.columns.get_loc('non_event_cnt') + 1
score_ball = result.columns.get_loc('score_ball') + 1
woe = result.columns.get_loc('WOE') + 1
event_rate = result.columns.get_loc('event_rate') + 1
# Set sheet name, transfer data to sheet
sheet_name = result.name
sheet_name = result['feature'][0]
result.to_excel(writer, sheet_name=sheet_name)
# Get worksheet link
worksheet = writer.sheets[sheet_name]
# Create stacked column chart
chart_events = workbook.add_chart({'type': 'column', 'subtype': 'stacked'})
# Add event and non-event counts to chart
chart_events.add_series(
{
'name': 'event_cnt ',
Expand All @@ -513,7 +506,6 @@ def _build_excel_sheet_with_charts(
'values': [sheet_name, 1, non_event_cnt, max_row, non_event_cnt]
}
)
# Create separate line chart for combination
woe_line = workbook.add_chart({'type': 'line'})
woe_line.add_series(
{
Expand All @@ -523,17 +515,14 @@ def _build_excel_sheet_with_charts(
'y2_axis': True,
}
)
# Combine charts
chart_events.combine(woe_line)
# Create column chart for score_ball
chart_score_ball = workbook.add_chart({'type': 'column'})
chart_score_ball.add_series(
{
'name': 'score_ball ',
'values': [sheet_name, 1, score_ball, max_row, score_ball]
}
)
# Create separate line chart for combination
event_rate_line = workbook.add_chart({'type': 'line'})
event_rate_line.add_series(
{
Expand All @@ -543,21 +532,17 @@ def _build_excel_sheet_with_charts(
'y2_axis': True,
}
)
# Combine charts
chart_score_ball.combine(event_rate_line)
# Change size and legend of charts
chart_events.set_size({'width': width, 'height': height})
chart_events.set_legend({'position': 'bottom'})
chart_score_ball.set_size({'width': width, 'height': height})
chart_score_ball.set_legend({'position': 'bottom'})
# Merge first 3 columns
worksheet.merge_range(1, 1, max_row, 1, result.iloc[1, 0], merge_format)
worksheet.set_column(1, 1, 20)
worksheet.merge_range(1, 2, max_row, 2, result.iloc[1, 1], merge_format)
worksheet.set_column(2, 2, 10)
worksheet.merge_range(1, 3, max_row, 3, result.iloc[1, 2], merge_format)
worksheet.set_column(3, 3, 10)
# Insert charts
worksheet.insert_chart(f'{first_plot_position}{max_row + 3}', chart_events)
worksheet.insert_chart(f'{second_plot_position}{max_row + 3}', chart_score_ball)

Expand Down Expand Up @@ -603,3 +588,22 @@ def save_scorecard_fn(
writer.save()
except Exception as e:
print(f"Problem with saving: {e}")


def _calc_model_results_table(model: Model) -> pd.DataFrame:
res_dict = {
'index': [],
'coef': [],
'P>|z|': []
}

res_dict['index'].append('const')
res_dict['coef'].append(model.intercept_)
res_dict['P>|z|'].append(0)

for i in range(len(model.feature_names_)):
res_dict['index'].append(model.feature_names_[i][4:])
res_dict['coef'].append(model.coef_[i])
res_dict['P>|z|'].append(model.pvalues_[i])

return pd.DataFrame.from_dict(res_dict).reset_index(drop=True)
1 change: 1 addition & 0 deletions woe_scoring/core/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def __init__(
self.feature_names_ = List[str]
self.model_score_ = float
self.pvalues_ = List[float]
self.model_results = None

def get_model(self, data: pd.DataFrame, target: Union[pd.Series, np.ndarray]) -> callable:
"""
Expand Down

0 comments on commit 7b7677a

Please sign in to comment.