-
Notifications
You must be signed in to change notification settings - Fork 1
/
main_task_analysis.py
115 lines (98 loc) · 5.67 KB
/
main_task_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from GraphConfigurator import GraphConfigurator
from AnalysisConfigurator import AnalysisConfigurator
import PreprocessSelector
from constructors.EventGraphConstructor import EventGraphConstructor
from constructors.HighLevelEventConstructor import HighLevelEventConstructor
from constructors.ClusterConstructor import ClusterConstructor
from TaskClusterModule import TaskClusterModule
from VariantVisualizer import VariantVisualizer
from DFGVisualizer import DFGVisualizer
# --------------------------- BEGIN CONFIG ----------------------------- #
# TO START:
# specify the name of the graph:
graph = "bpic2017_case_attr"
# and configure all the settings related to this graph name in "graph_confs.py"
gc = GraphConfigurator(graph)
# specify the analysis description (patterns to be analyzed and number of clusters)
pattern_subset_description = "P_freq_geq_10__C_20"
# and configure all analysis parameters in "analysis_confs.py"
ac = AnalysisConfigurator(pattern_subset_description)
# --------------------------- CONSTRUCTION ----------------------------- #
# IF STARTING FROM SCRATCH (without event graph constructed in neo4j)
# (1) create graph in Neo4j (with same password as specified in "graph_confs.py")
# and allocate enough memory: set dbms.memory.heap.max_size=20G
# (2) install APOC plugin
# (3) specify path to import directory of neo4j database:
path_to_neo4j_import_directory = 'C:\\Users\\s111402\\.Neo4jDesktop\\relate-data\dbmss\\' \
'dbms-95e392fb-324f-40c5-a2ec-c7cdfd0eb78e\\import\\'
# (4) set "step_preprocess" and "step_create_event_graph" to true:
step_preprocess = False
step_construct_event_graph = False
# IF EVENT GRAPH IS ALREADY CONSTRUCTED:
# (5) set "step_construct_high_level_events" to true to construct high level events:
# and set "step_construct_clusters" to true to perform clustering and construct clusters:
step_construct_high_level_events = False
step_construct_clusters = False
# IF EVENT GRAPH, HIGH LEVEL EVENTS AND CLUSTER CONSTRUCTS ARE IN PLACE,
# THE FOLLOWING STEPS CAN BE EXECUTED
# --------------- VISUALIZATION of TASK CLUSTER VARIANTS --------------- #
# the first step adds to each task instance node an identifier for its variant, i.e., each task instance with the same path/action sequence has the same variant ID
# this value is based on the frequency of te variant (i.e., if a task instance has variant ID=1, its variant/action sequence is the most frequent occurring in the data)
# this is used for the next step to better identify the visualized variants
step_add_task_instance_ids = False
step_visualize_task_variants_colored = True
# --------------- PROCESS VISUALIZATION using TASK DFGs ---------------- #
step_create_intra_task_DFG = False
step_create_inter_task_DFG = False
entity_type = 'case'
df_show_threshold = 1.0
print_description = False
# start_end_date = None
start_end_date = ['2016-01-01', '2016-06-30']
# start_end_date = ['2016-08-01', '2017-02-01']
step_create_DFG_concept_drift_comparison = False
start_end_dates = [['2016-01-01', '2016-06-30'], ['2016-08-01', '2017-02-01']]
step_create_inter_task_DFG_resource_specific = False
# resources = None
resources = ["User_29", "User_113"]
resources_lists_over = [["User_29"], ["User_113"]]
df_show_threshold_over = 5
df_show_threshold_under = 5
# ------------------------------ END CONFIG ---------------------------- #
# [1.a] CONSTRUCTION
if step_preprocess:
PreprocessSelector.get_preprocessor(graph, gc.get_filename(), gc.get_column_names(), gc.get_separator(),
gc.get_timestamp_format(), path_to_neo4j_import_directory).preprocess()
if step_construct_event_graph:
EventGraphConstructor(gc.get_password(), path_to_neo4j_import_directory, graph) \
.construct()
if step_construct_high_level_events:
HighLevelEventConstructor(gc.get_password(), graph, gc.get_entity_labels(), gc.get_action_lifecycle_labels()) \
.construct()
# [1.b] CLUSTERING
if step_construct_clusters:
tcm = TaskClusterModule(graph, gc.get_password(), ac.get_analysis_directory(), ac.get_pattern_filter_description(),
ac.get_pattern_filter_cypher(), ac.get_encoding(), ac.get_num_clusters())
cc = ClusterConstructor(gc.get_password(), graph, gc.get_entity_labels(), gc.get_action_lifecycle_labels())
cc.remove_cluster_constructs()
cc.construct_clusters(tcm.get_patterns_clustered())
# [2] VISUALIZATION of TASK CLUSTER VARIANTS
if step_add_task_instance_ids:
HighLevelEventConstructor(gc.get_password(), graph, gc.get_entity_labels(), gc.get_action_lifecycle_labels()) \
.set_task_instance_ids()
if step_visualize_task_variants_colored:
vv = VariantVisualizer(graph=graph, analysis_directory=ac.get_analysis_directory())
vv.visualize_variants_colored()
# [3] PROCESS VISUALIZATION using TASK DFGs
dfg_vis = DFGVisualizer(graph, gc.get_password(), gc.get_name_data_set(), gc.get_entity_labels(),
gc.get_action_lifecycle_labels(), ac.get_analysis_directory(), ac.get_exclude_clusters())
if step_create_inter_task_DFG:
dfg_vis.visualize_inter_task_DFG(entity_type, df_show_threshold, start_end_date=start_end_date, resources=resources,
print_description=print_description)
if step_create_intra_task_DFG:
dfg_vis.visualize_intra_task_DFG(14)
if step_create_DFG_concept_drift_comparison:
dfg_vis.visualize_cluster_DFG_concept_drift_comparison(entity_type, df_show_threshold, start_end_dates)
if step_create_inter_task_DFG_resource_specific:
dfg_vis.visualize_cluster_DFG_resources(df_show_threshold_under, df_show_threshold_over, resources_lists_over,
start_end_date=start_end_date, resources=resources)