-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.py
118 lines (90 loc) · 3.09 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""
Parses logs written by dfhack/scripts/logPaths
"""
import os
import time
import pandas as pd
from rich.progress import track
from snapshot import make_snapshot
GOAL_KEY = {}
def parse(dir: str):
"""
Parse logs in the passed dir,
then makes aggregated snapshots file.
Returns logData: dict, logCount: int, regardless of parsing results
"""
# read the logs
logData, logCount = read_logs(dir)
# make snapshots of the logData
make_snapshot(logData)
return logData, logCount
def read_logs(dir: str):
"""
Reads logs in the passed dir
Returns tuple ({key: snapshotName, value: snapshotDataFrame}, logCount)
"""
# list to append new scriptLog entries
scriptLog = []
# read logNames from passed dir
logNames = os.listdir(dir)
# dict of logs
# key: snapshotName, value: logDf
logData = {}
logCount = 0
# if logFiles exist...
if len(logNames) > 0:
# for each log...
total = 0
for log in track(logNames, description="Parsing logs..."):
# test if log as already been parsed...
if not test_log(log):
# if not...
# remove the file suffix
snapshotName = log.replace(".txt", "")
# read the log file to logDf as DataFrame
logDf = pd.read_csv(dir + log, header=None, index_col=False)
# format logdf
logDf = logDf.rename(
columns={0: "path_id", 1: "path_goal", 2: "x", 3: "y", 4: "z"}
)
# replace snapshot column with formatted name snapshotName
logDf["snapshot"] = snapshotName
# add entry to logData dict with...
# key: snapshotName, value: logDf
logData[snapshotName] = logDf
# append the un-formatted log file name to scriptLog
scriptLog.append(log)
# increment progress bar
time.sleep(0.01)
total += 1
logCount += total
# print the number of logs currently being processed
print(f"Processed {total} new logs...")
# if new logs found, update user
if logCount > 0:
print("Writing data file names to cache...")
else:
print("No new logs detected...")
# open scriptLog.txt...
with open("scriptLog.txt", "a") as f:
# for each log name, append to content of scriptLog.txt
for logName in scriptLog:
f.write(logName + "\n")
# returns logData DataFrame
return logData, logCount
def test_log(log: str):
"""
Accepts a log, where log = the name of a
log in data/, and tests if exists in scriptLog.txt
Returns True if already parsed, else False
"""
# check if log has already been read
parsed = False
with open("scriptLog.txt", "r") as f:
lines = f.readlines()
for row in lines:
if row.find(log) != -1:
# log name was found
parsed = True
# returns True if log exists in scriptLog, else False
return parsed