-
Notifications
You must be signed in to change notification settings - Fork 14
/
parsers.py
470 lines (422 loc) · 19.2 KB
/
parsers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
# -*- coding: utf-8 -*-
################################################################################
# Copyright (c), AiiDA team and individual contributors. #
# All rights reserved. #
# This file is part of the AiiDA-wannier90 code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-wannier90 #
# For further information on the license, see the LICENSE.txt file #
################################################################################
from __future__ import absolute_import
import io
import os
import six
from six.moves import range
from aiida.parsers import Parser
from aiida.common import exceptions as exc
__all__ = (
'Wannier90Parser',
'band_parser',
'raw_wout_parser',
)
class Wannier90Parser(Parser):
"""
Wannier90 output parser. Will parse the centres, spreads and, if
available, the Imaginary/Real ratio of the Wannier functions.
Will also check if the output converged.
"""
def __init__(self, node):
from .calculations import Wannier90Calculation
# check for valid input
if not issubclass(node.process_class, Wannier90Calculation):
raise exc.OutputParsingError(
"Input must calc must be a "
"Wannier90Calculation, it is instead {}".format(
type(node.process_class)
)
)
super(Wannier90Parser, self).__init__(node)
@staticmethod
def _get_seedname_from_input_filename(input_filename):
"""
Return the seedname given the input filename
Raises a ValueError if the input filename does not end with .win.
"""
input_suffix = '.win'
if input_filename.endswith(input_suffix):
return input_filename[:-len(input_suffix)]
raise ValueError(
"The input filename '{}' does not end with '{}', so I don't know how to get the seedname"
.format(input_filename, input_suffix)
)
def parse(self, **kwargs): # pylint: disable=too-many-locals,inconsistent-return-statements; # noqa: MC0001
"""
Parses the datafolder, stores results.
This parser for this simple code does simply store in the DB a node
representing the file of forces in real space
"""
from aiida.orm import Dict, SinglefileData
# None if unset
temporary_folder = kwargs.get('retrieved_temporary_folder')
seedname = self._get_seedname_from_input_filename(
self.node.get_options()['input_filename']
)
output_file_name = "{}.wout".format(seedname)
error_file_name = "{}.werr".format(seedname)
nnkp_file_name = "{}.nnkp".format(seedname)
# select the folder object
# Check that the retrieved folder is there
try:
out_folder = self.retrieved
except exc.NotExistent:
return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER
# Checks for error output files
if error_file_name in out_folder.list_object_names():
self.logger.error(
'Errors were found please check the retrieved '
'{} file'.format(error_file_name)
)
return self.exit_codes.ERROR_WERR_FILE_PRESENT
exiting_in_stdout = False
try:
with out_folder.open(output_file_name) as handle:
out_file = handle.readlines()
# Wannier90 doesn't always write the .werr file on error
if any('Exiting......' in line for line in out_file):
exiting_in_stdout = True
except OSError:
self.logger.error("Standard output file could not be found.")
return self.exit_codes.ERROR_OUTPUT_STDOUT_MISSING
if temporary_folder is not None:
nnkp_temp_path = os.path.join(temporary_folder, nnkp_file_name)
if os.path.isfile(nnkp_temp_path):
with io.open(nnkp_temp_path, 'rb') as handle:
node = SinglefileData(file=handle)
self.out('nnkp_file', node)
# Tries to parse the bands
try:
with out_folder.open('{}_band.dat'.format(seedname)) as fil:
band_dat = fil.readlines()
with out_folder.open('{}_band.kpt'.format(seedname)) as fil:
band_kpt = fil.readlines()
except IOError:
# IOError: _band.* files not present
pass
else:
structure = self.node.inputs.structure
## TODO: should we catch exceptions here?
try:
with out_folder.open(
'{}_band.labelinfo.dat'.format(seedname)
) as fil:
band_labelinfo = fil.readlines()
except IOError: # use legacy parser for wannier90 < 3.0
try:
kpoint_path = self.node.inputs.kpoint_path
special_points = kpoint_path.get_dict()
except (exc.NotExistent, KeyError):
# exc.NotExistent: no input kpoint_path
# KeyError: no get_dict()
pass
else:
output_bandsdata, band_warnings = band_parser_legacy(
band_dat, band_kpt, special_points, structure
)
self.out('interpolated_bands', output_bandsdata)
else:
output_bandsdata, band_warnings = band_parser(
band_dat, band_kpt, band_labelinfo, structure
)
self.out('interpolated_bands', output_bandsdata)
# Parse the stdout an return the parsed data
wout_dictionary = raw_wout_parser(out_file)
try:
wout_dictionary['warnings'].extend(band_warnings)
except (KeyError, NameError):
# KeyError: wout_dictionary does not contain warnings
# NameError: no band_warnings
pass
output_data = Dict(dict=wout_dictionary)
self.out('output_parameters', output_data)
if exiting_in_stdout:
return self.exit_codes.ERROR_EXITING_MESSAGE_IN_STDOUT
def raw_wout_parser(wann_out_file): # pylint: disable=too-many-locals,too-many-statements # noqa: disable=MC0001
'''
This section will parse a .wout file and return certain key
parameters such as the centers and spreads of the
wannier90 functions, the Im/Re ratios, certain warnings,
and labels indicating output files produced
:param out_file: the .wout file, as a list of strings
:return out: a dictionary of parameters that can be stored as parameter data
'''
w90_conv = False #Used to assess convergence of MLWF procedure use conv_tol and conv_window>1
out = {}
out.update({'warnings': []})
for i, line in enumerate(wann_out_file):
# checks for any warnings
if 'Warning' in line:
# Certain warnings get a special flag
out['warnings'].append(line)
# From the 'initial' part of the output, only sections which indicate
# whether certain files have been written, e.g. 'Write r^2_nm to file'
# the units used, e.g. 'Length Unit', that will guide the parser
# e.g. 'Number of Wannier Functions', or which supplament warnings
# not directly provided, e.g. unconvergerged wannierization needs
# some logic in AiiDa to determine whether it met the convergence
# target or not...
# Parses some of the MAIN parameters
if 'MAIN' in line:
i += 1
line = wann_out_file[i]
while '-----' not in line:
line = wann_out_file[i]
if 'Number of Wannier Functions' in line:
out.update({'number_wfs': int(line.split()[-2])})
if 'Length Unit' in line:
out.update({'length_units': line.split()[-2]})
if (out['length_units'] != 'Ang'):
out['warnings'].append(
'Units not Ang, '
'be sure this is OK!'
)
if 'Output verbosity (1=low, 5=high)' in line:
out.update({'output_verbosity': int(line.split()[-2])})
if out['output_verbosity'] != 1:
out['warnings'].append(
'Parsing is only supported '
'if output verbosity is set to 1'
)
if 'Post-processing' in line:
out.update({'preprocess_only': line.split()[-2]})
i += 1
# Parses some of the WANNIERISE parameters
if 'WANNIERISE' in line:
i += 1
line = wann_out_file[i]
while '-----' not in line:
line = wann_out_file[i]
if 'Convergence tolerence' in line:
out.update({
'convergence_tolerance': float(line.split()[-2])
})
if 'Write r^2_nm to file' in line:
out.update({'r2mn_writeout': line.split()[-2]})
if out['r2mn_writeout'] != 'F':
out['warnings'].append(
'The r^2_nm file has been selected '
'to be written, but this is not yet supported!'
)
if 'Write xyz WF centres to file' in line:
out.update({'xyz_writeout': line.split()[-2]})
if out['xyz_writeout'] != 'F':
out['warnings'].append(
'The xyz_WF_center file has '
'been selected to be written, but this is not '
'yet supported!'
)
i += 1
if 'Wannierisation convergence criteria satisfied' in line:
w90_conv = True
# Reading the final WF, also checks to see if they converged or not
if 'Final State' in line:
# Originally wanted to implement automatic convergence check
# but parsing this using the version below fails depending
# on the convergence settings used in the aiida.win file
# Final_check_line = wann_out_file[i-2]
# if 'Wannierisation convergence criteria satisfied' \
# not in Final_check_line:
# Final_Delta = float(Final_check_line.split()[-3])
# if abs(Final_Delta) > out['convergence_tolerance']:
# out['Warnings'] += ['Wannierization not converged within '
# 'specified tolerance!']
num_wf = out['number_wfs']
wf_out = []
end_wf_loop = i + num_wf + 1
for i in range(i + 1, end_wf_loop):
line = wann_out_file[i]
wf_out_i = {'wf_ids': '', 'wf_centres': '', 'wf_spreads': ''}
#wf_out_i['wf_ids'] = int(line.split()[-7])
wf_out_i['wf_ids'] = int(line.split('(')[0].split()[-1])
wf_out_i['wf_spreads'] = float(line.split(')')[1].strip())
#wf_out_i['wf_spreads'] = float(line.split()[-1])
try:
x = float(
line.split('(')[1].split(')')[0].split(',')[0].strip()
)
except (ValueError, IndexError):
# To avoid that the crasher completely fails, we set None as a fallback
x = None
try:
y = float(
line.split('(')[1].split(')')[0].split(',')[1].strip()
)
except (ValueError, IndexError):
y = None
try:
z = float(
line.split('(')[1].split(')')[0].split(',')[2].strip()
)
except (ValueError, IndexError):
z = None
coord = (x, y, z)
wf_out_i['wf_centres'] = coord
wf_out.append(wf_out_i)
out.update({'wannier_functions_output': wf_out})
for i in range(i + 2, i + 6):
line = wann_out_file[i]
if 'Omega I' in line:
out.update({'Omega_I': float(line.split()[-1])})
if 'Omega D' in line:
out.update({'Omega_D': float(line.split()[-1])})
if 'Omega OD' in line:
out.update({'Omega_OD': float(line.split()[-1])})
if 'Omega Total' in line:
out.update({'Omega_total': float(line.split()[-1])})
if ' Maximum Im/Re Ratio' in line:
wann_functions = out['wannier_functions_output']
wann_id = int(line.split()[3])
wann_function = wann_functions[wann_id - 1]
wann_function.update({'im_re_ratio': float(line.split()[-1])})
if not w90_conv:
out['warnings'].append(
'Wannierisation finished because num_iter was reached.'
)
return out
def band_parser(band_dat, band_kpt, band_labelinfo, structure): # pylint: disable=too-many-locals
"""
Parsers the bands output data to construct a BandsData object which is then
returned. Used for wannier90 >= 3.0
:param band_dat: list of str with each str stores one line of aiida_band.dat file
:param band_kpt: list of str with each str stores one line of aiida_band.kpt file
:param band_labelinfo: list of str with each str stores one line in aiida_band.labelinfo.dat file
:return: BandsData object constructed from the input params
"""
import numpy as np
from aiida.orm import BandsData
from aiida.orm import KpointsData
warnings = []
# imports the data
out_kpt = np.genfromtxt(band_kpt, skip_header=1, usecols=(0, 1, 2))
out_dat = np.genfromtxt(band_dat, usecols=1)
# reshaps the output bands
out_dat = out_dat.reshape(
len(out_kpt), (len(out_dat) // len(out_kpt)), order="F"
)
labels_dict = {}
for line_idx, line in enumerate(band_labelinfo, start=1):
if not line.strip():
continue
try:
# label, idx, xval, kx, ky, kz = line.split()
label, idx, _, _, _, _ = line.split()
except ValueError:
warnings.append((
'Wrong number of items in line {} of the labelinfo file - '
'I will not assign that label'
)).format(line_idx)
continue
try:
idx = int(idx)
except ValueError:
warnings.append((
"Invalid value for the index in line {} of the labelinfo file, "
"it's not an integer - I will not assign that label"
)).format(line_idx)
continue
# I use a dictionary because there are cases in which there are
# two lines for the same point (e.g. when I do a zero-length path,
# from a point to the same point, just to have that value)
# Note the -1 because in fortran indices are 1-based, in Python are
# 0-based
labels_dict[idx - 1] = label
labels = [(key, labels_dict[key]) for key in sorted(labels_dict)]
bands = BandsData()
k = KpointsData()
k.set_cell_from_structure(structure)
k.set_kpoints(out_kpt, cartesian=False)
bands.set_kpointsdata(k)
bands.set_bands(out_dat, units='eV')
bands.labels = labels
return bands, warnings
def band_parser_legacy(band_dat, band_kpt, special_points, structure): # pylint: disable=too-many-locals
"""
Parsers the bands output data, along with the special points retrieved
from the input kpoints to construct a BandsData object which is then
returned. Cannot handle discontinuities in the kpath, if two points are
assigned to same spot only one will be passed. Used for wannier90 < 3.0
:param band_dat: list of str with each str stores one line of aiida_band.dat file
:param band_kpt: list of str with each str stores one line of aiida_band.kpt file
:param special_points: special points to add labels to the bands a dictionary in
the form expected in the input as described in the wannier90 documentation
:return: BandsData object constructed from the input params,
and a list contains warnings.
"""
import numpy as np
from aiida.orm import BandsData
from aiida.orm import KpointsData
warnings = []
warnings.append((
"Note: no file named SEEDNAME_band.labelinfo.dat found. "
"You are probably using a version of Wannier90 before 3.0. "
"There, the labels associated with each k-points were not printed in output "
"and there were also cases in which points were not calculated "
"(see issue #195 on the Wannier90 GitHub page). "
"I will anyway try to do my best to assign labels, "
"but the assignment might be wrong "
"(especially if there are path discontinuities)."
))
# imports the data
out_kpt = np.genfromtxt(band_kpt, skip_header=1, usecols=(0, 1, 2))
out_dat = np.genfromtxt(band_dat, usecols=1)
# reshaps the output bands
out_dat = out_dat.reshape(
len(out_kpt), (len(out_dat) // len(out_kpt)), order="F"
)
# finds expected points of discontinuity
kpath = special_points['path']
cont_break = [(i, (kpath[i - 1][1], kpath[i][0]))
for i in range(1, len(kpath))
if kpath[i - 1][1] != kpath[i][0]]
# finds the special points
special_points_dict = special_points['point_coords']
# We set atol to 1e-5 because in the kpt file the coords are printed with fixed precision
labels = [
(i, k) for k in special_points_dict for i in range(len(out_kpt))
if all(
np.isclose(special_points_dict[k], out_kpt[i], rtol=0, atol=1.e-5)
)
]
labels.sort()
# Checks and appends labels if discontinuity
appends = []
for x in cont_break:
# two cases the break is before or the break is after
# if the break is before
if labels[x[0]][1] != x[1][0]:
# checks to see if the discontinuity was already there
if labels[x[0] - 1] == x[1][0]:
continue
insert_point = x[0]
new_label = x[1][0]
kpoint = labels[x[0]][0] - 1
appends += [[insert_point, new_label, kpoint]]
# if the break is after
if labels[x[0]][1] != x[1][1]:
# checks to see if the discontinuity was already there
if labels[x[0] + 1] == x[1][1]:
continue
insert_point = x[0] + 1
new_label = x[1][1]
kpoint = labels[x[0]][0] + 1
appends += [[insert_point, new_label, kpoint]]
appends.sort()
for i, append in enumerate(appends):
labels.insert(append[0] + i, (append[2], six.text_type(append[1])))
bands = BandsData()
k = KpointsData()
k.set_cell_from_structure(structure)
k.set_kpoints(out_kpt, cartesian=False)
bands.set_kpointsdata(k)
bands.set_bands(out_dat, units='eV')
bands.labels = labels
return bands, warnings