-
Notifications
You must be signed in to change notification settings - Fork 11
/
eval_model.py
440 lines (355 loc) · 15.8 KB
/
eval_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
# Python 2 compatibility
from __future__ import print_function
from __future__ import division
# Standard libraries
import os
from os.path import splitext, basename
import multiprocessing
import warnings
import numpy
from scipy.interpolate import UnivariateSpline
try:
import yaml
except ImportError:
print('Warning: YAML must be installed to read input file.')
raise
from pyked.chemked import ChemKED, DataPoint
# Local imports
from .utils import units
from .simulation import Simulation
min_deviation = 0.10
"""float: minimum allowable standard deviation for experimental data"""
def create_simulations(dataset, properties):
"""Set up individual simulations for each ignition delay value.
Parameters
----------
dataset :
properties : pyked.chemked.ChemKED
ChemKED object with full set of experimental properties
Returns
-------
simulations : list
List of :class:`Simulation` objects for each simulation
"""
simulations = []
for idx, case in enumerate(properties.datapoints):
sim_meta = {}
# Common metadata
sim_meta['data-file'] = dataset
sim_meta['id'] = splitext(basename(dataset))[0] + '_' + str(idx)
simulations.append(Simulation(properties.experiment_type,
properties.apparatus.kind,
sim_meta,
case
)
)
return simulations
def simulation_worker(sim_tuple):
"""Worker for multiprocessing of simulation cases.
Parameters
----------
sim_tuple : tuple
Contains Simulation object and other parameters needed to setup
and run case.
Returns
-------
sim : ``Simulation``
Simulation case with calculated ignition delay.
"""
sim, model_file, model_spec_key, path, restart = sim_tuple
sim.setup_case(model_file, model_spec_key, path)
sim.run_case(restart)
sim = Simulation(sim.kind, sim.apparatus, sim.meta, sim.properties)
return sim
def estimate_std_dev(indep_variable, dep_variable):
"""
Parameters
----------
indep_variable : ndarray, list(float)
Independent variable (e.g., temperature, pressure)
dep_variable : ndarray, list(float)
Dependent variable (e.g., ignition delay)
Returns
-------
standard_dev : float
Standard deviation of difference between data and best-fit line
"""
assert len(indep_variable) == len(dep_variable), \
'independent and dependent variables not the same length'
# ensure no repetition of independent variable by taking average of associated dependent
# variables and removing duplicates
vals, count = numpy.unique(indep_variable, return_counts=True)
repeated = vals[count > 1]
for val in repeated:
idx, = numpy.where(indep_variable == val)
dep_variable[idx[0]] = numpy.mean(dep_variable[idx])
dep_variable = numpy.delete(dep_variable, idx[1:])
indep_variable = numpy.delete(indep_variable, idx[1:])
# ensure data sorted based on independent variable to avoid some problems
sorted_vars = sorted(zip(indep_variable, dep_variable))
indep_variable = [pt[0] for pt in sorted_vars]
dep_variable = [pt[1] for pt in sorted_vars]
# spline fit of the data
if len(indep_variable) == 1 or len(indep_variable) == 2:
# Fit of data will be perfect
return min_deviation
elif len(indep_variable) == 3:
spline = UnivariateSpline(indep_variable, dep_variable, k=2)
else:
spline = UnivariateSpline(indep_variable, dep_variable)
standard_dev = numpy.std(dep_variable - spline(indep_variable))
if standard_dev < min_deviation:
print('Standard deviation of {:.2f} too low, '
'using {:.2f}'.format(standard_dev, min_deviation))
standard_dev = min_deviation
return standard_dev
def get_changing_variable(cases):
"""Identify variable changing across multiple cases.
Parameters
----------
cases : list(pyked.chemked.DataPoint)
List of DataPoint with experimental case data.
Returns
-------
variable : list(float)
List of floats representing changing experimental variable.
"""
changing_var = None
for var_name in ['temperature', 'pressure']:
if var_name == 'temperature':
variable = [case.temperature for case in cases]
elif var_name == 'pressure':
variable = [case.pressure for case in cases]
if not all([x == variable[0] for x in variable]):
if not changing_var:
changing_var = var_name
else:
warnings.warn('Warning: multiple changing variables. '
'Using temperature.',
RuntimeWarning
)
changing_var = 'temperature'
break
# Temperature is default
if changing_var is None:
changing_var = 'temperature'
if changing_var == 'temperature':
variable = [case.temperature.value.magnitude if hasattr(case.temperature, 'value')
else case.temperature.magnitude
for case in cases
]
elif changing_var == 'pressure':
variable = [case.pressure.value.magnitude if hasattr(case.pressure, 'value')
else case.pressure.magnitude
for case in cases
]
return variable
def evaluate_model(model_name, spec_keys_file, dataset_file,
data_path='data', model_path='models',
results_path='results', model_variant_file=None,
num_threads=None, print_results=False, restart=False,
skip_validation=False,
):
"""Evaluates the ignition delay error of a model for a given dataset.
Parameters
----------
model_name : str
Chemical kinetic model filename
spec_keys_file : str
Name of YAML file identifying important species
dataset_file : str
Name of file with list of data files
data_path : str
Local path for data files. Optional; default = 'data'
model_path : str
Local path for model file. Optional; default = 'models'
results_path : str
Local path for creating results files. Optional; default = 'results'
model_variant_file : str
Name of YAML file identifying ranges of conditions for variants of the
kinetic model. Optional; default = ``None``
num_threads : int
Number of CPU threads to use for performing simulations in parallel.
Optional; default = ``None``, in which case the available number of
cores minus one is used.
print_results : bool
If ``True``, print results of the model evaluation to screen.
restart : bool
If ``True``, process saved results. Mainly intended for testing/development.
skip_validation : bool
If ``True``, skips validation of ChemKED files.
Returns
-------
output : dict
Dictionary with all information about model evaluation results.
"""
# Create results_path if it doesn't exist
if not os.path.exists(results_path):
os.makedirs(results_path)
# Dict to translate species names into those used by models
with open(spec_keys_file, 'r') as f:
model_spec_key = yaml.safe_load(f)
# Keys for models with variants depending on pressure or bath gas
model_variant = None
if model_variant_file:
with open(model_variant_file, 'r') as f:
model_variant = yaml.safe_load(f)
# Read dataset list
with open(dataset_file, 'r') as f:
dataset_list = f.read().splitlines()
error_func_sets = numpy.zeros(len(dataset_list))
dev_func_sets = numpy.zeros(len(dataset_list))
# Dictionary with all output data
output = {'model': model_name, 'datasets': []}
# If number of threads not specified, use either max number of available
# cores minus 1, or use 1 if multiple cores not available.
if not num_threads:
num_threads = multiprocessing.cpu_count()-1 or 1
# Loop through all datasets
for idx_set, dataset in enumerate(dataset_list):
dataset_meta = {'dataset': dataset, 'dataset_id': idx_set}
# Create individual simulation cases for each datapoint in this set
properties = ChemKED(os.path.join(data_path, dataset), skip_validation=skip_validation)
simulations = create_simulations(dataset, properties)
ignition_delays_exp = numpy.zeros(len(simulations))
ignition_delays_sim = numpy.zeros(len(simulations))
#############################################
# Determine standard deviation of the dataset
#############################################
ign_delay = [case.ignition_delay.to('second').value.magnitude
if hasattr(case.ignition_delay, 'value')
else case.ignition_delay.to('second').magnitude
for case in properties.datapoints
]
# get variable that is changing across datapoints
variable = get_changing_variable(properties.datapoints)
# for ignition delay, use logarithm of values
standard_dev = estimate_std_dev(variable, numpy.log(ign_delay))
dataset_meta['standard deviation'] = float(standard_dev)
#######################################################
# Need to check if Ar or He in reactants but not model,
# and if so skip this dataset (for now).
#######################################################
if ((any(['Ar' in spec for case in properties.datapoints
for spec in case.composition]
)
and 'Ar' not in model_spec_key[model_name]
) or
(any(['He' in spec for case in properties.datapoints
for spec in case.composition]
)
and 'He' not in model_spec_key[model_name]
)
):
warnings.warn('Warning: Ar or He in dataset, but not in model. Skipping.',
RuntimeWarning
)
error_func_sets[idx_set] = numpy.nan
continue
# Use available number of processors minus one,
# or one process if single core.
pool = multiprocessing.Pool(processes=num_threads)
# setup all cases
jobs = []
for idx, sim in enumerate(simulations):
# special treatment based on pressure for Princeton model (and others)
if model_variant and model_name in model_variant:
model_mod = ''
if 'bath gases' in model_variant[model_name]:
# find any bath gases requiring special treatment
bath_gases = set(model_variant[model_name]['bath gases'])
gases = bath_gases.intersection(
set([c['species-name'] for c in sim.properties.composition])
)
# If only one bath gas present, use that. If multiple, use the
# predominant species. If none of the designated bath gases
# are present, just use the first one (shouldn't matter.)
if len(gases) > 1:
max_mole = 0.
sp = ''
for g in gases:
if float(sim.properties['composition'][g]) > max_mole:
sp = g
elif len(gases) == 1:
sp = gases.pop()
else:
# If no designated bath gas present, use any.
sp = bath_gases.pop()
model_mod += model_variant[model_name]['bath gases'][sp]
if 'pressures' in model_variant[model_name]:
# pressure to atm
pres = sim.properties.pressure.to('atm').magnitude
# choose closest pressure
# better way to do this?
i = numpy.argmin(numpy.abs(numpy.array(
[float(n)
for n in list(model_variant[model_name]['pressures'])
]
) - pres))
pres = list(model_variant[model_name]['pressures'])[i]
model_mod += model_variant[model_name]['pressures'][pres]
model_file = os.path.join(model_path, model_name + model_mod)
else:
model_file = os.path.join(model_path, model_name)
jobs.append([sim, model_file, model_spec_key[model_name], results_path, restart])
# run all cases
jobs = tuple(jobs)
results = pool.map(simulation_worker, jobs)
# not adding more proceses, and ensure all finished
pool.close()
pool.join()
dataset_meta['datapoints'] = []
for idx, sim in enumerate(results):
sim.process_results()
if hasattr(sim.properties.ignition_delay, 'value'):
ignition_delay = sim.properties.ignition_delay.value
else:
ignition_delay = sim.properties.ignition_delay
if hasattr(ignition_delay, 'nominal_value'):
ignition_delay = ignition_delay.nominal_value * units.second
dataset_meta['datapoints'].append(
{'experimental ignition delay': str(ignition_delay),
'simulated ignition delay': str(sim.meta['simulated-ignition-delay']),
'temperature': str(sim.properties.temperature),
'pressure': str(sim.properties.pressure),
'composition': [{'InChI': sim.properties.composition[spec].InChI,
'species-name': sim.properties.composition[spec].species_name,
'amount': str(sim.properties.composition[spec].amount.magnitude),
} for spec in sim.properties.composition],
'composition type': sim.properties.composition_type,
})
ignition_delays_exp[idx] = ignition_delay.magnitude
ignition_delays_sim[idx] = sim.meta['simulated-ignition-delay'].magnitude
# calculate error function for this dataset
error_func = numpy.power(
(numpy.log(ignition_delays_sim) -
numpy.log(ignition_delays_exp)) / standard_dev, 2
)
error_func = numpy.nanmean(error_func)
error_func_sets[idx_set] = error_func
dataset_meta['error function'] = float(error_func)
dev_func = (numpy.log(ignition_delays_sim) -
numpy.log(ignition_delays_exp)
) / standard_dev
dev_func = numpy.nanmean(dev_func)
dev_func_sets[idx_set] = dev_func
dataset_meta['absolute deviation'] = float(dev_func)
output['datasets'].append(dataset_meta)
if print_results:
print('Done with ' + dataset)
# Overall error function
error_func = numpy.nanmean(error_func_sets)
if print_results:
print('overall error function: ' + repr(error_func))
print('error standard deviation: ' + repr(numpy.nanstd(error_func_sets)))
# Absolute deviation function
abs_dev_func = numpy.nanmean(dev_func_sets)
if print_results:
print('absolute deviation function: ' + repr(abs_dev_func))
output['average error function'] = float(error_func)
output['error function standard deviation'] = float(numpy.nanstd(error_func_sets))
output['average deviation function'] = float(abs_dev_func)
# Write data to YAML file
with open(splitext(basename(model_name))[0] + '-results.yaml', 'w') as f:
yaml.dump(output, f)
return output