Source code for asaplib.io.cscope
"""
Adaptors for generating ChemiScope compatible inputs
"""
import warnings
import numpy as np
import json
import gzip
IGNORED_ASE_ARRAYS = ['positions', 'numbers']
def _typetransform(data):
"""Ensure data is a list of JSON serialisable objects"""
assert isinstance(data, list) and len(data) > 0
if isinstance(data[0], str):
return list(map(str, data))
elif isinstance(data[0], bytes):
return list(map(lambda u: u.decode('utf8'), data))
else:
try:
return [float(value) for value in data]
except ValueError:
raise Exception('unsupported type in value')
def _linearize(name, value):
"""
Transform 2D arrays in multiple 1D arrays, converting types to fit json as
needed.
"""
data = {}
if isinstance(value['values'], list):
data[name] = {
'target': value['target'],
'values': _typetransform(value['values']),
}
elif isinstance(value['values'], np.ndarray):
if len(value['values'].shape) == 1:
data[name] = {
'target': value['target'],
'values': _typetransform(list(value['values'])),
}
elif len(value['values'].shape) == 2:
for i in range(value['values'].shape[1]):
data[f'{name}[{i + 1}]'] = {
'target': value['target'],
'values': _typetransform(list(value['values'][:, i])),
}
else:
raise Exception('unsupported ndarray value')
else:
raise Exception(f'unknown type for value {name}')
return data
def _frame_to_json(frame):
data = {}
data['size'] = len(frame)
data['names'] = list(frame.symbols)
data['x'] = [float(value) for value in frame.positions[:, 0]]
data['y'] = [float(value) for value in frame.positions[:, 1]]
data['z'] = [float(value) for value in frame.positions[:, 2]]
if (frame.cell.lengths() != [0.0, 0.0, 0.0]).all():
data['cell'] = list(np.concatenate(frame.cell))
return data
def _generate_environments(frames, cutoff):
environments = []
for frame_id, frame in enumerate(frames):
for center in range(len(frame)):
environments.append({
'structure': frame_id,
'center': center,
'cutoff': cutoff,
})
return environments
[docs]def write_chemiscope_input(filename,
frames,
meta=None,
extra=None,
cutoff=None):
"""
Write the json file expected by the default chemiscope visualizer at
``filename``.
:param str filename: name of the file to use to save the json data. If it
ends with '.gz', a gzip compressed file will be written
:param list frames: list of `ase.Atoms`_ objects containing all the
structures
:param dict meta: optional metadata of the dataset, see below
:param dict extra: optional dictionary of additional properties, see below
:param float cutoff: optional. If present, will be used to generate
atom-centered environments
The dataset metadata should be given in the ``meta`` dictionary, the
possible keys are:
.. code-block:: python
meta = {
'name': '...', # str, dataset name
'description': '...', # str, dataset description
'authors': [ # list of str, dataset authors, OPTIONAL
'...',
],
'references': [ # list of str, references for this dataset,
'...', # OPTIONAL
],
}
The written JSON file will contain all the properties defined on the
`ase.Atoms`_ objects. Values in ``ase.Atoms.arrays`` are mapped to
``target = "atom"`` properties; while values in ``ase.Atoms.info`` are
mapped to ``target = "structure"`` properties. The only exception is
``ase.Atoms.arrays["numbers"]``, which is always ignored. If you want to
have the atomic numbers as a property, you should add it to ``extra``
manually.
Additional properties can be added with the ``extra`` parameter. This
parameter should be a dictionary containing one entry for each property.
Each entry contains a ``target`` attribute (``'atom'`` or ``'structure'``)
and a set of values. ``values`` can be a Python list of float or string; a
1D numpy array of numeric values; or a 2D numpy array of numeric values. In
the later case, multiple properties will be generated along the second axis.
For example, passing
.. code-block:: python
extra = {
'cheese': {
'target': 'atom',
'values': np.zeros((300, 4))
}
}
will generate four properties named ``cheese[1]``, ``cheese[2]``,
``cheese[3]``, and ``cheese[4]``, each containing 300 values.
.. _`ase.Atoms`: https://wiki.fysik.dtu.dk/ase/ase/atoms.html
:NOTE:
Adapted from: https://github.com/cosmo-epfl/chemiscope/blob/master/utils/chemiscope_input.py
"""
if not (filename.endswith('.json') or filename.endswith('.json.gz')):
raise Exception('filename should end with .json or .json.gz')
data = {'meta': {}}
if meta is not None:
if 'name' in meta:
data['meta']['name'] = str(meta['name'])
if 'description' in meta:
data['meta']['description'] = str(meta['description'])
if 'authors' in meta:
data['meta']['authors'] = list(map(str, meta['authors']))
if 'references' in meta:
data['meta']['references'] = list(map(str, meta['references']))
for key in meta.keys():
if key not in ['name', 'description', 'authors', 'references']:
warnings.warn('ignoring unexpected metadata: {}'.format(key))
if 'name' not in data['meta'] or not data['meta']['name']:
data['meta']['name'] = filename
properties = {}
if extra is not None:
for name, value in extra.items():
properties.update(_linearize(name, value))
# Read properties coming from the ase.Atoms objects
from_frames = {}
# target: structure properties
# TODO this need to updates as ASAP store arrays for each atom in the INFO?
def _append_value(from_frames, name, value):
"""Append value to from_frames, create the entry if not exists"""
if name in from_frames:
from_frames[name]['values'].append(value)
else:
from_frames.update(
{name: {
'target': 'structure',
'values': [value]
}})
return
def _extend_value(from_frames, name, value):
"""Extend the entry in from_frames, create the entry if not exists"""
if name in from_frames:
from_frames[name]['values'].extend(value)
else:
from_frames.update(
{name: {
'target': 'atom',
'values': list(value)
}})
return
for frame in frames:
for name, value in frame.info.items():
if isinstance(value, (list, tuple, np.ndarray)):
for idx, _value in enumerate(value):
_name = name + f'[{idx}]'
_append_value(from_frames, _name, _value)
else:
_append_value(from_frames, name, value)
# target: atom properties
has_atomic = False
for frame in frames:
for name, value in frame.arrays.items():
if name in IGNORED_ASE_ARRAYS:
continue
has_atomic = True
if len(value.shape) > 1:
# Iterate over the columns
for idx, _value in enumerate(value.T):
_name = 'atomic-' + name + f'[{idx}]'
_extend_value(from_frames, _name, _value)
else:
_extend_value(from_frames, name, value)
for name, value in from_frames.items():
properties.update(_linearize(name, value))
data['properties'] = properties
data['structures'] = [_frame_to_json(frame) for frame in frames]
if cutoff is not None and has_atomic:
data['environments'] = _generate_environments(frames, cutoff)
if filename.endswith(".gz"):
with gzip.open(filename, 'w', 9) as file:
file.write(json.dumps(data).encode("utf8"))
else:
with open(filename, 'w') as file:
json.dump(data, file)