Source code for asaplib.io.cscope

"""
Adaptors for generating ChemiScope compatible inputs
"""

import warnings
import numpy as np
import json
import gzip

IGNORED_ASE_ARRAYS = ['positions', 'numbers']


def _typetransform(data):
    """Ensure data is a list of JSON serialisable objects"""
    assert isinstance(data, list) and len(data) > 0
    if isinstance(data[0], str):
        return list(map(str, data))
    elif isinstance(data[0], bytes):
        return list(map(lambda u: u.decode('utf8'), data))
    else:
        try:
            return [float(value) for value in data]
        except ValueError:
            raise Exception('unsupported type in value')


def _linearize(name, value):
    """
    Transform 2D arrays in multiple 1D arrays, converting types to fit json as
    needed.
    """
    data = {}
    if isinstance(value['values'], list):
        data[name] = {
            'target': value['target'],
            'values': _typetransform(value['values']),
        }
    elif isinstance(value['values'], np.ndarray):
        if len(value['values'].shape) == 1:
            data[name] = {
                'target': value['target'],
                'values': _typetransform(list(value['values'])),
            }
        elif len(value['values'].shape) == 2:
            for i in range(value['values'].shape[1]):
                data[f'{name}[{i + 1}]'] = {
                    'target': value['target'],
                    'values': _typetransform(list(value['values'][:, i])),
                }
        else:
            raise Exception('unsupported ndarray value')
    else:
        raise Exception(f'unknown type for value {name}')

    return data


def _frame_to_json(frame):
    data = {}
    data['size'] = len(frame)
    data['names'] = list(frame.symbols)
    data['x'] = [float(value) for value in frame.positions[:, 0]]
    data['y'] = [float(value) for value in frame.positions[:, 1]]
    data['z'] = [float(value) for value in frame.positions[:, 2]]

    if (frame.cell.lengths() != [0.0, 0.0, 0.0]).all():
        data['cell'] = list(np.concatenate(frame.cell))

    return data


def _generate_environments(frames, cutoff):
    environments = []
    for frame_id, frame in enumerate(frames):
        for center in range(len(frame)):
            environments.append({
                'structure': frame_id,
                'center': center,
                'cutoff': cutoff,
            })
    return environments


[docs]def write_chemiscope_input(filename, frames, meta=None, extra=None, cutoff=None): """ Write the json file expected by the default chemiscope visualizer at ``filename``. :param str filename: name of the file to use to save the json data. If it ends with '.gz', a gzip compressed file will be written :param list frames: list of `ase.Atoms`_ objects containing all the structures :param dict meta: optional metadata of the dataset, see below :param dict extra: optional dictionary of additional properties, see below :param float cutoff: optional. If present, will be used to generate atom-centered environments The dataset metadata should be given in the ``meta`` dictionary, the possible keys are: .. code-block:: python meta = { 'name': '...', # str, dataset name 'description': '...', # str, dataset description 'authors': [ # list of str, dataset authors, OPTIONAL '...', ], 'references': [ # list of str, references for this dataset, '...', # OPTIONAL ], } The written JSON file will contain all the properties defined on the `ase.Atoms`_ objects. Values in ``ase.Atoms.arrays`` are mapped to ``target = "atom"`` properties; while values in ``ase.Atoms.info`` are mapped to ``target = "structure"`` properties. The only exception is ``ase.Atoms.arrays["numbers"]``, which is always ignored. If you want to have the atomic numbers as a property, you should add it to ``extra`` manually. Additional properties can be added with the ``extra`` parameter. This parameter should be a dictionary containing one entry for each property. Each entry contains a ``target`` attribute (``'atom'`` or ``'structure'``) and a set of values. ``values`` can be a Python list of float or string; a 1D numpy array of numeric values; or a 2D numpy array of numeric values. In the later case, multiple properties will be generated along the second axis. For example, passing .. code-block:: python extra = { 'cheese': { 'target': 'atom', 'values': np.zeros((300, 4)) } } will generate four properties named ``cheese[1]``, ``cheese[2]``, ``cheese[3]``, and ``cheese[4]``, each containing 300 values. .. _`ase.Atoms`: https://wiki.fysik.dtu.dk/ase/ase/atoms.html :NOTE: Adapted from: https://github.com/cosmo-epfl/chemiscope/blob/master/utils/chemiscope_input.py """ if not (filename.endswith('.json') or filename.endswith('.json.gz')): raise Exception('filename should end with .json or .json.gz') data = {'meta': {}} if meta is not None: if 'name' in meta: data['meta']['name'] = str(meta['name']) if 'description' in meta: data['meta']['description'] = str(meta['description']) if 'authors' in meta: data['meta']['authors'] = list(map(str, meta['authors'])) if 'references' in meta: data['meta']['references'] = list(map(str, meta['references'])) for key in meta.keys(): if key not in ['name', 'description', 'authors', 'references']: warnings.warn('ignoring unexpected metadata: {}'.format(key)) if 'name' not in data['meta'] or not data['meta']['name']: data['meta']['name'] = filename properties = {} if extra is not None: for name, value in extra.items(): properties.update(_linearize(name, value)) # Read properties coming from the ase.Atoms objects from_frames = {} # target: structure properties # TODO this need to updates as ASAP store arrays for each atom in the INFO? def _append_value(from_frames, name, value): """Append value to from_frames, create the entry if not exists""" if name in from_frames: from_frames[name]['values'].append(value) else: from_frames.update( {name: { 'target': 'structure', 'values': [value] }}) return def _extend_value(from_frames, name, value): """Extend the entry in from_frames, create the entry if not exists""" if name in from_frames: from_frames[name]['values'].extend(value) else: from_frames.update( {name: { 'target': 'atom', 'values': list(value) }}) return for frame in frames: for name, value in frame.info.items(): if isinstance(value, (list, tuple, np.ndarray)): for idx, _value in enumerate(value): _name = name + f'[{idx}]' _append_value(from_frames, _name, _value) else: _append_value(from_frames, name, value) # target: atom properties has_atomic = False for frame in frames: for name, value in frame.arrays.items(): if name in IGNORED_ASE_ARRAYS: continue has_atomic = True if len(value.shape) > 1: # Iterate over the columns for idx, _value in enumerate(value.T): _name = 'atomic-' + name + f'[{idx}]' _extend_value(from_frames, _name, _value) else: _extend_value(from_frames, name, value) for name, value in from_frames.items(): properties.update(_linearize(name, value)) data['properties'] = properties data['structures'] = [_frame_to_json(frame) for frame in frames] if cutoff is not None and has_atomic: data['environments'] = _generate_environments(frames, cutoff) if filename.endswith(".gz"): with gzip.open(filename, 'w', 9) as file: file.write(json.dumps(data).encode("utf8")) else: with open(filename, 'w') as file: json.dump(data, file)