Source code for asaplib.descriptors.atomic_to_global

"""
Methods and functions to compute global descriptors of a frame from its atomic desciptors
"""

import json

import numpy as np

from ..io import NpEncoder


[docs]class Atomic_2_Global_Descriptors:
    def __init__(self, k_spec_dict):
        """
        Object handing the reducer functions used to convert atomic descriptors into global ones

        Parameters
        ----------
        k_spec_dict: dictionaries that specify which atomic to global descriptor to use 
        e.g.
        k_spec_dict = {'first_reducer': {'reducer_type': reducer_type,  
                          'zeta': zeta,
                          'species': species,
                          'element_wise': element_wise}}
        """
        self.k_spec_dict = k_spec_dict
        # list of Atomic_2_Global_Descriptor objections
        self.engines = {}

        self.bind()

[docs]    def add(self, k_spec, tag):
        """
        adding the specifications of a new Atomic_2_Global_Descriptor
        Parameters
        ----------
        k_spec: a dictionary that specify which Atomic_2_Global_Descriptor descriptor to use 
        """
        self.k_spec_dict[tag] = k_spec

[docs]    def pack(self):
        return json.dumps(self.k_spec_dict, sort_keys=True, cls=NpEncoder)

[docs]    def bind(self):
        """
        binds the objects that actually compute the descriptors
        these objects need to have .create(atomic_desc) method to compute the global descriptors from atomic ones
        """
        # clear up the objects
        self.engines = {}
        for element in self.k_spec_dict.keys():
            self.engines[element] = self._call(self.k_spec_dict[element])
            self.k_spec_dict[element]['acronym'] = self.engines[element].get_acronym()

    def _call(self, k_spec):
        """
        call the specific descriptor objects
        """
        if "reducer_type" not in k_spec.keys():
            raise ValueError("Did not specify the type of the global descriptor reducer.")
        if k_spec["reducer_type"] == "average":
            return Atomic_2_Global_Average(k_spec)
        if k_spec["reducer_type"] == "sum":
            return Atomic_2_Global_Sum(k_spec)
        if k_spec["reducer_type"] == "moment_average":
            return Atomic_2_Global_Moment_Average(k_spec)
        if k_spec["reducer_type"] == "moment_sum":
            return Atomic_2_Global_Moment_Sum(k_spec)
        else:
            raise NotImplementedError

[docs]    def compute(self, atomic_desc_dict, atomic_numbers):
        """
        compute the global descriptor vector for a frame from atomic contributions
        Parameters
        ----------
        atomic_desc_dict : a dictionary. each entry contains the essential info of the descriptor (acronym) 
                          and a np.array [N_desc*N_atoms]. Global descriptors for a frame.
                     see Atomic_Descriptors.compute() in .atomic_descriptors.py
        atomic_numbers: np.matrix. [N_atoms]. Atomic numbers for atoms in the frame.

        Returns
        -------
        desc_dict: a dictionary. each entry contains the essential info of the descriptor, i.e. acronym 
                          and a np.array [N_desc]. Global descriptors for a frame.
                   e.g. {'d1':{ 'acronym': 'XXX', 'descriptors': `a np.array [N_desc]`}}
        """
        desc_dict = {}
        for atomic_desc_element in atomic_desc_dict.keys():
            atomic_desc_now = atomic_desc_dict[atomic_desc_element]['atomic_descriptors']
            desc_dict[atomic_desc_element] = {}
            for element in self.k_spec_dict.keys():
                desc_dict[atomic_desc_element][element] = {}
                k_acronym, desc_dict[atomic_desc_element][element]['descriptors'] = self.engines[element].create(
                    atomic_desc_now, atomic_numbers)
                # we use a combination of the acronym of the descriptor and of the reducer function
                desc_dict[atomic_desc_element][element]['acronym'] = atomic_desc_dict[atomic_desc_element][
                                                                         'acronym'] + k_acronym
        return desc_dict


[docs]class Atomic_2_Global_Base:
    def __init__(self, k_spec):
        self.acronym = ""
        # we have defaults here; the default is not to distinguish between different elements
        if 'element_wise' in k_spec.keys():
            self.element_wise = bool(k_spec['element_wise'])
        else:
            self.element_wise = False

        if self.element_wise:
            try:
                self.species = k_spec['species']
            except:
                raise ValueError("Cannot do element-wise operations without specifying the global species")
            self.acronym = "-e"

[docs]    def get_acronym(self):
        # we use an acronym for each descriptor, so it's easy to find it and refer to it
        return self.acronym

[docs]    def create(self, atomic_desc, atomic_numbers=[]):
        """
        compute the global descriptor vector for a frame from atomic contributions
        Parameters
        ----------
        atomic_desc: a np.array [N_desc*N_atoms]. Atomic descriptors for a frame.
        atomic_numbers: np.matrix. [N_atoms]. Atomic numbers for atoms in the frame.

        Returns
        -------
        acronym: self.acronym
        desc:  a np.array [N_desc]. Global descriptors for a frame.
        """
        return self.acronym, []


[docs]class Atomic_2_Global_Average(Atomic_2_Global_Base):
    """this is the vanilla situation. We just take the average soap for all atoms"""

    def __init__(self, k_spec):

        super().__init__(k_spec)

        if "reducer_type" not in k_spec.keys() or k_spec["reducer_type"] != "average":
            raise ValueError("reducer type is not average or cannot find the type")

        print("Using Atomic_2_Global_Average reducer ...")

[docs]    def create(self, atomic_desc, atomic_numbers=[]):
        if self.element_wise:
            return self.acronym, Descriptor_By_Species(atomic_desc, atomic_numbers, self.species, True)
        else:
            return self.acronym, np.mean(atomic_desc, axis=0)


[docs]class Atomic_2_Global_Sum(Atomic_2_Global_Base):
    """ We just take the sum soap for all atoms"""

    def __init__(self, k_spec):

        super().__init__(k_spec)

        if "reducer_type" not in k_spec.keys() or k_spec["reducer_type"] != "sum":
            raise ValueError("reducer type is not sum or cannot find the type")

        print("Using Atomic_2_Global_Sum reducer ...")
        self.acronym += "-sum"

[docs]    def create(self, atomic_desc, atomic_numbers=[]):
        if self.element_wise:
            return self.acronym, Descriptor_By_Species(atomic_desc, atomic_numbers, self.species, False)
        else:
            return self.acronym, np.sum(atomic_desc, axis=0)


[docs]class Atomic_2_Global_Moment_Average(Atomic_2_Global_Base):
    """ 
    get the global descriptor from atomic ones 
    by averaging over the atomic descriptors of z th power 

    Parameters
    ----------
    zeta: take the zeta th power
    """

    def __init__(self, k_spec):

        super().__init__(k_spec)

        if "reducer_type" not in k_spec.keys() or k_spec["reducer_type"] != "moment_average":
            raise ValueError("reducer type is not moment_average or cannot find the type")

        try:
            self.zeta = k_spec['zeta']
        except:
            raise ValueError("cannot initialize the zeta value")

        print("Using Atomic_2_Global_Moment_Average reducer ...")
        self.acronym += "-z-" + str(self.zeta)

[docs]    def create(self, atomic_desc, atomic_numbers=[]):
        if self.element_wise:
            return self.acronym, Descriptor_By_Species(np.power(atomic_desc, self.zeta), atomic_numbers, self.species,
                                                       True)
        else:
            return self.acronym, np.mean(np.power(atomic_desc, self.zeta), axis=0)


[docs]class Atomic_2_Global_Moment_Sum(Atomic_2_Global_Base):
    """ 
    get the global descriptor from atomic ones 
    by averaging over the atomic descriptors of z th power 

    Parameters
    ----------
    zeta: take the zeta th power
    """

    def __init__(self, k_spec):

        super().__init__(k_spec)

        if "reducer_type" not in k_spec.keys() or k_spec["reducer_type"] != "moment_sum":
            raise ValueError("reducer type is not moment_sum or cannot find the type")

        try:
            self.zeta = k_spec['zeta']
        except:
            raise ValueError("cannot initialize the zeta list")

        print("Using Atomic_2_Global_Moment_Sum reducer ...")
        self.acronym += "-z-" + str(self.zeta) + "-sum"

[docs]    def create(self, atomic_desc, atomic_numbers=[]):
        if self.element_wise:
            return self.acronym, Descriptor_By_Species(np.power(atomic_desc, self.zeta), atomic_numbers, self.species,
                                                       False)
        else:
            return self.acronym, np.sum(np.power(atomic_desc, self.zeta), axis=0)


[docs]def Descriptor_By_Species(atomic_desc, atomic_numbers, global_species, average_over_natom=True):
    """ 
    first compute the average/sum descriptors for each species,
    then concatenate them.

    Parameters
    ----------
    atomic_desc: np.matrix. [N_atoms, N_desc]. Atomic descriptors for a frame.
    atomic_numbers: np.matrix. [N_atoms]. Atomic numbers for atoms in the frame.
    global_species: a list of all atomic species in all frames
    average_over_natom: normalized by number of the atoms of the same species

    Returns
    -------
    desc: np.matrix [N_desc*len(global_species)]. Global descriptors for a frame.
    """
    desc_by_species = {}
    for species in global_species:
        atomic_desc_by_species = [atomic_desc[i] for i, at in enumerate(atomic_numbers) if at == species]
        if average_over_natom and len(atomic_desc_by_species) > 0:
            # normalize by the number of atoms
            desc_by_species[species] = np.mean(atomic_desc_by_species, axis=0)
        elif len(atomic_desc_by_species) > 0:
            desc_by_species[species] = np.sum(atomic_desc_by_species, axis=0)
        else:
            desc_by_species[species] = 0
        # print(np.shape(atomic_desc),len(atomic_desc))

    desc_len = np.shape(atomic_desc)[1]
    desc = np.zeros(desc_len * len(global_species), dtype=float)
    for i, species in enumerate(global_species):
        desc[i * desc_len:(i + 1) * desc_len] = desc_by_species[species]
    return np.asarray(desc)