Source code for asaplib.kernel.kernel_transforms

"""
Methods and functions to convert descriptors to kernels for samples

Kernels are measures of similarity, 
i.e. s(a, b) > s(a, c) if objects a and b are considered “more similar” than objects a and c.
A kernel must also be positive semi-definite.

Essentially, for each pair of samples a and b we compute
k(a,b)
based on the coordinates of descriptors d(a) and d(b) 
"""
import numpy as np
import json
from .ml_kernel_operations import normalizekernel

[docs]class Descriptors_to_Kernels: def __init__(self, k_spec_dict={}): """ Object handing the specification and the computation of atomic descriptors Parameters ---------- k_spec_dict: dictionaries that specify which way to convert descriptors into kernel matrix e.g. k_spec_dict = { "first_kernel": {"type": 'linear', "normalize" = True}, "second_kernel": {"type": 'cosine'}, "third_kernel": {"type": 'polynormial', "d":3, "normalize" = True} } Notice that we can specify multiple kernels here. What we do is that: 1. compute k(a,b) for all these kernel functions 2. sum up all k(a,b). """ self.k_spec_dict = k_spec_dict # list of kernel (similarity measurement) objects self.engines = {} self.acronym = "" self.bind()
[docs] def add(self, k_spec, tag): """ adding the specifications of a new kernel function Parameters ---------- k_spec: a dictionary that specify which atomic descriptor to use """ self.k_spec_dict[tag] = k_spec
[docs] def pack(self): return json.dumps(self.k_spec_dict, sort_keys=True, cls=NpEncoder)
[docs] def get_acronym(self): if self.acronym == "": for element in self.k_spec_dict.keys(): self.acronym += self.engines[element].get_acronym() return self.acronym
[docs] def bind(self): """ binds the objects that actually compute the kernels these objects need to have .transform() method to compute kernels from decriptor matrix [n_descriptors, n_samples] """ # clear up the objects self.engines = {} for element in self.k_spec_dict.keys(): self.engines[element] = self._call(self.k_spec_dict[element]) self.k_spec_dict[element]['acronym'] = self.engines[element].get_acronym()
def _call(self, k_spec): """ call the specific kernel objects """ if "type" not in k_spec.keys(): raise ValueError("Did not specify the type of the kernel function.") if k_spec["type"] == "linear": return Kernel_Function_Linear(k_spec) if k_spec["type"] == "polynomial": return Kernel_Function_Polynomial(k_spec) if k_spec["type"] == "cosine": return Kernel_Function_Cosine(k_spec) else: raise NotImplementedError
[docs] def compute(self, desc_a, desc_b=None): """ compute the global descriptor vector for a frame from atomic contributions Parameters ---------- desc : array-like, shape=[n_descriptors, n_samples] design matrix Returns ------- k_mat : array-like, shape=[n_samples, n_samples] design matrix """ if desc_b is None: desc_b = desc_a n_a = len(desc_a) n_b = len(desc_b) k_mat = np.zeros((n_a,n_b), dtype=float) for element in self.k_spec_dict.keys(): #print(np.shape(k_mat), np.shape(self.engines[element].transform(desc_a, desc_b))) k_mat += self.engines[element].transform(desc_a, desc_b) # this is not normalized! return k_mat
[docs]class Kernel_Function_Base: def __init__(self, k_spec): self.acronym = "" pass
[docs] def get_acronym(self): # we use an acronym for each descriptor, so it's easy to find it and refer to it return self.acronym
[docs] def transform(self, desc_a, desc_b): return []
[docs]class Kernel_Function_Linear(Kernel_Function_Base): def __init__(self, k_spec): self.acronym = "linear" try: self.normalize = k_spec['normalize'] except: self.normalize = False
[docs] def transform(self, desc_a, desc_b): if self.normalize and len(desc_a) == len(desc_b): return normalizekernel(np.dot(desc_a, desc_b.T)) else: return np.dot(desc_a, desc_b.T)
[docs]class Kernel_Function_Polynomial(Kernel_Function_Base): def __init__(self, k_spec): self.acronym = "poly" self.d = k_spec['d'] try: self.normalize = k_spec['normalize'] except: self.normalize = False
[docs] def transform(self, desc_a, desc_b): if self.normalize and len(desc_a) == len(desc_b): return normalizekernel(np.power(np.dot(desc_a, desc_b.T),self.d)) else: return np.power(np.dot(desc_a, desc_b.T),self.d)
[docs]class Kernel_Function_Cosine(Kernel_Function_Base): def __init__(self, k_spec): self.acronym = 'cos'
[docs] def transform(self, desc_a, desc_b): from sklearn.metrics.pairwise import cosine_similarity as sk_cos_sim return sk_cos_sim(desc_a, desc_b)