Source code for asaplib.fit.getscore

"""
Functions for assessing the quality of the fits
"""

import numpy as np
from scipy.stats import spearmanr

[docs]class LC_SCOREBOARD():
    def __init__(self, train_sizes):
        self.scores = {size: [] for size in train_sizes}

[docs]    def add_score(self, Ntrain, score):
        self.scores[Ntrain].append(score)

[docs]    def dump_all(self):
        return self.scores

[docs]    def fetch_all(self):
        lc_results = {}
        for sc in score_func.keys():
            lc_results[sc] = self.fetch(sc)
        return lc_results

[docs]    def fetch(self, sc_name='RMSE'):
        Ntrains = []
        avg_scores = []
        avg_scores_error = []
        for Ntrain, score in self.scores.items():
            avg = 0.
            var = 0.
            for sc in score:
                avg += sc[sc_name]
                var += sc[sc_name] ** 2.
            avg /= len(score)
            var /= len(score)
            var -= avg ** 2.
            avg_scores.append(avg)
            avg_scores_error.append(np.sqrt(var))
            Ntrains.append(int(Ntrain))
        return np.stack((Ntrains, avg_scores, avg_scores_error), axis=-1)

[docs]    def plot_learning_curve(self, sc_name='RMSE'):
        """plot the learning curve"""
        from matplotlib import pyplot as plt
        lc_results = self.fetch(sc_name)
       
        fig, ax = plt.subplots()
        ax.errorbar(lc_results[:,0], lc_results[:,1], yerr=lc_results[:,2], linestyle='-', uplims=True, lolims=True)
        ax.set_title('Learning curve')
        ax.set_xlabel('Number of training samples')
        ax.set_ylabel('Test {}'.format(sc_name))
        ax.set_xscale('log')
        ax.set_yscale('log')
        return fig, ax

[docs]def get_score(ypred, y):
    scores = {}
    for k, func in score_func.items():
        scores[k] = func(ypred, y)
    return scores

[docs]def get_r2(y_pred, y):
    weight = 1
    sample_weight = None
    numerator = (weight * (y - y_pred) ** 2).sum(axis=0, dtype=np.float64)
    denominator = (weight * (y - np.average(
        y, axis=0, weights=sample_weight)) ** 2).sum(axis=0, dtype=np.float64)
    output_scores = 1 - (numerator / denominator)
    return np.mean(output_scores).tolist()


[docs]def get_mae(ypred, y):
    return np.mean(np.abs(ypred - y)).tolist()


[docs]def get_rmse(ypred, y):
    return np.sqrt(np.mean((ypred - y) ** 2)).tolist()


[docs]def get_sup(ypred, y):
    return np.amax(np.abs((ypred - y))).tolist()


[docs]def get_spearman(ypred, y):
    corr, _ = spearmanr(ypred, y)
    return corr.tolist()

score_func = dict(
    MAE=get_mae,
    RMSE=get_rmse,
    SUP=get_sup,
    R2=get_r2,
    CORR=get_spearman
)