#!/usr/bin/python3

'''Linear regression example
'''


import numpy as np
import os
from matplotlib import pyplot as plt



def data_set_linear(count = 10, sigma = 0.1):
    '''Return a linear dataset with normally distributed noise

    @param {int} count number of points
    @param {float} sigma std deviation of the added noise
    @return {np.ndarray} x data
            {np.ndarray} y data
            {np.ndarray} unperturned y data

    '''
    # Make random values reproducible
    rstate = np.random.RandomState(0)
    x = np.linspace(0, 1, count)
    # Linear line with noise with no bias (offset)
    y = x + rstate.normal(0, sigma, len(x))
    return x, y, x

def basis(x, M):
    '''Polynomial basis function of M-th degree

    @param {float} location to evaluate the basis functions
    @param {int} M degree of the basis functions
    @return {np.1darray} basis function values at `x`

    '''

    # Include bias
    N = M+1
    ret = np.zeros(N, dtype=np.float64)
    for i in range(N):
        ret[i] = x**i
    return ret

def train(x, t, M):
    '''Determine polynomial model coefficients

    Minimization of the L2 norm of the error function
          E = 1/2 || y(x, w) - t ||_2^2
    with
          y = w0 + w1 * x + w2 * x^2 + ....

    @param {np.1darray} training observation points
    @param {np.1darray} training observed values
    @param {int} M polynomial degree (model complexity)
    @return {np.1darray} model coefficients [w0, w1, ...]

    '''

    # Include bias
    N = M + 1
    A = np.zeros(shape=(N, N), dtype=np.float64)
    b = np.zeros(N, dtype=np.float64)

    # Form LHS and RHS
    for i, x_ in enumerate(x):
        phi = basis(x_, M)
        # A is symmetric, so computation/storage can be improved
        A = A + np.outer(phi, phi)
        b += t[i] * phi
    return np.linalg.solve(A, b)

def predict(x, w):
    ''' Predict values observation values at `x`

    @param {float|np.1darray} location to evaluate model
    @param {np.1darray} model coefficients
    @return {np.array} Predicted values at @x
    '''
    x = iter(x)
    M = len(w)
    return np.array([basis(x_, M-1).dot(w) for x_ in x])

def main():
    x, t, t_unperturbed = data_set_linear()

    # Prediction values
    x_fine = np.linspace(x[0], x[-1], len(x)*2)

    # stage 1: "train" using 1st degree polynomial model
    w = train(x, t, 1)
    # stage 2: "predicted" model values using training set
    y = predict(x_fine, w)

    fig, ax = plt.subplots()
    # "Training" dataset
    ax.plot(x, t, linestyle='', marker='o', markersize='4', label='Data set')
    # Target
    ax.plot(x, t_unperturbed, linestyle='--', label=r'Target')
    # Model graph
    ax.plot(x_fine, y, linestyle='-', label=r'Model $y(x, \mathbf{w})$')

    ax.set_ylabel('t')
    ax.set_xlabel('x')
    ax.grid(True)
    ax.legend(loc='best', fancybox=True, framealpha=0.5, fontsize='medium')

    __dirname = os.path.dirname(os.path.realpath(__file__))
    fn = os.path.join(__dirname, '../img/linear-regression.svg')
    fig.patch.set_alpha(0.0)
    plt.savefig(fn,
                facecolor=fig.get_facecolor(),
                edgecolor='none',
                bbox_inches=0)

if __name__ == '__main__':
    main()