from PIL import Image
img = Image.open('esn.png');
img

def ESN_init(inSize, outSize, resSize, alpha, sparsity):
    Win = np.random.rand(inSize+1, resSize) - 0.5 # Encoding.
    Wres = np.random.rand(resSize, resSize) - 0.5 # Matrix describing reservoir.
    
    # Technical details of echo state networks. We tune the sparsity of the reservoir
    # and normalize Wres.
    Wres[np.random.rand(resSize, resSize)>sparsity] = 0
    spec_rad = max(abs(np.linalg.eig(Wres)[0]))
    Wres /= spec_rad
    return Win, Wres, inSize, resSize, alpha

def reservoir(data, Win, Wres, inSize, resSize, alpha):
    """
    alpha: a bleed-through constant that controls the memory of the neurons
    """
    datamatrix = np.zeros((data.shape[0], 1+inSize+resSize))   
    
    # Initialize the neurons to some default value.
    R = .1*(np.ones((1, resSize)) - 0.5)
    for t in range(data.shape[0]):
        u = data[t]
        R = ((1 - alpha)*R + # Bleed through.
              alpha*np.tanh( # Activation function.
                  # New input and feed forward from previous time step.
                  np.dot(np.hstack((1,u)), Win) + np.dot(R, Wres))) 
        datamatrix[t] = np.append(np.append(1,u), R)
    return datamatrix

def fetchData(n):
    """Return a particular instance of NARMA10, on $n$ inputs."""
    u = 0.5 * np.random.uniform(size=(n+10))
    y = np.zeros(shape=(n+10))
    for i in range(10, n+10):
        y[i] = 0.3 * y[i-1] + 0.05 * y[i-1] * np.sum(y[i-10:i]) + 1.5 * u[i-1] * u[i-10] + 0.1
    return (u[10:], y[10:])

def test_NARMA10(resSize, inSize=1, outSize=1, train_cycles=11000, test_cycles=1000,
                 alpha=0.7, sparsity=0.9):    
    Echo = ESN_init(inSize, outSize, resSize, alpha, sparsity)
    data_train, Y_train = fetchData(train_cycles)
    data_test, Y_test = fetchData(test_cycles)
    train = np.array(reservoir(data_train, *Echo))
    
    # Standard least squares solution.
    Wout = np.dot(np.linalg.pinv(train), Y_train)
    test = reservoir(data_test, *Echo)
    Yhat = np.dot(test, Wout)
    # We remove the initial datapoints because the reservoir needs to "warmup".
    NRMSE = np.sqrt(np.divide(np.mean(np.square(Y_test[50:]-Yhat[50:])),np.var(Y_test[50:])))
    return NRMSE, Y_test, Yhat, test

import numpy as np
import matplotlib.pyplot as plt
from itertools import chain, combinations
from functools import reduce

def plot_NARMA(resSize=5):
    np.random.seed(137)
    NRMSE, Y_test, Yhat, _ = test_NARMA10(resSize)
    plt.figure(figsize=(18,6))
    plt.yscale('log')
    plt.plot(Yhat, color='red', linewidth=5, label='Single ESN Prediction')
    plt.plot(Y_test, color='green', linestyle=":",  linewidth=2, label='Target Value')
    plt.ylim(.2,1)
    plt.ylabel("NARMA(t)")
    plt.xlabel("Test Sample (t)")
    plt.title("Test Performance")
    print(f"Normalized root mean squared error is {NRMSE}.")
    return Yhat, Y_test

Yhat1, Y_test1 = plot_NARMA()

Normalized root mean squared error is 0.7496595380852676.

plt.scatter(Yhat1[1:]-Yhat1[:-1], Y_test1[1:]-Yhat1[:-1])
plt.xlabel("Difference between steps")
plt.ylabel("Predicted difference")
plt.plot(np.linspace(-.3,.3), np.linspace(-.3,.3), color='r', label="y=x")
plt.tight_layout()
plt.xlim(-.3,.3)
plt.ylim(-.3,.3)
plt.legend();

def reservoir(data, Win, Wres, inSize, resSize, alpha, powerset):
    """
    alpha: a bleed-through constant that controls the memory of the neurons
    """
    datamatrix = np.zeros((data.shape[0], 1+inSize+resSize))   
    
    # Initialize the neurons to some default value.
    R = .1*(np.ones((1, resSize)) - 0.5)
    for t in range(data.shape[0]):
        u = data[t]
        R = (1 - alpha)*R + alpha*np.tanh(np.dot(np.hstack((1,u)), Win) + np.dot(R, Wres))
        datamatrix[t] = np.append(np.append(1,u), R)
    num_points = datamatrix.shape[0]
    if not powerset:
        return datamatrix
    else:
        # Add the powerset of signals.
        s = list(datamatrix.T)[2:]
        power_signals = [reduce(lambda a, b: a*b, el, np.ones(num_points))
                         for el in list(chain.from_iterable(
                             combinations(s, r) for r in range(len(s)+1)))[1:]]
        power_signals = list(datamatrix.T)[0:2] + power_signals
        return np.array(power_signals).T

def test_NARMA10(resSize, inSize=1, outSize=1, train_cycles=9000, test_cycles=1000, alpha=0.7,
                 sparsity=0.9, powerset=True):    
    Echo = ESN_init(inSize, outSize, resSize, alpha, sparsity)
    data_train, Y_train = fetchData(train_cycles)
    data_test, Y_test = fetchData(test_cycles)
    train = np.array(reservoir(data_train, *Echo, powerset))
    
    # We're going to add in ridge regression to avoid overfitting, and help with
    # the ill-conditioned train matrix.
    coeff = 1E-8
    ridge = np.zeros(train.shape)
    np.fill_diagonal(ridge, coeff)   
    Wout = np.dot(np.linalg.pinv(train + ridge), Y_train)   
    
    # Standard least squares solution, with ridge regression.
    test = reservoir(data_test, *Echo, powerset)
    Yhat = np.dot(test, Wout)
    NRMSE = np.sqrt(np.divide(np.mean(np.square(Y_test[50:]-Yhat[50:])),np.var(Y_test[50:])))
    return NRMSE, Y_test, Yhat, test

Yhat2, Y_test2 = plot_NARMA()

Normalized root mean squared error is 0.5437949184260549.

plt.scatter(Yhat2[1:]-Yhat2[:-1], Y_test2[1:]-Yhat2[:-1])
plt.xlabel("Difference between steps")
plt.ylabel("Predicted difference")
plt.plot(np.linspace(-.3,.3), np.linspace(-.3,.3), color='r', label="y=x")
plt.tight_layout()
plt.xlim(-.3,.3)
plt.ylim(-.3,.3)
plt.legend();

plt.hist((Yhat1-Y_test1)**2, bins=np.logspace(-10, .1), alpha=0.5, label='no power set')
plt.hist((Yhat2-Y_test2)**2, bins=np.logspace(-10, .1), alpha=0.5, label='power set')
plt.xscale('log')
plt.legend()
plt.title("Histogram of squared errors per test point");

Part 1: Echo State Networks¶

NARMA10¶

Powerset Signals¶