Image('Neuron.png', width=400)

Image('a_neuron.png', width=400)

x = jnp.linspace(-3, 3, 100)
plt.plot(x, x, label='linear')
plt.plot(x, jax.nn.relu(x), label='relu')
plt.plot(x, jax.nn.sigmoid(x), label='sigmoid')
plt.plot(x, jnp.tanh(x), label='tanh')

[<matplotlib.lines.Line2D at 0x77e488785f60>]

# Load the dataset
data = np.load('mnist.npz')
X = data['X_train']
y = data['y_train']
plt.imshow(X[0,:].reshape(28,28))
print(y[0])

5

X = data['X_train_bin']
y = data['y_train_bin']

def func(w, b, x):
    return jax.nn.sigmoid(jnp.matmul(x, w) + b)

def xe(w, b, x, y):
    fx = func(w, b, x)
    return (-y*jnp.log(fx)-(1-y)*jnp.log(1-fx)).mean()

@jax.jit
def update(w, b, x, y):
    dw, db = jax.grad(xe, argnums=(0,1))(w, b, x, y)
    return w - 0.01*dw, b - 0.01*db

w = np.random.randn(784)/784
b = 0.

loss = []
for t in range(500):
    loss.append(xe(w, b, X, y))
    w, b = update(w, b, X, y)
plt.plot(loss)

[<matplotlib.lines.Line2D at 0x77e4885c63e0>]

def accuracy(y_pred, y_true):
    return (y_true==y_pred).mean()

y_pred = (func(w, b, X)>0.5)*1.
print('accuracy: {}'.format(accuracy(y_pred, y)))

accuracy: 1.0

Xor = jnp.sign(np.random.randn(200, 2)) + 0.1*np.random.randn(200,2)
yor = 1.*((Xor[:,0]*Xor[:,1])>0)

plt.scatter(Xor[:,0], Xor[:,1], c=yor)

<matplotlib.collections.PathCollection at 0x77e48848b280>

Image('mlp.png', width=400)

Image('2_xor.png', width=400)

Image('neural_chain.png', width=600)

def l1(w1, b1, x):
    return jax.nn.relu(jnp.matmul(x, w1) + b1)

def func(w1, w2, b1, b2, x):
    x1 = l1(w1, b1, x)
    x2 = jax.nn.sigmoid(jnp.matmul(x1, w2) + b2)
    return x2

def xe(w1, w2, b1, b2, x, y):
    fx = func(w1, w2, b1, b2, x)
    return (-y*jnp.log(fx)-(1-y)*jnp.log(1-fx)).mean()

@jax.jit
def update(w1, w2, b1, b2, x, y, eta=0.1):
    dw1, dw2, db1, db2 = jax.grad(xe, argnums=(0,1,2,3))(w1, w2, b1, b2, x, y)
    return w1 - eta*dw1, w2 - eta*dw2, b1 - eta*db1, b2 - eta*db2

w1 = np.random.randn(2, 4)/10
w2 = np.random.randn(4)/10
b1 = np.random.randn(4)/10
b2 = np.random.randn(1)/10

loss = []
for t in range(2500):
    ind = np.random.choice(len(Xor), size=64, replace=False)
    loss.append(xe(w1, w2, b1, b2, Xor[ind, :], yor[ind]))
    w1, w2, b1, b2 = update(w1, w2, b1, b2, Xor[ind, :], yor[ind], eta=0.1)
plt.plot(loss)

[<matplotlib.lines.Line2D at 0x77e46cd9bb20>]

t = 50; tx = jnp.linspace(-1.5, 1.5, t)
xv, yv = jnp.meshgrid(tx, tx, sparse=True); xv = xv.squeeze(); yv = yv.squeeze()
xx = jnp.array([[xx, yy] for yy in yv for xx in xv])
y_pred = jnp.array(func(w1, w2, b1, b2, xx)).reshape(t, t)
cmap = plt.get_cmap('PiYG')
levels=jnp.linspace(-1.5, .5, 10)
norm = matplotlib.colors.BoundaryNorm(levels, ncolors=cmap.N, clip=True)
plt.pcolormesh(xv, yv, -y_pred, shading='nearest', norm=norm);
plt.scatter(Xor[:,0], Xor[:,1], c=yor)

<matplotlib.collections.PathCollection at 0x77e46cc3fd90>

t = 50; tx = jnp.linspace(-1.5, 1.5, t);
xv, yv = jnp.meshgrid(tx, tx, sparse=True); xv = xv.squeeze(); yv = yv.squeeze()
xx = jnp.array([[xx, yy] for yy in yv for xx in xv])
y_pred = jnp.array(l1(w1, b1, xx)).reshape(t, t, 4)
cmap = plt.get_cmap('PiYG')
levels=jnp.linspace(-4., 2., 100)
norm = matplotlib.colors.BoundaryNorm(levels, ncolors=cmap.N, clip=True)
plt.pcolormesh(xv, yv, -y_pred[:,:,0], shading='nearest', norm=norm);
plt.scatter(Xor[:,0], Xor[:,1], c=yor)

<matplotlib.collections.PathCollection at 0x77e4883af400>

levels=jnp.linspace(-4., 2., 100)
norm = matplotlib.colors.BoundaryNorm(levels, ncolors=cmap.N, clip=True)
plt.pcolormesh(xv, yv, -y_pred[:,:,1], shading='nearest', norm=norm);
plt.scatter(Xor[:,0], Xor[:,1], c=yor)

<matplotlib.collections.PathCollection at 0x77e46ca05c30>

levels=jnp.linspace(-4., 2., 100)
norm = matplotlib.colors.BoundaryNorm(levels, ncolors=cmap.N, clip=True)
plt.pcolormesh(xv, yv, -y_pred[:,:,2], shading='nearest', norm=norm);
plt.scatter(Xor[:,0], Xor[:,1], c=yor)

<matplotlib.collections.PathCollection at 0x77e46c880bb0>

levels=jnp.linspace(-4., 2., 100)
norm = matplotlib.colors.BoundaryNorm(levels, ncolors=cmap.N, clip=True)
plt.pcolormesh(xv, yv, -y_pred[:,:,3], shading='nearest', norm=norm);
plt.scatter(Xor[:,0], Xor[:,1], c=yor)

<matplotlib.collections.PathCollection at 0x77e46c8f3940>

def train_nn(n):
    w1 = np.random.randn(2, n)/(jnp.sqrt(n))
    w2 = np.random.randn(n)/(jnp.sqrt(n))
    b1 = np.random.randn(n)/(jnp.sqrt(n))
    b2 = np.random.randn(1)/(jnp.sqrt(n))

    loss = []
    for t in range(1000):
        loss.append(xe(w1, w2, b1, b2, Xor, yor))
        w1, w2, b1, b2 = update(w1, w2, b1, b2, Xor, yor, eta=0.1)
    return loss

for n in range(1, 8):
    plt.plot(train_nn(2**n), label='n={}'.format(2**n))
plt.legend()

<matplotlib.legend.Legend at 0x77e46c4a68f0>

def train_nn2(n):
    w1 = np.random.randn(2, n)/(jnp.sqrt(n))
    w2 = np.random.randn(n)/(jnp.sqrt(n))
    b1 = np.random.randn(n)/(jnp.sqrt(n))
    b2 = np.random.randn(1)/(jnp.sqrt(n))

    w = w1
    w_change = []
    for t in range(1000):
        ind = np.random.choice(len(Xor), size=128, replace=False)
        w1, w2, b1, b2 = update(w1, w2, b1, b2, Xor[ind, :], yor[ind], eta=0.1)
        w_change.append(jnp.linalg.norm(w1 - w)/jnp.linalg.norm(w))
    return w_change

for n in range(1, 8):
    plt.plot(train_nn2(2**n), label='n={}'.format(2**n))
plt.legend()

<matplotlib.legend.Legend at 0x77e46c741bd0>

Xor_tr = jnp.sign(np.random.randn(200, 2))
yor_tr = 1.*((Xor_tr[:,0]*Xor_tr[:,1])>0)
Xor_tr += 0.7*np.random.randn(200,2)
Xor_te = jnp.sign(np.random.randn(200, 2))
yor_te = 1.*((Xor_te[:,0]*Xor_te[:,1])>0)
Xor_te += 0.7*np.random.randn(200,2)

plt.scatter(Xor_tr[:,0], Xor_tr[:,1], c=yor_tr)

<matplotlib.collections.PathCollection at 0x77e46c04ed40>

n = 10000
w1 = np.random.randn(2, n)/(jnp.sqrt(n))
w2 = np.random.randn(n)/(jnp.sqrt(n))
b1 = np.random.randn(n)/(jnp.sqrt(n))
b2 = np.random.randn(1)/(jnp.sqrt(n))

loss = 0
for t in range(1000):
    loss = xe(w1, w2, b1, b2, Xor_te, yor_te)
    w1, w2, b1, b2 = update(w1, w2, b1, b2, Xor_tr, yor_tr, eta=0.1)

t = 50; tx = jnp.linspace(-3.5, 3.5, t)
xv, yv = jnp.meshgrid(tx, tx, sparse=True); xv = xv.squeeze(); yv = yv.squeeze()
xx = jnp.array([[xx, yy] for yy in yv for xx in xv])
y_pred = jnp.array(func(w1, w2, b1, b2, xx)).reshape(t, t)
cmap = plt.get_cmap('PiYG')
levels=jnp.linspace(-1.5, .5, 100)
norm = matplotlib.colors.BoundaryNorm(levels, ncolors=cmap.N, clip=True)
plt.pcolormesh(xv, yv, -y_pred, shading='nearest', norm=norm);
plt.scatter(Xor_tr[:,0], Xor_tr[:,1], c=yor_tr, marker='s')
plt.scatter(Xor_te[:,0], Xor_te[:,1], c=yor_te)

<matplotlib.collections.PathCollection at 0x77e46cd7ea70>

Machine Learning and Applications - Neural Networks¶

David Picard¶

École des Ponts ParisTech¶

david.picard@enpc.fr¶

Natural neuron¶

Artificial Neuron (McCulloch & Pitts)¶

Activation functions¶

Training¶

Small example¶

Binary cross-entropy loss¶

Non linearly separable problems¶

Multiple layer¶

Multiple Layer Perceptron¶

XOR - Exercise¶

Training¶

Backpropagation¶

Backpropagation¶

Fully connected network¶

Algorithm¶

XOR with MLP¶

Intermediate layers¶

Neural networks losses¶

Neural networks capacity¶

Proof¶

Neural network capacity¶

VC dimension¶

Effect of width¶

Neural Tangent Kernel (Jacot et al, 2018)¶

Lottery ticket hypothesis¶

Double Descent (Belkin et al, 2019)¶

Metric Learning¶

Contrastive loss¶

Large Margin Nearest Neighbor (Weinberger et al, 2009)¶

Neural Networks , take home¶