Below is the python code for the perceptron algorithm. We first generate a dataset with two dimensional input and one-dimensional output. The input is normally distributed along both axes. The output, y, is either +1 or -1. The training set is a list of tuples of the form ([x1, x2], y) where the first element of the tuple is a list (actually a numpy array in the code, but for simplicity, I write it as a list), and the second element is the output.
Then we feed the training set to the learning algorithm. The perceptron function initializes the weight vector, w, randomly. It then iterates over the training set data. At each iteration, we multiply w by x, resulting in the dot product over the two vectors. If this dot product is positive, then the point x is on the correct side of the dividing line. If the dot product is negative, then the point x is on the wrong side of the dividing line, and therefore the classifier is wrong. To correct this wrong, we nudge the line slightly.
#! /usr/bin/env python
import math
import numpy as np
from numpy.random import normal
from pylab import plot, cla, show, norm, rand, xlabel, ylabel, title, legend, get_cmap, savefig, xlim, ylim
from random import shuffle, random
import matplotlib.pyplot as plt
def generateDataNormal(n):
data = []
mu, sigma = 0.15, 0.15 # mean and standard deviation
x1 = np.random.normal(mu, sigma, n)
y1 = np.random.normal(mu, sigma, n)
data.extend([(np.array([x1[i], y1[i]]), 1) for i in range(n)])
mu, sigma = -0.15, 0.15 # mean and standard deviation
x2 = np.random.normal(mu, sigma, n)
y2 = np.random.normal(mu, sigma, n)
data.extend([(np.array([x2[i], y2[i]]), -1) for i in range(n)])
return data
def test_error(w, test_data):
correct = 0
for x_i, y_i in test_data:
y_i_prime = np.dot(w, x_i)
if y_i * y_i_prime >= 0:
correct += 1
e = (len(test_data)-float(correct))/len(test_data)
return e
def perceptron_weights(training_data, lrate=0.2):
w = []
xlen = len(training_data[0][0])
for i in xrange(xlen):
w.append(random())
w = np.array(w)
for x_i, y_i in training_data:
y_i_prime = np.dot(w, x_i)
if y_i * y_i_prime <= 0.0:
w = w + lrate*y_i*x_i
yield w.copy()
def perceptron_weight(training_data, lrate=0.2):
w = []
xlen = len(training_data[0][0])
for i in xrange(xlen):
w.append(random())
w = np.array(w)
for x_i, y_i in training_data:
y_i_prime = np.dot(w, x_i)
if y_i * y_i_prime <= 0.0:
w = w + lrate*y_i*x_i
return w
def plot_data(training_data, ws, lrate):
for x in training_data:
if x[-1] == 1:
plot(x[0][0], x[0][1], 'ob')
else:
plot(x[0][0], x[0][1], 'or')
colors = np.linspace(0.1, 1, len(ws))
mymap = get_cmap("Greys")
mycolors = mymap(colors)
i = 0
for w in ws:
n = norm(w)
ww = w/n
ww1 = [ww[1], -ww[0]]
ww2 = [-ww[1], ww[0]]
plot([ww1[0], ww2[0]],[ww1[1], ww2[1]],color=mycolors[i])
i += 1
xlim(-1, 1)
ylim(-1, 1)
savefig('perceptron_'+str(lrate)+'.png')
cla()
#show()
if __name__ == '__main__':
training_data = generateDataNormal(100)
shuffle(training_data)
for lrate in [0.1, 0.2, 0.4, 0.8]:
weights = [w for w in perceptron_weights(training_data, lrate)]
plot_data(training_data, weights, lrate)
Below are three plots of how w changes on each iteration (where it is wrong and w must be changed) for different learning rates, represented by the Greek letter lambda. A note about the color mapping for the lines (which are actually the lines orthogonal to w): there is a line for each time w is updated, and the first line is the lightest, and the last line is the darkest. As you can see, small learning rates like 0.1 result in small changes in w and larger learning rates like 1.0 result in larger change. Caution should be taken when choosing lambda to somewhat guarantee convergence and avoid thrashing or cycling of w. The blue circles are one class and the red circles are the other.




No comments:
Post a Comment