In [1]:
import sklearn
from sklearn.neighbors import KNeighborsClassifier

#if we consider 'k' neighbours of a point on a grid of all possible values (e.g. age 0-100 by height 0-3m). The neighbours are squares on the generated grid, not the observations

In [2]:
happy = 1
unhappy = -1

y = [ happy, happy, happy, unhappy, unhappy, unhappy]
y

Out[2]:
[1, 1, 1, -1, -1, -1]
In [3]:
#x = time_in_store, age
x = [ [60, 18], [60, 21], [45, 19], [20, 30], [25, 40], [30, 35] ]

In [4]:
modemodel = KNeighborsClassifier().fit(x,y)
model.score(x,y)

Out[4]:
1.0
In [15]:
#we now define a customer of 65 minutes & 17 years old & then predict their score - Happy / Unhappy. This customer is 1 (very happy)
x_unknown = [[65, 17]]
model.predict(x_unknown)

Out[15]:
array([1])

# Example 2¶

In [16]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

In [22]:
iris = datasets.load_breast_cancer()

X = iris.data[:, :2]
y = iris.target

X[:5, :], y

Out[22]:
(array([[17.99, 10.38],
[20.57, 17.77],
[19.69, 21.25],
[11.42, 20.38],
[20.29, 14.34]]),
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,
1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]))
In [32]:
n_neighbors = 15
clf = neighbors.KNeighborsClassifier(n_neighbors)
clf.fit(X, y)

Out[32]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=1, n_neighbors=15, p=2,
weights='uniform')
In [27]:
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

#make a grid of every possible value
x1_range = np.linspace(x_min, x_max, 1000)
x2_range = np.linspace(y_min, y_max, 1000)

x1_grid, x2_grid = np.meshgrid(x1_range, x2_range)
x1_grid.shape

Out[27]:
(1000, 1000)
In [33]:
x1_x2 = np.c_[x1_grid.ravel(), x2_grid.ravel()]

Yp = clf.predict(x1_x2)
Yp = Yp.reshape(x1_grid.shape)
clf.score(X,y)

Out[33]:
0.8945518453427065
In [29]:
plt.pcolormesh(x1_grid, x2_grid, Yp, cmap=ListedColormap(['plum', 'palegreen', 'skyblue']))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=ListedColormap(['purple', 'green', 'blue']), edgecolor='k', s=20)

Out[29]:
<matplotlib.collections.PathCollection at 0x1a46dcfdd8>
In [ ]: