In [5]:
#import libraries
import sklearn
import numpy as np
import seaborn as sns
from sklearn.linear_model import LinearRegression


# Create our x and y value arrays and plot them¶

In [6]:
#x = ages
#y = spend per week
x = np.array([18, 20, 30, 45])
y = np.array([10, 50, 40, 60])

#plot our two generated arrays on scatterplot
sns.scatterplot(x,y)

Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a15de9eb8>

# reshape the data & fit it to the model.¶

In [7]:
model = LinearRegression()

#reshaped to x rows and 1 columns. the -1 fills in the blank (num of rows)
x = x.reshape(-1, 1)
x.shape

#fit is the 'learning bit' - it's fitting the line to the data (fitting the formula to the data)
#model.coef_, model.intercept_ are the a & b from y = ax + b
model.fit(x,y)

#these are the parameters from y = ax + b (a = gradient, b = intercept)
print(model.coef_)
print(model.intercept_)

[1.22605364]
5.363984674329487


# Feed x into the model to predict y¶

In [8]:
#predict y value based on x
y_pred = model.predict(x)

#print the predictions for the x's
print(y_pred)

[27.43295019 29.88505747 42.14559387 60.53639847]


# Plot the original x & y and the new x & y predicted (prediction as line)¶

In [9]:
#print the estimates and the actuals on the same chart
#ravel means remove shape - just give me the datapoints
sns.scatterplot(x.ravel(),y)
sns.lineplot(x.ravel(), y_pred)

Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a15eff5c0>

# Print the model accuracy score¶

In [25]:
#model score (1 = 100% correct. 0 = 0% correct)
model.score(x,y)

Out[25]:
0.4904214559386973

# Example 2¶

In [41]:
from sklearn.datasets import load_breast_cancer

x = data.data
y = data.target
z = data.target_names

model = LinearRegression().fit(x,y)
model.score(x,y)

Out[41]:
0.7743246526421793
In [47]:
import pandas as pd
df = pd.DataFrame(x, columns=data.feature_names)