import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import torch
import torch.optim as optim

ames = pd.read_csv('https://github.com/kcarnold/AmesHousing/blob/master/data/ames.csv.gz?raw=true', compression="gzip")
ames['price'] = ames["Sale_Price"] / 100_000 # Make `price` be in units of $100k, to be easier to interpret.

def plot_data():
    # You don't have to know how this function works.
    plt.scatter(ames['Longitude'], ames['Latitude'], c=ames["price"], s=.5)
    plt.xlabel("Longitude"); plt.ylabel("Latitude")
    plt.colorbar(label="Sale Price ($100k)")
plot_data()

feature_names = ['Longitude', 'Latitude']
X = torch.tensor(ames[feature_names].values).float()
X.shape

torch.Size([2930, 2])

y = torch.tensor(ames['price'].values).float()
y.shape

torch.Size([2930])

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=.2, random_state=42)

X_train.shape, y_train.shape

(torch.Size([2344, 2]), torch.Size([2344]))

X_valid.shape, y_valid.shape

(torch.Size([586, 2]), torch.Size([586]))

def plot_model(prediction_fn, fig=None, prediction_args=()):
    # Compute extents
    lat_min = ames.Latitude.min()
    lat_max = ames.Latitude.max()
    lon_min = ames.Longitude.min()
    lon_max = ames.Longitude.max()
    price_min = ames.price.min()
    price_max = ames.price.max()

    # Ask the classifier for predictions on a grid
    xx, yy = np.meshgrid(np.linspace(lon_min, lon_max, 250), np.linspace(lat_min, lat_max, 250))
    Z = prediction_fn(np.c_[xx.ravel(), yy.ravel()], *prediction_args).reshape(xx.shape)

    if fig is None:
        fig = plt.figure(figsize=plt.figaspect(2))

    # Left side: show the predictions in 2D. Superimpose the original data.
    ax = fig.add_subplot(2, 1, 1)
    surf = ax.contourf(xx, yy, Z, alpha=.5, cmap=plt.cm.viridis, vmin=price_min, vmax=price_max)
    ax.scatter(ames['Longitude'], ames['Latitude'], c=ames["price"], s=1, cmap='viridis', vmin=price_min, vmax=price_max)
    ax.set(xlabel="Longitude", ylabel="Latitude", title="2D contour view")
    fig.colorbar(surf, label="Sale Price ($100k)")

    # Right side: show the predictions in 3D
    ax = fig.add_subplot(2, 1, 2, projection='3d')
    ax.plot_surface(xx, yy, Z, alpha=.5, cmap=plt.cm.viridis, vmin=price_min, vmax=price_max)
    #ax.scatter(ames['Longitude'], ames['Latitude'], c=ames["price"], s=1, cmap='viridis', vmin=price_min, vmax=price_max)
    ax.set(title="3D view")

torch.manual_seed(42)

weights = torch.randn(..., requires_grad=True)
bias = torch.randn(1, requires_grad=True)
weights, bias

(tensor([0.3367, 0.1288], requires_grad=True),
 tensor([0.2345], requires_grad=True))

x_i = X_train[0]
y_pred_i = ...
y_pred_i

tensor([-25.8750], grad_fn=<AddBackward0>)

def linreg_forward(X, weights, bias):
    return X @ weights + bias

y_pred_train = linreg_forward(X_train, weights, bias)
y_pred_train.shape

torch.Size([2344])

plot_model(lambda X: linreg_forward(torch.tensor(X).float(), weights, bias).detach().numpy())

def compute_mse_loss(y_true, y_pred):
    return ...

mse_loss = compute_mse_loss(..., ...)
print("MSE loss:", mse_loss.item())

MSE loss: 765.9743041992188

# Reset the parameters
torch.manual_seed(42)
weights = torch.randn(2, requires_grad=True)
bias = torch.randn(1, requires_grad=True)

# Create LBFGS optimizer
optimizer = optim.LBFGS([weights, bias], lr=1, max_iter=20)

def closure():
    optimizer.zero_grad()
    y_pred = linreg_forward(X_train, weights, bias)
    loss = compute_mse_loss(y_train, y_pred)
    loss.backward()
    return loss

# Optimize
loss = optimizer.step(closure)
print("Final loss:", loss.item())

Final loss: 765.9743041992188

print("Fitted weights:", weights.detach().numpy())
print("Fitted bias:", bias.detach().numpy())

Fitted weights: [0.09082168 0.23917523]
Fitted bias: [0.23708798]

plot_model(lambda X: linreg_forward(torch.FloatTensor(X), weights, bias).detach().numpy())

Multiple Linear Regression, the Hard Way (PyTorch version)¶

Goals¶

Course Objectives Addressed¶

Setup¶

Task¶

Part A: Linear regression¶

Analysis¶