# Imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns                                       # Has good utility to plot heatmap. You can use others as you see fit.
from sklearn.model_selection import train_test_split        # To split data, 0.8: Train, 0.2: Test
from sklearn.linear_model import Ridge, LinearRegression    # Use the models from sklearn, see their documentations
from sklearn.datasets import fetch_openml                   # For loading data
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error              # Metric for comparing performace
import pandas as pd

# data = fetch_openml(name="boston", version=1, as_frame=True)
# data = fetch_california_housing()


# X, y = pd.DataFrame(data.data, columns=data.feature_names), data.target
## Add cells for your code / comments

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler

# Load the Boston Housing dataset
boston = fetch_openml(name='boston', version=1, as_frame=True)
df_boston = boston.frame.copy()
df_boston.head()

X, y = boston.data, boston.target

print("Number of missing values per feature:")
print(X.isnull().sum())

Number of missing values per feature:
CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
dtype: int64

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

plt.figure(figsize=(11, 9))
corr = df_boston.corr(numeric_only=True)
sns.heatmap(
    corr,
    annot=True,
    cmap="coolwarm",
    center=0,
    square=True,
    linewidths=.5,
    cbar_kws={"shrink": .8}
)
plt.title("Boston Housing – Correlation matrix")
plt.show()

# 1. Distribution of the target variable
plt.figure(figsize=(8, 4))
sns.histplot(df_boston["MEDV"], kde=True)
plt.title("Distribution of MEDV (Median Home Value)")
plt.xlabel("MEDV")
plt.ylabel("Count")
plt.show()

# 2. Box‑plot of MEDV by CHAS (Charles River dummy variable)
plt.figure(figsize=(6, 4))
sns.boxplot(x="CHAS", y="MEDV", data=df_boston)
plt.title("MEDV by Proximity to Charles River (CHAS)")
plt.xlabel("Bounds River (0 = no, 1 = yes)")
plt.ylabel("MEDV")
plt.show()

# 3. Scatter + regression line: RM vs MEDV
plt.figure(figsize=(6, 4))
sns.regplot(x="RM", y="MEDV", data=df_boston, scatter_kws={"alpha":0.6})
plt.title("Relationship between RM and MEDV")
plt.xlabel("Average Number of Rooms (RM)")
plt.ylabel("MEDV")
plt.show()

# 4. Joint‑plot: LSTAT vs MEDV
sns.jointplot(
    x="LSTAT",
    y="MEDV",
    data=df_boston,
    kind="reg",
    height=6,
    marginal_kws=dict(bins=30, fill=True)
)
plt.suptitle("Joint Distribution of LSTAT vs MEDV", y=1.02)
plt.show()

# 5. Pair‑plot of a handful of interesting features
keys = ["RM", "LSTAT", "PTRATIO", "TAX", "MEDV"]
sns.pairplot(df_boston[keys], kind="scatter", diag_kind="kde", plot_kws={"alpha":0.6})
plt.suptitle("Pairwise Relationships of Selected Features", y=1.02)
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import pandas as pd

# Load Boston Housing dataset
boston = fetch_openml(name='boston', version=1, as_frame=True)
X, y = boston.data, boston.target

# Preprocessing
# Check for missing values to ensure data integrity
X.isnull().sum()

# Standardize features to normalize scales for fair model treatment
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
lr_mse = mean_squared_error(y_test, lr_pred)
print(f"Linear Regression Test MSE: {lr_mse:.4f}")

Linear Regression Test MSE: 24.2911

# Ridge Regression with varying alpha
alphas = np.logspace(-4, 4, 100)
train_errors = []
test_errors = []
ridge_models = []

for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train, y_train)
    ridge_models.append(ridge)
    train_pred = ridge.predict(X_train)
    test_pred = ridge.predict(X_test)
    train_errors.append(mean_squared_error(y_train, train_pred))
    test_errors.append(mean_squared_error(y_test, test_pred))

# Plot test error vs alpha
plt.figure(figsize=(8, 6))
plt.semilogx(alphas, test_errors, label='Test Error')
plt.xlabel('Alpha (Regularization Parameter)')
plt.ylabel('Mean Squared Error')
plt.title('Test Error vs Alpha for Ridge Regression (Boston Dataset)')
plt.legend()
plt.grid(True)

# Mark the best alpha
grid_search = GridSearchCV(Ridge(), {'alpha': alphas}, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
best_alpha = grid_search.best_params_['alpha']
plt.axvline(x=best_alpha, color='r', linestyle='--', label=f'Best Alpha = {best_alpha:.4f}')
plt.legend()

plt.show()

# Compare performance
ridge_best = Ridge(alpha=best_alpha)
ridge_best.fit(X_train, y_train)
ridge_pred = ridge_best.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)

print(f"Linear Regression MSE: {lr_mse}")
print(f"Ridge Regression MSE (best alpha {best_alpha}): {ridge_mse}")

Linear Regression MSE: 24.29111947497352
Ridge Regression MSE (best alpha 2.310129700083163): 24.344971003742206

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import pandas as pd

# Load California Housing dataset
california = fetch_california_housing(as_frame=True)
X, y = california.data, california.target

# Preprocessing
# Check for missing values
print(X.isnull().sum())

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
df_cal = california.frame.copy()
# Correlation Heatmap
plt.figure(figsize=(11, 9))
corr_cal = df_cal.corr(numeric_only=True)
sns.heatmap(corr_cal, annot=True, cmap='coolwarm', center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": 0.8})
plt.title('California Housing – Correlation matrix')
plt.show()

MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
dtype: int64

# Boxplot
plt.figure(figsize=(12, 8))
X_scaled.boxplot()
plt.title('Feature Distribution (Standardized)')
plt.xticks(rotation=45)
plt.show()

# Scatter Plot (MedInc vs Target)
plt.figure(figsize=(8, 6))
plt.scatter(X['MedInc'], y, alpha=0.5)
plt.title('Median Income vs House Value (Target)')
plt.xlabel('Median Income (MedInc)')
plt.ylabel('House Value')
plt.show()

# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
lr_mse = mean_squared_error(y_test, lr_pred)
print(f"Linear Regression Test MSE: {lr_mse:.4f}")

Linear Regression Test MSE: 0.5559

# Ridge Regression with varying alpha
alphas = np.logspace(-4, 4, 100)
train_errors = []
test_errors = []

for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train, y_train)
    train_pred = ridge.predict(X_train)
    test_pred = ridge.predict(X_test)
    train_errors.append(mean_squared_error(y_train, train_pred))
    test_errors.append(mean_squared_error(y_test, test_pred))

# Plot test error vs alpha
plt.figure(figsize=(8, 6))
plt.semilogx(alphas, test_errors, label='Test Error')
plt.xlabel('Alpha (Regularization Parameter)')
plt.ylabel('Mean Squared Error')
plt.title('Test Error vs Alpha for Ridge Regression (California Housing)')
plt.legend()
plt.grid(True)

# Mark the best alpha
grid_search = GridSearchCV(Ridge(), {'alpha': alphas}, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
best_alpha = grid_search.best_params_['alpha']
plt.axvline(x=best_alpha, color='r', linestyle='--', label=f'Best Alpha = {best_alpha:.4f}')
plt.legend()

plt.show()

# Ridge with best alpha
ridge_best = Ridge(alpha=best_alpha)
ridge_best.fit(X_train, y_train)
ridge_pred = ridge_best.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)

# Print results
print(f"Linear Regression MSE: {lr_mse}")
print(f"Ridge Regression MSE (best alpha {best_alpha}): {ridge_mse}")

Linear Regression MSE: 0.5558915986952442
Ridge Regression MSE (best alpha 0.6280291441834259): 0.5558661968199002

import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from itertools import product

class RegressionNN(nn.Module):
    def __init__(self, input_size, hidden_layers=3, hidden_units=256, dropout=0.2):
        super(RegressionNN, self).__init__()
        layers = []
        layers.append(nn.Linear(input_size, hidden_units))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout))
        for _ in range(hidden_layers - 1):
            layers.append(nn.Linear(hidden_units, hidden_units))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
        layers.append(nn.Linear(hidden_units, 1))
        self.model = nn.Sequential(*layers)
    def forward(self, x):
        return self.model(x)

def create_data_loaders(features, targets, batch_size=64, test_frac=0.2, random_state=42):
    """
    Split data into train/test sets, standardize features,
    and return a DataLoader for training plus raw test tensors.
    """
    X_train, X_test, y_train, y_test = train_test_split(
        features, targets,
        test_size=test_frac,
        random_state=random_state
    )

    # Fit scaler on training features
    scaler = StandardScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    # Convert to torch tensors
    X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_train_t = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32)
    X_test_t  = torch.tensor(X_test_scaled,  dtype=torch.float32)
    y_test_t  = torch.tensor(y_test.values.reshape(-1, 1),  dtype=torch.float32)

    train_dataset = TensorDataset(X_train_t, y_train_t)
    train_loader  = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, X_test_t, y_test_t

def train_model(model, train_loader, num_epochs=50, learning_rate=1e-3):
    """
    Train the given model on the data from train_loader
    for num_epochs epochs using Adam + MSE loss.
    Returns the trained model on CPU.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    for epoch in range(num_epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            Xb, yb = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            loss = criterion(model(Xb), yb)
            loss.backward()
            optimizer.step()

    model.eval()
    return model.cpu()


def compute_mse(model, X, y):
    """
    Compute mean squared error of model predictions vs. targets.
    """
    with torch.no_grad():
        preds = model(X).numpy()
    return mean_squared_error(y.numpy(), preds)


def run_experiment(
    data_df,
    target_column,
    hidden_depths=[3],
    dropout_values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
    epochs=50,
    batch_size=64,
    dataset_label="Dataset"
):
    """
    For each combination of hidden layer depth and dropout rate,
    train a RegressionNN and record test MSE.
    """
    X = data_df.drop(columns=target_column)
    y = data_df[target_column]

    train_loader, X_test, y_test = create_data_loaders(X, y, batch_size=batch_size)

    results = {}
    for depth, dropout in product(hidden_depths, dropout_values):
        model = RegressionNN(
            input_size=X.shape[1],
            hidden_layers=depth,
            hidden_units=256,
            dropout=dropout
        )
        trained = train_model(model, train_loader, num_epochs=epochs)
        test_mse = compute_mse(trained, X_test, y_test)
        results[(depth, dropout)] = test_mse
        print(
            f"{dataset_label}: depth={depth:2d}, dropout={dropout:.1f} → "
            f"Test MSE={test_mse:.4f}"
        )

    return results

def plot_results_vs_dropout(results, depths_fixed, title):
    """
    Plot Test MSE as a function of dropout rate for a fixed hidden depth.
    """
    dropout_rates = sorted({dr for (_, dr) in results})
    mses = [results[(depths_fixed, dr)] for dr in dropout_rates]

    plt.figure()
    plt.plot(dropout_rates, mses, marker='o')
    plt.xlabel("Dropout Rate")
    plt.ylabel("Test MSE")
    plt.title(title)
    plt.grid(True)
    plt.show()


def plot_heatmap(results, depths, dropouts, title):
    """
    Display a heatmap of Test MSEs with depths on y-axis and dropouts on x-axis.
    """
    import seaborn as sns

    matrix = np.array([
        [results[(d, dr)] for dr in dropouts]
        for d in depths
    ])
    plt.figure(figsize=(8, 5))
    sns.heatmap(
        matrix, annot=True, fmt=".3f",
        xticklabels=dropouts, yticklabels=depths,
        cbar_kws={"label": "Test MSE"}
    )
    plt.xlabel("Dropout Rate")
    plt.ylabel("Hidden Depth")
    plt.title(title)
    plt.show()

from sklearn.datasets import fetch_openml, fetch_california_housing

boston_df = fetch_openml(name="boston", version=1, as_frame=True).frame
results_boston = run_experiment(
    boston_df, "MEDV",
    hidden_depths=[3],
    dataset_label="Boston"
)
plot_results_vs_dropout(results_boston, depths_fixed=3, title="Boston (Depth=3): MSE vs. Dropout")

Boston: depth= 3, dropout=0.0 → Test MSE=10.8994
Boston: depth= 3, dropout=0.1 → Test MSE=12.2643
Boston: depth= 3, dropout=0.2 → Test MSE=12.0886
Boston: depth= 3, dropout=0.3 → Test MSE=12.0007
Boston: depth= 3, dropout=0.4 → Test MSE=13.0902
Boston: depth= 3, dropout=0.5 → Test MSE=12.8894

cal_df = fetch_california_housing(as_frame=True).frame
results_california = run_experiment(
    cal_df, "MedHouseVal",
    hidden_depths=[3],
    epochs=30,
    dataset_label="California"
)
plot_results_vs_dropout(results_california, depths_fixed=3, title="California (Depth=3): MSE vs. Dropout")

California: depth= 3, dropout=0.0 → Test MSE=0.2673
California: depth= 3, dropout=0.1 → Test MSE=0.2729
California: depth= 3, dropout=0.2 → Test MSE=0.2755
California: depth= 3, dropout=0.3 → Test MSE=0.2710
California: depth= 3, dropout=0.4 → Test MSE=0.2835
California: depth= 3, dropout=0.5 → Test MSE=0.2981

depth_list = list(range(3, 30, 4))  # [3, 7, 11, 15, 19, 23, 27]
results_boston_grid = run_experiment(boston_df, "MEDV", hidden_depths=depth_list, dataset_label="Boston")
results_cal_grid   = run_experiment(cal_df, "MedHouseVal", hidden_depths=depth_list, epochs=30, dataset_label="California")

Boston: depth= 3, dropout=0.0 → Test MSE=11.1413
Boston: depth= 3, dropout=0.1 → Test MSE=10.8427
Boston: depth= 3, dropout=0.2 → Test MSE=11.2182
Boston: depth= 3, dropout=0.3 → Test MSE=12.6766
Boston: depth= 3, dropout=0.4 → Test MSE=11.8547
Boston: depth= 3, dropout=0.5 → Test MSE=14.3177
Boston: depth= 7, dropout=0.0 → Test MSE=9.2964
Boston: depth= 7, dropout=0.1 → Test MSE=12.1937
Boston: depth= 7, dropout=0.2 → Test MSE=11.8023
Boston: depth= 7, dropout=0.3 → Test MSE=13.3088
Boston: depth= 7, dropout=0.4 → Test MSE=31.4095
Boston: depth= 7, dropout=0.5 → Test MSE=52.4906
Boston: depth=11, dropout=0.0 → Test MSE=14.4352
Boston: depth=11, dropout=0.1 → Test MSE=14.5069
Boston: depth=11, dropout=0.2 → Test MSE=11.1659
Boston: depth=11, dropout=0.3 → Test MSE=18.3767
Boston: depth=11, dropout=0.4 → Test MSE=25.6484
Boston: depth=11, dropout=0.5 → Test MSE=32.9384
Boston: depth=15, dropout=0.0 → Test MSE=10.2544
Boston: depth=15, dropout=0.1 → Test MSE=13.6943
Boston: depth=15, dropout=0.2 → Test MSE=11.4868
Boston: depth=15, dropout=0.3 → Test MSE=17.7181
Boston: depth=15, dropout=0.4 → Test MSE=49.6707
Boston: depth=15, dropout=0.5 → Test MSE=32.9973
Boston: depth=19, dropout=0.0 → Test MSE=9.7452
Boston: depth=19, dropout=0.1 → Test MSE=10.8222
Boston: depth=19, dropout=0.2 → Test MSE=20.2272
Boston: depth=19, dropout=0.3 → Test MSE=22.5210
Boston: depth=19, dropout=0.4 → Test MSE=23.0125
Boston: depth=19, dropout=0.5 → Test MSE=52.2882
Boston: depth=23, dropout=0.0 → Test MSE=10.2233
Boston: depth=23, dropout=0.1 → Test MSE=12.5758
Boston: depth=23, dropout=0.2 → Test MSE=14.9917
Boston: depth=23, dropout=0.3 → Test MSE=18.1890
Boston: depth=23, dropout=0.4 → Test MSE=25.5785
Boston: depth=23, dropout=0.5 → Test MSE=49.3337
Boston: depth=27, dropout=0.0 → Test MSE=10.4761
Boston: depth=27, dropout=0.1 → Test MSE=13.1210
Boston: depth=27, dropout=0.2 → Test MSE=13.8490
Boston: depth=27, dropout=0.3 → Test MSE=73.8254
Boston: depth=27, dropout=0.4 → Test MSE=74.4163
Boston: depth=27, dropout=0.5 → Test MSE=73.3806
California: depth= 3, dropout=0.0 → Test MSE=0.2671
California: depth= 3, dropout=0.1 → Test MSE=0.2636
California: depth= 3, dropout=0.2 → Test MSE=0.2684
California: depth= 3, dropout=0.3 → Test MSE=0.2861
California: depth= 3, dropout=0.4 → Test MSE=0.2906
California: depth= 3, dropout=0.5 → Test MSE=0.2941
California: depth= 7, dropout=0.0 → Test MSE=0.2696
California: depth= 7, dropout=0.1 → Test MSE=0.2756
California: depth= 7, dropout=0.2 → Test MSE=0.2782
California: depth= 7, dropout=0.3 → Test MSE=0.2830
California: depth= 7, dropout=0.4 → Test MSE=0.3061
California: depth= 7, dropout=0.5 → Test MSE=0.3345
California: depth=11, dropout=0.0 → Test MSE=0.2779
California: depth=11, dropout=0.1 → Test MSE=0.2803
California: depth=11, dropout=0.2 → Test MSE=0.2908
California: depth=11, dropout=0.3 → Test MSE=0.2876
California: depth=11, dropout=0.4 → Test MSE=0.3720
California: depth=11, dropout=0.5 → Test MSE=0.4102
California: depth=15, dropout=0.0 → Test MSE=0.2737
California: depth=15, dropout=0.1 → Test MSE=0.2771
California: depth=15, dropout=0.2 → Test MSE=0.3034
California: depth=15, dropout=0.3 → Test MSE=0.3359
California: depth=15, dropout=0.4 → Test MSE=0.4499
California: depth=15, dropout=0.5 → Test MSE=0.7113

plot_heatmap(results_boston_grid, depth_list, [0.0, 0.1, 0.2, 0.3, 0.4, 0.5], "Boston: MSE Heatmap")
plot_heatmap(results_cal_grid,   depth_list, [0.0, 0.1, 0.2, 0.3, 0.4, 0.5], "California: MSE Heatmap")

def calculate_linear_params(n_features):
  return n_features + 1 # N weights + 1 bias

def calculate_nn_params(input_size, hidden_layers, hidden_units):

  input_layer_params = (input_size * hidden_units) + hidden_units

  # there are (hidden_layers - 1) such connections
  hidden_layer_params = (hidden_layers - 1) * ((hidden_units * hidden_units) + hidden_units)

  output_layer_params = (hidden_units * 1) + 1

  total_params = input_layer_params + hidden_layer_params + output_layer_params

  return total_params



n_features_boston = 13
n_features_california = 8

# --- Define NN Architecture Parameters Used ---
nn_hidden_units = 256
nn_depths = range(3, 27 + 1, 4) # Depths tested: 3, 7, 11, 15, 19, 23, 27


print("--- Linear/Ridge Regression Parameters ---")
params_lr_boston = calculate_linear_params(n_features_boston)
print(f"Boston Housing (N={n_features_boston}): {params_lr_boston} parameters")

params_lr_california = calculate_linear_params(n_features_california)
print(f"California Housing (N={n_features_california}): {params_lr_california} parameters")
print("-" * 40)



print(f"--- Neural Network Parameters (Hidden Units = {nn_hidden_units}) ---")

print("\nBoston Housing (N=13):")
print("-" * 20)
print("| Depth (L) | Total Parameters |")
print("|-----------|------------------|")
for depth in nn_depths:
  params = calculate_nn_params(n_features_boston, depth, nn_hidden_units)
  print(f"| {depth:<9} | {params:>16,} |")

print("\nCalifornia Housing (N=8):")
print("-" * 20)
print("| Depth (L) | Total Parameters |")
print("|-----------|------------------|")
for depth in nn_depths:
  params = calculate_nn_params(n_features_california, depth, nn_hidden_units)
  print(f"| {depth:<9} | {params:>16,} |")

--- Linear/Ridge Regression Parameters ---
Boston Housing (N=13): 14 parameters
California Housing (N=8): 9 parameters
----------------------------------------
--- Neural Network Parameters (Hidden Units = 256) ---

Boston Housing (N=13):
--------------------
| Depth (L) | Total Parameters |
|-----------|------------------|
| 3         |          135,425 |
| 7         |          398,593 |
| 11        |          661,761 |
| 15        |          924,929 |
| 19        |        1,188,097 |
| 23        |        1,451,265 |
| 27        |        1,714,433 |

California Housing (N=8):
--------------------
| Depth (L) | Total Parameters |
|-----------|------------------|
| 3         |          134,145 |
| 7         |          397,313 |
| 11        |          660,481 |
| 15        |          923,649 |
| 19        |        1,186,817 |
| 23        |        1,449,985 |
| 27        |        1,713,153 |

	CRIM	ZN	INDUS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT	MEDV
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1	296.0	15.3	396.90	4.98	24.0
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2	242.0	17.8	396.90	9.14	21.6
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2	242.0	17.8	392.83	4.03	34.7
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3	222.0	18.7	394.63	2.94	33.4
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3	222.0	18.7	396.90	5.33	36.2

Assignment 4¶

Question 1: Regularization¶

(a) Boston Housing Dataset Preprocessing and Analysis¶

Strong Predictors of House Value¶

Features with Little Direct Effect¶

Geographic Coordinates Are Not Predictive by Themselves¶

Multicollinearity to Watch For¶

Feature Distribution (Standardized) – First Plot (Boxplot)¶

Median Income vs House Value (Target) – Second Plot (Scatterplot)¶

Question 2: Regularization in Neural Networks¶

Question 3: Number of Parameters¶

Your answer here¶