Importing Libraries¶
# Mounting google drive
from google.colab import drive
drive.mount('/content/drive')
# Downloading all the required libraries
# Importing all the required libraries
import os
import numpy as np
import pandas as pd
import re
import cv2
import string
import networkx as nx
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from matplotlib import style
from glob import glob
from natsort import natsorted
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
style.use('ggplot')
IMG_SIZE = 32
NUM_CLASSES = 11
Mounted at /content/drive
def show_image_grid(images, M, N, title='Title', figsize=8):
# Assuming 'images' is a numpy array of shape (num_images, height, width, channels)
if M==1:
row_size = figsize
col_size = figsize//4
elif N==1:
row_size = figsize//4
col_size = figsize
else:
row_size, col_size = figsize, figsize
fig, axes = plt.subplots(M, N, figsize=(row_size, col_size))
if len(images.shape) < 4:
images = np.expand_dims(images.copy(), axis=0)
fig.suptitle(title)
for i in range(M):
for j in range(N):
if M==1 and N==1:
ax = axes
elif M == 1 or N==1:
ax = axes[max(i, j)]
else:
ax = axes[i, j]
index = i * N + j
if index < images.shape[0]:
ax.imshow(cv2.cvtColor(images[index], cv2.COLOR_BGR2RGB))
ax.axis('off')
plt.tight_layout()
plt.show()
plt.clf()
Q1: Food Classification using Logistic Regression [7 Marks]¶
Given a 11 classes of food, you need to complete the boilerplate code for Food Classification using Logistic Regression.
You can take help from following resource:
- [Link]
Part A: Learning Model [3 marks]¶
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def compute_loss(y_true, y_pred):
# Write your code here
epsilon = 1e-15
y_pred = np.clip(y_pred, epsilon, 1-epsilon)
return -np.mean(y_true * np.log(y_pred) + (1-y_true) * np.log(1-y_pred))
class LogisticRegressionOvA:
def __init__(self, learning_rate=0.01, epochs=1000):
self.learning_rate = learning_rate
self.epochs = epochs
self.weights = None
self.classes = None
self.bias = None
def fit(self, X, y, num_classes):
"""
Train logistic regression for each class using one-vs-all approach.
"""
num_samples, num_features = X.shape
self.weights = np.zeros((num_classes, num_features))
self.bias = np.zeros(num_classes)
for class_idx in range(num_classes):
y_binary = (y == class_idx).astype(int)
for _ in range(self.epochs):
linear_model = np.dot(X, self.weights[class_idx]) + self.bias[class_idx]
y_pred = sigmoid(linear_model)
# Compute gradients
dw = (1 / num_samples) * np.dot(X.T, (y_pred - y_binary))
db = (1 / num_samples) * np.sum(y_pred - y_binary)
# Update weights
self.weights[class_idx] -= self.learning_rate * dw
self.bias[class_idx] -= self.learning_rate * db
def predict(self, X):
"""
Predict class labels using one-vs-all approach.
"""
linear_model = np.dot(X, self.weights.T) + self.bias
y_pred = sigmoid(linear_model)
return np.argmax(y_pred, axis=1)
def load_images_from_folder(folder_path):
images = []
labels = []
print('Loading datasets...')
for class_id, class_name in enumerate(tqdm(["Bread", "Dairy product", "Dessert", "Egg", "Fried food", "Meat", "Noodles-Pasta", "Rice", "Seafood", "Soup", "Vegetable-Fruit"])):
for img_path in natsorted(glob(f"{folder_path}/{class_name}/*")): # Modify extension if needed
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
images.append(np.float32(img.flatten())/255.0)
labels.append(class_id)
return np.array(images), np.array(labels)
X, y = load_images_from_folder("/content/drive/My Drive/ES670MM/dataset/C/food11/")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print('\n Dataset shape: ', X_train.shape, X_test.shape, y_train.shape, y_test.shape)
model = LogisticRegressionOvA(learning_rate=0.01, epochs=1000)
model.fit(X_train, y_train, NUM_CLASSES)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
Loading datasets...
100%|██████████| 11/11 [01:19<00:00, 7.26s/it]
Dataset shape: (2671, 3072) (297, 3072) (2671,) (297,) Model Accuracy: 0.26
Part B: Weights Visualization [2 marks]¶
Once we train our Logistic Regression model, we will have the trained weights of shape [num_classes, feature_size]. We know that feature size is equal to (32 x 32 x 3 = 3072). Now your job is to visualize the features that is being learned using weight matrix. So there will be total 10 plots.
import numpy as np
def display_class_weights(weight_tensor, img_dim=32, n_classes=11):
"""
Convert each class’s weight vector into an RGB image,
normalize to [0,255], and collect for grid display.
"""
class_images = []
for cls in range(n_classes):
# reshape the flat weight vector into (H, W, C)
raw = weight_tensor[cls]
img = raw.reshape((img_dim, img_dim, 3))
# scale pixels to cover full 0–255 range
min_val, max_val = img.min(), img.max()
norm = (img - min_val) / (max_val - min_val + 1e-8)
uint8_img = (norm * 255).astype(np.uint8)
class_images.append(uint8_img)
# determine grid size (one extra slot if needed)
rows, cols = 3, 4
img_array = np.stack(class_images, axis=0)
# invoke your grid-display utility
show_image_grid(
img_array,
rows,
cols,
title="Per-Class Weight Maps",
figsize=10
)
# Example call
display_class_weights(model.weights, IMG_SIZE, NUM_CLASSES)
<Figure size 640x480 with 0 Axes>
Part C: Image Retrieval [2 marks]¶
Similar Assignment A, your task is to write image retrival code using the feature learned using Logistic Regression (W matrix). Here instead of mean images we have the learned weights for each class of dim 3072. Discuss how the accuracy you obtain here vary from the simple mean based image classification.
Learned weights (W) work better because they focus on details that actually help tell objects apart, while mean images just average everything (like mixing all cat/dog photos into blurry blobs).
1. Learned Patterns vs. Average Looks¶
Wmatrices: Contain specific tricks to spot differences (e.g., "cat ears vs. dog nose")- Mean images: Mix all class photos → lose important details (like blending whiskers and fur)
2. Smart Feature Focus vs. Simple Pixel Matching¶
Wweights: Highlight key patterns (edges/textures that matter)- Mean images: Compare raw pixels (might match random similar colors instead of actual objects)
def image_lookup_fn(mean_images, query_image, mean_img_classes):
best_match_idx = np.random.randint(0, 10) # default
# Write your code here
# You have to search among mean_images which one is closest to the query image
# return the class of mean image which has the highest matching score
best_score = -np.inf
best_match_idx = 0
# Resize and flatten query image to match training image shape
query_resized = cv2.resize(query_image, (32, 32))
query_flat = query_resized.flatten().astype(np.float32) / 255.0
# Normalize the query vector
query_flat /= (np.linalg.norm(query_flat) + 1e-8)
for idx, class_weight in enumerate(mean_images):
# Normalize the weight vector
class_weight_norm = class_weight / (np.linalg.norm(class_weight) + 1e-8)
# Compute cosine similarity
score = np.dot(query_flat, class_weight_norm)
if score > best_score:
best_score = score
best_match_idx = idx
return mean_img_classes[best_match_idx]
search_images = []
search_img_classes = []
for files in tqdm(natsorted(glob('/content/drive/My Drive/ES670MM/dataset/A/images/search_images/*'))):
search_images.append(cv2.imread(files, 1))
search_img_classes.append(os.path.splitext(os.path.basename(files))[0].split('_')[0])
search_images = np.array(search_images)
show_image_grid(search_images, 10, 10, 'Search Images', figsize=8)
100%|██████████| 110/110 [00:02<00:00, 50.32it/s]
<Figure size 640x480 with 0 Axes>
pred_classes = []
cluster_images = {}
for query_image in tqdm(search_images):
pred_classes.append(image_lookup_fn(model.weights, query_image, np.arange(NUM_CLASSES)))
if pred_classes[-1] in cluster_images:
cluster_images[pred_classes[-1]].append(query_image)
else:
cluster_images[pred_classes[-1]] = [query_image]
for class_name, img_lst in cluster_images.items():
img_lst = np.array(img_lst)
M = img_lst.shape[0]//10 + 1
N = img_lst.shape[0]%10 + 1
show_image_grid(img_lst, M, N, f'Search Class: {class_name}', figsize=8)
100%|██████████| 110/110 [00:00<00:00, 1998.36it/s]
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
Q2: Image Segmentation using Page Rank [3 Marks]¶
Given a list of grayscale images, your task is to perform image segmentation using Page Rank algorithm. You have to write Page Rank algorithm from scratch.
def image_to_graph(image):
height, width = image.shape
G = nx.Graph()
for y in range(height):
for x in range(width):
G.add_node((y, x), intensity=image[y, x])
for y in range(height):
for x in range(width):
current_pixel = (y, x)
neighbors = [(y+dy, x+dx) for dy in [-1, 0, 1] for dx in [-1, 0, 1]
if 0 <= y+dy < height and 0 <= x+dx < width and (dy, dx) != (0, 0)]
for neighbor in neighbors:
weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0)
G.add_edge(current_pixel, neighbor, weight=weight)
return G
def pagerank(G, alpha=0.85, weight='weight', max_iter=100, tol=1e-6):
# Initialize PR scores uniformly
# Set equal initial PageRank values for all nodes
num_nodes = G.number_of_nodes()
page_rank = {n: 1.0/num_nodes for n in G.nodes()}
for _ in range(max_iter):
total_change = 0
new_rank = {}
# Calculate new ranks using neighbor contributions
for node in G.nodes():
# Sum weighted contributions from neighbors
neighbor_contributions = 0.0
for neighbor in G.neighbors(node):
# Get edge weight (default 1.0 if unspecified)
edge_weight = G[neighbor][node].get(weight, 1.0)
# Calculate neighbor's total outgoing weight
outgoing_total = sum(G[neighbor][nbr].get(weight, 1.0)
for nbr in G.neighbors(neighbor))
if outgoing_total > 0: # Avoid division by zero
neighbor_contributions += page_rank[neighbor] * edge_weight / outgoing_total
# Update with damping factor (alpha)
new_rank[node] = (1 - alpha)/num_nodes + alpha * neighbor_contributions
total_change += abs(new_rank[node] - page_rank[node])
# Update rankings and check convergence
page_rank = new_rank.copy()
if total_change < tol:
break
return page_rank
def segment_using_pagerank(G, image_shape):
pr = pagerank(G, alpha=0.85, weight='weight') # Compute PageRank
pr_values = np.array([pr[node] for node in G.nodes()]).reshape(image_shape)
# Normalize scores
pr_values = (pr_values - pr_values.min()) / (pr_values.max() - pr_values.min())
# Threshold the top 30% highest ranked pixels as foreground
threshold = np.percentile(pr_values, 70)
segmentation = (pr_values > threshold).astype(np.uint8) * 255
return pr_values, segmentation
def load_image_page_rank(image_path, size=(128, 128)):
img = cv2.imread(image_path)
img = cv2.resize(img, size)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Convert to grayscale
return img
src_images = []
for files in tqdm(natsorted(glob('/content/drive/My Drive/ES670MM/dataset/A/images/search_images/*'))):
src_images.append(load_image_page_rank(files))
src_images = np.array(src_images)[11:22]
show_image_grid(np.tile(np.expand_dims(src_images, axis=-1), (1, 1, 3)), 3, 3, 'Search Images', figsize=8)
100%|██████████| 110/110 [00:00<00:00, 140.03it/s]
<Figure size 640x480 with 0 Axes>
for img in src_images:
G = image_to_graph(img)
pr_values, segmentation = segment_using_pagerank(G, img.shape)
# Visualization
plt.figure(figsize=(12, 6))
plt.subplot(1, 3, 1)
plt.grid(False)
plt.imshow(img, cmap='gray')
plt.title("Original Image")
plt.subplot(1, 3, 2)
plt.imshow(pr_values, cmap='jet')
plt.grid(False)
plt.title("PageRank Scores")
plt.subplot(1, 3, 3)
plt.imshow(segmentation, cmap='gray')
plt.grid(False)
plt.title("Segmented Image")
plt.show()
<ipython-input-11-976354ea1260>:16: RuntimeWarning: overflow encountered in scalar negative weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0) <ipython-input-11-976354ea1260>:16: RuntimeWarning: overflow encountered in scalar subtract weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0)