Importing Libraries¶

In [ ]:
# Mounting google drive
from google.colab import drive
drive.mount('/content/drive')

# Downloading all the required libraries


# Importing all the required libraries
import os
import numpy as np
import pandas as pd
import re
import cv2
import string
import networkx as nx
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from matplotlib import style
from glob import glob
from natsort import natsorted
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
style.use('ggplot')

IMG_SIZE = 32
NUM_CLASSES = 11
Mounted at /content/drive
In [ ]:
def show_image_grid(images, M, N, title='Title', figsize=8):
    # Assuming 'images' is a numpy array of shape (num_images, height, width, channels)
    if M==1:
        row_size = figsize
        col_size = figsize//4
    elif N==1:
        row_size = figsize//4
        col_size = figsize
    else:
        row_size, col_size = figsize, figsize

    fig, axes = plt.subplots(M, N, figsize=(row_size, col_size))

    if len(images.shape) < 4:
        images = np.expand_dims(images.copy(), axis=0)

    fig.suptitle(title)
    for i in range(M):
        for j in range(N):
            if M==1 and N==1:
                ax = axes
            elif M == 1 or N==1:
                ax = axes[max(i, j)]
            else:
                ax = axes[i, j]
            index = i * N + j
            if index < images.shape[0]:
                ax.imshow(cv2.cvtColor(images[index], cv2.COLOR_BGR2RGB))
            ax.axis('off')
    plt.tight_layout()
    plt.show()
    plt.clf()

Q1: Food Classification using Logistic Regression [7 Marks]¶

Given a 11 classes of food, you need to complete the boilerplate code for Food Classification using Logistic Regression.

You can take help from following resource:

  1. [Link]

Part A: Learning Model [3 marks]¶

In [ ]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_loss(y_true, y_pred):
    # Write your code here
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1-epsilon)
    return -np.mean(y_true * np.log(y_pred) + (1-y_true) * np.log(1-y_pred))


class LogisticRegressionOvA:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.classes = None
        self.bias = None

    def fit(self, X, y, num_classes):
        """
        Train logistic regression for each class using one-vs-all approach.
        """
        num_samples, num_features = X.shape
        self.weights = np.zeros((num_classes, num_features))
        self.bias = np.zeros(num_classes)

        for class_idx in range(num_classes):
            y_binary = (y == class_idx).astype(int)
            for _ in range(self.epochs):
                linear_model = np.dot(X, self.weights[class_idx]) + self.bias[class_idx]
                y_pred = sigmoid(linear_model)

                # Compute gradients
                dw = (1 / num_samples) * np.dot(X.T, (y_pred - y_binary))
                db = (1 / num_samples) * np.sum(y_pred - y_binary)

                # Update weights
                self.weights[class_idx] -= self.learning_rate * dw
                self.bias[class_idx] -= self.learning_rate * db

    def predict(self, X):
        """
        Predict class labels using one-vs-all approach.
        """
        linear_model = np.dot(X, self.weights.T) + self.bias
        y_pred = sigmoid(linear_model)
        return np.argmax(y_pred, axis=1)
In [ ]:
def load_images_from_folder(folder_path):
    images = []
    labels = []
    print('Loading datasets...')
    for class_id, class_name in enumerate(tqdm(["Bread", "Dairy product", "Dessert", "Egg", "Fried food", "Meat", "Noodles-Pasta", "Rice", "Seafood", "Soup", "Vegetable-Fruit"])):
        for img_path in natsorted(glob(f"{folder_path}/{class_name}/*")):  # Modify extension if needed
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            images.append(np.float32(img.flatten())/255.0)
            labels.append(class_id)
    return np.array(images), np.array(labels)
In [ ]:
X, y = load_images_from_folder("/content/drive/My Drive/ES670MM/dataset/C/food11/")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print('\n Dataset shape: ', X_train.shape, X_test.shape, y_train.shape, y_test.shape)
model = LogisticRegressionOvA(learning_rate=0.01, epochs=1000)
model.fit(X_train, y_train, NUM_CLASSES)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
Loading datasets...
100%|██████████| 11/11 [01:19<00:00,  7.26s/it]
 Dataset shape:  (2671, 3072) (297, 3072) (2671,) (297,)
Model Accuracy: 0.26

Part B: Weights Visualization [2 marks]¶

Once we train our Logistic Regression model, we will have the trained weights of shape [num_classes, feature_size]. We know that feature size is equal to (32 x 32 x 3 = 3072). Now your job is to visualize the features that is being learned using weight matrix. So there will be total 10 plots.

In [ ]:
import numpy as np

def display_class_weights(weight_tensor, img_dim=32, n_classes=11):
    """
    Convert each class’s weight vector into an RGB image,
    normalize to [0,255], and collect for grid display.
    """
    class_images = []

    for cls in range(n_classes):
        # reshape the flat weight vector into (H, W, C)
        raw = weight_tensor[cls]
        img = raw.reshape((img_dim, img_dim, 3))

        # scale pixels to cover full 0–255 range
        min_val, max_val = img.min(), img.max()
        norm = (img - min_val) / (max_val - min_val + 1e-8)
        uint8_img = (norm * 255).astype(np.uint8)

        class_images.append(uint8_img)

    # determine grid size (one extra slot if needed)
    rows, cols = 3, 4
    img_array = np.stack(class_images, axis=0)

    # invoke your grid-display utility
    show_image_grid(
        img_array,
        rows,
        cols,
        title="Per-Class Weight Maps",
        figsize=10
    )


# Example call
display_class_weights(model.weights, IMG_SIZE, NUM_CLASSES)
No description has been provided for this image
<Figure size 640x480 with 0 Axes>

Part C: Image Retrieval [2 marks]¶

Similar Assignment A, your task is to write image retrival code using the feature learned using Logistic Regression (W matrix). Here instead of mean images we have the learned weights for each class of dim 3072. Discuss how the accuracy you obtain here vary from the simple mean based image classification.

Learned weights (W) work better because they focus on details that actually help tell objects apart, while mean images just average everything (like mixing all cat/dog photos into blurry blobs).

1. Learned Patterns vs. Average Looks¶

  • W matrices: Contain specific tricks to spot differences (e.g., "cat ears vs. dog nose")
  • Mean images: Mix all class photos → lose important details (like blending whiskers and fur)

2. Smart Feature Focus vs. Simple Pixel Matching¶

  • W weights: Highlight key patterns (edges/textures that matter)
  • Mean images: Compare raw pixels (might match random similar colors instead of actual objects)
In [ ]:
def image_lookup_fn(mean_images, query_image, mean_img_classes):
    best_match_idx = np.random.randint(0, 10) # default

    # Write your code here
    # You have to search among mean_images which one is closest to the query image
    # return the class of mean image which has the highest matching score
    best_score = -np.inf
    best_match_idx = 0

    # Resize and flatten query image to match training image shape
    query_resized = cv2.resize(query_image, (32, 32))
    query_flat = query_resized.flatten().astype(np.float32) / 255.0

    # Normalize the query vector
    query_flat /= (np.linalg.norm(query_flat) + 1e-8)

    for idx, class_weight in enumerate(mean_images):
        # Normalize the weight vector
        class_weight_norm = class_weight / (np.linalg.norm(class_weight) + 1e-8)

        # Compute cosine similarity
        score = np.dot(query_flat, class_weight_norm)

        if score > best_score:
            best_score = score
            best_match_idx = idx

    return mean_img_classes[best_match_idx]
In [ ]:
search_images = []
search_img_classes = []

for files in tqdm(natsorted(glob('/content/drive/My Drive/ES670MM/dataset/A/images/search_images/*'))):
    search_images.append(cv2.imread(files, 1))
    search_img_classes.append(os.path.splitext(os.path.basename(files))[0].split('_')[0])

search_images = np.array(search_images)
show_image_grid(search_images, 10, 10, 'Search Images', figsize=8)
100%|██████████| 110/110 [00:02<00:00, 50.32it/s] 
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
In [ ]:
pred_classes = []
cluster_images = {}

for query_image in tqdm(search_images):
    pred_classes.append(image_lookup_fn(model.weights, query_image, np.arange(NUM_CLASSES)))
    if pred_classes[-1] in cluster_images:
        cluster_images[pred_classes[-1]].append(query_image)
    else:
        cluster_images[pred_classes[-1]] = [query_image]

for class_name, img_lst in cluster_images.items():
    img_lst = np.array(img_lst)
    M = img_lst.shape[0]//10 + 1
    N = img_lst.shape[0]%10 + 1
    show_image_grid(img_lst, M, N, f'Search Class: {class_name}', figsize=8)
100%|██████████| 110/110 [00:00<00:00, 1998.36it/s]
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
<Figure size 640x480 with 0 Axes>

Q2: Image Segmentation using Page Rank [3 Marks]¶

Given a list of grayscale images, your task is to perform image segmentation using Page Rank algorithm. You have to write Page Rank algorithm from scratch.

In [ ]:
def image_to_graph(image):
    height, width = image.shape
    G = nx.Graph()

    for y in range(height):
        for x in range(width):
            G.add_node((y, x), intensity=image[y, x])

    for y in range(height):
        for x in range(width):
            current_pixel = (y, x)
            neighbors = [(y+dy, x+dx) for dy in [-1, 0, 1] for dx in [-1, 0, 1]
                         if 0 <= y+dy < height and 0 <= x+dx < width and (dy, dx) != (0, 0)]

            for neighbor in neighbors:
                weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0)
                G.add_edge(current_pixel, neighbor, weight=weight)

    return G
In [ ]:
def pagerank(G, alpha=0.85, weight='weight', max_iter=100, tol=1e-6):
    # Initialize PR scores uniformly
    # Set equal initial PageRank values for all nodes
    num_nodes = G.number_of_nodes()
    page_rank = {n: 1.0/num_nodes for n in G.nodes()}

    for _ in range(max_iter):
        total_change = 0
        new_rank = {}

        # Calculate new ranks using neighbor contributions
        for node in G.nodes():
            # Sum weighted contributions from neighbors
            neighbor_contributions = 0.0
            for neighbor in G.neighbors(node):
                # Get edge weight (default 1.0 if unspecified)
                edge_weight = G[neighbor][node].get(weight, 1.0)
                # Calculate neighbor's total outgoing weight
                outgoing_total = sum(G[neighbor][nbr].get(weight, 1.0)
                                   for nbr in G.neighbors(neighbor))

                if outgoing_total > 0:  # Avoid division by zero
                    neighbor_contributions += page_rank[neighbor] * edge_weight / outgoing_total

            # Update with damping factor (alpha)
            new_rank[node] = (1 - alpha)/num_nodes + alpha * neighbor_contributions
            total_change += abs(new_rank[node] - page_rank[node])

        # Update rankings and check convergence
        page_rank = new_rank.copy()
        if total_change < tol:
            break
    return page_rank
In [ ]:
def segment_using_pagerank(G, image_shape):
    pr = pagerank(G, alpha=0.85, weight='weight')  # Compute PageRank

    pr_values = np.array([pr[node] for node in G.nodes()]).reshape(image_shape)

    # Normalize scores
    pr_values = (pr_values - pr_values.min()) / (pr_values.max() - pr_values.min())

    # Threshold the top 30% highest ranked pixels as foreground
    threshold = np.percentile(pr_values, 70)
    segmentation = (pr_values > threshold).astype(np.uint8) * 255

    return pr_values, segmentation
In [ ]:
def load_image_page_rank(image_path, size=(128, 128)):
    img = cv2.imread(image_path)
    img = cv2.resize(img, size)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    return img
In [ ]:
src_images = []

for files in tqdm(natsorted(glob('/content/drive/My Drive/ES670MM/dataset/A/images/search_images/*'))):
    src_images.append(load_image_page_rank(files))

src_images = np.array(src_images)[11:22]
show_image_grid(np.tile(np.expand_dims(src_images, axis=-1), (1, 1, 3)), 3, 3, 'Search Images', figsize=8)
100%|██████████| 110/110 [00:00<00:00, 140.03it/s]
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
In [ ]:
for img in src_images:
    G = image_to_graph(img)
    pr_values, segmentation = segment_using_pagerank(G, img.shape)

    # Visualization
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 3, 1)
    plt.grid(False)
    plt.imshow(img, cmap='gray')
    plt.title("Original Image")

    plt.subplot(1, 3, 2)
    plt.imshow(pr_values, cmap='jet')
    plt.grid(False)
    plt.title("PageRank Scores")

    plt.subplot(1, 3, 3)
    plt.imshow(segmentation, cmap='gray')
    plt.grid(False)
    plt.title("Segmented Image")

    plt.show()
<ipython-input-11-976354ea1260>:16: RuntimeWarning: overflow encountered in scalar negative
  weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0)
<ipython-input-11-976354ea1260>:16: RuntimeWarning: overflow encountered in scalar subtract
  weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image