# Mounting google drive
from google.colab import drive
drive.mount('/content/drive')

# Downloading all the required libraries


# Importing all the required libraries
import os
import numpy as np
import pandas as pd
import re
import cv2
import string
import networkx as nx
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from matplotlib import style
from glob import glob
from natsort import natsorted
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
style.use('ggplot')

IMG_SIZE = 32
NUM_CLASSES = 11

Mounted at /content/drive

def show_image_grid(images, M, N, title='Title', figsize=8):
    # Assuming 'images' is a numpy array of shape (num_images, height, width, channels)
    if M==1:
        row_size = figsize
        col_size = figsize//4
    elif N==1:
        row_size = figsize//4
        col_size = figsize
    else:
        row_size, col_size = figsize, figsize

    fig, axes = plt.subplots(M, N, figsize=(row_size, col_size))

    if len(images.shape) < 4:
        images = np.expand_dims(images.copy(), axis=0)

    fig.suptitle(title)
    for i in range(M):
        for j in range(N):
            if M==1 and N==1:
                ax = axes
            elif M == 1 or N==1:
                ax = axes[max(i, j)]
            else:
                ax = axes[i, j]
            index = i * N + j
            if index < images.shape[0]:
                ax.imshow(cv2.cvtColor(images[index], cv2.COLOR_BGR2RGB))
            ax.axis('off')
    plt.tight_layout()
    plt.show()
    plt.clf()

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_loss(y_true, y_pred):
    # Write your code here
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1-epsilon)
    return -np.mean(y_true * np.log(y_pred) + (1-y_true) * np.log(1-y_pred))


class LogisticRegressionOvA:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.classes = None
        self.bias = None

    def fit(self, X, y, num_classes):
        """
        Train logistic regression for each class using one-vs-all approach.
        """
        num_samples, num_features = X.shape
        self.weights = np.zeros((num_classes, num_features))
        self.bias = np.zeros(num_classes)

        for class_idx in range(num_classes):
            y_binary = (y == class_idx).astype(int)
            for _ in range(self.epochs):
                linear_model = np.dot(X, self.weights[class_idx]) + self.bias[class_idx]
                y_pred = sigmoid(linear_model)

                # Compute gradients
                dw = (1 / num_samples) * np.dot(X.T, (y_pred - y_binary))
                db = (1 / num_samples) * np.sum(y_pred - y_binary)

                # Update weights
                self.weights[class_idx] -= self.learning_rate * dw
                self.bias[class_idx] -= self.learning_rate * db

    def predict(self, X):
        """
        Predict class labels using one-vs-all approach.
        """
        linear_model = np.dot(X, self.weights.T) + self.bias
        y_pred = sigmoid(linear_model)
        return np.argmax(y_pred, axis=1)

def load_images_from_folder(folder_path):
    images = []
    labels = []
    print('Loading datasets...')
    for class_id, class_name in enumerate(tqdm(["Bread", "Dairy product", "Dessert", "Egg", "Fried food", "Meat", "Noodles-Pasta", "Rice", "Seafood", "Soup", "Vegetable-Fruit"])):
        for img_path in natsorted(glob(f"{folder_path}/{class_name}/*")):  # Modify extension if needed
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            images.append(np.float32(img.flatten())/255.0)
            labels.append(class_id)
    return np.array(images), np.array(labels)

X, y = load_images_from_folder("/content/drive/My Drive/ES670MM/dataset/C/food11/")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print('\n Dataset shape: ', X_train.shape, X_test.shape, y_train.shape, y_test.shape)
model = LogisticRegressionOvA(learning_rate=0.01, epochs=1000)
model.fit(X_train, y_train, NUM_CLASSES)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

Loading datasets...

100%|██████████| 11/11 [01:19<00:00,  7.26s/it]

 Dataset shape:  (2671, 3072) (297, 3072) (2671,) (297,)
Model Accuracy: 0.26

import numpy as np

def display_class_weights(weight_tensor, img_dim=32, n_classes=11):
    """
    Convert each class’s weight vector into an RGB image,
    normalize to [0,255], and collect for grid display.
    """
    class_images = []

    for cls in range(n_classes):
        # reshape the flat weight vector into (H, W, C)
        raw = weight_tensor[cls]
        img = raw.reshape((img_dim, img_dim, 3))

        # scale pixels to cover full 0–255 range
        min_val, max_val = img.min(), img.max()
        norm = (img - min_val) / (max_val - min_val + 1e-8)
        uint8_img = (norm * 255).astype(np.uint8)

        class_images.append(uint8_img)

    # determine grid size (one extra slot if needed)
    rows, cols = 3, 4
    img_array = np.stack(class_images, axis=0)

    # invoke your grid-display utility
    show_image_grid(
        img_array,
        rows,
        cols,
        title="Per-Class Weight Maps",
        figsize=10
    )


# Example call
display_class_weights(model.weights, IMG_SIZE, NUM_CLASSES)

<Figure size 640x480 with 0 Axes>

def image_lookup_fn(mean_images, query_image, mean_img_classes):
    best_match_idx = np.random.randint(0, 10) # default

    # Write your code here
    # You have to search among mean_images which one is closest to the query image
    # return the class of mean image which has the highest matching score
    best_score = -np.inf
    best_match_idx = 0

    # Resize and flatten query image to match training image shape
    query_resized = cv2.resize(query_image, (32, 32))
    query_flat = query_resized.flatten().astype(np.float32) / 255.0

    # Normalize the query vector
    query_flat /= (np.linalg.norm(query_flat) + 1e-8)

    for idx, class_weight in enumerate(mean_images):
        # Normalize the weight vector
        class_weight_norm = class_weight / (np.linalg.norm(class_weight) + 1e-8)

        # Compute cosine similarity
        score = np.dot(query_flat, class_weight_norm)

        if score > best_score:
            best_score = score
            best_match_idx = idx

    return mean_img_classes[best_match_idx]

search_images = []
search_img_classes = []

for files in tqdm(natsorted(glob('/content/drive/My Drive/ES670MM/dataset/A/images/search_images/*'))):
    search_images.append(cv2.imread(files, 1))
    search_img_classes.append(os.path.splitext(os.path.basename(files))[0].split('_')[0])

search_images = np.array(search_images)
show_image_grid(search_images, 10, 10, 'Search Images', figsize=8)

100%|██████████| 110/110 [00:02<00:00, 50.32it/s]

<Figure size 640x480 with 0 Axes>

pred_classes = []
cluster_images = {}

for query_image in tqdm(search_images):
    pred_classes.append(image_lookup_fn(model.weights, query_image, np.arange(NUM_CLASSES)))
    if pred_classes[-1] in cluster_images:
        cluster_images[pred_classes[-1]].append(query_image)
    else:
        cluster_images[pred_classes[-1]] = [query_image]

for class_name, img_lst in cluster_images.items():
    img_lst = np.array(img_lst)
    M = img_lst.shape[0]//10 + 1
    N = img_lst.shape[0]%10 + 1
    show_image_grid(img_lst, M, N, f'Search Class: {class_name}', figsize=8)

100%|██████████| 110/110 [00:00<00:00, 1998.36it/s]

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

def image_to_graph(image):
    height, width = image.shape
    G = nx.Graph()

    for y in range(height):
        for x in range(width):
            G.add_node((y, x), intensity=image[y, x])

    for y in range(height):
        for x in range(width):
            current_pixel = (y, x)
            neighbors = [(y+dy, x+dx) for dy in [-1, 0, 1] for dx in [-1, 0, 1]
                         if 0 <= y+dy < height and 0 <= x+dx < width and (dy, dx) != (0, 0)]

            for neighbor in neighbors:
                weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0)
                G.add_edge(current_pixel, neighbor, weight=weight)

    return G

def pagerank(G, alpha=0.85, weight='weight', max_iter=100, tol=1e-6):
    # Initialize PR scores uniformly
    # Set equal initial PageRank values for all nodes
    num_nodes = G.number_of_nodes()
    page_rank = {n: 1.0/num_nodes for n in G.nodes()}

    for _ in range(max_iter):
        total_change = 0
        new_rank = {}

        # Calculate new ranks using neighbor contributions
        for node in G.nodes():
            # Sum weighted contributions from neighbors
            neighbor_contributions = 0.0
            for neighbor in G.neighbors(node):
                # Get edge weight (default 1.0 if unspecified)
                edge_weight = G[neighbor][node].get(weight, 1.0)
                # Calculate neighbor's total outgoing weight
                outgoing_total = sum(G[neighbor][nbr].get(weight, 1.0)
                                   for nbr in G.neighbors(neighbor))

                if outgoing_total > 0:  # Avoid division by zero
                    neighbor_contributions += page_rank[neighbor] * edge_weight / outgoing_total

            # Update with damping factor (alpha)
            new_rank[node] = (1 - alpha)/num_nodes + alpha * neighbor_contributions
            total_change += abs(new_rank[node] - page_rank[node])

        # Update rankings and check convergence
        page_rank = new_rank.copy()
        if total_change < tol:
            break
    return page_rank

def segment_using_pagerank(G, image_shape):
    pr = pagerank(G, alpha=0.85, weight='weight')  # Compute PageRank

    pr_values = np.array([pr[node] for node in G.nodes()]).reshape(image_shape)

    # Normalize scores
    pr_values = (pr_values - pr_values.min()) / (pr_values.max() - pr_values.min())

    # Threshold the top 30% highest ranked pixels as foreground
    threshold = np.percentile(pr_values, 70)
    segmentation = (pr_values > threshold).astype(np.uint8) * 255

    return pr_values, segmentation

def load_image_page_rank(image_path, size=(128, 128)):
    img = cv2.imread(image_path)
    img = cv2.resize(img, size)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    return img

src_images = []

for files in tqdm(natsorted(glob('/content/drive/My Drive/ES670MM/dataset/A/images/search_images/*'))):
    src_images.append(load_image_page_rank(files))

src_images = np.array(src_images)[11:22]
show_image_grid(np.tile(np.expand_dims(src_images, axis=-1), (1, 1, 3)), 3, 3, 'Search Images', figsize=8)

100%|██████████| 110/110 [00:00<00:00, 140.03it/s]

<Figure size 640x480 with 0 Axes>

for img in src_images:
    G = image_to_graph(img)
    pr_values, segmentation = segment_using_pagerank(G, img.shape)

    # Visualization
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 3, 1)
    plt.grid(False)
    plt.imshow(img, cmap='gray')
    plt.title("Original Image")

    plt.subplot(1, 3, 2)
    plt.imshow(pr_values, cmap='jet')
    plt.grid(False)
    plt.title("PageRank Scores")

    plt.subplot(1, 3, 3)
    plt.imshow(segmentation, cmap='gray')
    plt.grid(False)
    plt.title("Segmented Image")

    plt.show()

<ipython-input-11-976354ea1260>:16: RuntimeWarning: overflow encountered in scalar negative
  weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0)
<ipython-input-11-976354ea1260>:16: RuntimeWarning: overflow encountered in scalar subtract
  weight = np.exp(-abs(image[y, x] - image[neighbor]) / 255.0)

Importing Libraries¶

Q1: Food Classification using Logistic Regression [7 Marks]¶

Part A: Learning Model [3 marks]¶

Part B: Weights Visualization [2 marks]¶

Part C: Image Retrieval [2 marks]¶

1. Learned Patterns vs. Average Looks¶

2. Smart Feature Focus vs. Simple Pixel Matching¶

Q2: Image Segmentation using Page Rank [3 Marks]¶