!pip install faiss # Install the faiss library

import faiss # Import the faiss library

d = 128  # Dimension of vectors
M = 32    # Number of neighbors in each layer
index = faiss.IndexHNSWFlat(d, M)

xb = ...  # Your dataset of vectors (numpy array)
index.add(xb)

efConstruction = 200  # Number of neighbors considered during index construction
efSearch = 50         # Number of neighbors considered during search
index.hnsw.efConstruction = efConstruction
index.hnsw.efSearch = efSearch

xq = ...  # Your query set (numpy array)
k = 5     # Number of nearest neighbors to retrieve
distances, indices = index.search(xq[:1000], k)

import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

class HNSWGraph:
    def __init__(self, M, ef_construction, ef_search):
        self.M = M  # Maximum number of neighbors
        self.ef_construction = ef_construction  # Construction time parameter
        self.ef_search = ef_search  # Search time parameter
        self.data = []  # Data points
        self.graph = {}  # Graph structure

    def add_data_point(self, data_point):
        self.data.append(data_point)
        self.graph[tuple(data_point.flatten())] = []

    def construct_graph(self):
        for i, data_point in enumerate(self.data):
            self._add_neighbors(tuple(data_point.flatten()), self.ef_construction)

    def _add_neighbors(self, data_point, ef):
        neighbors = self._get_neighbors(data_point, ef)
        self.graph[data_point] = neighbors

    def _get_neighbors(self, data_point, ef):
        distances = [(np.linalg.norm(np.array(data_point) - other), tuple(other.flatten())) for other in self.data if not np.array_equal(np.array(data_point), other)]
        distances.sort()
        return [x[1] for x in distances[:ef]]

    def search(self, query, k):
        query_tuple = tuple(query.flatten())
        if query_tuple not in self.graph:
            self.add_data_point(query)
            self._add_neighbors(query_tuple, self.ef_construction)

        visited = set()
        candidates = []
        self._search(query_tuple, k, visited, candidates, self.ef_search)

        # Ensure only k neighbors are returned
        unique_candidates = list(set(candidates))
        return sorted(unique_candidates, key=lambda x: np.linalg.norm(np.array(query) - np.array(x)))[:k]

    def _search(self, query, k, visited, candidates, ef):
        if len(candidates) >= k:
            return
        for neighbor in self.graph[query]:
            if neighbor not in visited:
                visited.add(neighbor)
                distances = [(np.linalg.norm(np.array(query) - np.array(other)), tuple(other.flatten())) for other in self.data if not np.array_equal(np.array(query), other)]
                distances.sort()
                new_candidates = [x[1] for x in distances[:ef]]
                candidates.extend(new_candidates)
                self._search(neighbor, k, visited, candidates, ef)

    def get_neighbors(self, data_point):
        return self.graph[data_point]

    def visualize_graph(self):
        G = nx.Graph()
        for node, neighbors in self.graph.items():
            for neighbor in neighbors:
                if node != neighbor:
                    G.add_edge(node, neighbor)

        pos = {node: (node[0], node[1]) for node in G.nodes()}
        plt.figure(figsize=(12, 10))
        nx.draw(G, pos, with_labels=False, node_size=30, node_color='blue', edge_color='gray')
        plt.title("HNSW Graph")
        plt.show()

    def visualize_search(self, query, neighbors):
        G = nx.Graph()
        # Add edges between the query point and its k-nearest neighbors
        query_pos = tuple(query.flatten())
        G.add_node(query_pos)
        for neighbor in neighbors:
            neighbor_pos = tuple(neighbor.flatten()) if isinstance(neighbor, np.ndarray) else neighbor
            G.add_node(neighbor_pos)
            G.add_edge(query_pos, neighbor_pos)

        pos = {node: (node[0], node[1]) for node in G.nodes()}
        plt.figure(figsize=(12, 10))
        nx.draw(G, pos, with_labels=False, node_size=100, node_color='blue', edge_color='gray')

        # Plot query point and nearest neighbors
        plt.scatter([query_pos[0]], [query_pos[1]], color='red', s=500, edgecolor='black', label='Query Point')
        plt.scatter([pos[0] for pos in G.nodes() if pos != query_pos], [pos[1] for pos in G.nodes() if pos != query_pos], color='green', s=500, edgecolor='black', label='Nearest Neighbors')

        plt.legend()
        plt.title("HNSW Search")
        plt.show()

# Create a sample dataset
np.random.seed(0)
data = np.random.rand(100, 2)  # Reduced dimension to 2 for visualization

# Create an HNSW graph
graph = HNSWGraph(M=10, ef_construction=50, ef_search=10)

# Add data points to the graph
for point in data:
    graph.add_data_point(point)

# Construct the graph
graph.construct_graph()

# Search for k-nearest neighbors
query = np.random.rand(1, 2)[0]  # Reduced dimension to 2 for visualization
k = 5
neighbors = graph.search(query, k)

# Visualize the graph
graph.visualize_graph()

# Visualize the search results
graph.visualize_search(query, neighbors)

neighbors

[(0.31856895245132366, 0.6674103799636817),
 (0.26455561210462697, 0.7742336894342167),
 (0.4236547993389047, 0.6458941130666561),
 (0.24875314351995803, 0.5761573344178369),
 (0.4238550485581797, 0.6063932141279244)]

Hierarchical Navigable Small World(HNSW) Graphs¶

Foundations of HNSW¶

Categories of ANN Algorithms:¶

Key Concepts:¶

𝑃(𝑙) = 1/2^𝑙¶

Creating HNSW¶

Performance and Tuning¶

Recall vs. Search Time:¶

Memory Usage:¶

Improving Memory Usage:¶

Boosting Search Speeds:¶

HNSW Using FAISS¶

HNSW From scratch¶

Hierarchical Navigable Small World(HNSW) Graphs¶

Foundations of HNSW¶

Categories of ANN Algorithms:¶

Key Concepts:¶

𝑃(𝑙) = 1/2𝑙¶

Creating HNSW¶

Performance and Tuning¶

Recall vs. Search Time:¶

Memory Usage:¶

Improving Memory Usage:¶

Boosting Search Speeds:¶

HNSW Using FAISS¶

HNSW From scratch¶

𝑃(𝑙) = 1/2^𝑙¶