Unit 2: Hash Tables & Dictionaries

Hash Functions, Collision Resolution Schemes

Duration: 6 Hours

Learning Objectives

1. Hash Table Concept

A hash table is a data structure that implements an associative array, a structure that can map keys to values. It uses a hash function to compute an index into an array of buckets or slots.

Key Concepts:

  • Hash Function: Maps keys to array indices
  • Collision: When two keys hash to the same index
  • Load Factor: Ratio of filled slots to total slots
  • Bucket: Storage location for key-value pairs

2. Interactive Hash Function Visualization

Hash Function Demo

Enter values to see how they are hashed into the table:

Hash function: h(key) = (sum of ASCII values) % table_size

3. Hash Functions

A good hash function should distribute keys uniformly across the hash table to minimize collisions.

Common Hash Functions

#include <iostream>
#include <string>
#include <functional>

class HashFunctions {
public:
    // Division method
    static size_t divisionHash(int key, size_t tableSize) {
        return key % tableSize;
    }
    
    // Multiplication method
    static size_t multiplicationHash(int key, size_t tableSize) {
        const double A = 0.6180339887; // (sqrt(5) - 1) / 2
        double temp = key * A;
        temp = temp - floor(temp); // fractional part
        return static_cast<size_t>(floor(tableSize * temp));
    }
    
    // String hash function
    static size_t stringHash(const std::string& key, size_t tableSize) {
        size_t hash = 0;
        for (char c : key) {
            hash = (hash * 31 + c) % tableSize;
        }
        return hash;
    }
    
    // Polynomial rolling hash
    static size_t polynomialHash(const std::string& key, size_t tableSize) {
        const size_t base = 256;
        const size_t mod = 1000000007;
        
        size_t hash = 0;
        size_t power = 1;
        
        for (char c : key) {
            hash = (hash + (c * power) % mod) % mod;
            power = (power * base) % mod;
        }
        
        return hash % tableSize;
    }
    
    // STL hash function wrapper
    template<typename T>
    static size_t stlHash(const T& key, size_t tableSize) {
        std::hash<T> hasher;
        return hasher(key) % tableSize;
    }
};

4. Collision Resolution - Chaining

Chaining handles collisions by maintaining a linked list at each hash table slot.

Chaining Collision Resolution

Watch how chaining handles collisions:

Chaining Implementation

#include <iostream>
#include <vector>
#include <list>
#include <string>

template<typename K, typename V>
class HashTableChaining {
private:
    std::vector<std::list<std::pair<K, V>>> table;
    size_t tableSize;
    size_t numElements;
    
    size_t hash(const K& key) const {
        std::hash<K> hasher;
        return hasher(key) % tableSize;
    }
    
public:
    HashTableChaining(size_t size = 7) : tableSize(size), numElements(0) {
        table.resize(tableSize);
    }
    
    void insert(const K& key, const V& value) {
        size_t index = hash(key);
        auto& chain = table[index];
        
        // Check if key already exists
        for (auto& pair : chain) {
            if (pair.first == key) {
                pair.second = value; // Update existing
                return;
            }
        }
        
        // Insert new key-value pair
        chain.emplace_back(key, value);
        numElements++;
        
        // Check load factor and resize if necessary
        if (static_cast<double>(numElements) / tableSize > 0.75) {
            resize();
        }
    }
    
    bool search(const K& key, V& value) const {
        size_t index = hash(key);
        const auto& chain = table[index];
        
        for (const auto& pair : chain) {
            if (pair.first == key) {
                value = pair.second;
                return true;
            }
        }
        return false;
    }
    
    bool remove(const K& key) {
        size_t index = hash(key);
        auto& chain = table[index];
        
        for (auto it = chain.begin(); it != chain.end(); ++it) {
            if (it->first == key) {
                chain.erase(it);
                numElements--;
                return true;
            }
        }
        return false;
    }
    
    void display() const {
        for (size_t i = 0; i < tableSize; ++i) {
            std::cout << "Bucket " << i << ": ";
            for (const auto& pair : table[i]) {
                std::cout << "[" << pair.first << ":" << pair.second << "] ";
            }
            std::cout << std::endl;
        }
    }
    
private:
    void resize() {
        auto oldTable = std::move(table);
        tableSize *= 2;
        table.clear();
        table.resize(tableSize);
        numElements = 0;
        
        for (const auto& chain : oldTable) {
            for (const auto& pair : chain) {
                insert(pair.first, pair.second);
            }
        }
    }
};

5. Collision Resolution - Open Addressing

Open addressing resolves collisions by finding alternative locations within the hash table itself.

Open Addressing (Linear Probing)

Watch how linear probing handles collisions:

Open Addressing Implementation

#include <iostream>
#include <vector>
#include <optional>

template<typename K, typename V>
class HashTableOpenAddressing {
private:
    struct Entry {
        K key;
        V value;
        bool deleted = false;
        
        Entry() = default;
        Entry(const K& k, const V& v) : key(k), value(v) {}
    };
    
    std::vector<std::optional<Entry>> table;
    size_t tableSize;
    size_t numElements;
    
    size_t hash(const K& key) const {
        std::hash<K> hasher;
        return hasher(key) % tableSize;
    }
    
    // Linear probing
    size_t linearProbe(const K& key) const {
        size_t index = hash(key);
        size_t originalIndex = index;
        
        while (table[index].has_value() && 
               (table[index]->deleted || table[index]->key != key)) {
            index = (index + 1) % tableSize;
            if (index == originalIndex) break; // Table full
        }
        
        return index;
    }
    
    // Quadratic probing
    size_t quadraticProbe(const K& key, size_t attempt) const {
        size_t index = hash(key);
        return (index + attempt * attempt) % tableSize;
    }
    
public:
    HashTableOpenAddressing(size_t size = 7) : tableSize(size), numElements(0) {
        table.resize(tableSize);
    }
    
    bool insert(const K& key, const V& value) {
        if (static_cast<double>(numElements) / tableSize >= 0.5) {
            resize();
        }
        
        size_t index = linearProbe(key);
        
        if (!table[index].has_value() || table[index]->deleted) {
            table[index] = Entry(key, value);
            numElements++;
            return true;
        } else if (table[index]->key == key) {
            table[index]->value = value; // Update existing
            return true;
        }
        
        return false; // Table full
    }
    
    bool search(const K& key, V& value) const {
        size_t index = linearProbe(key);
        
        if (table[index].has_value() && 
            !table[index]->deleted && 
            table[index]->key == key) {
            value = table[index]->value;
            return true;
        }
        
        return false;
    }
    
    bool remove(const K& key) {
        size_t index = linearProbe(key);
        
        if (table[index].has_value() && 
            !table[index]->deleted && 
            table[index]->key == key) {
            table[index]->deleted = true;
            numElements--;
            return true;
        }
        
        return false;
    }
    
    void display() const {
        for (size_t i = 0; i < tableSize; ++i) {
            std::cout << "Index " << i << ": ";
            if (table[i].has_value() && !table[i]->deleted) {
                std::cout << "[" << table[i]->key << ":" << table[i]->value << "]";
            } else if (table[i].has_value() && table[i]->deleted) {
                std::cout << "[DELETED]";
            } else {
                std::cout << "[EMPTY]";
            }
            std::cout << std::endl;
        }
    }
    
private:
    void resize() {
        auto oldTable = std::move(table);
        tableSize *= 2;
        table.clear();
        table.resize(tableSize);
        numElements = 0;
        
        for (const auto& entry : oldTable) {
            if (entry.has_value() && !entry->deleted) {
                insert(entry->key, entry->value);
            }
        }
    }
};

6. Hash Table Algorithms

Insertion Algorithm (Chaining)

1 Compute hash value: index = hash(key)
2 Search for key in the chain at table[index]
3 If key exists, update value; otherwise, add new entry
4 Check load factor and resize if necessary

Search Algorithm (Open Addressing)

1 Compute initial hash value: index = hash(key)
2 If table[index] is empty, key not found
3 If table[index].key == key, return value
4 Otherwise, probe next position and repeat

7. Performance Analysis

Operation Average Case Worst Case Notes
Insert O(1) O(n) With good hash function
Search O(1) O(n) Depends on load factor
Delete O(1) O(n) Similar to search
Space O(n) Plus overhead for chains/probing

8. Complete Example Program

#include <iostream>
#include <string>

int main() {
    std::cout << "=== Unit 2: Hash Tables Demo ===" << std::endl;
    
    // Chaining hash table demo
    std::cout << "\n--- Chaining Hash Table ---" << std::endl;
    HashTableChaining<std::string, int> chainTable(7);
    
    chainTable.insert("apple", 5);
    chainTable.insert("banana", 3);
    chainTable.insert("orange", 8);
    chainTable.insert("grape", 12);
    
    chainTable.display();
    
    int value;
    if (chainTable.search("banana", value)) {
        std::cout << "Found banana: " << value << std::endl;
    }
    
    // Open addressing hash table demo
    std::cout << "\n--- Open Addressing Hash Table ---" << std::endl;
    HashTableOpenAddressing<std::string, int> openTable(7);
    
    openTable.insert("cat", 4);
    openTable.insert("dog", 6);
    openTable.insert("bird", 2);
    openTable.insert("fish", 1);
    
    openTable.display();
    
    if (openTable.search("dog", value)) {
        std::cout << "Found dog: " << value << std::endl;
    }
    
    return 0;
}