Hash Functions, Collision Resolution Schemes
A hash table is a data structure that implements an associative array, a structure that can map keys to values. It uses a hash function to compute an index into an array of buckets or slots.
Enter values to see how they are hashed into the table:
A good hash function should distribute keys uniformly across the hash table to minimize collisions.
#include <iostream>
#include <string>
#include <functional>
class HashFunctions {
public:
// Division method
static size_t divisionHash(int key, size_t tableSize) {
return key % tableSize;
}
// Multiplication method
static size_t multiplicationHash(int key, size_t tableSize) {
const double A = 0.6180339887; // (sqrt(5) - 1) / 2
double temp = key * A;
temp = temp - floor(temp); // fractional part
return static_cast<size_t>(floor(tableSize * temp));
}
// String hash function
static size_t stringHash(const std::string& key, size_t tableSize) {
size_t hash = 0;
for (char c : key) {
hash = (hash * 31 + c) % tableSize;
}
return hash;
}
// Polynomial rolling hash
static size_t polynomialHash(const std::string& key, size_t tableSize) {
const size_t base = 256;
const size_t mod = 1000000007;
size_t hash = 0;
size_t power = 1;
for (char c : key) {
hash = (hash + (c * power) % mod) % mod;
power = (power * base) % mod;
}
return hash % tableSize;
}
// STL hash function wrapper
template<typename T>
static size_t stlHash(const T& key, size_t tableSize) {
std::hash<T> hasher;
return hasher(key) % tableSize;
}
};
Chaining handles collisions by maintaining a linked list at each hash table slot.
Watch how chaining handles collisions:
#include <iostream>
#include <vector>
#include <list>
#include <string>
template<typename K, typename V>
class HashTableChaining {
private:
std::vector<std::list<std::pair<K, V>>> table;
size_t tableSize;
size_t numElements;
size_t hash(const K& key) const {
std::hash<K> hasher;
return hasher(key) % tableSize;
}
public:
HashTableChaining(size_t size = 7) : tableSize(size), numElements(0) {
table.resize(tableSize);
}
void insert(const K& key, const V& value) {
size_t index = hash(key);
auto& chain = table[index];
// Check if key already exists
for (auto& pair : chain) {
if (pair.first == key) {
pair.second = value; // Update existing
return;
}
}
// Insert new key-value pair
chain.emplace_back(key, value);
numElements++;
// Check load factor and resize if necessary
if (static_cast<double>(numElements) / tableSize > 0.75) {
resize();
}
}
bool search(const K& key, V& value) const {
size_t index = hash(key);
const auto& chain = table[index];
for (const auto& pair : chain) {
if (pair.first == key) {
value = pair.second;
return true;
}
}
return false;
}
bool remove(const K& key) {
size_t index = hash(key);
auto& chain = table[index];
for (auto it = chain.begin(); it != chain.end(); ++it) {
if (it->first == key) {
chain.erase(it);
numElements--;
return true;
}
}
return false;
}
void display() const {
for (size_t i = 0; i < tableSize; ++i) {
std::cout << "Bucket " << i << ": ";
for (const auto& pair : table[i]) {
std::cout << "[" << pair.first << ":" << pair.second << "] ";
}
std::cout << std::endl;
}
}
private:
void resize() {
auto oldTable = std::move(table);
tableSize *= 2;
table.clear();
table.resize(tableSize);
numElements = 0;
for (const auto& chain : oldTable) {
for (const auto& pair : chain) {
insert(pair.first, pair.second);
}
}
}
};
Open addressing resolves collisions by finding alternative locations within the hash table itself.
Watch how linear probing handles collisions:
#include <iostream>
#include <vector>
#include <optional>
template<typename K, typename V>
class HashTableOpenAddressing {
private:
struct Entry {
K key;
V value;
bool deleted = false;
Entry() = default;
Entry(const K& k, const V& v) : key(k), value(v) {}
};
std::vector<std::optional<Entry>> table;
size_t tableSize;
size_t numElements;
size_t hash(const K& key) const {
std::hash<K> hasher;
return hasher(key) % tableSize;
}
// Linear probing
size_t linearProbe(const K& key) const {
size_t index = hash(key);
size_t originalIndex = index;
while (table[index].has_value() &&
(table[index]->deleted || table[index]->key != key)) {
index = (index + 1) % tableSize;
if (index == originalIndex) break; // Table full
}
return index;
}
// Quadratic probing
size_t quadraticProbe(const K& key, size_t attempt) const {
size_t index = hash(key);
return (index + attempt * attempt) % tableSize;
}
public:
HashTableOpenAddressing(size_t size = 7) : tableSize(size), numElements(0) {
table.resize(tableSize);
}
bool insert(const K& key, const V& value) {
if (static_cast<double>(numElements) / tableSize >= 0.5) {
resize();
}
size_t index = linearProbe(key);
if (!table[index].has_value() || table[index]->deleted) {
table[index] = Entry(key, value);
numElements++;
return true;
} else if (table[index]->key == key) {
table[index]->value = value; // Update existing
return true;
}
return false; // Table full
}
bool search(const K& key, V& value) const {
size_t index = linearProbe(key);
if (table[index].has_value() &&
!table[index]->deleted &&
table[index]->key == key) {
value = table[index]->value;
return true;
}
return false;
}
bool remove(const K& key) {
size_t index = linearProbe(key);
if (table[index].has_value() &&
!table[index]->deleted &&
table[index]->key == key) {
table[index]->deleted = true;
numElements--;
return true;
}
return false;
}
void display() const {
for (size_t i = 0; i < tableSize; ++i) {
std::cout << "Index " << i << ": ";
if (table[i].has_value() && !table[i]->deleted) {
std::cout << "[" << table[i]->key << ":" << table[i]->value << "]";
} else if (table[i].has_value() && table[i]->deleted) {
std::cout << "[DELETED]";
} else {
std::cout << "[EMPTY]";
}
std::cout << std::endl;
}
}
private:
void resize() {
auto oldTable = std::move(table);
tableSize *= 2;
table.clear();
table.resize(tableSize);
numElements = 0;
for (const auto& entry : oldTable) {
if (entry.has_value() && !entry->deleted) {
insert(entry->key, entry->value);
}
}
}
};
Operation | Average Case | Worst Case | Notes |
---|---|---|---|
Insert | O(1) | O(n) | With good hash function |
Search | O(1) | O(n) | Depends on load factor |
Delete | O(1) | O(n) | Similar to search |
Space | O(n) | Plus overhead for chains/probing |
#include <iostream>
#include <string>
int main() {
std::cout << "=== Unit 2: Hash Tables Demo ===" << std::endl;
// Chaining hash table demo
std::cout << "\n--- Chaining Hash Table ---" << std::endl;
HashTableChaining<std::string, int> chainTable(7);
chainTable.insert("apple", 5);
chainTable.insert("banana", 3);
chainTable.insert("orange", 8);
chainTable.insert("grape", 12);
chainTable.display();
int value;
if (chainTable.search("banana", value)) {
std::cout << "Found banana: " << value << std::endl;
}
// Open addressing hash table demo
std::cout << "\n--- Open Addressing Hash Table ---" << std::endl;
HashTableOpenAddressing<std::string, int> openTable(7);
openTable.insert("cat", 4);
openTable.insert("dog", 6);
openTable.insert("bird", 2);
openTable.insert("fish", 1);
openTable.display();
if (openTable.search("dog", value)) {
std::cout << "Found dog: " << value << std::endl;
}
return 0;
}