Hi. Was trying to understand how a hash table works. The below is my attempt to implement the same in C++. I hope to improve this with further details continuously.
I am using Separate chaining here. So the hash table is implemented as a vector of lists. Each list shall grow when collisions occur. When the load factor (total number of elements / total number of buckets) goes beyond a certain value, we can rehash and recreate the hash table. This will reduce the time taken for searching / removing elements.
TODO: I need to use a better hash function here since a simple mod operation is not of much use here.
The following is the code I wrote:
Snippet
#include <iostream>
#include <list>
#include <vector>
#include <string>
using namespace std;
class Hashtable
{
vector<list<pair<int, string>>> hashTable;
int count = 0; // To make getting the isEmpty easier
int bucketCount = 11;
public:
bool isEmpty() const;
void insert(int key, string value);
string getValue(int key);
void remove(int key);
void printTable();
Hashtable()
{
hashTable.resize(bucketCount);
}
private:
void rehash();
float getLoadFactor();
int getHash(int key);
};
bool Hashtable::isEmpty() const
{
return count == 0;
}
void Hashtable::insert(int key, string value)
{
if (getLoadFactor() > 0.9f) // We can initiate the rehash after the load factor goes beyond 0.9
{
rehash();
}
auto &bucket = hashTable[getHash(key)]; // Note that the bucket is captured as a reference, else the values will not be updated
auto iterator = bucket.begin();
for (; iterator != bucket.end(); iterator++)
{
std::pair<int, string> & kvp = *iterator;
if (kvp.first == key)
{
cout << "Key already found, updating value from " << kvp.second << " to " << value << endl;
kvp.second = value;
return;
}
}
// Key was not found earlier, hence adding new kvp
bucket.push_back(make_pair(key,value));
count++;
}
string Hashtable::getValue(int key)
{
auto bucket = hashTable[getHash(key)];
for (auto kvp : bucket)
{
if (kvp.first == key) return kvp.second;
}
cout << __FUNCTION__ << ": Key not found" << endl;
return "";
}
void Hashtable::remove(int key)
{
auto &bucket = hashTable[getHash(key)]; // Note that the bucket is captured as a reference, else the values will not be updated
auto iterator = bucket.begin();
for (; iterator != bucket.end(); iterator++)
{
auto kvp = *iterator;
if (kvp.first == key)
{
bucket.erase(iterator);
count--;
return;
}
}
cout << __FUNCTION__ << ": Key not found" << endl;
}
void Hashtable::printTable()
{
cout << "The hash table has " << count << " elements\n\n";
for (auto bucket : hashTable)
{
for (auto kvp : bucket)
{
cout << kvp.first << " : " << kvp.second << endl;
}
}
}
void Hashtable::rehash()
{
cout << "Rehashing\n\n";
int newSize = (hashTable.size() * 2) + 1;
vector<list<pair<int, string>>> tempHashtable(newSize);
bucketCount = newSize; // Increase the bucket size and change it to twice the original value
for (auto bucket : hashTable)
{
for (auto kvp : bucket)
{
tempHashtable[getHash(kvp.first)].push_back(make_pair(kvp.first, kvp.second));
}
}
hashTable = tempHashtable;
}
float Hashtable::getLoadFactor()
{
return (count * 1.0f ) / bucketCount;
}
int Hashtable::getHash(int key)
{
// Let us use a very simple hash function for now
return key % bucketCount;
}
int main()
{
Hashtable ht;
ht.insert(1, "1");
ht.insert(100, "2");
ht.insert(33, "3");
ht.insert(5, "4");
ht.insert(45, "5");
ht.insert(80, "6");
ht.insert(0, "7");
ht.insert(42, "8");
ht.insert(2, "10");
ht.insert(334, "11");
ht.insert(134, "12");
ht.insert(124, "13");
ht.insert(511, "14");
ht.insert(1, "9");
ht.printTable();
string val = ht.getValue(1);
cout << "Value for key " << 1 << " is " << val << endl;
val = ht.getValue(1278);
if(val != "")
cout << "Value for key " << 1278 << " is " << val << endl;
ht.remove(1);
ht.remove(1);
}
Comentarios