Topic Overview
Hashing
Master hash functions, hash tables, collision resolution, and hashing algorithms for efficient data storage and retrieval.
Hashing is a technique that maps data of arbitrary size to fixed-size values, enabling O(1) average-case lookup, insertion, and deletion operations.
Why This Matters in Interviews
Hashing is fundamental because it enables:
- Fast lookups: O(1) average time complexity for search operations
- Problem optimization: Many problems can be solved efficiently using hash tables
- System design: Understanding hashing is crucial for distributed systems, caching, and databases
- Algorithm design: Hash-based algorithms appear frequently in interviews
Interviewers test hashing knowledge to assess your understanding of trade-offs between time and space complexity, and your ability to choose appropriate data structures.
Core Concepts
- Hash function: Maps keys to array indices
- Hash table: Data structure using hashing for fast access
- Collision: When two keys hash to the same index
- Collision resolution: Chaining vs open addressing
- Load factor: Ratio of entries to buckets (affects performance)
- Perfect hashing: Hash function with no collisions
- Consistent hashing: Used in distributed systems
Detailed Explanation
Hash Functions
Properties of good hash function:
- Deterministic: Same input always produces same output
- Uniform distribution: Keys distributed evenly across buckets
- Fast computation: O(1) time complexity
- Avalanche effect: Small input changes cause large output changes
Common hash functions:
// Division method
function hashDivision(key: number, size: number): number {
return key % size;
}
// Multiplication method
function hashMultiplication(key: number, size: number): number {
const A = 0.6180339887; // (√5 - 1) / 2
return Math.floor(size * ((key * A) % 1));
}
// String hashing (polynomial rolling hash)
function hashString(s: string, size: number): number {
const base = 31;
const mod = 1e9 + 7;
let hash = 0;
let power = 1;
for (let i = 0; i < s.length; i++) {
hash = (hash + (s.charCodeAt(i) * power) % mod) % mod;
power = (power * base) % mod;
}
return hash % size;
}
// FNV-1a hash (fast, good distribution)
function hashFNV(key: string): number {
const FNV_OFFSET_BASIS = 2166136261;
const FNV_PRIME = 16777619;
let hash = FNV_OFFSET_BASIS;
for (let i = 0; i < key.length; i++) {
hash ^= key.charCodeAt(i);
hash = (hash * FNV_PRIME) >>> 0; // Unsigned 32-bit
}
return hash;
}
Hash Table Implementation
Chaining (Separate Chaining):
class HashTableChaining<K, V> {
private buckets: Array<Array<[K, V]>>;
private size: number;
private capacity: number;
private loadFactor: number = 0.75;
constructor(initialCapacity: number = 16) {
this.capacity = initialCapacity;
this.buckets = new Array(this.capacity).fill(null).map(() => []);
this.size = 0;
}
private hash(key: K): number {
const keyStr = String(key);
let hash = 0;
for (let i = 0; i < keyStr.length; i++) {
hash = ((hash << 5) - hash) + keyStr.charCodeAt(i);
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash) % this.capacity;
}
put(key: K, value: V): void {
const index = this.hash(key);
const bucket = this.buckets[index];
// Check if key already exists
for (let i = 0; i < bucket.length; i++) {
if (bucket[i][0] === key) {
bucket[i][1] = value; // Update existing
return;
}
}
// Add new entry
bucket.push([key, value]);
this.size++;
// Resize if load factor exceeded
if (this.size / this.capacity > this.loadFactor) {
this.resize();
}
}
get(key: K): V | undefined {
const index = this.hash(key);
const bucket = this.buckets[index];
for (const [k, v] of bucket) {
if (k === key) {
return v;
}
}
return undefined;
}
remove(key: K): boolean {
const index = this.hash(key);
const bucket = this.buckets[index];
for (let i = 0; i < bucket.length; i++) {
if (bucket[i][0] === key) {
bucket.splice(i, 1);
this.size--;
return true;
}
}
return false;
}
private resize(): void {
const oldBuckets = this.buckets;
this.capacity *= 2;
this.buckets = new Array(this.capacity).fill(null).map(() => []);
this.size = 0;
// Rehash all entries
for (const bucket of oldBuckets) {
for (const [key, value] of bucket) {
this.put(key, value);
}
}
}
}
// Time: O(1) average, O(n) worst case
// Space: O(n)
Open Addressing (Linear Probing):
class HashTableOpenAddressing<K, V> {
private buckets: Array<[K, V] | null>;
private capacity: number;
private size: number;
private DELETED = Symbol('DELETED');
constructor(initialCapacity: number = 16) {
this.capacity = initialCapacity;
this.buckets = new Array(this.capacity).fill(null);
this.size = 0;
}
private hash(key: K, probe: number = 0): number {
const keyStr = String(key);
let hash = 0;
for (let i = 0; i < keyStr.length; i++) {
hash = ((hash << 5) - hash) + keyStr.charCodeAt(i);
hash = hash & hash;
}
return (Math.abs(hash) + probe) % this.capacity;
}
put(key: K, value: V): void {
if (this.size >= this.capacity * 0.75) {
this.resize();
}
let probe = 0;
let index = this.hash(key, probe);
while (this.buckets[index] !== null &&
this.buckets[index] !== this.DELETED &&
this.buckets[index]![0] !== key) {
probe++;
index = this.hash(key, probe);
}
if (this.buckets[index] === null || this.buckets[index] === this.DELETED) {
this.size++;
}
this.buckets[index] = [key, value];
}
get(key: K): V | undefined {
let probe = 0;
let index = this.hash(key, probe);
while (this.buckets[index] !== null) {
if (this.buckets[index] !== this.DELETED &&
this.buckets[index]![0] === key) {
return this.buckets[index]![1];
}
probe++;
index = this.hash(key, probe);
}
return undefined;
}
remove(key: K): boolean {
let probe = 0;
let index = this.hash(key, probe);
while (this.buckets[index] !== null) {
if (this.buckets[index] !== this.DELETED &&
this.buckets[index]![0] === key) {
this.buckets[index] = this.DELETED as any;
this.size--;
return true;
}
probe++;
index = this.hash(key, probe);
}
return false;
}
private resize(): void {
const oldBuckets = this.buckets;
this.capacity *= 2;
this.buckets = new Array(this.capacity).fill(null);
this.size = 0;
for (const entry of oldBuckets) {
if (entry !== null && entry !== this.DELETED) {
this.put(entry[0], entry[1]);
}
}
}
}
Examples
Two Sum Problem
function twoSum(nums: number[], target: number): number[] {
const map = new Map<number, number>();
for (let i = 0; i < nums.length; i++) {
const complement = target - nums[i];
if (map.has(complement)) {
return [map.get(complement)!, i];
}
map.set(nums[i], i);
}
return [];
}
// Time: O(n), Space: O(n)
Group Anagrams
function groupAnagrams(strs: string[]): string[][] {
const map = new Map<string, string[]>();
for (const str of strs) {
const key = str.split('').sort().join('');
if (!map.has(key)) {
map.set(key, []);
}
map.get(key)!.push(str);
}
return Array.from(map.values());
}
// Time: O(n * k log k) where k is max string length
// Space: O(n * k)
Longest Consecutive Sequence
function longestConsecutive(nums: number[]): number {
const numSet = new Set(nums);
let maxLength = 0;
for (const num of numSet) {
// Only start counting from the beginning of a sequence
if (!numSet.has(num - 1)) {
let currentNum = num;
let currentLength = 1;
while (numSet.has(currentNum + 1)) {
currentNum++;
currentLength++;
}
maxLength = Math.max(maxLength, currentLength);
}
}
return maxLength;
}
// Time: O(n), Space: O(n)
Common Pitfalls
- Poor hash function: Causes clustering and collisions. Fix: Use well-tested hash functions
- Not handling collisions: Assuming perfect hashing. Fix: Always implement collision resolution
- Load factor too high: Degrades to O(n) performance. Fix: Resize when load factor > 0.75
- Mutable keys: Changing keys after insertion breaks hash table. Fix: Use immutable keys or rehash
- Hash collisions in equals: Two equal objects must have same hash. Fix: Override both hashCode and equals
- Not resizing: Table becomes inefficient. Fix: Implement dynamic resizing
Interview Questions
Beginner
Q: Explain how a hash table works. What is the time complexity of basic operations?
A:
Hash Table Overview:
- Hash function: Maps keys to array indices
- Buckets: Array of buckets to store key-value pairs
- Collision resolution: Handles when multiple keys hash to same index
Time Complexity:
- Insert: O(1) average, O(n) worst case (all collisions)
- Search: O(1) average, O(n) worst case
- Delete: O(1) average, O(n) worst case
Space Complexity: O(n)
How it works:
- Compute hash of key → get index
- Store/retrieve value at that index
- Handle collisions if multiple keys map to same index
Collision Resolution:
- Chaining: Store multiple entries in same bucket (linked list)
- Open addressing: Find next available slot (linear/quadratic probing)
Intermediate
Q: Implement a hash table with chaining. How do you handle resizing and what is the optimal load factor?
A:
class HashTable<K, V> {
private buckets: Array<Array<[K, V]>>;
private size: number;
private capacity: number;
private loadFactor: number = 0.75;
constructor(initialCapacity: number = 16) {
this.capacity = initialCapacity;
this.buckets = new Array(this.capacity).fill(null).map(() => []);
this.size = 0;
}
private hash(key: K): number {
// Use built-in or custom hash function
return Math.abs(String(key).split('').reduce((a, b) => {
a = ((a << 5) - a) + b.charCodeAt(0);
return a & a;
}, 0)) % this.capacity;
}
put(key: K, value: V): void {
const index = this.hash(key);
const bucket = this.buckets[index];
// Update if exists
for (let i = 0; i < bucket.length; i++) {
if (bucket[i][0] === key) {
bucket[i][1] = value;
return;
}
}
// Add new
bucket.push([key, value]);
this.size++;
// Resize if needed
if (this.size / this.capacity > this.loadFactor) {
this.resize();
}
}
get(key: K): V | undefined {
const index = this.hash(key);
const bucket = this.buckets[index];
for (const [k, v] of bucket) {
if (k === key) return v;
}
return undefined;
}
private resize(): void {
const oldBuckets = this.buckets;
this.capacity *= 2;
this.buckets = new Array(this.capacity).fill(null).map(() => []);
this.size = 0;
// Rehash all entries
for (const bucket of oldBuckets) {
for (const [key, value] of bucket) {
this.put(key, value);
}
}
}
}
Load Factor:
- Optimal: 0.75 (balance between space and collisions)
- Too low: Wastes space
- Too high: Many collisions, degrades performance
Resizing:
- Trigger: When load factor > threshold
- New capacity: Usually double (2x)
- Rehashing: All entries must be rehashed with new capacity
Senior
Q: Design a distributed hash table system that handles node failures, data replication, and consistent hashing. How do you ensure data availability and handle rebalancing?
A:
class DistributedHashTable {
private nodes: Map<string, Node>;
private ring: ConsistentHashRing;
private replicationFactor: number = 3;
constructor() {
this.nodes = new Map();
this.ring = new ConsistentHashRing();
}
addNode(nodeId: string, node: Node): void {
this.nodes.set(nodeId, node);
this.ring.addNode(nodeId);
this.rebalance();
}
removeNode(nodeId: string): void {
this.nodes.delete(nodeId);
this.ring.removeNode(nodeId);
this.rebalance();
}
put(key: string, value: any): void {
const primaryNode = this.ring.getNode(key);
const replicaNodes = this.ring.getReplicas(key, this.replicationFactor);
// Write to primary and replicas
const nodesToWrite = [primaryNode, ...replicaNodes];
// Write in parallel
Promise.all(
nodesToWrite.map(nodeId =>
this.nodes.get(nodeId)!.put(key, value)
)
);
}
get(key: string): Promise<any> {
const primaryNode = this.ring.getNode(key);
const replicaNodes = this.ring.getReplicas(key, this.replicationFactor);
// Try primary first, then replicas
const nodesToTry = [primaryNode, ...replicaNodes];
for (const nodeId of nodesToTry) {
try {
const value = await this.nodes.get(nodeId)!.get(key);
if (value !== null) {
// Repair if read from replica
if (nodeId !== primaryNode) {
this.nodes.get(primaryNode)!.put(key, value);
}
return value;
}
} catch (error) {
// Node failed, try next
continue;
}
}
return null;
}
private rebalance(): void {
// Redistribute keys when nodes added/removed
// Only move keys that need to change ownership
const keysToMove = this.ring.getKeysToMove();
for (const { key, oldNode, newNode } of keysToMove) {
const value = await this.nodes.get(oldNode)!.get(key);
if (value) {
await this.nodes.get(newNode)!.put(key, value);
await this.nodes.get(oldNode)!.delete(key);
}
}
}
}
class ConsistentHashRing {
private ring: Map<number, string> = new Map();
private sortedKeys: number[] = [];
addNode(nodeId: string): void {
// Add multiple virtual nodes for better distribution
for (let i = 0; i < 150; i++) {
const hash = this.hash(`${nodeId}:${i}`);
this.ring.set(hash, nodeId);
this.sortedKeys.push(hash);
}
this.sortedKeys.sort((a, b) => a - b);
}
removeNode(nodeId: string): void {
for (let i = 0; i < 150; i++) {
const hash = this.hash(`${nodeId}:${i}`);
this.ring.delete(hash);
const index = this.sortedKeys.indexOf(hash);
if (index > -1) {
this.sortedKeys.splice(index, 1);
}
}
}
getNode(key: string): string {
const hash = this.hash(key);
const index = this.findFirstGreater(hash);
const ringKey = this.sortedKeys[index % this.sortedKeys.length];
return this.ring.get(ringKey)!;
}
getReplicas(key: string, count: number): string[] {
const replicas: string[] = [];
const hash = this.hash(key);
let index = this.findFirstGreater(hash);
while (replicas.length < count) {
const ringKey = this.sortedKeys[index % this.sortedKeys.length];
const nodeId = this.ring.get(ringKey)!;
if (!replicas.includes(nodeId)) {
replicas.push(nodeId);
}
index++;
}
return replicas;
}
private hash(key: string): number {
// Use MD5 or SHA-1 hash
return this.simpleHash(key);
}
private simpleHash(key: string): number {
let hash = 0;
for (let i = 0; i < key.length; i++) {
hash = ((hash << 5) - hash) + key.charCodeAt(i);
hash = hash & hash;
}
return Math.abs(hash);
}
private findFirstGreater(hash: number): number {
// Binary search for first key >= hash
let left = 0;
let right = this.sortedKeys.length;
while (left < right) {
const mid = Math.floor((left + right) / 2);
if (this.sortedKeys[mid] < hash) {
left = mid + 1;
} else {
right = mid;
}
}
return left;
}
getKeysToMove(): Array<{ key: string; oldNode: string; newNode: string }> {
// Calculate which keys need to move
// Implementation depends on tracking key ownership
return [];
}
}
Features:
- Consistent hashing: Minimal key movement on node changes
- Replication: Multiple copies for availability
- Rebalancing: Efficient key redistribution
- Failure handling: Read from replicas if primary fails
Key Takeaways
- Hash tables: O(1) average time for insert, search, delete
- Hash functions: Should distribute keys uniformly
- Collision resolution: Chaining (simple) vs open addressing (space efficient)
- Load factor: Keep below 0.75 for good performance
- Resizing: Double capacity and rehash when load factor exceeded
- Applications: Fast lookups, counting, caching, distributed systems
- Trade-offs: Space vs time, collision handling complexity