Skip to main content

16. Hashing

 

Hashing

  • Hashing is a technique where search time is independent of the number of items in which we are searching a data value.
  • The basic idea is to use the key itself to find the address in the memory to make searching easy.
  • The values are then stored in hash table, by using that key we can access the element in O(1) time.
  • A hash function maps a big number or string to a small integer that can be used as index in hash table.

Collision

It is possible that two different set of keys K1 and K2 will yield the same hash address. This situation is called collision. The technique to resolve collision is called collision resolution.


import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;

class HashMap<K, V> {
    private class Node {
        K key;
        V value;

        public Node(K key, V value) {
            this.key = key;
            this.value = value;
        }
    }

    private int n; // Number of key-value pairs
    private int N; // Number of buckets
    private LinkedList<Node> buckets[];

    @SuppressWarnings("unchecked")
    public HashMap() {
        this.N = 4;
        this.buckets = new LinkedList[N];
        for (int i = 0; i < N; i++)
            this.buckets[i] = new LinkedList<>();
    }

    private int hashFunction(K key) {
        return Math.abs(key.hashCode()) % N;
    }

    private int searchInLL(K key, int idx) {
        LinkedList<Node> ll = buckets[idx];
        for (int i = 0; i < ll.size(); i++)
            if (ll.get(i).key == key)
                return i;
        return -1;
    }

    @SuppressWarnings("unchecked")
    private void rehash() {
        LinkedList<Node> oldBucket[] = buckets;
        buckets = new LinkedList[N * 2];
        for (int i = 0; i < N * 2; i++)
            buckets[i] = new LinkedList<>();
        for (int i = 0; i < oldBucket.length; i++) {
            LinkedList<Node> ll = oldBucket[i];
            for (int j = 0; j < ll.size(); j++) {
                Node node = ll.get(j);
                put(node.key, node.value);
            }
        }
    }

    public void put(K key, V value) {
        int bucketIdx = hashFunction(key);
        int dataIdx = searchInLL(key, bucketIdx);

        if (dataIdx == -1) {
            buckets[bucketIdx].add(new Node(key, value));
            n++;
        } else {
            Node node = buckets[bucketIdx].get(dataIdx);
            node.value = value;
        }

        double lambda = (double) n / N;
        if (lambda > 2.0) {
            rehash();
        }
    }

    public V get(K key) {
        int bucketIdx = hashFunction(key);
        int dataIdx = searchInLL(key, bucketIdx);

        if (dataIdx == -1) {
            return null;
        } else {
            Node node = buckets[bucketIdx].get(dataIdx);
            return node.value;
        }
    }

    public boolean containsKey(K key) {
        int bucketIdx = hashFunction(key);
        int dataIdx = searchInLL(key, bucketIdx);

        if (dataIdx == -1)
            return false;
        return true;
    }

    public V remove(K key){
        int bucketIdx = hashFunction(key);
        int dataIdx = searchInLL(key, bucketIdx);

        if (dataIdx == -1) {
            return null;
        } else {
            Node node = buckets[bucketIdx].remove(dataIdx);
            n--;
            return node.value;
        }
    }
}

Comments