自己實現一個簡單的布穀鳥散列

需求

自己實現一個簡單的布穀鳥散列(Cuckoo hashtable)。

分析

布穀鳥哈希最早於2001 年由Rasmus Pagh 和Flemming Friche Rodler 提出 。該哈希方法是爲了解決哈希衝突的問題而提出,利用較少計算換取了較大空間。名稱源於該哈希方法行爲類似於布穀鳥在別的鳥巢中下蛋,並將別的鳥蛋擠出的行爲。它具有佔用空間小、查詢迅速等特性,可用於Bloom filter 和內存管理 。

算法描述

算法使用多個不同哈希函數計算對應key 的位置。

  • 當多個哈希任意位置爲空,則選擇一個位置插入
  • 當多個哈希有位置爲空時,則插入到空位置
  • 當多個哈希位置均不爲空時,隨機選擇兩者之一的位置上key 踢出,計算踢出的key 另一個哈希值對應的位置進行插入,轉至2執行(即當再次插入位置爲空時插入,仍舊不爲空時,再踢出這個key)

接口

interface HashTable<E> {
    int size();
    boolean isEmpty();
    void clear();
    boolean contains(E e);
    void add(E e);
    boolean remove(E e);
}
interface HashFamily<E> {
    int getNumOfFunctions();
    void generateNewFunctions();
    int hash(E e, int which);
}

實現類

class CuckooHashTable<E> implements HashTable<E> {
    private static final float LOAD_FACTOR = 0.4f;

    private static final int ALLOWED_REHASHES = 1;

    private static final int DEFAULT_TABLE_SIZE = 101;

    private final HashFamily<? super E> hashFunctions;

    private final int numHashFunctions;

    private Object[] array;

    private int currentSize;

    public CuckooHashTable(HashFamily<? super E> hashFunctions) {
        this(hashFunctions, DEFAULT_TABLE_SIZE);
    }

    public CuckooHashTable(HashFamily<? super E> hashFunctions, int size) {
        allocateArray(nextPrime(size));
        currentSize = 0;
        this.hashFunctions = hashFunctions;
        numHashFunctions = hashFunctions.getNumOfFunctions();
    }

    private void allocateArray(int arraySize) {
        array = new Object[arraySize];
    }

    private void doClear() {
        currentSize = 0;
        for (int i = 0; i < array.length; i++)
            array[i] = null;
    }

    @Override
    public int size() {
        return currentSize;
    }

    @Override
    public boolean isEmpty() {
        return currentSize == 0;
    }

    @Override
    public void clear() {
        doClear();
    }

    @Override
    public boolean contains(E e) {
        return findPos(e) != -1;
    }

    @Override
    public void add(E e) {
        if (contains(e))
            return;
        if (currentSize >= array.length * LOAD_FACTOR)
            expand();
        addHelper(e);
    }

    private int rehashes = 0;

    private Random r = new Random();

    private void addHelper(E e) {
        final int COUNT_LIMIT = 100;
        while (true) {
            int lastPos = -1;
            int pos;
            for (int count = 0; count < COUNT_LIMIT; count++) {
                for (int i = 0; i < numHashFunctions; i++) {
                    pos = hash(e, i);
                    if (array[pos] == null) {
                        array[pos] = e;
                        currentSize++;
                        return;
                    }
                }
                int i = 0;
                do {
                    pos = hash(e, r.nextInt(numHashFunctions));
                }
                while (pos == lastPos && i++ < 5);
                E tmp = array(lastPos = pos);
                array[pos] = e;
                e = tmp;
            }
            if (++rehashes > ALLOWED_REHASHES) {
                expand();
                rehashes = 0;
            }
            else
                rehash();
        }
    }

    private void expand() {
        rehash((int) (array.length / LOAD_FACTOR));
    }

    private void rehash() {
        hashFunctions.generateNewFunctions();
        rehash(array.length);
    }

    private void rehash(int newLength) {
        @SuppressWarnings("unchecked")
        E[] oldArray = (E[]) array;
        allocateArray(nextPrime(newLength));
        currentSize = 0;
        for (E e : oldArray) {
            if (e != null)
                add(e);
        }
    }

    @SuppressWarnings("unchecked")
    private E array(int index) {
        return (E) array[index];
    }

    @Override
    public boolean remove(E e) {
        int pos = findPos(e);
        if (pos != -1) {
            array[pos] = null;
            currentSize--;
        }
        return pos != -1;
    }

    private int findPos(E e) {
        for (int i = 0; i < numHashFunctions; i++) {
            int pos = hash(e, i);
            if (array[pos] != null && array[pos].equals(e))
                return pos;
        }
        return -1;
    }

    private int hash(E e, int which) {
        int hashVal = hashFunctions.hash(e, which);
        hashVal %= array.length;
        if (hashVal < 0)
            hashVal += array.length;
        return hashVal;
    }

    @Override
    public String toString() {
        StringJoiner joiner = new StringJoiner();
        for (int i = 0; i < array.length; i++) {
            if (array[i] != null)
                joiner.add(array[i]);
        }
        return joiner.toString();
    }

    private int nextPrime(int n) {
        if (n % 2 == 0)
            n++;
        while (!isPrime(n))
            n += 2;
        return n;
    }

    private boolean isPrime(int n) {
        if (n == 2 || n == 3)
            return true;
        if (n == 1 || n % 2 == 0)
            return false;
        for (int i = 3; i * i <= n; i += 2)
            if (n % i == 0)
                return false;
        return true;
    }
}


class StringHashFamily implements HashFamily<String> {
    private final int[] MUTLIPLIERS;

    private final Random r = new Random();

    public StringHashFamily(int size) {
        MUTLIPLIERS = new int[size];
        generateNewFunctions();
    }

    @Override
    public int getNumOfFunctions() {
        return MUTLIPLIERS.length;
    }

    @Override
    public void generateNewFunctions() {
        for (int i = 0; i < MUTLIPLIERS.length; i++)
            MUTLIPLIERS[i] = r.nextInt();
    }

    @Override
    public int hash(String e, int which) {
        final int mutliplier = MUTLIPLIERS[which];
        int hashVal = 0;
        for (int i = 0; i < e.length(); i++)
            hashVal = mutliplier * hashVal + e.charAt(i);
        return hashVal;
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章