Consistent Hashing

Implementation

For completeness here is a simple implementation in Java. In order for consistent hashing to be effective it is important to have a hash function thatmixes well. Most implementations ofObject 'shashCode donot mix well - for example, they typically produce a restricted number of small integer values - so we have aHashFunction interface to allow a custom hash function to be used. MD5 hashes are recommended here.

import java.util.Collection;
import java.util.SortedMap;
import java.util.TreeMap;

public class ConsistentHash<T> {

 private final HashFunction hashFunction;
 private final int numberOfReplicas;
 private final SortedMap<Integer, T> circle = new TreeMap<Integer, T>();

 public ConsistentHash(HashFunction hashFunction, int numberOfReplicas,
     Collection<T> nodes) {
   this.hashFunction = hashFunction;
   this.numberOfReplicas = numberOfReplicas;

   for (T node : nodes) {
     add(node);
   }
 }

 public void add(T node) {
   for (int i = 0; i < numberOfReplicas; i++) {
     circle.put(hashFunction.hash(node.toString() + i), node);
   }
 }

 public void remove(T node) {
   for (int i = 0; i < numberOfReplicas; i++) {
     circle.remove(hashFunction.hash(node.toString() + i));
   }
 }

 public T get(Object key) {
   if (circle.isEmpty()) {
     return null;
   }
   int hash = hashFunction.hash(key);
   if (!circle.containsKey(hash)) {
     SortedMap<Integer, T> tailMap = circle.tailMap(hash);
     hash = tailMap.isEmpty() ? circle.firstKey() : tailMap.firstKey();
   }

   return circle.get(hash);//這一行可以有很大優化，畢竟在萬個以內的整數中查找一個最接近的大於等於hash的算法是非常簡單的，而不必用treemap的實現。

numberOfReplicas的經驗值在100-200之間，這就是一個物理 節點對應多少個虛擬節點，如果我們把環形拉直，其實就是每個

節點在數組中的位置，物理節點很少，比如10個物理節點，如果平均分佈在Integer.MIN-Integer.MAX中，那麼每個節點間的區間

大約有2^29這麼大，假如某一時間段的一些key的hash正好在這一範圍，那麼它們就被聚集到某一臺物理節點上。在採用了虛擬節點

後，每個物理節點對應的虛擬節點和其它物理節點對應的虛擬節點是平均交叉分佈的，極大地減少了節點區間帶來的分佈聚集。

以下是一個簡單實現的測試：

import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; /** * 類NodeManager.java的實現描述： * * @author axman 2011-3-30 下午09:24:38 */ public class NodeManager { private static final char[] hex = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; public static class Node { // node's fields. private String ipAddr; private int port; private String nodeName; public Node(String ipAddr, int port, String nodeName){ this.ipAddr = ipAddr; this.port = port; this.nodeName = nodeName; } public String toString() { return nodeName + ":<" + ipAddr + ":" + port + ">"; } } private final SortedMap<Integer, Node> nodeMap = new TreeMap<Integer, Node>(); private int[] hashVal; private int virtualNums; public NodeManager(Node[] nodes, int virtualNums){ this.virtualNums = virtualNums; for (Node node : nodes) { for (int i = 0; i < this.virtualNums; i++) { nodeMap.put(hash(node.toString() + i), node); } } Integer[] tmp = nodeMap.keySet().toArray(new Integer[0]); hashVal = copy(tmp); // 初始化，add,remove方法n天，n月，n年纔會調用一次，所以用一點點開銷轉化爲int[]爲下面每秒 // 千萬次調用提升性能。 } public void add(Node node) { for (int i = 0; i < this.virtualNums; i++) { nodeMap.put(hash(node.toString() + i), node); } Integer[] tmp = nodeMap.keySet().toArray(new Integer[0]); hashVal = copy(tmp); } public void remove(Node node) { for (int i = 0; i < this.virtualNums; i++) { nodeMap.remove((hash(node.toString() + i))); } Integer[] tmp = nodeMap.keySet().toArray(new Integer[0]); hashVal = copy(tmp); } /** * 環形中查找下一節點就是在有序整數數組中查找一個大於等於當前值的元素，完全的二分查找。僅是返回值的判斷條件不同。 * * @param key * @return */ public Node getNode(String key) { int hash = hash(key); int low = 0, high = hashVal.length - 1; while (low <= high) { int mid = (low + high) / 2; int midVal = hashVal[mid]; if (midVal >= hash) { if (mid == 0 || hashVal[mid - 1] <= hash) { return nodeMap.get(midVal); } high = mid - 1; } else { low = mid + 1; } } return nodeMap.get(hashVal[0]); } private int[] copy(Integer[] src) { int[] tmp = new int[src.length]; for (int i = 0; i < tmp.length; i++) tmp[i] = src[i]; return tmp; } private int hash(String key) { try { java.security.MessageDigest md5 = java.security.MessageDigest.getInstance("MD5"); md5.update(key.getBytes()); byte[] data = md5.digest(); char[] charArr = new char[32]; for (int i = 0; i < data.length; i++) { charArr[i * 2] = hex[data[i] >>> 4 & 0xF]; charArr[i * 2 + 1] = hex[data[i] & 0xF]; } return new String(charArr).hashCode(); } catch (Exception e) { return Integer.MIN_VALUE; } } public static void main(String[] args) { Node[] nodes = new Node[10]; Map<Node, List<String>> map = new HashMap<Node, List<String>>(); Map<Node, List<String>> map1 = new HashMap<Node, List<String>>(); //make node for (int i = 0; i < nodes.length; i++) { nodes[i] = new Node("10.1.33.2" + i, 80 + i, "mynode" + i); } NodeManager nm = new NodeManager(nodes, 150); //make kesy String[] keys = new String[10000]; for (int i = 0; i < keys.length; i++) { keys[i] = "key" + (i * 17) + "ss"+i*19; } //make result 1 for (String key : keys) { Node n = nm.getNode(key); List<String> l = map.get(n); if (l == null) { l = new ArrayList<String>(); map.put(n, l); } l.add(key); } //make result2 ,add node nm.add(new Node("10.1.3.30", 90, "mynode10")); for (String key : keys) { Node n = nm.getNode(key); List<String> l = map1.get(n); if (l == null) { l = new ArrayList<String>(); map1.put(n, l); } l.add(key); } //print difference for (Iterator<Node> i = map.keySet().iterator(); i.hasNext();) { Node key = i.next(); List<String> l = map.get(key); List<String> l1 = map1.get(key); System.out.println(key+":"+l.size()+","+l1.size()+","+((l.size()-l1.size())*1d/l.size())); //這個差值的平均值應該是n/(m+n) } } }