本文所引用的源碼全部來自Redis2.8.2版本。

Redis中字典dict數據結構與API相關文件是：dict.h, dict.c。

本文講解的不是很詳細，可以同時參考Redis實現與設計一書中字典部分，本文關於字典的核心代碼的註釋可以參考。

字典，簡單說就是存儲key-value鍵值數據，當然value=NULL那麼就是集合了。字典通俗來說就是C++ STL中的map，STL中的map是用red-black tree實現的，因爲map不僅能夠保證key不重複，而且key還是按照字典序存儲的，而Redis中的字典並不要求有序，因此爲了降低編碼的難度使用哈希表作爲字典的底層實現。Redis的字典是使用一個桶bucket，通過對key進行hash得到的索引值index，然後將key-value的數據存在桶的index位置，Redis處理hash碰撞的方式是鏈表，兩個不同的key hash得到相同的索引值，那麼就使用鏈表解決衝突。使用鏈表自然當存儲的數據巨大的時候，字典不免會退化成多個鏈表，效率大大降低，Redis採用rehash的方式對桶進行擴容來解決這種退化。

Redis使用的hash算法有以下兩種：

1. MurmurHash2 32 bit 算法：這種算法的分佈率和速度都非常好，具體信息請參考 MurmurHash 的主頁：http://code.google.com/p/smhasher/ 。
2. 基於 djb 算法實現的一個大小寫無關散列算法：具體信息請參考
http://www.cse.yorku.ca/~oz/hash.html 。

字典數據結構

typedef struct dictEntry {//字典的節點
    void *key;
    union {//使用的聯合體
        void *val;
        uint64_t u64;//這兩個參數很有用
        int64_t s64;
    } v;
    struct dictEntry *next;//下一個節點指針
} dictEntry;

typedef struct dictType {
    unsigned int (*hashFunction)(const void *key); //hash函數指針
    void *(*keyDup)(void *privdata, const void *key); //鍵複製函數指針
    void *(*valDup)(void *privdata, const void *obj); //值複製函數指針
    int (*keyCompare)(void *privdata, const void *key1, const void *key2); //鍵比較函數指針
    void (*keyDestructor)(void *privdata, void *key); //鍵構造函數指針
    void (*valDestructor)(void *privdata, void *obj); //值構造函數指針
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht { //字典hash table
    dictEntry **table;//可以看做字典數組，俗稱桶bucket
    unsigned long size; //指針數組的大小，即桶的層數
    unsigned long sizemask;
    unsigned long used; //字典中當前的節點數目
} dictht;

typedef struct dict {
    dictType *type;
    void *privdata; //私有數據
    dictht ht[2];   //兩個hash table
    int rehashidx; /* rehashing not in progress if rehashidx == -1 */ //rehash 索引
    int iterators; /* number of iterators currently running */ //當前該字典迭代器個數
} dict;

dict數據結構中聲明瞭兩個字典hashtable結構dictht，ht[1]在rehash時候使用，後面具體分析。

下圖給出整個字典結構，圖片來自Redis設計與實現一書：

上圖ht[1]爲空，說明當然字典沒在Rehash狀態。

字典的API函數

函數名稱	作用	複雜度
dictCreate	創建一個新字典	O(1)
dictResize	重新規劃字典的大小	O(1)
dictExpand	擴展字典	O(1)
dictRehash	對字典進行N步漸進式Rehash	O(N)
_dictRehashStep	對字典進行1步嘗試Rehash	O(N)
dictAdd	添加一個元素	O(1)
dictReplace	替換給定key的value值	O(1)
dictDelete	刪除一個元素	O(N)
dictRelease	釋放字典	O(1)
dictFind	查找一個元素	O(N)
dictFetchValue	通過key查找value	O(N)
dictGetRandomKey	隨機返回字典中一個元素	O(1)

創建新字典

通過dictCreate函數創建一個新字典dict *dictCreate(dictType *type, void *privDataPtr)，一個空字典的示意圖如下(圖片來自Redis設計與實現一書):

上面已經提起過，ht[1]只在Rehash時使用。

字典添加元素

根據字典當前的狀態，將一個key-value元素添加到字典中可能會引起一系列複製的操作：

如果字典未初始化（即字典的0號哈希表ht[0]的table爲空），那麼需要調用dictExpand函數對它初始化；

如果插入的元素key已經存在，那麼添加元素失敗；

如果插入元素時，引起碰撞，需要使用鏈表來處理碰撞；

如果插入元素時，引起程序滿足Rehash的條件時，先調用dictExpand函數擴展哈希表的size，然後準備漸進式Rehash操作。

字典添加元素的流程圖，來自Redis設計與實現一書

/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    unsigned long realsize = _dictNextPower(size); //得到需要擴展到的size

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize * sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    //準備漸進式rehash，rehash的字典table爲0號
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}

/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
    /* Incremental rehashing already in progress. Return. */
    if (dictIsRehashing(d)) return DICT_OK;

    // 如果哈希表爲空，那麼將它擴展爲初始大小
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /*如果哈希表的已用節點數 >= 哈希表的大小，並且以下條件任一個爲真：
       1) dict_can_resize 爲真
       2) 已用節點數除以哈希表大小之比大於 dict_force_resize_ratio
       那麼調用 dictExpand 對哈希表進行擴展,擴展的體積至少爲已使用節點數的兩倍
    */
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}

static int _dictKeyIndex(dict *d, const void *key)
{
    unsigned int h, idx, table;
    dictEntry *he;

    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    /* Compute the key hash value */
    h = dictHashKey(d, key);//通過hash函數得到key所在的bucket索引位置
    //查找在現有字典中是否出現了該key
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx];
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return -1;
            he = he->next;
        }
        //如果系統沒在rehash則不需要查找ht[1]
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}

dictEntry *dictAddRaw(dict *d, void *key)
{
    int index;
    dictEntry *entry;
    dictht *ht;

    if (dictIsRehashing(d)) _dictRehashStep(d);// 嘗試漸進式地 rehash 桶中一組元素

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    // 查找可容納新元素的索引位置,如果元素已存在， index 爲 -1
    if ((index = _dictKeyIndex(d, key)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    // 決定該把新元素放在那個哈希表
    entry = zmalloc(sizeof(*entry));
    //頭插法，插入節點
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    dictSetKey(d, entry, key);//關聯起key
    return entry;
}

/* Add an element to the target hash table */
//添加一個元素
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key);

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);//關聯起value
    return DICT_OK;
}

字典Rehash解析

Rehash的觸發機制：當每次添加新元素時，都會對工作哈希表ht[0]進行檢查，如果used（哈希表中元素的數目）與size（桶的大小）比率ratio滿足以下任一條件，將激活字典的Rehash機制：ratio=used / size， ratio >= 1並且dict_can_resize 爲真；ratio 大於變量 dict_force_resize_ratio 。

Rehash執行過程：

創建一個比ht[0].used至少兩倍的ht[1].table；將原ht[0].table中所有元素遷移到ht[1].table；清空原來ht[0]，將ht[1]替換成ht[0]

漸進式Rehash主要由兩個函數來進行：

_dictRehashStep:當對字典進行添加、查找、刪除、隨機獲取元素都會執行一次，其每次在開始Rehash後，將ht[0].table的第一個不爲空的索引上的所有節點全部遷移到ht[1].table;

dictRehashMilliseconds:由Redis服務器常規任務程序(serverCron)執行，以毫秒爲單位，在一定時間內，以每次執行100步rehash操作。

Rehash操作核心函數：

int dictRehash(dict *d, int n) {
    if (!dictIsRehashing(d)) return 0;

    while(n--) {
        dictEntry *de, *nextde;

        /* Check if we already rehashed the whole table... */
        if (d->ht[0].used == 0) {//已經完成
            zfree(d->ht[0].table);//釋放ht[0].table
            d->ht[0] = d->ht[1]; //這裏ht[0]與ht[1]都不是指針，直接賦值就替換了
            _dictReset(&d->ht[1]);//將ht[1].table設置爲null
            d->rehashidx = -1;
            return 0;
        }

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned)d->rehashidx);
        //找到第一個不爲空的數組
        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
        //指向該鏈表頭
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {//遍歷鏈表
            unsigned int h;

            nextde = de->next;
            /* Get the index in the new hash table */
            //得到在ht[1]中的索引號，通過相應的hash函數
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;

            // 添加節點到 ht[1] ，調整指針，採用的是頭插法
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;//設置爲空
        d->rehashidx++;
    }
    return 1;
}

小結

Redis中的字典數據結構使用哈希表來實現，用來存儲key-value鍵值元素；
字典使用兩個哈希表，一般只使用ht[0]，只有當Rehash時候才使用ht[0]；
哈希表採用鏈表的方式解決鍵碰撞問題；
Redis的Rehash操作是漸進式的，服務器程序會主動Rehash，在查找、添加、刪除元素等操作時也會在Rehash進行時執行一次rehash操作。

字典的內容實在太多，操作比較繁瑣，應該是Redis中最複雜的底層數據結構了，本文分析的絕對不夠深入，希望以後有時間再修改吧，暫時先這樣。到目前爲止，Redis六種內部數據結構，同時也是底層操作的實現講解全部結束，後面的文章將進入五種基本數據類型指令的實現，字符串（String）、哈希表（Hash）、列表（List）、集合（Set）、有序集合（Sorted Set）的各種指令的實現。

我自己對Redis2.8.2源碼的註釋，有時間找個機會放出來。

最後感謝黃健宏（huangz1990）的Redis設計與實現及其他對Redis2.6源碼的相關注釋對我在研究Redis2.8源碼方面的幫助。

Redis內部數據結構詳解之字典(dict)

字典數據結構

字典的API函數

創建新字典

字典添加元素

字典Rehash解析

小結

APUE學習筆記——第七章進程環境

Splay Tree源碼

APUE學習筆記——第八章進程控制

基於RABC權限管理的後臺管理項目新權限的設計思想

APUE學習筆記——第十二章線程控制

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結