以前聽說過c語言有四書五經,同事推薦的《C語言接口與實現-創建可重用軟件的技術》,剛接觸就有點懵圈,作爲菜鳥的看不懂,於是每一章節重複的看,代碼一條條的過。對於第二章原子,目前只能做到了解皮毛,這裏記錄下來以備查閱和後續補充。源代碼從博主Character_Painter的文章中扒過來的,如果有侵權請聯繫。附上鍊接http://blog.csdn.net/javagoingon/article/details/51107102。
atom.h頭文件,比較簡單,無需註釋
#ifndef ATOM_INCLUDED
#define ATOM_INCLUDED
extern int Atom_length(const char *str);
extern const char *Atom_new (const char *str, int len);
extern const char *Atom_string(const char *str);
extern const char *Atom_int (long n);
#endif
atom.c源文件
#include<stdio.h>
#include<assert.h>
#include<string.h>
#include<stdlib.h>
#define _CRT_SECURE_NO_WARNINGS
#define LONG_MAX1 1000 // 在limits.h中LONG_MAX 2147483647L LONG_MIN (-LONG_MAX-1),
#define LONG_MIN1 100
#define NELEMS(x) ((sizeof (x))/(sizeof ((x)[0]))) // 計算數組長度的方法,但數組元素作爲函數參數時,此方法不可用。
static struct atom {
struct atom *link;
int len;
char *str;
} *buckets[2048];
// 定義了一個長度爲2048的指針數組,每個元素是atom *
// 散列數組,用於計算hash表的key值,作爲buckets的下標索引值,這個理解的不深....
static unsigned long scatter[] ={ 2078917053, 143302914, 1027100827, 1953210302, 755253631, 2002600785, 1405390230, 45248011, 1099951567, 433832350, 2018585307, 438263339, 813528929, 1703199216, 618906479, 573714703, 766270699, 275680090, 1510320440, 1583583926, 1723401032, 1965443329, 1098183682, 1636505764, 980071615, 1011597961, 643279273, 1315461275, 157584038, 1069844923, 471560540, 89017443, 1213147837, 1498661368, 2042227746, 1968401469, 1353778505, 1300134328, 2013649480, 306246424, 1733966678, 1884751139, 744509763, 400011959, 1440466707, 1363416242, 973726663, 59253759, 1639096332, 336563455, 1642837685, 1215013716, 154523136, 593537720, 704035832, 1134594751, 1605135681, 1347315106, 302572379, 1762719719, 269676381, 774132919, 1851737163, 1482824219, 125310639, 1746481261, 1303742040, 1479089144, 899131941, 1169907872, 1785335569, 485614972, 907175364, 382361684, 885626931, 200158423, 1745777927, 1859353594, 259412182, 1237390611, 48433401, 1902249868, 304920680, 202956538, 348303940, 1008956512, 1337551289, 1953439621, 208787970, 1640123668, 1568675693, 478464352, 266772940, 1272929208, 1961288571, 392083579, 871926821, 1117546963, 1871172724, 1771058762, 139971187, 1509024645, 109190086, 1047146551, 1891386329, 994817018, 1247304975, 1489680608, 706686964, 1506717157, 579587572, 755120366, 1261483377, 884508252, 958076904, 1609787317, 1893464764, 148144545, 1415743291, 2102252735, 1788268214, 836935336, 433233439, 2055041154, 2109864544, 247038362, 299641085, 834307717, 1364585325, 23330161, 457882831, 1504556512, 1532354806, 567072918, 404219416, 1276257488, 1561889936, 1651524391, 618454448, 121093252, 1010757900, 1198042020, 876213618, 124757630, 2082550272, 1834290522, 1734544947, 1828531389, 1982435068, 1002804590, 1783300476, 1623219634, 1839739926, 69050267, 1530777140, 1802120822, 316088629, 1830418225, 488944891, 1680673954, 1853748387, 946827723, 1037746818, 1238619545, 1513900641, 1441966234, 367393385, 928306929, 946006977, 985847834, 1049400181, 1956764878, 36406206, 1925613800, 2081522508, 2118956479, 1612420674, 1668583807, 1800004220, 1447372094, 523904750, 1435821048, 923108080, 216161028, 1504871315, 306401572, 2018281851, 1820959944, 2136819798, 359743094, 1354150250, 1843084537, 1306570817, 244413420, 934220434, 672987810, 1686379655, 1301613820, 1601294739, 484902984, 139978006, 503211273, 294184214, 176384212, 281341425, 228223074, 147857043, 1893762099, 1896806882, 1947861263, 1193650546, 273227984, 1236198663, 2116758626, 489389012, 593586330, 275676551, 360187215, 267062626, 265012701, 719930310, 1621212876, 2108097238, 2026501127, 1865626297, 894834024, 552005290, 1404522304, 48964196, 5816381, 1889425288, 188942202, 509027654, 36125855, 365326415, 790369079, 264348929, 513183458, 536647531, 13672163, 313561074, 1730298077, 286900147, 1549759737, 1699573055, 776289160, 2143346068, 1975249606, 1136476375, 262925046, 92778659, 1856406685, 1884137923, 53392249, 1735424165, 1602280572};
//創建字符串的原子節點,添加到buckets中的桶中去,通過計算得到hash的h值添加到對應的原子atom列表中,如果已經存在此str內容,則直接返回已存在的節點內容。
const char * Atom_new(const char *str, int len)
{
unsigned long h;
int i;
struct atom *p;
assert(str);
assert(len >= 0);
for (h = 0, i = 0; i < len; i++)
{
h = (h << 1) + scatter[(unsigned char)str[i]];
}
h &= NELEMS(buckets) - 1;
// 計算出hash索引值,落在0-2047中
// buckets[h]爲鏈表頭,對每個節點p進行判斷,看內容是否與傳入參數str相同,直到找到或者p=NULL
for (p = buckets[h]; p; p = p->link)
{
if (len == p->len)
{
for (i = 0; i < len && p->str[i] == str[i];) //從0開始挨個比對節點p->str中的元素與傳入參數str中的元素是否相等,如果相等比對一下個i {
i++;
}
if (i == len)
// 如果i==len,證明p->str中的內容與傳入的str完全一致,不用重新new一個,直接返回p->str指針
{
return p->str;
}
}
// 如果在節點中未找到對應的str內容,則重新分配空間new一個,這裏分配了atom原子結構體長度+len+1,atom結鉤體是每個節點的表頭
// 包含struct atom *link, int len和char * str,如果是32位系統則長度是12,可以通過sizeof(*p)或者NELEMS打印確認,
// 長度len是傳入參數str內容的長度,+1是結尾補充'\0'的分配的1字節空間
p = malloc(sizeof (*p) + len + 1);
p->len = len;
p->str = (char *)(p + 1);
// p+1的值剛好是跳過表頭12的地址,即之前分配的len+1的空間,用於存儲str的內容。
if (len > 0)
{
memcpy(p->str, str, len);
}
p->str[len] = '\0'; p->link = buckets[h];
// 將新創建的節點插入到buckets[h]的鏈表中,插入方式爲替換鏈表head節點,新建單元作爲head
buckets[h] = p;
return p->str;
}
const char *Atom_int(long n)
{
char str[43];// 書中解釋了43爲魔數,暫時沒有去了解,這樣的設計思路值得借鑑
char *s = str + sizeof(str); unsigned long m;
// 簡單一點就是需要處理有符號書,比如char的範圍是-128~127,CHAR_MIN=-128 CHAR_MAX=127, 則如果n=-128, m=127+1,然後轉換爲字符串後再加上‘-’號 // 此處用的是long道理一樣,int和char型小於範圍小於long,所以如果遇到-128直接執行m=-n;
// 符號'-'在do while循環中不好處理,單獨通過if(n<0)來判斷,補充上符號。
if (n == LONG_MIN)
{
m = LONG_MAX + 1UL;
}else if (n < 0)
{
m = -n;
}else
{
m = n;
}
do{
*--s = m % 10 + '0';
}while ((m /= 10) > 0);
if (n < 0)
{
*--s = '-';
}
return Atom_new(s, (str + sizeof(str) - s));
}
// string類型的new,簡單不解釋
const char *Atom_string(const char *str)
{
assert(str);
return Atom_new(str, strlen(str));
}
// 遍歷查找對應str的長度,不清楚這樣做的含義,爲啥直接計算str的長度?
int Atom_length(const char *str)
{
struct atom *p;
int i;
assert(str);
for (i = 0; i < NELEMS(buckets); i++)
{
for (p = buckets[i]; p; p = p->link)
if (p->str == str)
{
return p->len;
}
}
return 0;
}
int main()
{
printf("%d\n",Atom1_length(Atom1_int(10000000)));
return 0;
}