🐬【MySQL技術導航】「索引技術」體驗前所未有的技術探險,看穿索引的本質和技術體系(上篇)

{"type":"doc","content":[{"type":"heading","attrs":{"align":null,"level":1},"content":[{"type":"text","text":"前提概要","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"本篇文章主要介紹了相關MySQL技術系列體系中,最重要的部分-索引,帶你從索引的本質(底層原理)、索引的類型、索引的原理、索引的數據結構,最後到索引的使用角度以及索引的優化,全方位360度去探索索引的奧祕!","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":1},"content":[{"type":"text","text":"數據庫類型","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"OLAP:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"聯機分析處理----對海量歷史數據進行分析,產生決策性的策略----數據倉庫—Hive","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"OLTP:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"聯機事務處理----要求很短時效內返回對應的結果----數據庫—關係型數據庫(mysql、oracle)","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":1},"content":[{"type":"text","text":"內容架構","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"磁盤角度去看索引機制(運作機制提升性能原理)","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引機制的分析和基本介紹","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引機制的分類和概念","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引本身的優缺點以及不同分類的優缺點","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":2},"content":[{"type":"text","text":"索引和磁盤的關係","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"數據讀取時主要時間開銷","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"從概念模型上來講,從磁盤讀出一條數據需要兩步:","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"將磁頭移動到數據所在的扇區,找到數據所在的頁,將這一頁數據加載到內存","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"在內存中,找到數據所在的偏移量,並返回該記錄","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"總結分析瓶頸點","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"這兩步中,第1步消耗的時間遠遠大於第2步,其原因是需要移動磁頭到給定扇區,時間開銷由尋道和延遲兩部分構成,通常在毫秒級。而從內存直接讀取數據,時間爲納秒級。","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"優化的方式","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"主要的時間開銷來源於尋找數據所在的頁。","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"因此優化尋找數據所在頁的需求是非常緊急且重要的。","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"數據量計算","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"對於給定大小的數據量(比如2^ 20條)、平均每條記錄所佔用的內存空間(比如2^ 7byte)和每一頁的大小(如4kb),那麼平均每頁的記錄數和所需的數據頁數就可以確定,分別是(32(2^12/2^7)條和2^15(2^20/2^5)頁)。","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"傳統暴力(順序型讀寫)X","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"如果使用遍歷的方式,將每一頁加載到內存然後搜索,那麼最壞情況下需要1萬次的讀取數據頁的操作才能搜索到給定記錄。這就好像我有一本書,我要從第一頁一指翻到最後一頁,才能找到我要讀到的某一行文字,這顯然不太高效。","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"索引機制(半隨機性讀寫)√","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"優化的辦法是建立索引,將數據按索引排序,對於每一條數據,我可以建立一個索引+指針來指向該數據的起始地址","attrs":{}},{"type":"text","text":"。(","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"空間來換時間","attrs":{}},{"type":"text","text":")","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"那麼我就可以將這個索引存起來,並使用指針指向該記錄。","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"現在需要 2^ 20個索引,假設","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"每條索引和指針共佔8byte空間","attrs":{}},{"type":"text","text":",","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"那麼所需的索引頁爲2^ 11(2^ 20 * 8 / 4kb)頁。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"此時如果遍歷索引頁,然後再去找到對應的記錄,最壞需要2^11 + 1次讀取,顯然效率高了些","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"索引升級之多級索引化(全隨機性讀寫)","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"如果利用遞歸的思想,對索引頁再建索引(","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"是不是在內存管理中有類似的想法,對頁表建立頁表","attrs":{}},{"type":"text","text":")。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"比如,對這2^20條數據的索引建立索引。對於給定的一頁,它的第一條索引是確定的","attrs":{}},{"type":"text","text":"。","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"numberedlist","attrs":{"start":1,"normalizeStart":1},"content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":1,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"那麼將第一條索引再存到另一個索引中(索引的索引),假設索引都連續,那麼對於給定的一個索引,就可以根據索引的索引找到這個索引所在的頁","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":2,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"然後找到這個索引,根據這個索引指向的數據去讀取數據。比如:檢索索引15,它在0到31之間,便可以知道它在第一個數據頁上,通過之前保存起來的指針即可訪問到該頁","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":3,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"由於更上層的索引更稀疏了,因此可以保存更多的索引,使每一頁能表示更多的數據。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":4,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"遞歸到最終只需1頁就能保存某一級索引的時候,就可以停止遞歸了","attrs":{}},{"type":"text","text":"。","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"此時根據索引來訪問數據,只需要查每一級索引中的某一頁,就可以確定下一級索引所在的頁。直到最底一層的索引,它指向了數據","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":5,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"那麼需要讀取的總頁數爲索引級數+1。這個次數遠遠小於總數據頁數","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}]},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"使用索引的好處在於","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"第一,將數據進行了分桶,對於落在桶內區間段的索引,都可以通過一個指針訪問到對應的數據頁","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"第二,索引所佔的內存要遠遠小於1條記錄所佔的空間,因此個索引頁能保存非常多的索引","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/a0/a09614c192a6c926892672f306fceeb8.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"索引機制總結","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"一句話:減少加載磁盤的次數(尋道的時間和次數),且索引佔用數據較少,所以可以存放更多的數據內存(提高檢索效率)。","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"磁盤預讀","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"去磁盤讀取數據,是用多少讀取多少嗎?","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"內存和磁盤發生數據交互的時候,一般情況下有一個最小的邏輯單元:頁(Page)。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"頁一般由操作系統覺得大小,4k或8k,而我們在進行數據交互的時候,可以取頁的整數倍來讀取。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"innodb存儲引擎每次讀取數據,讀取16k","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"局部性原理:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"數據和程序都有聚集成羣的傾向,同時之前被訪問過的數據很可能再次被查詢,空間局部性,時間局部性","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"索引存儲","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"磁盤,查詢數據的時候會優先將索引加載到內存中","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引在存儲的時候,需要什麼信息?需要存儲存儲什麼字段值?","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"key:實際數據行中存儲的索引鍵。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"文件地址,所在磁盤文件地址","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"offset:偏移量","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":2},"content":[{"type":"text","text":"索引系統性介紹","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"索引的定義","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引(index)是幫助MySQL高效獲取數據的數據結構(有序)","attrs":{}},{"type":"text","text":"。","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"在數據之外,數據庫系統還維護着滿足特定查找算法的數據結構,這些數據結構以某種方式引用(指向)數據,這樣就可以在這些數據結構上實現高級查找算法,這種數據結構就是索引。","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"索引的介紹","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引是一種用於快速查詢和檢索數據的數據結構","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引的作用就相當於目錄的作用,可以類比字典、 火車站的車次表、圖書的目錄等","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/9e/9e54e041151b13dd45ad15cadcd163ef.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引是在存儲【引擎層】實現的,而不是在服務器層實現的,所以不同存儲引擎具有不同的索引類型和實現","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"可以簡單的理解爲“排好序的快速查找數據結構”,數據本身之外,數據庫還維護者一個滿足特定查找算法的數據結構,這些數據結構以某種方式引用(指向)數據,這樣就可以在這些數據結構上實現高級查找算法,這種數據結構,就是索引。","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"索引本身也很大,不可能全部存儲在內存中,一般以索引文件的形式存儲在磁盤上,(","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"InnoDB數據表上的索引是表空間的一個組成部分","attrs":{}},{"type":"text","text":"),它們包含着對數據表裏所有記錄的引用指針。","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/ab/ab8c223423bda4fdb40f2c2816bf67a1.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"左邊是數據表,一共有兩列七行記錄,最左邊的0x07格式的數據是物理地址(注意邏輯上相鄰的記錄在磁盤上也並不是一定物理相鄰的)","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"爲了加快Col 2的查找,可以維護一個右邊所示的二叉查找樹,每個節點分別包含索引鍵值和一個指向對應數據的物理地址的指針,這樣就可以運用二叉查找快速獲取到對應的數據了","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"一般來說索引本身也很大,不可能全部存儲在內存中,因此索引往往以索引文件的形式存儲在磁盤上。建立索引是數據庫中用來提高性能的最常用的方式。","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"索引優缺點","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"優勢","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"效率:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"大大提高數據檢索效率(減少了檢索的數據量以及次數),降低數據庫IO成本,這也是創建索引的最主要原因","attrs":{}},{"type":"text","text":";","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"性能:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"降低數據排序的成本,降低CPU的消耗,提高系統性能","attrs":{}},{"type":"text","text":";","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"索引大大減小了服務器需要掃描的數據量","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"索引可以幫助服務器避免排序和臨時表","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"索引可以將隨機IO變成順序IO","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"在MySQL5.1和更新的版本中,InnoDB可以在服務器端過濾掉行後就釋放鎖,但在早期的MySQL版本中,InnoDB直到事務提交時纔會解鎖。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"對不需要的元組的加鎖,會增加鎖的開銷,降低併發性。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"InnoDB僅對需要訪問的元組加鎖,而索引能夠減少InnoDB訪問的元組數。但是隻有在存儲引擎層過濾掉那些不需要的數據才能達到這種目的。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"一旦索引不允許InnoDB那樣做(即索引達不到過濾的目的),MySQL服務器只能對InnoDB返回的數據進行WHERE操作,已經無法避免對那些元組加鎖了。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"如果查詢不能使用索引,MySQL會進行全表掃描,並鎖住每一個元組,不管是否真正需要。","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"劣勢","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"空間方面:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引也是一張表,保存了主鍵和索引字段,並指向實體表的記錄,所以索引也需要佔用內存(物理空間)。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"建立索引會佔用磁盤空間的索引文件。一般情況這個問題不太嚴重,但如果你在一個大表上創建了多種組合索引,索引文件的會膨脹很快","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"時間方面:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"創建索引和維護索引要耗費時間,具體地,當對錶中的數據進行增加、刪除和修改的時候,索引也要動態的維護,會降低增/改/刪的執行效率;","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"索引大大提高了查詢速度,同時卻會降低更新表的速度,如對錶進行INSERT、UPDATE和DELETE。因爲更新表時,MySQL不僅要保存數據,還要保存索引文件","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"注意要點","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"如果某個數據列包含許多重複的內容,爲它建立索引就沒有太大的實際效果。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"對於非常小的表,大部分情況下簡單的全表掃描更高效;","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"因此應該只爲最經常查詢和最經常排序的數據列建立索引","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"MySQL裏同一個數據表裏的索引總數限制爲16個。","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"關於InnoDB、索引和鎖:InnoDB在二級索引上使用共享鎖(讀鎖),但訪問主鍵索引需要排他鎖(寫鎖)","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"索引的分類","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"存儲數據結構劃分","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"B-Tree索引(B-Tree或B+Tree索引),Hash索引,full-index全文索引,R-Tree索引,這裏所描述的是索引存儲時保存的形式。","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"【從物理角度劃分】","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"聚集索引:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"即數據文件本身就是主鍵索引文件(InnoDB)","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"聚集索引(聚簇索引、Innodb):聚集索引即索引結構和數據一起存放的索引,主鍵索引屬於聚集索引。表中記錄的物理順序與鍵值的索引順序相同。 因爲真實數據的物理順序只有一種,所以一個表只能有一個聚集索引","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"InnoDB 引擎的表的 .ibd文件就包含了該表的索引和數據,對於InnoDB 引擎表來說,該表的索引(B+樹)的每個非葉子節點存儲索引,葉子節點存儲索引和索引對應的數據","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/d1/d1fd549fa3fd9e0793880ba8d2018ee7.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"link","attrs":{"href":"https://upload-images.jianshu.io/upload_images/26078969-3c1cf9cee4bc53b7.image?imageMogr2/auto-orient/strip%7CimageView2/2/w/850/format/webp","title":"","type":null},"content":[{"type":"text","text":"【圖片來源】","attrs":{}}]}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"非聚集索引(輔助索引):","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"即索引文件與數據文件是分離的(MyISAM),聚集索引和非聚集索引都是B+樹結構","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"非聚集索引即索引結構和數據分開存放的索引。二級索引屬於非聚集索引","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"記錄的物理順序與鍵值的索引順序不同。這也是非聚集索引與聚集索引的根本區別。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"表中記錄的物理順序與鍵值的索引順序不同。這也是非聚集索引與聚集索引的根本區別","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"非聚集索引的葉子節點並不一定存放數據的指針, 因爲二級索引的葉子節點就存放的是主鍵,根據主鍵再回表查數據","attrs":{}},{"type":"text","text":"。","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"MYISAM引擎的表的.MYI 文件包含了表的索引, 該表的索引(B+樹)的每個葉子非葉子節點存儲索引, 葉子節點存儲索引和索引對應數據的指針,指向.MYD 文件的數據。","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/db/dbdf86406c663c8857361f9cd4a93123.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"link","attrs":{"href":"https://upload-images.jianshu.io/upload_images/26078969-3c1cf9cee4bc53b7.image?imageMogr2/auto-orient/strip%7CimageView2/2/w/850/format/webp","title":"","type":null},"content":[{"type":"text","text":"【圖片來源】","attrs":{}}]}]},{"type":"heading","attrs":{"align":null,"level":5},"content":[{"type":"text","text":"聚集索引的優點","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"聚集索引的查詢速度非常的快,因爲整個 B+樹本身就是一顆多叉平衡樹,葉子節點也都是有序的,定位到索引的節點,就相當於定位到了數據。","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":5},"content":[{"type":"text","text":"聚集索引的缺點","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"依賴於有序的數據","attrs":{}},{"type":"text","text":" :因爲 B+樹是多路平衡樹,如果索引的數據不是有序的,那麼就需要在插入時排序,如果數據是整型還好,否則類似於字符串或 UUID 這種又長又難比較的數據,插入或查找的速度肯定比較慢。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"更新代價大","attrs":{}},{"type":"text","text":" : 如果對索引列的數據被修改時,那麼對應的索引也將會被修改, 而且況聚集索引的葉子節點還存放着數據,修改代價肯定是較大的, 所以對於主鍵索引來說,主鍵一般都是不可被修改的。","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":5},"content":[{"type":"text","text":"非聚集索引的優點","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"更新代價比聚集索引要小 。因爲非聚集索引的葉子節點是不存放數據的","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":5},"content":[{"type":"text","text":"非聚集索引的缺點:","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"跟聚集索引一樣,非聚集索引也依賴於有序的數據","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"可能會二次查詢(回表) :這應該是非聚集索引最大的缺點了。當查到索引對應的指針或主鍵後,可能還需要根據指針或主鍵再到數據文件或表中查詢","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"【從功能角度劃分】","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"普通索引(單列索引):","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"每個索引只包含單個列,一個表可以有多個單列索引,僅加速查詢;","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"唯一索引:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"加速查詢 + 列值唯一(可以有null)","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"主鍵索引:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"加速查詢 + 列值唯一(不可以有null)+ 表中只有一個,在 mysql 的 InnoDB 的表中,當沒有顯示的指定表的主鍵時,InnoDB 會自動先檢查表中是否有唯一索引的字段,如果有,則選擇該字段爲默認的主鍵,否則 InnoDB 將會自動創建一個6Byte的自增主鍵","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"組合/複合索引(多列索引):","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"多列值組成一個索引,專門用於組合搜索,其效率大於索引合併(即使用多個單列索引組合搜索)","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":5},"content":[{"type":"text","text":"主鍵與唯一索引的區別","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"主鍵是一種約束,目的是對這個表的某一列進行限制;唯一索引是一種索引,目的是爲了加速查詢;","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"主鍵列不允許爲空值,而唯一索引列可以爲空值(null)","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"一個表中最多隻能有一個主鍵,但是可以包含多個唯一索引","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"主鍵一定是唯一性索引,唯一性索引並不一定就是主鍵","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":4},"content":[{"type":"text","text":"【從特性角度劃分】","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"MySQL目前主要有以下幾種索引類型:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"B+Tree 索引、哈希索引、全文索引(full-index)與空間數據索引(R-Tree)","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"B+Tree 索引:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"是大多數 MySQL 存儲引擎的默認索引類型,不需進行全表掃描,只需對樹進行搜索,所以查找速度快很多; B+ Tree 的有序性,所以除了用於查找,還可以用於排序和分組","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/b5/b5bb19746299e8df5ed3d0ee1b58774e.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"horizontalrule","attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"B+樹把數據全放在了葉子節點中,葉子節點之間使用雙向指針連接,最底層的葉子節點形成了一個雙向有序鏈表。 例如: 查詢範圍 select * from table where id between 11 and 35?","attrs":{}}]},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/ef/ef1bf5ada597d26e1b25e1f0d81308a5.png","alt":null,"title":null,"style":null,"href":null,"fromPaste":false,"pastePass":false}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"第一步,將磁盤一加載到內存中,發現11<28,尋找地址磁盤2","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"第二步,將磁盤二加載到內存中,發現10>11>17,尋找地址磁盤5","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"第三步,將磁盤五加載到內存中,發現11=11,讀取data","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"第四步,繼續向右查詢,讀取磁盤5,發現35=35,讀取11-35之間數據,結束 由此可見,這樣的範圍查詢比B樹速度提高了不少。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"哈希索引:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"哈希索引能以 O(1) 時間進行查找,一次定位,不需要像樹形索引逐層查找,具有極高的效率","attrs":{}},{"type":"text","text":"。","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"哈希表這種結構適用於只有等值查詢的場景,比如 Memcached 及其他一些 NoSQL 引擎。但是失去了有序性","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"對排序與組合索引效率不高;","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"只支持精確查找(等值查詢,如=、in()、<=>),無法用於部分查找和範圍查找。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"InnoDB 存儲引擎有一個特殊的功能叫“自適應哈希索引”,當某個索引值被使用的非常頻繁時,會在 B+Tree 索引之上再創建一個哈希索引,這樣就讓 B+Tree 索引具有哈希索引的一些優點,比如快速的哈希查找。","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/d6/d6aa2038ad69c84935b024c47d6328f6.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"全文索引:","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"MyISAM 存儲引擎支持全文索引,用於查找文本中的關鍵詞,而不是直接比較是否相等","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":1,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"查找條件使用 MATCH AGAINST,而不是普通的 WHERE。全文索引使用倒排索引實現,它記錄着關鍵詞到其所在文檔的映射。","attrs":{}},{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"InnoDB 存儲引擎在 MySQL 5.6.4 版本中也開始支持全文索引","attrs":{}},{"type":"text","text":"。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"空間數據索引:MyISAM 存儲引擎支持空間數據索引(R-Tree),可以用於地理數據存儲。空間數據索引會從所有維度來索引數據,可以有效地使用任意維度來進行組合查詢。必須使用 GIS 相關的函數來維護數據","attrs":{}}]}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"樹(二叉樹、紅黑樹、AVL樹、B樹、B+樹),這裏爲什麼索引默認用 B+樹,而不用B樹、二叉樹、hash和紅黑樹呢?","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"爲什麼不用B-tree:","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"B+樹的磁盤讀寫代價更低:B+樹的內部節點並沒有指向關鍵字具體信息的指針(B樹每個節點都存儲數據,B+樹只有葉子節點才存儲節點),所以查找相同數據量的情況下,B樹的高度更高,IO更頻繁。數據庫索引是存儲在磁盤上的,當數據量大時,就不能把整個索引全部加載到內存了,只能逐一加載每一個磁盤頁(對應索引樹的節點)。","attrs":{}}]}]},{"type":"listitem","attrs":{"listStyle":null},"content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"由於B+樹的數據都存儲在葉子結點中,分支結點均爲索引,方便掃庫,只需要掃一遍葉子結點即可(葉子節點使用雙向鏈表連接)。但是B樹因爲其分支結點同樣存儲着數據,我們要找到具體的數據,需要進行一次中序遍歷按序來找,所以B+樹更加適合在區間查詢的情況,通常B+樹用於數據庫索引。","attrs":{}}]}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"爲什麼不用Hash方式:","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"因爲Hash索引底層是哈希表,哈希表是一種以key-value存儲數據的結構,只適合等值查詢(等值查詢效率高),如=、in()、<=>多個數據在存儲關係上是完全沒有任何順序關係的,所以對於區間查詢是無法直接通過索引查詢的,就需要全表掃描,即不支持範圍查詢 。","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"哈希索引不支持多列聯合索引的最左匹配規則,如果有大量重複鍵值得情況下,哈希索引的效率會很低,因爲存在哈希碰撞問題。","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"爲什麼不用二叉樹方式:","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"樹的高度不均勻,不能自平衡,查找效率跟數據有關(樹的高度),並且IO代價高。","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"什麼不用紅黑樹","attrs":{}}]},{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"樹的高度隨着數據量增加而增加,IO代價高。數據庫系統的設計者巧妙利用了磁盤預讀原理,將一個節點的大小設爲等於一個頁,這樣每個節點只需要一次I/O就可以完全載入","attrs":{}},{"type":"text","text":"。","attrs":{}}]}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":1},"content":[{"type":"text","text":"期待下篇","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"哎 支持太多了,期待下篇的介紹說明,未完待續 ......","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}}]}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章