1. 版本
1)操作系統版本
cat /proc/version
Linux version 3.10.0-957.5.1.el7.x86_64 ([email protected]) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-36) (GCC) ) #1
2)數據庫版本
mysql --version
mysql Ver 14.14 Distrib 5.7.22, for linux-glibc2.12 (x86_64) using EditLine wrapper
2. 問題描述
2.1 問題發現
有如下一則 limit 查詢,正常執行時間爲800-900 ms,有什麼優化方案?
select * from test_table where merchant_id = 43 and status = 'SUCCESS' order by salary_id desc limit 900000,10;
表結構如下:
CREATE TABLE `test_table` (
`salary_id` int(11) NOT NULL AUTO_INCREMENT,
`salary_no` varchar(32) DEFAULT NULL,
`merchant_id` int(11) NOT NULL,
`customer_id` int(11) DEFAULT NULL,
`wallet_no` varchar(32) DEFAULT NULL,
`batch_num` varchar(32) NOT NULL,
`batch_id` int(11) DEFAULT NULL,
`biz_type` varchar(32) NOT NULL,
`name` varchar(32) DEFAULT NULL,
`id_card` varchar(18) NOT NULL,
`account` varchar(64) DEFAULT NULL,
`bank_name` varchar(128) DEFAULT NULL,
`apply_amount` int(11) NOT NULL DEFAULT '0',
`origin_amount` int(11) unsigned NOT NULL DEFAULT '0',
`origin_fee` int(11) unsigned NOT NULL DEFAULT '0',
`actual_amount` int(11) unsigned NOT NULL DEFAULT '0',
`actual_fee` int(11) unsigned NOT NULL DEFAULT '0',
`need_pay` tinyint(1) DEFAULT '1',
`factory_id` int(11) unsigned NOT NULL,
`factory_name` varchar(64) DEFAULT NULL,
`salary_time` datetime DEFAULT NULL,
`status` varchar(32) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`user_mobile` varchar(32) DEFAULT NULL,
`pay_note` varchar(64) DEFAULT NULL,
`remark` varchar(128) DEFAULT NULL,
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`salary_id`),
KEY `idx_id_card` (`id_card`),
KEY `idx_time` (`salary_time`),
KEY `idx_salary_no` (`salary_no`) USING BTREE,
KEY `idx_mix` (`merchant_id`,`batch_num`,`need_pay`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=773795 DEFAULT CHARSET=utf8mb4;
3.優化方案
先給出優化方案,然後再詳細解釋下爲什麼這麼做。優化方案由如下兩部分組成:
1)改寫sql如下:
SELECT *
FROM test_table a
INNER JOIN
(SELECT salary_id
FROM test_table
WHERE merchant_id = 43
AND STATUS = 'SUCCESS'
LIMIT 900000,
10) b ON a.salary_id = b.salary_id;
2)添加 (merchant_id,STATUS) 組合索引
alter table test_table add index idx_merchant_id_status(merchant_id,STATUS);
4.優化方案原理分析
1) 未添加索引前,兩種sql寫法的執行計劃及執行時間
mysql> explain select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 90000,10;
+----+-------------+-------------------+------+---------------+---------+---------+-------+--------+-----------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------------------+------+---------------+---------+---------+-------+--------+-----------------------------+
| 1 | SIMPLE | test_table | ref | idx_mix | idx_mix | 4 | const | 356237 | Using where; Using filesort |
+----+-------------+-------------------+------+---------------+---------+---------+-------+--------+-----------------------------+
1 row in set (0.01 sec)
mysql> explain
-> SELECT *
-> FROM test_table a
-> INNER JOIN
-> (SELECT salary_id
-> FROM test_table
-> WHERE merchant_id = 43
-> AND STATUS = 'SUCCESS'
-> ORDER BY salary_id DESC
-> LIMIT 90000,
-> 10) b ON a.salary_id = b.salary_id;
+----+-------------+-------------------+--------+---------------+---------+---------+-------------+--------+-----------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------------------+--------+---------------+---------+---------+-------------+--------+-----------------------------+
| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 90010 | NULL |
| 1 | PRIMARY | a | eq_ref | PRIMARY | PRIMARY | 4 | b.salary_id | 1 | NULL |
| 2 | DERIVED | test_table | ref | idx_mix | idx_mix | 4 | const | 356237 | Using where; Using filesort |
+----+-------------+-------------------+--------+---------------+---------+---------+-------------+--------+-----------------------------+
3 rows in set (0.00 sec)
mysql> select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 90000,10
-> ;
......(結果省略)
......
......
10 rows in set (0.82 sec)
mysql> SELECT * FROM test_table a INNER JOIN (SELECT salary_id FROM test_table WHERE merchant_id = 43 AND STATUS = 'SUCCESS' ORDER BY salary_id DESC LIMIT 90000, 10) b ON a.salary_id = b.salary_id;
......(結果省略)
......
......
10 rows in set (0.52 sec)
2)添加索引後,兩種sql寫法的執行計劃及執行時間
mysql> explain select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 90000,10;
+----+-------------+-------------------+------+--------------------------------+------------------------+---------+-------------+--------+-------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------------------+------+--------------------------------+------------------------+---------+-------------+--------+-------------+
| 1 | SIMPLE | test_table | ref | idx_mix,idx_merchant_id_status | idx_merchant_id_status | 38 | const,const | 350286 | Using where |
+----+-------------+-------------------+------+--------------------------------+------------------------+---------+-------------+--------+-------------+
1 row in set (0.00 sec)
mysql> explain SELECT *
-> FROM test_table a
-> INNER JOIN
-> (SELECT salary_id
-> FROM test_table
-> WHERE merchant_id = 43
-> AND STATUS = 'SUCCESS'
-> ORDER BY salary_id DESC
-> LIMIT 90000,
-> 10) b ON a.salary_id = b.salary_id;
+----+-------------+-------------------+--------+--------------------------------+------------------------+---------+-------------+--------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------------------+--------+--------------------------------+------------------------+---------+-------------+--------+--------------------------+
| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 90010 | NULL |
| 1 | PRIMARY | a | eq_ref | PRIMARY | PRIMARY | 4 | b.salary_id | 1 | NULL |
| 2 | DERIVED | test_table | ref | idx_mix,idx_merchant_id_status | idx_merchant_id_status | 38 | const,const | 350286 | Using where; Using index |
+----+-------------+-------------------+--------+--------------------------------+------------------------+---------+-------------+--------+--------------------------+
3 rows in set (0.00 sec)
mysql> select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 90000,10
-> ;
......(搜索結果省略)
......
......
10 rows in set (0.21 sec)
mysql> SELECT *
-> FROM test_table a
-> INNER JOIN
-> (SELECT salary_id
-> FROM test_table
-> WHERE merchant_id = 43
-> AND STATUS = 'SUCCESS'
-> ORDER BY salary_id DESC
-> LIMIT 90000,
-> 10) b ON a.salary_id = b.salary_id;
......(搜索結果省略)
......
......
10 rows in set (0.05 sec)
##爲什麼改成子查詢加關聯查詢以後效率提高?爲什麼添加了組合索引效率就提高?
3)未添加 (merchant_id,STATUS) 組合索引前兩個sql執行計劃解析
原始 sql 走 idx_mix 索引,需要掃描 356237 條索引記錄,並回表356237次拿出記錄的所有字段(如果表中有這麼多符合條件的記錄),然後通過主鍵id進行排序(通常索引上發現一條匹配的記錄就進行一次回表,可以通過MRR進行優化),取出最後10條
NOTE:因爲sql需要根據主鍵排序並且進行limit 過濾,但是表中目前只有一個 idx_mix 所以,所以需要掃描所有符合merchant_id=43的記錄並進行回表,然後對所有符合條件的記錄進行排序,再進行limit過濾。如果表中有(merchant_id,STATUS)組合索引,那麼只需要掃描90010次索引並回表90010次,且無需排序(索引組織表的主鍵是有序的,所以創建的二級索引中,當二級索引所有列都做等值匹配時,最後回表時主鍵肯定是順序的)。如果表中有(merchant_id)的單列索引,那麼也無需排序,且只要過濾到90010條記錄時就會停止,無需訪問所有的merchant_id=43的記錄。下面是添加(merchant_id,STATUS)組合索引前後,相關查詢慢日誌,體現了表掃描的具體行數(最後一條是添加了組合索引的慢查,如推斷只掃描了90010條記錄,當然也有可能出現掃描行數少於90010的情況)
mysql> select * from mysql.slow_log;
+----------------------------+--------------------------------+-----------------+-----------------+-----------+---------------+-------------------------+----------------+-----------+-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+
| start_time | user_host | query_time | lock_time | rows_sent | rows_examined | db | last_insert_id | insert_id | server_id | sql_text | thread_id |
+----------------------------+--------------------------------+-----------------+-----------------+-----------+---------------+-------------------------+----------------+-----------+-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+
| 2020-03-18 14:32:17.000000 | xxx@yyy | 00:00:00.000000 | 00:00:00.000000 | 10 | 346899 | xxx | 0 | 0 | 0 | select * from test_table where merchant_id = 43 and status = 'SUCCESS' order by salary_id desc limit 90000,10 | 33956 |
| 2020-03-18 14:32:44.000000 | xxx@yyy | 00:00:00.000000 | 00:00:00.000000 | 10 | 346919 | xxx | 0 | 0 | 0 | SELECT * FROM test_table a INNER JOIN (SELECT salary_id FROM test_table WHERE merchant_id = 43 AND STATUS = 'SUCCESS' ORDER BY salary_id DESC LIMIT 90000, 10) b ON a.salary_id = b.salary_id | 33956 |
| 2020-03-18 14:35:54.000000 | xxx@yyy | 00:00:00.000000 | 00:00:00.000000 | 10 | 90010 | xxx | 0 | 0 | 0 | select * from test_table where merchant_id = 43 and status = 'SUCCESS' order by salary_id desc limit 90000,10 | 33956 |
+----------------------------+--------------------------------+-----------------+-----------------+-----------+---------------+-------------------------+----------------+-----------+-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+
4 rows in set (0.00 sec)
原始 sql 改寫後,子查詢需要掃描 356237 條索引記錄,並取符合條件的最後10條索引記錄中的主鍵跟 a 表進行關聯,通過a表的主鍵訪問訪問10條記錄(拿出所有字段)
## 原始 sql 改寫後多了一個關聯操作,但是子查詢只需要訪問 356237 條索引記錄,不需要取出記錄的所有字段。所以效率高於改寫前的sql
4)加了 (merchant_id,STATUS) 組合索引後兩個執行計劃解析
原始sql走 idx_merchant_id_status 索引,需要訪問 90010 條索引記錄,並回表 90010 次拿出記錄的所有字段(如果表中有這麼多符合條件的記錄),並且消除了排序過程(排序過程爲什麼會被消除前文已經解釋了),取出最後10條。
## 我們可以看到只是添加索引,並未修改語句的情況下,mysql的執行效率提高了4倍左右,這是因爲添加該組合索引後,消除了排序過程,並且掃描的記錄數和回表次數從30多萬次減少到90010次
改寫後的 sql 子查詢 需要掃描90010條記錄,並取符合條件的最後10條索引記錄中的主鍵跟 a 表進行關聯,通過a表的主鍵訪問訪問10條記錄(拿出所有字段)。
##因爲子查詢使用了覆蓋索引,並且取消了排序,減少了掃描的行數,所以效率更高。
4. 總結分析
對比發現我們看到改寫前的sql同改寫後的sql掃描的記錄數一樣,但是改寫前的sql需要拿出相關記錄的所有字段(查出所有符合條件的記錄並排序後再進行 limit 過濾),而改寫後的子查詢只需要掃描索引獲得拿出salary_id字段並排序,改寫後sql執行效率提高程度和表中行的平均長度成正比。這時前者執行時間是0.82秒,或者執行時間是0.52秒。
當加上(merchant_id,STATUS)索引,對於改寫前的sql來說消除了排序,執行時間減少到0.21秒。對於改寫後的sql 來說因爲1.消除了排序,2.使用了覆蓋索引(消除了排序;不用回表;減少了引擎層同server層的交互),執行時間減少到0.05s
添加(merchant_id,STATUS)索引前查看兩個sql執行後的profile如下:
可以看到 Creating sort index 部分耗時最久,所以消除排序對sql執行性能影響最明顯
mysql> set profiling=1;
Query OK, 0 rows affected, 1 warning (0.00 sec)
mysql> SELECT *
-> FROM test_table a
-> INNER JOIN
-> (SELECT salary_id
-> FROM test_table
-> WHERE merchant_id = 43
-> AND STATUS = 'SUCCESS'
-> ORDER BY salary_id DESC
-> LIMIT 900090,
-> 10) b ON a.salary_id = b.salary_id;
Empty set (0.66 sec)
mysql>
mysql> select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 900090,10;
Empty set (0.82 sec)
mysql> show profile for query 1;
+----------------------+----------+
| Status | Duration |
+----------------------+----------+
| starting | 0.000141 |
| checking permissions | 0.000008 |
| checking permissions | 0.000008 |
| checking permissions | 0.000003 |
| checking permissions | 0.000004 |
| Opening tables | 0.001994 |
| init | 0.000109 |
| System lock | 0.000015 |
| optimizing | 0.000008 |
| optimizing | 0.000034 |
| statistics | 0.002187 |
| preparing | 0.000054 |
| Sorting result | 0.000057 |
| statistics | 0.000072 |
| preparing | 0.000020 |
| executing | 0.000018 |
| Sending data | 0.000018 |
| executing | 0.000003 |
| Sending data | 0.000003 |
| Creating sort index | 0.652778 |
| end | 0.000022 |
| query end | 0.000016 |
| closing tables | 0.000005 |
| removing tmp table | 0.001012 |
| closing tables | 0.000034 |
| freeing items | 0.000068 |
| cleaning up | 0.000274 |
+----------------------+----------+
27 rows in set, 1 warning (0.00 sec)
mysql> show profile for query 2;
+----------------------+----------+
| Status | Duration |
+----------------------+----------+
| starting | 0.000093 |
| checking permissions | 0.000014 |
| checking permissions | 0.000004 |
| Opening tables | 0.000023 |
| init | 0.000129 |
| System lock | 0.000020 |
| optimizing | 0.000037 |
| statistics | 0.000145 |
| preparing | 0.000045 |
| Sorting result | 0.000009 |
| executing | 0.000003 |
| Sending data | 0.000017 |
| Creating sort index | 0.820004 |
| end | 0.000017 |
| query end | 0.000010 |
| closing tables | 0.000023 |
| freeing items | 0.000073 |
| cleaning up | 0.000070 |
+----------------------+----------+
18 rows in set, 1 warning (0.00 sec)
mysql>