os: centos 7.4
db: postgresql 10.11
版本
# cat /etc/centos-release
CentOS Linux release 7.4.1708 (Core)
#
#
# yum list installed |grep -i postgresql
postgresql10.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-contrib.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-debuginfo.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-devel.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-docs.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-libs.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-odbc.x86_64 12.00.0000-1PGDG.rhel7 @pgdg10
postgresql10-plperl.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-plpython.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-pltcl.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-server.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-tcl.x86_64 2.4.0-1.rhel7 @pgdg10
postgresql10-tcl-debuginfo.x86_64 2.3.1-1.rhel7 @pgdg10
postgresql10-test.x86_64 10.11-2PGDG.rhel7 @pgdg10
# su - postgres
Last login: Wed Jan 15 18:34:12 CST 2020 on pts/0
$
$
$ psql -c "select version();"
version
----------------------------------------------------------------------------------------------------------
PostgreSQL 10.11 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39), 64-bit
(1 row)
測試數據
postgres=# create table tmp_t0(id varchar(100),name1 varchar(100),name2 varchar(100));
postgres=# create table tmp_t1(id varchar(100),name1 varchar(100),name2 varchar(100));
postgres=#
postgres=# insert into tmp_t0
select id::varchar,md5(id::varchar),md5(md5(id::varchar)) from generate_series(1,5000000) as id;
postgres=# insert into tmp_t1
select id::varchar,md5(id::varchar),md5(md5(id::varchar)) from generate_series(1,5000000) as id;
tmp_t0,tmp_t1 的 id 列上創建普通 btree 索引
postgres=# create index idx_tmp_t0_id on tmp_t0(id);
postgres=# create index idx_tmp_t1_id on tmp_t1(id);
postgres=# vacuum analyze tmp_t0;
postgres=# vacuum analyze tmp_t1;
排除並行
postgres=# set max_parallel_workers_per_gather=0;
like ‘xxoo’
like ‘xxoo’ 等價於 = ‘xxoo’
postgres=# explain select t0.*,t1.* from tmp_t0 t0,tmp_t1 t1 where t0.id=t1.id and t0.id like '1000000';
QUERY PLAN
--------------------------------------------------------------------------------------
Nested Loop (cost=0.86..16.91 rows=1 width=146)
-> Index Scan using idx_tmp_t0_id on tmp_t0 t0 (cost=0.43..8.45 rows=1 width=73)
Index Cond: ((id)::text = '1000000'::text)
Filter: ((id)::text ~~ '1000000'::text)
-> Index Scan using idx_tmp_t1_id on tmp_t1 t1 (cost=0.43..8.45 rows=1 width=73)
Index Cond: ((id)::text = (t0.id)::text)
(6 rows)
like ‘xxoo%’
前模糊(有前綴的模糊)
postgres=# explain select t0.*,t1.* from tmp_t0 t0,tmp_t1 t1 where t0.id=t1.id and t0.id like '1000000%';
QUERY PLAN
--------------------------------------------------------------------------------------
Nested Loop (cost=0.43..133262.84 rows=500 width=146)
-> Seq Scan on tmp_t0 t0 (cost=0.00..129068.84 rows=500 width=73)
Filter: ((id)::text ~~ '1000000%'::text)
-> Index Scan using idx_tmp_t1_id on tmp_t1 t1 (cost=0.43..8.38 rows=1 width=73)
Index Cond: ((id)::text = (t0.id)::text)
(5 rows)
可以看到 tmp_t0 走的是全表掃描(Seq Scan),並沒有使用到索引。
查看文檔(http://postgres.cn/docs/10/indexes-opclass.html )後發現 postgresql 並不會對 like ‘xxoo%’ 使用普通 btree 索引,看起來和 oracle 有差異。
新建個索引
postgres=# create index idx_tmp_t0_id_2 on tmp_t0(id varchar_pattern_ops);
postgres=# vacuum analyze tmp_t0;
postgres=# explain select t0.*,t1.* from tmp_t0 t0,tmp_t1 t1 where t0.id=t1.id and t0.id like '1000000%';
QUERY PLAN
----------------------------------------------------------------------------------------------
Nested Loop (cost=0.86..4202.45 rows=500 width=146)
-> Index Scan using idx_tmp_t0_id_2 on tmp_t0 t0 (cost=0.43..8.45 rows=500 width=73)
Index Cond: (((id)::text ~>=~ '1000000'::text) AND ((id)::text ~<~ '1000001'::text))
Filter: ((id)::text ~~ '1000000%'::text)
-> Index Scan using idx_tmp_t1_id on tmp_t1 t1 (cost=0.43..8.38 rows=1 width=73)
Index Cond: ((id)::text = (t0.id)::text)
(6 rows)
可以看到已經使用了新建的 idx_tmp_t0_id_2 索引
like ‘%xxoo’
後模糊(有後綴的模糊)
postgres=# explain select t0.*,t1.* from tmp_t0 t0,tmp_t1 t1 where t0.id=t1.id and t0.id like '%1000000';
QUERY PLAN
--------------------------------------------------------------------------------------
Nested Loop (cost=0.43..133264.25 rows=500 width=146)
-> Seq Scan on tmp_t0 t0 (cost=0.00..129070.25 rows=500 width=73)
Filter: ((id)::text ~~ '%1000000'::text)
-> Index Scan using idx_tmp_t1_id on tmp_t1 t1 (cost=0.43..8.38 rows=1 width=73)
Index Cond: ((id)::text = (t0.id)::text)
(5 rows)
沒有使用到之前創建的 idx_tmp_t0_id,idx_tmp_t0_id_2 這兩個索引。
這時需要做兩個調整
1,新建個反轉函數索引
2,調整 ‘%1000000’ 爲 ‘0000001%’
postgres=# create index idx_tmp_t0_id_3 on tmp_t0(reverse(id) varchar_pattern_ops);
postgres=# vacuum analyze tmp_t0;
postgres=# explain select t0.*,t1.* from tmp_t0 t0,tmp_t1 t1 where t0.id=t1.id and reverse(t0.id) like '0000001%';
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.86..4202.46 rows=500 width=146)
-> Index Scan using idx_tmp_t0_id_3 on tmp_t0 t0 (cost=0.43..8.46 rows=500 width=73)
Index Cond: ((reverse((id)::text) ~>=~ '0000001'::text) AND (reverse((id)::text) ~<~ '0000002'::text))
Filter: (reverse((id)::text) ~~ '0000001%'::text)
-> Index Scan using idx_tmp_t1_id on tmp_t1 t1 (cost=0.43..8.38 rows=1 width=73)
Index Cond: ((id)::text = (t0.id)::text)
(6 rows)
或者下面形式更好看些
postgres=# explain select t0.*,t1.* from tmp_t0 t0,tmp_t1 t1 where t0.id=t1.id and reverse(t0.id) like reverse('1000000')||'%';
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.86..4202.46 rows=500 width=146)
-> Index Scan using idx_tmp_t0_id_3 on tmp_t0 t0 (cost=0.43..8.46 rows=500 width=73)
Index Cond: ((reverse((id)::text) ~>=~ '0000001'::text) AND (reverse((id)::text) ~<~ '0000002'::text))
Filter: (reverse((id)::text) ~~ '0000001%'::text)
-> Index Scan using idx_tmp_t1_id on tmp_t1 t1 (cost=0.43..8.38 rows=1 width=73)
Index Cond: ((id)::text = (t0.id)::text)
(6 rows)
like ‘%xxoo%’
前後模糊(無前後綴的模糊)
3字或以上模糊查詢,使用pg_trgm可以很好的解決。參考<<pg_trgm 處理中間匹配 like ‘%xxoo%’>>
pg_trgm 也能很好處理前後綴模糊的場景.
postgres=# create extension pg_trgm;
postgres=# create index idx_tmp_t0_id_4 on tmp_t0 using gin(id gin_trgm_ops);
postgres=# explain select t0.*,t1.* from tmp_t0 t0,tmp_t1 t1 where t0.id=t1.id and t0.id like '%1000000%';
QUERY PLAN
---------------------------------------------------------------------------------------
Nested Loop (cost=28.31..6094.51 rows=500 width=146)
-> Bitmap Heap Scan on tmp_t0 t0 (cost=27.88..1900.51 rows=500 width=73)
Recheck Cond: ((id)::text ~~ '%1000000%'::text)
-> Bitmap Index Scan on idx_tmp_t0_id_4 (cost=0.00..27.75 rows=500 width=0)
Index Cond: ((id)::text ~~ '%1000000%'::text)
-> Index Scan using idx_tmp_t1_id on tmp_t1 t1 (cost=0.43..8.38 rows=1 width=73)
Index Cond: ((id)::text = (t0.id)::text)
(7 rows)
postgres=# \d+ tmp_t0
Table "public.tmp_t0"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
--------+------------------------+-----------+----------+---------+----------+--------------+-------------
id | character varying(100) | | | | extended | |
name1 | character varying(100) | | | | extended | |
name2 | character varying(100) | | | | extended | |
Indexes:
"idx_tmp_t0_id" btree (id)
"idx_tmp_t0_id_2" btree (id varchar_pattern_ops)
"idx_tmp_t0_id_3" btree (reverse(id::text) varchar_pattern_ops)
"idx_tmp_t0_id_4" gin (id gin_trgm_ops)
like ‘%xxoo%’
1-2個字的模糊查詢,優化器不會使用 pg_trgm 類型索引,可以創建個自定義函數索引。
postgres=# create or replace function f_user_split(text)
returns text[] as
$$
declare
res text[];
begin
select regexp_split_to_array($1, '') into res;
for i in 1..length($1)-1
loop
res := array_append(res, substring($1, i, 2));
end loop;
return res;
end;
$$
language plpgsql strict immutable;
postgres=# create index idx_tmp_t0_id_5 on tmp_t0 using gin(f_user_split(id));
postgres=# explain select t0.*,t1.* from tmp_t0 t0,tmp_t1 t1 where t0.id=t1.id and f_user_split(t0.id) @> array['99'];
QUERY PLAN
------------------------------------------------------------------------------------------------
Hash Join (cost=55802.50..191371.63 rows=25000 width=146)
Hash Cond: ((t1.id)::text = (t0.id)::text)
-> Seq Scan on tmp_t1 t1 (cost=0.00..116568.82 rows=5000082 width=73)
-> Hash (cost=55490.00..55490.00 rows=25000 width=73)
-> Bitmap Heap Scan on tmp_t0 t0 (cost=241.75..55490.00 rows=25000 width=73)
Recheck Cond: (f_user_split((id)::text) @> '{99}'::text[])
-> Bitmap Index Scan on idx_tmp_t0_id_5 (cost=0.00..235.50 rows=25000 width=0)
Index Cond: (f_user_split((id)::text) @> '{99}'::text[])
(8 rows)
Time: 83.097 ms
參考:
http://postgres.cn/docs/10/indexes-opclass.html