os: centos 7.4
db: postgresql 10.11
Hash Join (散列連接) 實現可以理解爲使用驅動表(小表)用來建立 hash map ,依次讀取驅動表的數據,對於每一行數據根據連接條件生成一個 hash map 中的一個元組(生成 hashkey),
再和被驅動表比較 hashkey,如果不相同則直接丟棄,如果相同再次過一遍join condition和filter,滿足條件的數據集返回。
Hash Join (散列連接) 不依賴索引,一般會選擇個小表當驅動表。
Hash Join (散列連接) 通常都能取得很好的性能。
版本
# cat /etc/centos-release
CentOS Linux release 7.4.1708 (Core)
#
# su - postgres
$
$ psql -c "select version();"
version
----------------------------------------------------------------------------------------------------------
PostgreSQL 10.11 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39), 64-bit
(1 row)
create table
$ psql
psql (10.11)
Type "help" for help.
postgres=#
postgres=# drop table if exists tmp_t4;
drop table if exists tmp_t5;
postgres=# create table tmp_t4(
id int8 primary key,
name varchar(100)
);
create table tmp_t5(
id int8 primary key,
name varchar(100)
);
postgres=# insert into tmp_t4
select id,
md5(id::varchar)
from generate_series(1,1000000) as id;
insert into tmp_t5
select id,
md5(id::varchar)
from generate_series(1,1000000) as id;
postgres=#
Hash Join
postgres=# set max_parallel_workers_per_gather=0;
postgres=# show work_mem;
work_mem
----------
4MB
(1 row)
postgres=# explain analyze
select t4.*,t5.*
from tmp_t4 t4,
tmp_t5 t5
where 1=1
and t4.id = t5.id
and t4.id between 1
and 9999
;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
Hash Join (cost=511.90..22482.91 rows=9553 width=82) (actual time=22.403..656.882 rows=9999 loops=1)
Hash Cond: (t5.id = t4.id)
-> Seq Scan on tmp_t5 t5 (cost=0.00..19346.00 rows=1000000 width=41) (actual time=0.007..243.948 rows=1000000 loops=1)
-> Hash (cost=392.48..392.48 rows=9553 width=41) (actual time=22.375..22.375 rows=9999 loops=1)
Buckets: 16384 Batches: 1 Memory Usage: 841kB
-> Index Scan using tmp_t4_pkey on tmp_t4 t4 (cost=0.42..392.48 rows=9553 width=41) (actual time=0.051..6.121 rows=9999 loops=1)
Index Cond: ((id >= 1) AND (id <= 9999))
Planning time: 0.448 ms
Execution time: 658.763 ms
(9 rows)
postgres=# set work_mem = "64MB";
SET
postgres=# show work_mem;
work_mem
----------
64MB
(1 row)
postgres=# explain analyze
select t4.*,t5.*
from tmp_t4 t4,
tmp_t5 t5
where 1=1
and t4.id = t5.id
and t4.id between 1
and 9999
;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
Hash Join (cost=511.90..22482.91 rows=9553 width=82) (actual time=10.084..653.416 rows=9999 loops=1)
Hash Cond: (t5.id = t4.id)
-> Seq Scan on tmp_t5 t5 (cost=0.00..19346.00 rows=1000000 width=41) (actual time=0.005..255.684 rows=1000000 loops=1)
-> Hash (cost=392.48..392.48 rows=9553 width=41) (actual time=10.063..10.063 rows=9999 loops=1)
Buckets: 16384 Batches: 1 Memory Usage: 841kB
-> Index Scan using tmp_t4_pkey on tmp_t4 t4 (cost=0.42..392.48 rows=9553 width=41) (actual time=0.038..5.446 rows=9999 loops=1)
Index Cond: ((id >= 1) AND (id <= 9999))
Planning time: 0.320 ms
Execution time: 655.363 ms
(9 rows)
參考: