- 數據
drop table test;
create table test(id int, name varchar, description varchar);
insert into test values (1,'a','a');
insert into test values (1,'a','b');
insert into test values (1,'a','c');
insert into test values (1,'a','c');
insert into test values (2,'a','a');
insert into test values (2,'a','b');
insert into test values (2,'a','c');
insert into test values (2,'a','c');
select * from test;
select DISTINCT * from test;
- 函數
CREATE OR REPLACE FUNCTION func_remove_duplicate_records(table_name varchar)
RETURNS INTEGER
AS
$BODY$
DECLARE
rec record;
column_list varchar;
partition_by_columns varchar;
sql_text varchar;
BEGIN
SELECT array_agg(attname)::varchar into column_list FROM pg_attribute WHERE attrelid = table_name::regclass and attname not in ('tableoid','cmax','cmin','xmin','xmax','ctid');
partition_by_columns = split_part(split_part(column_list,'{',2),'}',1);
sql_text = format('delete from test
where ctid =
any(
array(
select ctid from (
select row_number() over(partition by %1$s order by ctid) as rn, ctid from test
) t
where t.rn<>1
)
);
',partition_by_columns);
execute sql_text;
RETURN 0;
END;
$BODY$
LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER;
select * from test;
- 實驗:
postgres=# select * from test;
id | name | description
----+------+-------------
1 | a | a
1 | a | b
1 | a | c
1 | a | c
2 | a | a
2 | a | b
2 | a | c
2 | a | c
(8 rows)
postgres=# select DISTINCT * from test;
id | name | description
----+------+-------------
2 | a | b
1 | a | b
1 | a | a
1 | a | c
2 | a | c
2 | a | a
(6 rows)
postgres=#
postgres=# select * from func_remove_duplicate_records('test');
func_remove_duplicate_records
-------------------------------
0
(1 row)
postgres=# select * from test;
id | name | description
----+------+-------------
1 | a | a
1 | a | b
1 | a | c
2 | a | a
2 | a | b
2 | a | c
(6 rows)
- 總結:通過上面的方法就可以在沒有主鍵或者唯一鍵的情況下刪除重複記錄,如果有多張表,則可以配合shell腳本批量刪除表中的重複記錄
參考:https://github.com/digoal/blog/blob/master/201706/20170602_01.md