PG中刪除沒有主鍵與唯一鍵的表的重複記錄

  • 數據
drop table test;
create table test(id int, name varchar, description varchar);

insert into test values (1,'a','a');
insert into test values (1,'a','b');
insert into test values (1,'a','c');
insert into test values (1,'a','c');


insert into test values (2,'a','a');
insert into test values (2,'a','b');
insert into test values (2,'a','c');
insert into test values (2,'a','c');


select * from test;
select DISTINCT * from test;

  • 函數
CREATE OR REPLACE FUNCTION func_remove_duplicate_records(table_name varchar) 
RETURNS INTEGER 
AS
$BODY$
DECLARE
	rec record;
	column_list  varchar;
	partition_by_columns varchar;
	sql_text varchar;
BEGIN

	SELECT array_agg(attname)::varchar into column_list FROM pg_attribute WHERE attrelid = table_name::regclass and attname not in ('tableoid','cmax','cmin','xmin','xmax','ctid');
	partition_by_columns = split_part(split_part(column_list,'{',2),'}',1);
    sql_text = format('delete from test 
 				where ctid = 
 				any(
 					array( 
 							select ctid from (
 							select row_number() over(partition by %1$s order by ctid) as rn, ctid from test
 							) t
 				 	where t.rn<>1
 					)
 				); 
 			  ',partition_by_columns);
   	execute sql_text;
    RETURN 0;
END;
$BODY$
LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER;
select * from test;
  • 實驗:
postgres=# select * from test;
 id | name | description 
----+------+-------------
  1 | a    | a
  1 | a    | b
  1 | a    | c
  1 | a    | c
  2 | a    | a
  2 | a    | b
  2 | a    | c
  2 | a    | c
(8 rows)

postgres=# select DISTINCT * from test;
 id | name | description 
----+------+-------------
  2 | a    | b
  1 | a    | b
  1 | a    | a
  1 | a    | c
  2 | a    | c
  2 | a    | a
(6 rows)

postgres=# 
postgres=# select * from func_remove_duplicate_records('test');
 func_remove_duplicate_records 
-------------------------------
                             0
(1 row)

postgres=# select * from test;
 id | name | description 
----+------+-------------
  1 | a    | a
  1 | a    | b
  1 | a    | c
  2 | a    | a
  2 | a    | b
  2 | a    | c
(6 rows)
  • 總結:通過上面的方法就可以在沒有主鍵或者唯一鍵的情況下刪除重複記錄,如果有多張表,則可以配合shell腳本批量刪除表中的重複記錄

參考:https://github.com/digoal/blog/blob/master/201706/20170602_01.md

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章