首页 » PostgreSQL/GaussDB » Kingbase( PostgreSQL) 使用 “ON CONFLICT” /Merge 减少vacuum死元组量

Kingbase( PostgreSQL) 使用 “ON CONFLICT” /Merge 减少vacuum死元组量

我们的一位客户的计费系统大量依赖于Oracle数据库的主键(PK)进行去重操作,且其事务频率极高。基于ORA-1报错的编程习惯,这种业务逻辑在Oracle环境下虽然运作尚可,但并不理想。近年来,我一直从事Oracle数据库迁移到国产数据库的咨询、评估及实施工作。在此过程中,我习惯考虑各种场景,这就像在使用一些老品牌的汽车时,虽然耐用性强,但如果换成国产汽车,可能就会遇到不同的问题。若前期考虑不周,类似的场景切换到国产数据库后,可能会出现意想不到的困难。

老品牌的PostgreSQL或其衍生库(如Kingbase、OpenGauss等),同样也需要关注一些特定的注意事项,例如vacuum操作等。

我测试一下案例.

kingbase=# select version();
                                                          version
---------------------------------------------------------------------------------------------------------------------------
 KingbaseES V008R006C006B0013PS003 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-46), 64-bit
(1 row)

kingbase=# CREATE TABLE public.anbob_pk_test (
kingbase(#         id int PRIMARY KEY,
kingbase(#         value numeric,
kingbase(#         product_id int,
kingbase(#         effective_date timestamp(3)
kingbase(#         );
CREATE TABLE
kingbase=#
kingbase=# INSERT INTO public.anbob_pk_test VALUES (
kingbase(#         generate_series(0,10000),
kingbase(#         random()*1000,
kingbase(#         random()*100,
kingbase(#         current_timestamp(3));
INSERT 0 10001
kingbase=# SELECT
kingbase-#     schemaname,
kingbase-#     relname,
kingbase-#     n_live_tup,
kingbase-#     n_dead_tup
kingbase-# FROM
kingbase-#     pg_stat_all_tables
kingbase-# WHERE
kingbase-#     relname = 'anbob_pk_test';
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test     |      10001 |          0
(1 row)

kingbase=# INSERT INTO anbob_pk_test
kingbase-#     VALUES (0, 44.33893489873, 46, now());
ERROR:  duplicate key value violates unique constraint "anbob_pk_test_pkey"
DETAIL:  Key (id)=(0) already exists.

kingbase=# SELECT
kingbase-#     schemaname,
kingbase-#     relname,
kingbase-#     n_live_tup,
kingbase-#     n_dead_tup
kingbase-# FROM
kingbase-#     pg_stat_all_tables
kingbase-# WHERE
kingbase-#     relname = 'anbob_pk_test';
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test     |      10001 |          1
(1 row)

Note:
注意违反唯一性,同样产生了1条dead_tup 死元组。

增加使用ON CONFLICT

kingbase=# INSERT INTO anbob_pk_test
kingbase-#     VALUES (0, 44.33893489873, 46, now())
kingbase-# ON CONFLICT
kingbase-#     DO NOTHING;
INSERT 0 0

kingbase=# SELECT
kingbase-#     schemaname,
kingbase-#     relname,
kingbase-#     n_live_tup,
kingbase-#     n_dead_tup
kingbase-# FROM
kingbase-#     pg_stat_all_tables
kingbase-# WHERE
kingbase-#     relname = 'anbob_pk_test';
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test     |      10001 |          1 --no grow
(1 row)

Note:
相同的违反唯一报错,但是死元组数量没有增加,从而减少了所需的vacuum量!

insert select 增量死元组量

kingbase=# insert into anbob_pk_test select * from anbob_pk_test;
ERROR:  duplicate key value violates unique constraint "anbob_pk_test_pkey"
DETAIL:  Key (id)=(0) already exists.
kingbase=# SELECT
kingbase-#     schemaname,
kingbase-#     relname,
kingbase-#     n_live_tup,
kingbase-#     n_dead_tup
kingbase-# FROM
kingbase-#     pg_stat_all_tables
kingbase-# WHERE
kingbase-#     relname = 'anbob_pk_test';
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test     |      10001 |          2
(1 row)

Note:
使用insert select多行,违反后中断,死元组也仅增加1.

有些迁移应用中做如下判断,存在做更新,否则做insert,这种在oracle同样支持。如果使用自定义函数注意事项

kingbase=# CREATE OR REPLACE FUNCTION anbob_pk_test_func (p_id int, p_value numeric, p_product_id int)
kingbase-#     RETURNS VOID
kingbase-#     AS $$
kingbase$# BEGIN
kingbase$#     BEGIN
kingbase$#         INSERT INTO anbob_pk_test (id, value, product_id, effective_date)
kingbase$#             VALUES (p_id, p_value, p_product_id, now());
kingbase$#         RETURN;
kingbase$#     EXCEPTION
kingbase$#         WHEN unique_violation THEN
kingbase$#             -- try an update
kingbase$#             UPDATE
kingbase$#                 anbob_pk_test
kingbase$#             SET
kingbase$#                 value = p_value,
kingbase$#                 product_id = p_product_id,
kingbase$#                 effective_date = now()
kingbase$#             WHERE
kingbase$#                 id = p_id;
kingbase$#     IF found THEN
kingbase$#         RETURN;
kingbase$#         END IF;
kingbase$#     END;
kingbase$# END;
kingbase$#
kingbase$# $$
kingbase-# LANGUAGE plpgsql;
CREATE FUNCTION

kingbase=# vacuum anbob_pk_test;
VACUUM
kingbase=# SELECT
kingbase-#     schemaname,
kingbase-#     relname,
kingbase-#     n_live_tup,
kingbase-#     n_dead_tup
kingbase-# FROM
kingbase-#     pg_stat_all_tables
kingbase-# WHERE
kingbase-#     relname = 'anbob_pk_test';
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test    |      10001 |          0
(1 row)

kingbase=# select * from anbob_pk_test_func(0, 44.33893489873, 46);
 anbob_pk_test_func
------------------------

(1 row)

kingbase=# SELECT
kingbase-#     schemaname,
kingbase-#     relname,
kingbase-#     n_live_tup,
kingbase-#     n_dead_tup
kingbase-# FROM
kingbase-#     pg_stat_all_tables
kingbase-# WHERE
kingbase-#     relname = 'anbob_pk_test';
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test     |      10001 |          2
(1 row)

注意这里2个死元组。一个用于insert(失败),一个用于update. 如果应用上百万行的迁移,或断点续传或数据对比同步也采用这种方式,会产生大量不必要的vacuum。

PostgreSQL merge

D:\postgresql\pgsql\bin>psql -d postgres
psql (17rc1)
输入 "help" 来获取帮助信息.

postgres=# SELECT
postgres-#     schemaname,
postgres-#     relname,
postgres-#     n_live_tup,
postgres-#     n_dead_tup
postgres-# FROM
postgres-#     pg_stat_all_tables
postgres-# WHERE
postgres-#     relname = 'anbob_pk_test  ';
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test    |      10001 |          0
(1 行记录)


postgres=# WITH insert_query AS (
postgres(#     SELECT
postgres(#         0 AS id,
postgres(#         44.33893489873 AS value,
postgres(#         46 AS product_id,
postgres(#         now() AS effective_date) MERGE INTO anbob_pk_test   pkt
postgres-#     USING insert_query i ON pkt.id = i.id
postgres-#     WHEN MATCHED THEN
postgres-#         UPDATE SET
postgres-#             value = i.value, product_id = i.product_id, effective_date = i.effective_date
postgres-#     WHEN NOT MATCHED THEN
postgres-#         INSERT (id, value, product_id, effective_date)
postgres-#             VALUES (i.id, i.value, i.product_id, i.effective_date);
MERGE 1
postgres=# SELECT
postgres-#     schemaname,
postgres-#     relname,
postgres-#     n_live_tup,
postgres-#     n_dead_tup
postgres-# FROM
postgres-#     pg_stat_all_tables
postgres-# WHERE
postgres-#     relname = 'anbob_pk_test  ';
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test    |      10001 |          1
(1 行记录)

NOTe:
从PG 15后引入的merge,比上面自定义函数anbob_pk_test_func 减少了insert冲突时产生的死元组。如同“ON CONFLICT”.   以后是减少vacuum量时注意影响。 下面的merge语法同样支持。

Kingbase Merge

kingbase=# WITH insert_query AS (
kingbase(#     SELECT
kingbase(#         0 AS id,
kingbase(#         44.33893489873 AS value,
kingbase(#         46 AS product_id,
kingbase(#         now() AS effective_date) MERGE INTO anbob_pk_test pkt
kingbase-#     USING insert_query i ON pkt.id = i.id
kingbase-#     WHEN MATCHED THEN
kingbase-#         UPDATE SET
kingbase-#             value = i.value, product_id = i.product_id, effective_date = i.effective_date
kingbase-#     WHEN NOT MATCHED THEN
kingbase-#         INSERT (id, value, product_id, effective_date)
kingbase-#             VALUES (i.id, i.value, i.product_id, i.effective_date);
ERROR:  syntax error at or near "MERGE"

kingbase=# create table test1(a int, b int);
CREATE TABLE
kingbase=# insert into test1 values(1, 1);
INSERT 0 1
kingbase=# insert into test1 values(2, 2);
INSERT 0 1
kingbase=# insert into test1 values(3, 3);
INSERT 0 1
kingbase=#
kingbase=# create table test2(x int, y int);
CREATE TABLE
kingbase=# insert into test2 values(1, 1);
INSERT 0 1
kingbase=# insert into test2 values(3, 3);
INSERT 0 1
kingbase=# insert into test2 values(5, 5);
INSERT 0 1
kingbase=# merge into test2 using test1
kingbase-# on (test1.a = test2.x)
kingbase-# when matched then update set y = y * -1 where test1.a > 1
kingbase-# when not matched then insert values(test1.a, test1.b);
MERGE 2
kingbase=# select * from test2;
 x | y
---+----
 1 |  1
 5 |  5
 2 |  2
 3 | -3
(4 rows)

kingbase=# SELECT
kingbase-#     schemaname,
kingbase-#     relname,
kingbase-#     n_live_tup,
kingbase-#     n_dead_tup
kingbase-# FROM
kingbase-#     pg_stat_all_tables
kingbase-# WHERE
kingbase-#     relname = 'test2';
 schemaname | relname | n_live_tup | n_dead_tup
------------+---------+------------+------------
 public     | test2   |          4 |          1
(1 row)

Note:
kingbase不支持with subquery merge的语法,但使用merge基于SQL也是符合预期的,并没有产生insert冲突时的死元组。

OpenGauss for oracle

[omm@localhost ~]$ sh show sql_compatibility
 sql_compatibility                      | A                  | Choose which SQL format to adapt.

openGauss=# select version();
                                                                          version
-----------------------------------------------------------------------------------------------------------------------------------------------------------
 (openGauss 6.0.0-RC1 build ed7f8e37) compiled at 2024-03-31 11:59:31 commit 0 last mr   on x86_64-unknown-linux-gnu, compiled by g++ (GCC) 10.3.0, 64-bit
(1 row)

-- opengauss 兼容度为A, for oracle, 不支持ON CONFLICT语法,但是我们可以测试一下上面的手段创建 的function anbob_pk_test_func 
openGauss=# vacuum anbob_pk_test;
VACUUM
openGauss=# SELECT
      schemaname,
      relname,
      n_live_tup,
      n_dead_tup
  FROM
      pg_stat_all_tables
  WHERE
      relname = 'anbob_pk_test'; 
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test     |          0 |          0
(1 row)

openGauss=# vacuum analyze anbob_pk_test;
VACUUM
openGauss=# SELECT
      schemaname,
      relname,
      n_live_tup,
      n_dead_tup
  FROM
      pg_stat_all_tables
  WHERE
      relname = 'anbob_pk_test'; 
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test     |      10001 |          0
(1 row)

openGauss=# select * from anbob_pk_test_func(0, 44.33893489873, 46);
 anbob_pk_test_func
--------------------

(1 row)

openGauss=# SELECT
      schemaname,
      relname,
      n_live_tup,
      n_dead_tup
  FROM
      pg_stat_all_tables
  WHERE
      relname = 'anbob_pk_test'; 
 schemaname |      relname      | n_live_tup | n_dead_tup
------------+-------------------+------------+------------
 public     | anbob_pk_test     |      10001 |          2
(1 row)

Note:
和pg,kingbase一样,函数中自己判断insert失败做update,也会因为insert失败增加1个dead tup(另1 dead tup为update产生). 注意vacuum和vacuum analyze区别,缺少统计信息更新。

openGauss V6 merge

 create table test1(a int, b int);
 
 insert into test1 values(1, 1);
 insert into test1 values(2, 2);
 insert into test1 values(3, 3);


 create table test2(x int, y int);

 insert into test2 values(1, 1);
 insert into test2 values(3, 3);
 insert into test2 values(5, 5);
 
 merge into test2 using test1
 on (test1.a = test2.x)
 when matched then update set y = y * -1 where test1.a > 1
when not matched then insert values(test1.a, test1.b);


openGauss=# SELECT
      schemaname,
      relname,
      n_live_tup,
      n_dead_tup
  FROM
      pg_stat_all_tables
  WHERE
      relname = 'test2';o
	  
	  
 schemaname | relname | n_live_tup | n_dead_tup
------------+---------+------------+------------
 public     | test2   |          4 |          1

Note:
openGauss 也和kingbase一样,不支持with query merge语法,但支持基础merge, 死元组是预期仅update产生的。

总结
如果在pg、og系,基于pk 的insert如果违反了唯一性,同样会产生1个死元组,如果失败基数比较多会产生大量不必要的死元组导致膨胀,在postgresql中建议使用“ON CONFLICT”可以减少,而如果在insert因为存在冲突,做一些其它事时,如udpate,建议尝试使用merge 语法,避免自己定义基于exception实现。

打赏

,

对不起,这篇文章暂时关闭评论。