本文共 14629 字,大约阅读时间需要 48 分钟。
digoal
2016-10-26
PostgreSQL , 分区表 , pg_pathman , custom scan api , sharding , FDW
可以阅读以下几篇文章先回顾一下FDW,基于FDW的shared以及高效的分区插件pg_pathman。
由于pg_pathman支持foreign table,所以拿它来做sharding也是理所当然的事情,同时它支持的HASH partition不需要在QUERY中带入constraint对应的clause,也能走分区查询。
依旧使用《PostgreSQL 9.6 sharding + 单元化 (based on postgres_fdw) 最佳实践 - 通用水平分库场景设计与实践》文章中的例子。
例子使用1个路由节点,4个数据节点。
1. 配置目标库的pg_hba.conf
使用md5认证
2. 创建用户
create role role0 nosuperuser login encrypted password 'pwd' ;create role role1 nosuperuser login encrypted password 'pwd' ;create role role2 nosuperuser login encrypted password 'pwd' ;create role role3 nosuperuser login encrypted password 'pwd' ;create role digoal nosuperuser login encrypted password 'pwd' ;
3. 创建数据库
create database db0 with template template0 ;create database db1 with template template0 ;create database db2 with template template0 ;create database db3 with template template0 ;create database mas1 with template template0 ;
4. 配置数据库权限
grant all on database db0 to role0 ;grant all on database db1 to role1 ;grant all on database db2 to role2 ;grant all on database db3 to role3 ;grant all on database mas1 to digoal ;
5. 创建schema, 建议与USER同名
\c db0 role0create schema role0;\c db1 role1create schema role1;\c db2 role2create schema role2;\c db3 role3create schema role3;\c mas1 digoalcreate schema digoal;
6. 配置路由节点,需要用到postgres_fdw与pg_pathman插件
\c mas1 postgrescreate extension postgres_fdw;create extension pg_pathman;grant usage on foreign data wrapper postgres_fdw to digoal;
7. ddl建表语句 , ddl-1.sql
create table userinfo(uid int8 primary key, info text, crt_time timestamp);create table user_log(uid int8 not null, logintime timestamp, logouttime timestamp, client_addr inet, client_port int);create table user_membership(uid1 int8 not null, uid2 int8, unique (uid1,uid2) );create table user_membership_rev(uid2 int8 not null, uid1 int8, unique (uid2,uid1) );
8. 初始化数据节点,创建数据表,注意表名最好全局唯一,方便使用import foreign schema的方式导入。
\c db0 role0ddl-1.sql然后执行 : do language plpgsql $$declare i int; mod int := 1; nod int := 0; sql text;begin for i in (mod*nod)..(mod*(nod+1)-1) loop sql := 'create table userinfo_'||i||'(like userinfo including all)'; execute sql; sql := 'create table user_log_'||i||'(like user_log including all)'; execute sql; sql := 'create table user_membership_'||i||'(like user_membership including all)'; execute sql; sql := 'create table user_membership_rev_'||i||'(like user_membership_rev including all)'; execute sql; end loop;end;$$;
\c db1 role1ddl-1.sql然后执行 : do language plpgsql $$declare i int; mod int := 1; nod int := 1; sql text;begin for i in (mod*nod)..(mod*(nod+1)-1) loop sql := 'create table userinfo_'||i||'(like userinfo including all)'; execute sql; sql := 'create table user_log_'||i||'(like user_log including all)'; execute sql; sql := 'create table user_membership_'||i||'(like user_membership including all)'; execute sql; sql := 'create table user_membership_rev_'||i||'(like user_membership_rev including all)'; execute sql; end loop;end;$$;
\c db2 role2ddl-1.sql然后执行 : do language plpgsql $$declare i int; mod int := 1; nod int := 2; sql text;begin for i in (mod*nod)..(mod*(nod+1)-1) loop sql := 'create table userinfo_'||i||'(like userinfo including all)'; execute sql; sql := 'create table user_log_'||i||'(like user_log including all)'; execute sql; sql := 'create table user_membership_'||i||'(like user_membership including all)'; execute sql; sql := 'create table user_membership_rev_'||i||'(like user_membership_rev including all)'; execute sql; end loop;end;$$;
\c db3 role3ddl-1.sql然后执行 : do language plpgsql $$declare i int; mod int := 1; nod int := 3; sql text;begin for i in (mod*nod)..(mod*(nod+1)-1) loop sql := 'create table userinfo_'||i||'(like userinfo including all)'; execute sql; sql := 'create table user_log_'||i||'(like user_log including all)'; execute sql; sql := 'create table user_membership_'||i||'(like user_membership including all)'; execute sql; sql := 'create table user_membership_rev_'||i||'(like user_membership_rev including all)'; execute sql; end loop;end;$$;
9. 初始化master
\c mas1 digoal-- 初始化foreign server-- or user host (本例使用unix socket)create server db0 foreign data wrapper postgres_fdw OPTIONS (host '/u02/digoal/pg_root5281', port '5281', dbname 'db0', use_remote_estimate 'false', fetch_size '5000000');create server db1 foreign data wrapper postgres_fdw OPTIONS (host '/u02/digoal/pg_root5281', port '5281', dbname 'db1', use_remote_estimate 'false', fetch_size '5000000');create server db2 foreign data wrapper postgres_fdw OPTIONS (host '/u02/digoal/pg_root5281', port '5281', dbname 'db2', use_remote_estimate 'false', fetch_size '5000000');create server db3 foreign data wrapper postgres_fdw OPTIONS (host '/u02/digoal/pg_root5281', port '5281', dbname 'db3', use_remote_estimate 'false', fetch_size '5000000');-- 设置use_remote_estimate 'false' 需收集外部表的统计信息, 但是可以节约explain的操作,对于高并发的小事务,建议使用FALSE-- 创建user mappingcreate user mapping FOR digoal SERVER db0 options (user 'role0', password 'pwd');create user mapping FOR digoal SERVER db1 options (user 'role1', password 'pwd');create user mapping FOR digoal SERVER db2 options (user 'role2', password 'pwd');create user mapping FOR digoal SERVER db3 options (user 'role3', password 'pwd');-- 导入FOREIGN TABLE,排除主表import foreign schema role0 EXCEPT (userinfo,user_log,user_membership,mv_user_membership,user_membership_rev,mv_user_membership_rev) from server db0 into digoal;import foreign schema role1 EXCEPT (userinfo,user_log,user_membership,mv_user_membership,user_membership_rev,mv_user_membership_rev) from server db1 into digoal;import foreign schema role2 EXCEPT (userinfo,user_log,user_membership,mv_user_membership,user_membership_rev,mv_user_membership_rev) from server db2 into digoal;import foreign schema role3 EXCEPT (userinfo,user_log,user_membership,mv_user_membership,user_membership_rev,mv_user_membership_rev) from server db3 into digoal;-- 创建主表create table userinfo(uid int8 primary key, info text, crt_time timestamp);create table user_log(uid int8 not null, logintime timestamp, logouttime timestamp, client_addr inet, client_port int);create table user_membership(uid1 int8 not null, uid2 int8, unique (uid1,uid2) );create table user_membership_rev(uid2 int8 not null, uid1 int8, unique (uid2,uid1) );-- 生成哈希分区select create_hash_partitions('userinfo'::regclass, 'uid', 0, true) ;select create_hash_partitions('user_log'::regclass, 'uid', 0, true) ;select create_hash_partitions('user_membership'::regclass, 'uid1', 0, true) ;select create_hash_partitions('user_membership_rev'::regclass, 'uid2', 0, true) ;-- 设置foreign table 约束,目前pg_pathman不支持attach hash 分区表,所以我把步骤分解了,先构建约束-- 注意约束名有固定的格式do language plpgsql $$declare i int; x int := 0; y int := 3; sql text;begin for i in x..y loop sql := 'alter foreign table userinfo_'||i||' add constraint pathman_userinfo_'||i||'_1_check check(get_hash_part_idx(hashint8(uid), 4) = '||i||' )'; execute sql; sql := 'alter foreign table user_log_'||i||' add constraint pathman_user_log_'||i||'_1_check check(get_hash_part_idx(hashint8(uid), 4) = '||i||' )'; execute sql; sql := 'alter foreign table user_membership_'||i||' add constraint pathman_user_membership_'||i||'_1_check check(get_hash_part_idx(hashint8(uid1), 4) = '||i||' )'; execute sql; sql := 'alter foreign table user_membership_rev_'||i||' add constraint pathman_user_membership_rev_'||i||'_1_check check(get_hash_part_idx(hashint8(uid2), 4) = '||i||' )'; execute sql; end loop;end;$$;-- 设置继承关系do language plpgsql $$declare i int; x int := 0; y int := 3; sql text;begin for i in x..y loop sql := 'alter foreign table userinfo_'||i||' inherit userinfo'; execute sql; sql := 'alter foreign table user_log_'||i||' inherit user_log'; execute sql; sql := 'alter foreign table user_membership_'||i||' inherit user_membership'; execute sql; sql := 'alter foreign table user_membership_rev_'||i||' inherit user_membership_rev'; execute sql; end loop;end;$$;-- 前面都准备好之后,可以设置pg_pathman的内存,刷新。do language plpgsql $$declare i int; x int := 0; y int := 3; tbls text[] := array['userinfo', 'user_log', 'user_membership', 'user_membership_rev']; tbl text; child text; sql text;begin foreach tbl in array tbls loop for i in x..y loop child := tbl||'_'||i; -- 将子表添加到pg_pathman hash memory中 perform public.invoke_on_partition_created_callback(tbl::regclass, child::REGCLASS, 0); end loop; -- 刷新 perform public.on_create_partitions(tbl); end loop;end;$$;
10. 插入压测
插入数据测试的函数
create or replace function f(int) returns void as $$declarebegin insert into userinfo values ($1, md5(random()::Text), now()); exception when others then return;end;$$ language plpgsql strict;
压测
$ vi test1.sql\set id random(1,50000000)select f(:id);$ pgbench -M simple -n -r -P 1 -f ./test1.sql -c 64 -j 64 -T 1200 -U digoal mas1...progress: 102.0 s, 42769.1 tps, lat 1.495 ms stddev 0.512progress: 103.0 s, 42633.0 tps, lat 1.500 ms stddev 0.519progress: 104.0 s, 42864.0 tps, lat 1.492 ms stddev 0.525progress: 105.0 s, 42724.0 tps, lat 1.497 ms stddev 0.537progress: 106.0 s, 42801.3 tps, lat 1.494 ms stddev 0.507progress: 107.0 s, 42547.7 tps, lat 1.503 ms stddev 0.524...
PerfTop: 54084 irqs/sec kernel:40.6% exact: 0.0% [1000Hz cycles], (all, 64 CPUs)--------------------------------------------------------------------------------------------- samples pcnt function DSO _______ _____ _____________________________ ____________________________________ 18991.00 2.5% AllocSetAlloc /home/digoal/pgsql9.6/bin/postgres 17321.00 2.3% _spin_lock [kernel.kallsyms] 15946.00 2.1% GetSnapshotData /home/digoal/pgsql9.6/bin/postgres 15049.00 2.0% hash_search_with_hash_value /home/digoal/pgsql9.6/bin/postgres 12864.00 1.7% base_yyparse /home/digoal/pgsql9.6/bin/postgres 10796.00 1.4% SearchCatCache /home/digoal/pgsql9.6/bin/postgres 9826.00 1.3% memcpy /lib64/libc-2.12.so 8840.00 1.2% _int_malloc /lib64/libc-2.12.so 8261.00 1.1% __strlen_sse42 /lib64/libc-2.12.so 8245.00 1.1% schedule [kernel.kallsyms] 7441.00 1.0% __GI_vfprintf /lib64/libc-2.12.so 7348.00 1.0% AtEOXact_GUC /home/digoal/pgsql9.6/bin/postgres 6679.00 0.9% __strcmp_sse42 /lib64/libc-2.12.so 6531.00 0.9% mutex_spin_on_owner [kernel.kallsyms] 5899.00 0.8% reschedule_interrupt [kernel.kallsyms]
直连测试
\c db0 role0create or replace function f(int) returns void as $$declarebegin insert into userinfo values ($1, md5(random()::Text), now()); exception when others then return;end;$$ language plpgsql strict;
$ pgbench -M prepared -n -r -P 1 -f ./test1.sql -c 64 -j 64 -T 1200 -U role0 db0progress: 6.0 s, 149615.0 tps, lat 0.427 ms stddev 0.389progress: 7.0 s, 149022.1 tps, lat 0.428 ms stddev 0.398progress: 8.0 s, 148586.6 tps, lat 0.430 ms stddev 0.430
11. query测试
压测
$ vi test1.sql\set id random(1,50000000)select * from userinfo where uid=:id;$ pgbench -M prepared -n -r -P 1 -f ./test.sql -c 64 -j 64 -T 1200 -U digoal mas1...progress: 158.0 s, 46046.8 tps, lat 1.388 ms stddev 0.960progress: 159.0 s, 46201.2 tps, lat 1.384 ms stddev 0.945progress: 160.0 s, 46176.9 tps, lat 1.385 ms stddev 0.957progress: 161.0 s, 46143.0 tps, lat 1.386 ms stddev 0.943progress: 162.0 s, 46316.0 tps, lat 1.381 ms stddev 0.955progress: 163.0 s, 46067.1 tps, lat 1.388 ms stddev 0.954...
PerfTop: 46994 irqs/sec kernel:40.7% exact: 0.0% [1000Hz cycles], (all, 64 CPUs)------------------------------------------------------------------------------------------------- samples pcnt function DSO _______ _____ _____________________________ ________________________________________ 17838.00 2.4% GetSnapshotData /home/digoal/pgsql9.6/bin/postgres 17711.00 2.4% _spin_lock [kernel.kallsyms] 17372.00 2.4% AllocSetAlloc /home/digoal/pgsql9.6/bin/postgres 15205.00 2.1% base_yyparse /home/digoal/pgsql9.6/bin/postgres 12210.00 1.7% SearchCatCache /home/digoal/pgsql9.6/bin/postgres 11821.00 1.6% hash_search_with_hash_value /home/digoal/pgsql9.6/bin/postgres 10729.00 1.5% __mutex_lock_slowpath [kernel.kallsyms] 10694.00 1.5% mutex_spin_on_owner [kernel.kallsyms] 10168.00 1.4% _int_malloc /lib64/libc-2.12.so 8217.00 1.1% schedule [kernel.kallsyms] 6977.00 1.0% __strlen_sse42 /lib64/libc-2.12.so 6918.00 0.9% __strcmp_sse42 /lib64/libc-2.12.so 6711.00 0.9% _spin_lock_irqsave [kernel.kallsyms] 6629.00 0.9% reschedule_interrupt [kernel.kallsyms] 6599.00 0.9% find_busiest_group [kernel.kallsyms] 5984.00 0.8% core_yylex /home/digoal/pgsql9.6/bin/postgres
直连测试
$ vi test.sql\set id random(1,50000000)select * from userinfo_0 where uid=:id;$ pgbench -M prepared -n -r -P 1 -f ./test.sql -c 64 -j 64 -T 1200 -U role0 db0progress: 3.0 s, 1065981.5 tps, lat 0.059 ms stddev 0.008progress: 4.0 s, 1084914.1 tps, lat 0.058 ms stddev 0.006progress: 5.0 s, 1093083.4 tps, lat 0.057 ms stddev 0.006
1. 基于fdw的shard,从功能上来讲还有哪些值得改进的点:
例如, 支持聚合下推(10.0已经支持)
append的并行化,目前如果要扫描多个分区,是串行的,并不是并行扫描,非常影响效率。
2. 从性能方面来讲,还有哪些值得改进的点:
从profile的结果来看,目前的锁较重,有非常大的性能提升空间,需要花点时间看看代码。
当然如果当前的性能能满足你,或者你通过构建多个对等的master加上负载均衡,也能解决这个问题。
现在与直连单节点的性能差距还是较大的。
转载地址:http://kguhl.baihongyu.com/