案例说明:
KingbaseES RAC在两节点的基础上,执行在线扩容为3节点。
集群版本:
test=# select version();version
---------------------KingbaseES V008R006
(1 row)
集群架构:
操作系统:
[root@node210 KingbaseHA]# cat /etc/os-release
NAME="openEuler"
VERSION="20.03 (LTS-SP4)"
ID="openEuler"
VERSION_ID="20.03"
PRETTY_NAME="openEuler 22.03 (LTS-SP4)"
ANSI_COLOR="0;31"
一、新节点系统环境准备
1、配置节点信息(all nodes)
[root@node210 ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6192.168.1.208 node208
192.168.1.209 node209
192.168.1.210 node210
192.168.1.203 node203
2、配置ntp
[kingbase@node210 bin]$ cat /etc/ntp.conf
server 192.168.1.208
fudge 192.168.1.209 stratum 10[kingbase@node210 bin]$ ntpq -premote refid st t when poll reach delay offset jitter
==============================================================================
*node208 LOCAL(0) 11 u 255 512 377 0.154 +0.066 0.010
3、配置selinux
[kingbase@node210 bin]$ cat /etc/sysconfig/selinux# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - No SELinux policy is loaded.
SELINUX=disabled
# SELINUXTYPE= can take one of these three values:
# targeted - Targeted processes are protected,
# minimum - Modification of targeted policy. Only selected processes are protected.
# mls - Multi Level Security protection.
SELINUXTYPE=targeted
4、关闭防火墙
[root@node210 data_gfs2]# systemctl status firewalld
○ firewalld.service - firewalld - dynamic firewall daemonLoaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; vendor preset: enabled)Active: inactive (dead)Docs: man:firewalld(1)
5、创建kingbase用户(id必须所有节点一致)
[root@node210 ~]# groupadd -g 2001 kingbase
[root@node210 ~]# useradd -u 201 -g kingbase kingbase
[root@node210 ~]# id kingbase
uid=201(kingbase) gid=2001(kingbase) groups=2001(kingbase)
二、配置共享存储
1、配置iscsi连接
[root@node210 ~]# cat /etc/iscsi/initiatorname.iscsi
#InitiatorName=iqn.2012-01.com.openeuler:11775b69af4InitiatorName=iqn.2024-08.pip.cc:client
node.session.auth.authmethod = CHAP
node.session.auth.username = root
node.session.auth.password = 123456
2、连接iscsi server
[root@node210 ~]# iscsiadm -m discovery -t st -p 192.168.1.203
192.168.1.203:3260,1 iqn.2024-08.pip.cc:server[root@node210 ~]# cat iscsi.sh
iscsiadm -m node -T iqn.2024-08.pip.cc:server -p 192.168.1.203 --login[root@node210 ~]# sh iscsi.sh
Logging in to [iface: default, target: iqn.2024-08.pip.cc:server, portal: 192.168.1.203,3260]
Login to [iface: default, target: iqn.2024-08.pip.cc:server, portal: 192.168.1.203,3260] successful.
3、查看共享存储信息
[root@node210 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
sda 8:0 0 156.2G 0 disk
├─sda1 8:1 0 2G 0 part /boot
└─sda2 8:2 0 154.2G 0 part├─openeuler-root 253:0 0 60G 0 lvm /├─openeuler-swap 253:1 0 4G 0 lvm [SWAP]├─openeuler-opt 253:2 0 50G 0 lvm /opt└─openeuler-home 253:3 0 40.2G 0 lvm /home
sdb 8:16 0 512M 0 disk
sdc 8:32 0 60.6G 0 disk
sdd 8:48 0 128M 0 disk
sde 8:64 0 128M 0 disk
sdf 8:80 0 10.7G 0 disk
sdg 8:96 0 2.2G 0 disk
sdh 8:112 0 128M 0 disk
sr0 11:0 1 1024M 0 rom
4、配置udev绑定
[root@node210 rules.d]# pwd
/etc/udev/rules.d
[root@node210 rules.d]# cat 75-persist-iscsi.rules
KERNEL=="sd*",SUBSYSTEM=="block",PROGRAM=="/lib/udev/scsi_id -g -u -d /dev/$name",RESULT=="360014058606671082694fca897a2404d",SYMLINK+="qdsk",OWNER="root",GROUP="disk",MODE="0660"
KERNEL=="sd*",SUBSYSTEM=="block",PROGRAM=="/lib/udev/scsi_id -g -u -d /dev/$name",RESULT=="360014050da191d8d53b4d04a277aa8f5",SYMLINK+="kdata",OWNER="root",GROUP="disk",MODE="0660"# 重启udev服务
[root@node210 rules.d]# udevadm control --reload
[root@node210 rules.d]# udevadm trigger --type=devices --action=change# 绑定后共享存储信息
[root@node210 rules.d]# ls -l /dev/kdata
lrwxrwxrwx. 1 root root 3 Jan 8 16:13 /dev/kdata -> sdf
[root@node210 rules.d]# ls -l /dev/qdsk
lrwxrwxrwx. 1 root root 3 Jan 8 16:13 /dev/qdsk -> sdd
二、配置集群
1、集群当前状态
[root@node208 KingbaseHA]# crm status
Cluster Summary:* Stack: corosync* Current DC: node208 Pacemaker (Kingbase) V008R006B1108 (2.0.3.0.0 4b1f869f0f:1268c00dfa83) - partition with quorum* Last updated: Wed Jan 8 16:19:24 2025* Last change: Wed Jan 8 14:07:22 2025 by root via cibadmin on node209* 2 nodes configured* 8 resource instances configuredNode List:* Online: [ node208 node209 ]Full List of Resources:* fence_qdisk_0 (stonith:fence_qdisk): Started node209* fence_qdisk_1 (stonith:fence_qdisk): Started node208* Clone Set: clone-dlm [dlm]:* Started: [ node208 node209 ]* Clone Set: clone-gfs2 [gfs2]:* Started: [ node208 node209 ]* Clone Set: clone-DB [DB]:* Started: [ node208 node209 ]
2、数据库实例信息
test=# select sys_rac_nodelist;sys_rac_nodelist
-------------------------------------------(1,NODESTATE_MEMBER_ACTIVE,192.168.1.208)(2,NODESTATE_MEMBER_ACTIVE,192.168.1.209)
(2 rows)
3、集群配置(增加新节点)
如下所示,在cluster_manager.conf文件增加新节点信息:
[root@node208 KingbaseHA]# cat cluster_manager.conf |more
######################################## Basic Configuration ####################################
################# install #################
##cluster node information
cluster_name=krac
node_name=(node208 node209 node210)
node_ip=(192.168.1.208 192.168.1.209 192.168.1.210)##voting disk, used for qdevice
#check votingdisk path carefully before you use silent/--silent or -y to skip confirmation of
#qdisk_init, unverified execution may cause permanent data loss.
enable_qdisk=1
votingdisk=/dev/qdsk##shared data disk, used for gfs2
#check sharedata_disk path carefully before you use silent/--silent or -y to skip confirmation of
#cluster_disk_init, unverified execution may cause permanent data loss.
sharedata_dir=/sharedata/data_gfs2
sharedata_disk=/dev/kdata
.......# 同步所有节点
[root@node208 KingbaseHA]# scp cluster_manager.conf node209:`pwd`
[root@node208 KingbaseHA]# scp cluster_manager.conf node210:`pwd`
如下所示增加新节点配置:
三、在线增加新节点(新节点执行)
1、集群初始化
[root@node210 KingbaseHA]# ./cluster_manager.sh --base_configure_init
init kernel soft watchdog start
init kernel soft watchdog success
config host start
Host entry 192.168.1.208 node208 found, skiping...
Host entry 192.168.1.209 node209 found, skiping...
Host entry 192.168.1.210 node210 found, skiping...
config host success
add env varaible in /root/.bashrc
add env variable success
config corosync.conf start
config corosync.conf success
Starting Corosync Cluster Engine (corosync): [WARNING]
add pacemaker daemon user start
add pacemaker daemon user success
config pacemaker success
Starting Pacemaker Cluster Manager[ OK ]
config qdevice start
config qdevice success
clean qdisk fence flag start
clean qdisk fence flag success
Starting Qdisk Fenced daemon (qdisk-fenced): [ OK ]
Starting Corosync Qdevice daemon (corosync-qdevice): [ OK ]
config kingbase rac start
/opt/Kingbase/ES/V8/Server//log already exist
config kingbase rac success
add_udev_rule start
add_udev_rule success
insmod dlm.ko success
check and mknod for dlm start
check and mknod for dlm success
2、查看集群状态
# 环境变量配置
[root@node210 KingbaseHA]# cat /root/.bashrc
# .bashrc# User specific aliases and functionsalias rm='rm -i'
alias cp='cp -i'
alias mv='mv -i'# Source global definitions
if [ -f /etc/bashrc ]; then. /etc/bashrc
fi
export install_dir=/opt/KingbaseHA
export PATH=/opt/KingbaseHA/python2.7/bin:/opt/KingbaseHA/pacemaker/sbin/:$PATH
export PATH=/opt/KingbaseHA/crmsh/bin:/opt/KingbaseHA/pacemaker/libexec/pacemaker/:$PATH
export PATH=/opt/KingbaseHA/corosync/sbin:/opt/KingbaseHA/corosync-qdevice/sbin:$PATH
export PYTHONPATH=/opt/KingbaseHA/python2.7/lib/python2.7/site-packages/:/opt/KingbaseHA/crmsh/lib/python2.7/site-packages:$PYTHONPATH
export COROSYNC_MAIN_CONFIG_FILE=/opt/KingbaseHA/corosync/etc/corosync/corosync.conf
export CRM_CONFIG_FILE=/opt/KingbaseHA/crmsh/etc/crm/crm.conf
export OCF_ROOT=/opt/KingbaseHA/pacemaker/ocf
export HA_SBIN_DIR=/opt/KingbaseHA/pacemaker/sbin/
export QDEVICE_SBIN_DIR=/opt/KingbaseHA/corosync-qdevice/sbin/
export LD_LIBRARY_PATH=/opt/KingbaseHA/lib64/:$LD_LIBRARY_PATH
export HA_INSTALL_PATH=/opt/KingbaseHA
export PATH=/opt/KingbaseHA/dlm-dlm/sbin:/opt/KingbaseHA/gfs2-utils/sbin:$PATH
export LD_LIBRARY_PATH=/opt/KingbaseHA/corosync/lib/:$LD_LIBRARY_PATH# 应用环境变量
[root@node210 KingbaseHA]# source /root/.bashrc[root@node210 KingbaseHA]# crm status
Cluster Summary:* Stack: corosync* Current DC: node210 Pacemaker (Kingbase) V008R006B1108 (2.0.3.0.0 4b1f869f0f:1268c00dfa83) - partition WITHOUT quorum* Last updated: Wed Jan 8 16:26:26 2025* Last change: Wed Jan 8 16:26:10 2025 by hacluster via crmd on node210* 3 nodes configured* 0 resource instances configuredNode List:* Node node208: UNCLEAN (offline)* Node node209: UNCLEAN (offline)* Online: [ node210 ]
3、gfs2 初始化
[root@node210 KingbaseHA]# ./cluster_manager.sh --init_gfs2
init gfs2 start
current OS kernel version does not support updating gfs2, please confirm whether to continue? (Y/N):
y
init the OS native gfs2 success
4、原有编辑资源配置:(增加新节点配置)
[root@node208 ~]# crm configure edit
node 1: node208
node 2: node209
node 3: node210
........location fence_qdisk_0-on-node209 fence_qdisk_0 1800: node209
location fence_qdisk_0-on-node210 fence_qdisk_0 1800: node210
location fence_qdisk_1-on-node208 fence_qdisk_1 1800: node208
location fence_qdisk_1-on-node210 fence_qdisk_1 1800: node210
location fence_qdisk_2-on-node208 fence_qdisk_1 1800: node208
location fence_qdisk_2-on-node209 fence_qdisk_1 1800: node209
......
以下标注为新增加内容:
追加fence_qdisk绑定关系(此处注意,3个fence_disk中,分别对应其他节点):
5、将新节点corosync.conf拷贝到原有节点
1)新节点corosync.conf配置信息
[root@node210 KingbaseHA]# cd corosync/etc/corosync/
[root@node210 corosync]# cat corosync.conftotem {version: 2cluster_name: kractoken: 12000token_retransmits_before_loss_const: 12join: 10000crypto_hash: nonecrypto_cipher: noneinterface {knet_ping_interval: 1500knet_ping_timeout: 6000}}quorum {provider: corosync_votequorumexpected_votes: 5device {timeout: 60000sync_timeout: 70000master_wins: 1votes: 2model: diskdisk {debug: 0interval: 1000tko: 30tko_up: 2upgrade_wait: 1master_wait: 3label: kracio_timeout: 1fence_timeout: 50000enable_qdisk_fence: 1watchdog_dev: /dev/watchdogwatchdog_timeout: 30}heuristics {mode: offinterval: 1000timeout: 10000exec_ping: /bin/ping -q -c 1 192.168.4.1}}}logging {debug: offto_logfile: yeslogfile: /opt/KingbaseHA/corosync/var/log/cluster/corosync.loglogger_subsys {subsys: QDEVICEdebug: off}}
nodelist {node {ring0_addr:node208nodeid:1}node {ring0_addr:node209nodeid:2}node {ring0_addr:node210nodeid:3}
}
2) 同步文件到原有节点
[root@node210 corosync]# scp corosync.conf node208:`pwd`
[root@node210 corosync]# scp corosync.conf node209:`pwd`
6、同步corosync配置(原有任意节点)
[root@node208 KingbaseHA]# corosync-cfgtool -R
Reloading corosync.conf...
Done# 集群状态信息
[root@node208 KingbaseHA]# crm status* Current DC: node208 Pacemaker (Kingbase) V008R006B1108 (2.0.3.0.0 4b1f869f0f:1268c00dfa83) - partition with quorum* Last updated: Wed Jan 8 16:34:20 2025* Last change: Wed Jan 8 16:30:09 2025 by root via cibadmin on node208* 3 nodes configured* 11 resource instances configuredNode List:* Node node210: UNCLEAN (offline)* Online: [ node208 node209 ]Full List of Resources:* fence_qdisk_0 (stonith:fence_qdisk): Started node209* fence_qdisk_1 (stonith:fence_qdisk): Started node208* Clone Set: clone-dlm [dlm]:* Started: [ node208 node209 ]* Stopped: [ node210 ]* Clone Set: clone-gfs2 [gfs2]:* Started: [ node208 node209 ]* Stopped: [ node210 ]* Clone Set: clone-DB [DB]:* Started: [ node208 node209 ]* Stopped: [ node210 ]
7、新节点启动cluster服务
[root@node210 KingbaseHA]# ./cluster_manager.sh start
Waiting for node failover handling:[ OK ]
Starting Corosync Cluster Engine (corosync): [ OK ]
clean qdisk fence flag start
clean qdisk fence flag success
Starting Qdisk Fenced daemon (qdisk-fenced): [ OK ]
Starting Corosync Qdevice daemon (corosync-qdevice): [ OK ]
Waiting for quorate:[ OK ]
Starting Pacemaker Cluster Manager[ OK ]
8、查看集群状态
如下所示,所有节点资源状态正常:
[root@node208 KingbaseHA]# crm status
Cluster Summary:* Stack: corosync* Current DC: node208 Pacemaker (Kingbase) V008R006B1108 (2.0.3.0.0 4b1f869f0f:1268c00dfa83) - partition with quorum* Last updated: Fri Jan 10 15:53:37 2025* Last change: Wed Jan 8 17:55:45 2025 by root via cibadmin on node208* 3 nodes configured* 12 resource instances configuredNode List:* Online: [ node208 node209 node210 ]Full List of Resources:* fence_qdisk_0 (stonith:fence_qdisk): Started node209* fence_qdisk_1 (stonith:fence_qdisk): Started node208* Clone Set: clone-dlm [dlm]:* Started: [ node208 node209 node210 ]* Clone Set: clone-gfs2 [gfs2]:* Started: [ node208 node209 node210 ]* Clone Set: clone-DB [DB]:* Started: [ node208 node209 node210 ]* fence_qdisk_2 (stonith:fence_qdisk): Started node210# 数据库资源状态
[root@node208 KingbaseHA]# crm resource status clone-DB
resource clone-DB is running on: node208
resource clone-DB is running on: node209
resource clone-DB is running on: node210
9、查看数据库实例
test=# select sys_rac_nodelist;sys_rac_nodelist
-------------------------------------------(1,NODESTATE_MEMBER_ACTIVE,192.168.1.208)(2,NODESTATE_MEMBER_ACTIVE,192.168.1.209)(3,NODESTATE_MEMBER_ACTIVE,192.168.1.210)
(3 rows)
三、总结
以上为通过手工方式,在线KingbaseES RAC的扩容操作。