Skip to content

Commit

Permalink
robot:add metaserver shuntdown test
Browse files Browse the repository at this point in the history
  • Loading branch information
YunhuiChen committed Jun 22, 2022
1 parent 07a8e3f commit c1877be
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 11 deletions.
6 changes: 3 additions & 3 deletions robot/Resources/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@
fs_mount_path = "/home/nbs/failover/"
fs_mount_dir = ["test1","test2"]
fs_md5check_dir = ["test1"]
fs_mds = ["10.182.2.34","10.182.2.35","10.182.2.36"]
fs_metaserver = ["10.182.2.34","10.182.2.35","10.182.2.36"]
fs_etcd = ["10.182.2.34","10.182.2.35","10.182.2.36"]
fs_mds = ["10.182.26.34","10.182.26.35","10.182.26.36"]
fs_metaserver = ["10.182.26.34","10.182.26.35","10.182.26.36"]
fs_etcd = ["10.182.26.34","10.182.26.35","10.182.26.36"]
md5_check = []
fs_md5check_thread = ""
fs_mount_thread = ""
Expand Down
48 changes: 46 additions & 2 deletions robot/Resources/keywords/fs_fault_inject.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,29 @@ def check_fs_cluster_ok():
logger.debug("cluster is %s"%rs[1])
return False

def wait_fs_cluster_ok():
mds = config.fs_mds[0]
ssh = shell_operator.create_ssh_connect(mds, 1046, config.abnormal_user)
ori_cmd = "sudo docker ps |grep curvefs | awk '{print $1}'"
rs = shell_operator.ssh_exec(ssh, ori_cmd)
docker_id = rs[1][0].strip()
logger.info("docker is %s"%rs[1])
ori_cmd = "sudo docker exec -i %s curvefs_tool status |grep unhealthy"%docker_id
starttime = time.time()
while time.time() - starttime < 1200:
rs = shell_operator.ssh_exec(ssh, ori_cmd)
logger.info("status is %s"%rs[1])
if rs[0] != 0 and rs[1] == []:
logger.info("cluster is healthy")
return True
else:
logger.info("cluster is unhealthy")
time.sleep(60)
ori_cmd = "sudo docker exec -i %s curvefs_tool status"%docker_id
rs = shell_operator.ssh_exec(ssh, ori_cmd)
logger.debug("cluster is %s"%rs[1])
assert False,"cluster metaserver not recover finish in %d"%(120)

def check_fs_copyset_status():
mds = config.fs_mds[0]
ssh = shell_operator.create_ssh_connect(mds, 1046, config.abnormal_user)
Expand Down Expand Up @@ -280,9 +303,12 @@ def test_kill_process(process_name,num=1):
raise
return host

def test_start_process(process_name):
def test_start_process(process_name,host=None):
try:
cmd = "/home/nbs/.curveadm/bin/curveadm start --role=%s"%process_name
if host == None:
cmd = "/home/nbs/.curveadm/bin/curveadm start --role=%s"%process_name
else:
cmd = "/home/nbs/.curveadm/bin/curveadm start --role=%s --host=%s"%(process_name,host)
ret = shell_operator.run_exec(cmd)
assert ret == 0 ,"start %s fail"%process_name
except Exception as e:
Expand Down Expand Up @@ -494,6 +520,24 @@ def test_in_metaserver_copyset():
logger.error("error is %s"%e)
raise

def test_ipmitool_restart_metaserver():
metaserver_host = random.choice(config.fs_metaserver)
logger.info("|------begin test metaserver ipmitool cycle,host %s------|"%(metaserver_host))
ssh = shell_operator.create_ssh_connect(metaserver_host, 1046, config.abnormal_user)
fault_inject.ipmitool_cycle_restart_host(ssh)
time.sleep(60)
starttime = time.time()
i = 0
while time.time() - starttime < 600:
status = fault_inject.check_host_connect(metaserver_host)
if status == True:
break
else:
logger.debug("wait host up")
time.sleep(5)
assert status,"restart host %s fail"%metaserver_host
test_start_process("metaserver",metaserver_host)

def wait_fuse_exit(fusename=""):
test_client = config.fs_test_client[0]
ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user)
Expand Down
20 changes: 14 additions & 6 deletions robot/curve_fs_robot.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ test fs mds loss package 5%
test fs process loss package mds ${percent}
check fuse mount success
check fuse iops
[Teardown] check fs cluster ok
[Teardown] wait fs cluster ok

test fs metaserver loss package 5%
[Tags] P1 base first release failover
Expand Down Expand Up @@ -135,14 +135,22 @@ inject fuse client mem stress 90%
del mem stress ${ssh}
check fuse mount success
check fuse iops
[Teardown] del mem stress ${ssh}
[Teardown] wait fs cluster ok

test metaserver copyset migrates out in
[Tags] P1 base first release failover
test out metaserver copyset
test in metaserver copyset
check fuse mount success

test ipmitool restart metaserver node
[Tags] P1 base first release failover ipmitool
test ipmitool restart metaserver
sleep 60
check fuse mount success
check fuse iops
[Teardown] wait fs cluster ok

test loop mount umount
[Tags] P0 base first release failover
sleep 30
Expand Down Expand Up @@ -171,7 +179,7 @@ inject kill all mds
check fuse mount success
check fuse iops
check fs cluster ok
[Teardown] test start process mds
[Teardown] wait fs cluster ok

inject kill two metaserver
[Tags] P1 base first release failover
Expand All @@ -183,7 +191,7 @@ inject kill two metaserver
check fuse mount success
check fuse iops
check fs cluster ok
[Teardown] test start process metaserver
[Teardown] wait fs cluster ok

inject kill all metaserver
[Tags] P1 base first release failover
Expand All @@ -195,7 +203,7 @@ inject kill all metaserver
check fuse mount success
check fuse iops
check fs cluster ok
[Teardown] test start process metaserver
[Teardown] wait fs cluster ok

inject kill two etcd
[Tags] P1 base first release failover
Expand Down Expand Up @@ -247,7 +255,7 @@ check umount date consistency
sleep 60
check test dir file md5
check cto dir file md5 test5
# [Teardown] mount test dir
# [Teardown] wait fs cluster ok

*** Keywords ***

Expand Down

0 comments on commit c1877be

Please sign in to comment.