Skip to content

Commit

Permalink
robot:add P1 level failover test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
YunhuiChen committed Nov 19, 2021
1 parent 3394df8 commit ce47495
Show file tree
Hide file tree
Showing 7 changed files with 279 additions and 63 deletions.
5 changes: 5 additions & 0 deletions robot/Resources/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@
fs_metaserver = ["10.182.2.34","10.182.2.35","10.182.2.36"]
fs_etcd = ["10.182.2.34","10.182.2.35","10.182.2.36"]
md5_check = []
fs_mdtest_thread = ""
fs_mount_thread = ""
thrash_fs_mount = True
thrash_fs_mdtest = True
thrash_mount_host = "pubbeta2-nova48-3"
# chunkserver mount point
cs_0 = curve_workspace + "0"
cs_1 = curve_workspace + "1"
Expand Down
3 changes: 3 additions & 0 deletions robot/Resources/config/profile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
fsd=fsd1,anchor=/home/nbs/failover/test2,depth=1,width=10,files=10,sizes=(100m,0),shared=yes,openflags=o_direct
fwd=fwd1,fsd=fsd1,threads=10,xfersize=(512,20,4k,20,64k,20,512k,20,1024k,20),fileio=random,fileselect=random,rdpct=50
rd=rd1,fwd=fwd*,fwdrate=max,format=restart,elapsed=2000,interval=1
13 changes: 0 additions & 13 deletions robot/Resources/keywords/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,19 +381,6 @@ def umount_test_dir():
logger.error("mount dir fail.")
raise

def wait_fuse_exit():
test_client = config.fs_test_client[0]
ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user)
ori_cmd = "ps -ef|grep fuse | grep -v grep"
i = 0
while i < 300:
rs = shell_operator.ssh_exec(ssh, ori_cmd)
if rs[1] == []:
break
i = i + 5
time.sleep(10)
assert rs[1] == [],"fuse client not exit in 300s,process is %s"%rs[1]

def install_deb():
try:
# mkdeb_url = config.curve_workspace + "mk-deb.sh"
Expand Down
2 changes: 1 addition & 1 deletion robot/Resources/keywords/fault_inject.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import threading
import time
import mythread
import test_curve_stability_nbd
#import test_curve_stability_nbd
import re
import string
import types
Expand Down
161 changes: 150 additions & 11 deletions robot/Resources/keywords/fs_fault_inject.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
import re
import string
import types
import fault_inject
import mythread

def check_fs_cluster_ok():
return 1

def check_fuse_mount_success():
def check_fuse_mount_success(fs_mount_dir=config.fs_mount_dir):
test_client = config.fs_test_client[0]
ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user)
for mount_dir in config.fs_mount_dir:
for mount_dir in fs_mount_dir:
grep_cmd = "ps -ef | grep curve-fuse | grep %s | grep -v grep | awk '{print $2}' " % mount_dir
rs = shell_operator.ssh_exec(ssh,grep_cmd)
pid = "".join(rs[1]).strip()
Expand Down Expand Up @@ -50,6 +52,9 @@ def get_fuse_pid(mount_dir):

def start_fs_vdbench():
test_client = config.fs_test_client[0]
cmd = "scp -i %s -o StrictHostKeyChecking=no -P 1046 robot/Resources/config/profile \
%s:~/tools/vdbench/"%(config.pravie_key_path,test_client)
shell_operator.run_exec2(cmd)
ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user)
ori_cmd = "cd /home/nbs/tools/vdbench && sudo nohup ./vdbench -jn -f profile &"
rs = shell_operator.ssh_background_exec2(ssh, ori_cmd)
Expand Down Expand Up @@ -114,10 +119,16 @@ def start_fs_fio():
def check_fuse_iops(limit=1):
test_client = config.fs_test_client[0]
ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user)
ori_cmd = "sudo netstat -lntp |grep curve-fuse |awk '{print $4}'"
rs = shell_operator.ssh_exec(ssh, ori_cmd)
for port in rs[1]:
port = port.strip()
port_list = []
for mnt in config.fs_mount_dir:
ori_cmd = "ps -ef|grep %s | grep curve-fuse | grep -v grep | awk '{print $2}'"%mnt
rs = shell_operator.ssh_exec(ssh, ori_cmd)
pid = "".join(rs[1]).strip()
ori_cmd = "sudo netstat -lntp |grep %s |awk '{print $4}'"%pid
rs = shell_operator.ssh_exec(ssh, ori_cmd)
port_list.append("".join(rs[1]).strip())
for port in port_list:
# port = port.strip()
logger.info("get port %s ops" %port)
ori_cmd = "sudo curl -s http://" + port + "/vars" + " | grep \'user_write_bps :\'"
rs = shell_operator.ssh_exec(ssh, ori_cmd)
Expand Down Expand Up @@ -191,7 +202,7 @@ def wait_op_finish():
ori_cmd1 = "ps -ef|grep -v grep | grep fio"
ori_cmd2 = "ps -ef|grep -v grep | grep vdbench"
starttime = time.time()
while time.time() - starttime < 7200:
while time.time() - starttime < 2400:
rs1 = shell_operator.ssh_exec(ssh, ori_cmd1)
rs2 = shell_operator.ssh_exec(ssh, ori_cmd2)
if rs1[1] == [] and rs2[1] == []:
Expand Down Expand Up @@ -223,7 +234,7 @@ def clean_corefile():
def check_corefile():
test_client = config.fs_test_client[0]
ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user)
ori_cmd = "sudo find /corefile -name core*"
ori_cmd = "sudo ls /corefile |grep core"
rs = shell_operator.ssh_exec(ssh, ori_cmd)
if rs[1] != []:
assert False,"/corefile have coredump file,is %s"%rs[1]
Expand All @@ -248,10 +259,138 @@ def check_test_dir_file_md5():
assert rs[3] == 0,"diff md5 file fail, output %s"%rs[1]
assert rs[1] == [],"check fio test dir file md5 fail,diff is %s"%rs[1]

def multi_mdtest_exec(numjobs,filenum,filesize):

def test_fuse_client_mem_stress(stress=80):
client_host = config.fs_test_client[0]
logger.info("|------begin test fuse mem stress,host %s------|"%(client_host))
cmd = "free -g |grep Mem|awk \'{print $2}\'"
ssh = shell_operator.create_ssh_connect(client_host, 1046, config.abnormal_user)
rs = shell_operator.ssh_exec(ssh, cmd)
all_mem = int("".join(rs[1]).strip())
stress = all_mem * stress / 100
fault_inject.inject_mem_stress(ssh,stress)
return ssh

def test_fuse_client_cpu_stress(stress=80):
# client_host = random.choice(config.client_list)
client_host = config.fs_test_client[0]
logger.info("|------begin test fuse client cpu stress,host %s------|"%(client_host))
cmd = "scp -i %s -o StrictHostKeyChecking=no -P 1046 robot/Resources/keywords/cpu_stress.py \
%s:~/"%(config.pravie_key_path,client_host)
shell_operator.run_exec2(cmd)
ssh = shell_operator.create_ssh_connect(client_host, 1046, config.abnormal_user)
fault_inject.inject_cpu_stress(ssh,stress)
return ssh

def test_fs_process_delay_package(process_name,ms):
if process_name == "mds":
process_list = list(config.fs_mds)
elif process_name == "metaserver":
process_list = list(config.fs_metaserver)
elif process_name == "etcd":
process_list = list(config.fs_etcd)
elif process_name == "fuseclient":
process_list = list(config.fs_test_client)
test_host = random.choice(process_list)
ssh = shell_operator.create_ssh_connect(test_host, 1046, config.abnormal_user)
dev = fault_inject.get_hostip_dev(ssh,test_host)
logger.info("|------begin test host %s dev %s delay package------|"%(test_host,dev))
try:
fault_inject.package_delay_all(ssh, dev, ms)
fault_inject.show_tc_inject(ssh,dev)
# check_nbd_iops(1)
except Exception as e:
raise
finally:
time.sleep(60)
fault_inject.cancel_tc_inject(ssh,dev)

def test_fs_process_loss_package(process_name,percent):
if process_name == "mds":
process_list = list(config.fs_mds)
elif process_name == "metaserver":
process_list = list(config.fs_metaserver)
elif process_name == "etcd":
process_list = list(config.fs_etcd)
elif process_name == "fuseclient":
process_list = list(config.fs_test_client)
test_host = random.choice(process_list)
ssh = shell_operator.create_ssh_connect(test_host, 1046, config.abnormal_user)
dev = fault_inject.get_hostip_dev(ssh,test_host)
logger.info("|------begin test host %s dev %s loss package------|"%(test_host,dev))
try:
fault_inject.package_loss_all(ssh, dev, percent)
fault_inject.show_tc_inject(ssh,dev)
# check_nbd_iops(1)
except Exception as e:
raise
finally:
time.sleep(60)
fault_inject.cancel_tc_inject(ssh,dev)

def wait_fuse_exit(fusename=""):
test_client = config.fs_test_client[0]
ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user)
test_dir = os.path.join(config.fs_mount_path,config.fs_mount_dir[1])
ori_cmd = "mpirun --allow-run-as-root -np %d mdtest -n %d -w %d -e %d -y -u -i 3 -N 1 -F -R -d %s"%(numjobs,filenum,filesize,filesize,test_dir)
if fusename == "":
ori_cmd = "ps -ef|grep fuse | grep -v grep"
else:
ori_cmd = "ps -ef|grep %s | grep -v grep"%fusename
i = 0
while i < 300:
rs = shell_operator.ssh_exec(ssh, ori_cmd)
if rs[1] == []:
break
i = i + 5
time.sleep(10)
assert rs[1] == [],"fuse client not exit in 300s,process is %s"%rs[1]

def multi_mdtest_exec(ssh,test_dir):
test_dir = os.path.join(config.fs_mount_path,test_dir)
filenum_list = [100,200,300,500]
filesize_list = [1024,4096,10240,20480,102400]
filenum = random.choice(filenum_list)
filesize = random.choice(filesize_list)
ori_cmd = "mpirun --allow-run-as-root -np 8 mdtest -n %d -w %d -e %d -y -u -i 3 -N 1 -F -R -d %s"%(filenum,filesize,filesize,test_dir)
rs = shell_operator.ssh_exec(ssh, ori_cmd)
logger.debug("mpirun mdtest output is %s"%rs[1])
assert rs[3] == 0,"mdtest error, output %s"%rs[1]

def mount_umount_test():
test_client = config.fs_test_client[0]
ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user)
test_dir = ["test3"]
t = 0
while config.thrash_fs_mount:
multi_mdtest_exec(ssh,test_dir[0])
ori_cmd = "sudo umount " + config.fs_mount_path + test_dir[0]
rs = shell_operator.ssh_exec(ssh, ori_cmd)
assert rs[3] == 0,"umount %s fail,error is %s"%(test_dir,rs[1])
wait_fuse_exit(test_dir[0])
time.sleep(5)
cmd = "cd curvefs && make mount only=client hosts=%s"%config.thrash_mount_host
ret = shell_operator.run_exec(cmd)
time.sleep(10)
check_fuse_mount_success(test_dir)
time.sleep(2)
t += 1
return t

def loop_mount_umount():
thread = mythread.runThread(mount_umount_test)
logger.debug("thrash mount %s")
config.fs_mount_thread = thread
thread.start()

def stop_loop_mount():
try:
if config.fs_mount_thread == []:
assert False," loop mount umount not up"
t = config.fs_mount_thread
config.thrash_fs_mount = False
logger.info("set thrash_fs_mount to false")
assert t.exitcode == 0,"mount/umount error"
result = t.get_result()
logger2.info("mount umount test time is %d"%result)
assert result > 0,"test mount fail,result is %d"%result
except:
raise
1 change: 0 additions & 1 deletion robot/Resources/keywords/mythread.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import threading, traceback, sys
import base_operate

class runThread(threading.Thread):
def __init__(self, funcName, *args):
Expand Down
Loading

0 comments on commit ce47495

Please sign in to comment.