diff --git a/robot/Resources/config/config.py b/robot/Resources/config/config.py index c950266d83..29474ac29c 100644 --- a/robot/Resources/config/config.py +++ b/robot/Resources/config/config.py @@ -90,9 +90,9 @@ fs_mount_path = "/home/nbs/failover/" fs_mount_dir = ["test1","test2"] fs_md5check_dir = ["test1"] -fs_mds = ["10.182.2.34","10.182.2.35","10.182.2.36"] -fs_metaserver = ["10.182.2.34","10.182.2.35","10.182.2.36"] -fs_etcd = ["10.182.2.34","10.182.2.35","10.182.2.36"] +fs_mds = ["10.182.26.34","10.182.26.35","10.182.26.36"] +fs_metaserver = ["10.182.26.34","10.182.26.35","10.182.26.36"] +fs_etcd = ["10.182.26.34","10.182.26.35","10.182.26.36"] md5_check = [] fs_md5check_thread = "" fs_mount_thread = "" diff --git a/robot/Resources/keywords/deploy.py b/robot/Resources/keywords/deploy.py index f037bb9a90..93d7926a45 100644 --- a/robot/Resources/keywords/deploy.py +++ b/robot/Resources/keywords/deploy.py @@ -313,7 +313,7 @@ def destroy_curvefs(): ret = shell_operator.run_exec(cmd) assert ret == 0 ,"checkout fail" for mountpoint in config.fs_mount_dir: - cmd = "sudo /home/nbs/.curveadm/bin/curveadm umount %s%s"%(config.fs_mount_path,mountpoint) + cmd = "/home/nbs/.curveadm/bin/curveadm umount %s%s"%(config.fs_mount_path,mountpoint) shell_operator.ssh_exec(ssh, cmd) cmd = "echo 'yes' | /home/nbs/.curveadm/bin/curveadm stop" ret = shell_operator.run_exec(cmd) @@ -371,10 +371,10 @@ def mount_test_dir(mountpoint="",mountfile=""): if mountpoint == "": for mountpoint in config.fs_mount_dir: if config.fs_use_curvebs: - cmd = "sudo /home/nbs/.curveadm/bin/curveadm mount %s %s%s -c client-bs-%s.yaml \ + cmd = "/home/nbs/.curveadm/bin/curveadm mount %s %s%s -c client-bs-%s.yaml \ --fstype volume"%(mountpoint,config.fs_mount_path,mountpoint,mountpoint) else: - cmd = "sudo /home/nbs/.curveadm/bin/curveadm mount %s %s%s -c client-%s.yaml\ + cmd = "/home/nbs/.curveadm/bin/curveadm mount %s %s%s -c client-%s.yaml\ "%(mountpoint,config.fs_mount_path,mountpoint,mountpoint) rs = shell_operator.ssh_exec(ssh, cmd) assert rs[3] == 0,"mount %s dir fail,error is %s"%(mountpoint,rs[2]) @@ -382,10 +382,10 @@ def mount_test_dir(mountpoint="",mountfile=""): if mountfile == "": mountfile = mountpoint if config.fs_use_curvebs: - cmd = "sudo /home/nbs/.curveadm/bin/curveadm mount %s %s%s -c client-bs-%s.yaml \ + cmd = "/home/nbs/.curveadm/bin/curveadm mount %s %s%s -c client-bs-%s.yaml \ --fstype volume"%(mountpoint,config.fs_mount_path,mountfile,mountfile) else: - cmd = "sudo /home/nbs/.curveadm/bin/curveadm mount %s %s%s -c client-%s.yaml\ + cmd = "/home/nbs/.curveadm/bin/curveadm mount %s %s%s -c client-%s.yaml\ "%(mountpoint,config.fs_mount_path,mountfile,mountfile) rs = shell_operator.ssh_exec(ssh, cmd) assert rs[3] == 0,"mount %s dir fail,error is %s"%(mountpoint,rs[2]) @@ -399,11 +399,11 @@ def umount_test_dir(mountpoint=""): ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user) if mountpoint == "": for mountpoint in config.fs_mount_dir: - cmd = "sudo /home/nbs/.curveadm/bin/curveadm umount %s%s"%(config.fs_mount_path,mountpoint) + cmd = "/home/nbs/.curveadm/bin/curveadm umount %s%s"%(config.fs_mount_path,mountpoint) rs = shell_operator.ssh_exec(ssh, cmd) assert rs[3] == 0,"umount %s dir fail,error is %s"%(mountpoint,rs[2]) else: - cmd = "sudo /home/nbs/.curveadm/bin/curveadm umount %s%s"%(config.fs_mount_path,mountpoint) + cmd = "/home/nbs/.curveadm/bin/curveadm umount %s%s"%(config.fs_mount_path,mountpoint) rs = shell_operator.ssh_exec(ssh, cmd) assert rs[3] == 0,"umount %s dir fail,error is %s"%(mountpoint,rs[2]) except Exception: diff --git a/robot/Resources/keywords/fs_fault_inject.py b/robot/Resources/keywords/fs_fault_inject.py index 2ef0de129d..dacb0710db 100644 --- a/robot/Resources/keywords/fs_fault_inject.py +++ b/robot/Resources/keywords/fs_fault_inject.py @@ -36,6 +36,29 @@ def check_fs_cluster_ok(): logger.debug("cluster is %s"%rs[1]) return False +def wait_fs_cluster_ok(): + mds = config.fs_mds[0] + ssh = shell_operator.create_ssh_connect(mds, 1046, config.abnormal_user) + ori_cmd = "sudo docker ps |grep curvefs | awk '{print $1}'" + rs = shell_operator.ssh_exec(ssh, ori_cmd) + docker_id = rs[1][0].strip() + logger.info("docker is %s"%rs[1]) + ori_cmd = "sudo docker exec -i %s curvefs_tool status |grep unhealthy"%docker_id + starttime = time.time() + while time.time() - starttime < 1200: + rs = shell_operator.ssh_exec(ssh, ori_cmd) + logger.info("status is %s"%rs[1]) + if rs[0] != 0 and rs[1] == []: + logger.info("cluster is healthy") + return True + else: + logger.info("cluster is unhealthy") + time.sleep(60) + ori_cmd = "sudo docker exec -i %s curvefs_tool status"%docker_id + rs = shell_operator.ssh_exec(ssh, ori_cmd) + logger.debug("cluster is %s"%rs[1]) + assert False,"cluster metaserver not recover finish in %d"%(120) + def check_fs_copyset_status(): mds = config.fs_mds[0] ssh = shell_operator.create_ssh_connect(mds, 1046, config.abnormal_user) @@ -280,9 +303,12 @@ def test_kill_process(process_name,num=1): raise return host -def test_start_process(process_name): +def test_start_process(process_name,host=None): try: - cmd = "/home/nbs/.curveadm/bin/curveadm start --role=%s"%process_name + if host == None: + cmd = "echo 'yes' | /home/nbs/.curveadm/bin/curveadm start --role=%s"%process_name + else: + cmd = "echo 'yes' | /home/nbs/.curveadm/bin/curveadm start --role=%s --host=%s"%(process_name,host) ret = shell_operator.run_exec(cmd) assert ret == 0 ,"start %s fail"%process_name except Exception as e: @@ -494,6 +520,24 @@ def test_in_metaserver_copyset(): logger.error("error is %s"%e) raise +def test_ipmitool_restart_metaserver(): + metaserver_host = random.choice(config.fs_metaserver) + logger.info("|------begin test metaserver ipmitool cycle,host %s------|"%(metaserver_host)) + ssh = shell_operator.create_ssh_connect(metaserver_host, 1046, config.abnormal_user) + fault_inject.ipmitool_cycle_restart_host(ssh) + time.sleep(60) + starttime = time.time() + i = 0 + while time.time() - starttime < 600: + status = fault_inject.check_host_connect(metaserver_host) + if status == True: + break + else: + logger.debug("wait host up") + time.sleep(5) + assert status,"restart host %s fail"%metaserver_host + test_start_process("metaserver",metaserver_host) + def wait_fuse_exit(fusename=""): test_client = config.fs_test_client[0] ssh = shell_operator.create_ssh_connect(test_client, 1046, config.abnormal_user) @@ -531,7 +575,7 @@ def mount_umount_test(): time.sleep(30) check_fuse_mount_success(test_dir) multi_mdtest_exec(ssh,test_dir[0]) - ori_cmd = "sudo /home/nbs/.curveadm/bin/curveadm umount " + config.fs_mount_path + test_dir[0] + ori_cmd = "/home/nbs/.curveadm/bin/curveadm umount " + config.fs_mount_path + test_dir[0] rs = shell_operator.ssh_exec(ssh, ori_cmd) assert rs[3] == 0,"umount %s fail,error is %s"%(test_dir,rs[1]) wait_fuse_exit(test_dir[0]) diff --git a/robot/curve_fs_robot.txt b/robot/curve_fs_robot.txt index 321d718db6..f5e1120646 100644 --- a/robot/curve_fs_robot.txt +++ b/robot/curve_fs_robot.txt @@ -59,7 +59,7 @@ test fs mds loss package 5% test fs process loss package mds ${percent} check fuse mount success check fuse iops - [Teardown] check fs cluster ok + [Teardown] wait fs cluster ok test fs metaserver loss package 5% [Tags] P1 base first release failover @@ -135,7 +135,7 @@ inject fuse client mem stress 90% del mem stress ${ssh} check fuse mount success check fuse iops - [Teardown] del mem stress ${ssh} + [Teardown] wait fs cluster ok test metaserver copyset migrates out in [Tags] P1 base first release failover @@ -143,6 +143,14 @@ test metaserver copyset migrates out in test in metaserver copyset check fuse mount success +test ipmitool restart metaserver node + [Tags] P1 base first release failover ipmitool + test ipmitool restart metaserver + sleep 60 + check fuse mount success + check fuse iops + [Teardown] wait fs cluster ok + test loop mount umount [Tags] P0 base first release failover sleep 30 @@ -171,7 +179,7 @@ inject kill all mds check fuse mount success check fuse iops check fs cluster ok - [Teardown] test start process mds + [Teardown] wait fs cluster ok inject kill two metaserver [Tags] P1 base first release failover @@ -183,7 +191,7 @@ inject kill two metaserver check fuse mount success check fuse iops check fs cluster ok - [Teardown] test start process metaserver + [Teardown] wait fs cluster ok inject kill all metaserver [Tags] P1 base first release failover @@ -195,7 +203,7 @@ inject kill all metaserver check fuse mount success check fuse iops check fs cluster ok - [Teardown] test start process metaserver + [Teardown] wait fs cluster ok inject kill two etcd [Tags] P1 base first release failover @@ -247,7 +255,7 @@ check umount date consistency sleep 60 check test dir file md5 check cto dir file md5 test5 -# [Teardown] mount test dir +# [Teardown] wait fs cluster ok *** Keywords ***